From 859d215b7e006e5fc60f686ec976e997e35d169b Mon Sep 17 00:00:00 2001 From: Corinna Vinschen Date: Wed, 21 Feb 2018 21:40:01 +0100 Subject: [PATCH] Cygwin: split out fhandler_socket into inet and local classes First cut, still incomplete * fhandler_socket is now base class for other socket classes * fhandler_socket_inet handles AF_INET and AF_INET6 sockets * fhandler_socket_local handles AF_LOCAL/AF_UNIX sockets * finally get rid of fdsock by using set_socket_handle in accept4 * align file-related calls (fstat, fstatvfs, fchown, fchmod, facl) to Linux. Signed-off-by: Corinna Vinschen --- winsup/cygwin/Makefile.in | 2 + winsup/cygwin/dtable.cc | 8 +- winsup/cygwin/fhandler.h | 237 ++- winsup/cygwin/fhandler_socket.cc | 2072 +----------------------- winsup/cygwin/fhandler_socket_inet.cc | 1163 +++++++++++++ winsup/cygwin/fhandler_socket_local.cc | 1844 +++++++++++++++++++++ winsup/cygwin/net.cc | 140 -- winsup/cygwin/security.h | 1 + winsup/cygwin/syslog.cc | 40 +- winsup/cygwin/uinfo.cc | 4 +- 10 files changed, 3293 insertions(+), 2218 deletions(-) create mode 100644 winsup/cygwin/fhandler_socket_inet.cc create mode 100644 winsup/cygwin/fhandler_socket_local.cc diff --git a/winsup/cygwin/Makefile.in b/winsup/cygwin/Makefile.in index b75774ace..75ec29707 100644 --- a/winsup/cygwin/Makefile.in +++ b/winsup/cygwin/Makefile.in @@ -296,6 +296,8 @@ DLL_OFILES:= \ fhandler_registry.o \ fhandler_serial.o \ fhandler_socket.o \ + fhandler_socket_inet.o \ + fhandler_socket_local.o \ fhandler_tape.o \ fhandler_termios.o \ fhandler_tty.o \ diff --git a/winsup/cygwin/dtable.cc b/winsup/cygwin/dtable.cc index f46461823..eb3081e49 100644 --- a/winsup/cygwin/dtable.cc +++ b/winsup/cygwin/dtable.cc @@ -304,8 +304,8 @@ dtable::init_std_file_from_handle (int fd, HANDLE handle) dev.parse (name); else if (strcmp (name, ":sock:") == 0 /* NtQueryObject returns an error when called on an LSP socket - handle. fdsock tries to fetch the underlying base socket, - but this might fail. */ + handle. fhandler_socket::set_socket_handle tries to fetch + the underlying base socket, but this might fail. */ || (strcmp (name, unknown_file) == 0 && !::getsockopt ((SOCKET) handle, SOL_SOCKET, SO_RCVBUF, (char *) &rcv, &len))) @@ -517,10 +517,12 @@ fh_alloc (path_conv& pc) case FH_TCP: case FH_UDP: case FH_ICMP: + fh = cnew (fhandler_socket_inet); + break; case FH_UNIX: case FH_STREAM: case FH_DGRAM: - fh = cnew (fhandler_socket); + fh = cnew (fhandler_socket_local); break; case FH_FS: fh = cnew (fhandler_disk_file); diff --git a/winsup/cygwin/fhandler.h b/winsup/cygwin/fhandler.h index ce9d9246e..d912e1cb7 100644 --- a/winsup/cygwin/fhandler.h +++ b/winsup/cygwin/fhandler.h @@ -479,9 +479,16 @@ struct wsa_event class fhandler_socket: public fhandler_base { private: + /* permission fake following Linux rules */ + uid_t uid; + uid_t gid; + mode_t mode; + + protected: int addr_family; int type; - int connect_secret[4]; + virtual int af_local_connect () = 0; + int get_socket_flags (); wsa_event *wsock_events; HANDLE wsock_mtx; @@ -491,32 +498,11 @@ class fhandler_socket: public fhandler_base int evaluate_events (const long event_mask, long &events, const bool erase); const HANDLE wsock_event () const { return wsock_evt; } const LONG serial_number () const { return wsock_events->serial_number; } - private: + protected: int wait_for_events (const long event_mask, const DWORD flags); void release_events (); - pid_t sec_pid; - uid_t sec_uid; - gid_t sec_gid; - pid_t sec_peer_pid; - uid_t sec_peer_uid; - gid_t sec_peer_gid; - void af_local_set_secret (char *); - void af_local_setblocking (bool &, bool &); - void af_local_unsetblocking (bool, bool); - void af_local_set_cred (); - void af_local_copy (fhandler_socket *); - bool af_local_recv_secret (); - bool af_local_send_secret (); - bool af_local_recv_cred (); - bool af_local_send_cred (); - int af_local_accept (); - public: - int af_local_connect (); - int af_local_set_no_getpeereid (); - void af_local_set_sockpair_cred (); - - private: + protected: int _rmem; int _wmem; public: @@ -525,22 +511,20 @@ class fhandler_socket: public fhandler_base void rmem (int nrmem) { _rmem = nrmem; } void wmem (int nwmem) { _wmem = nwmem; } - private: + protected: DWORD _rcvtimeo; /* msecs */ DWORD _sndtimeo; /* msecs */ public: DWORD &rcvtimeo () { return _rcvtimeo; } DWORD &sndtimeo () { return _sndtimeo; } - private: + protected: struct _WSAPROTOCOL_INFOW *prot_info_ptr; public: void init_fixup_before (); bool need_fixup_before () const {return prot_info_ptr != NULL;} - private: - char *sun_path; - char *peer_sun_path; + protected: struct status_flags { unsigned async_io : 1; /* async I/O */ @@ -580,35 +564,34 @@ class fhandler_socket: public fhandler_base IMPLEMENT_STATUS_FLAG (conn_state, connect_state) IMPLEMENT_STATUS_FLAG (bool, no_getpeereid) - int socket (int af, int type, int protocol, int flags); - int bind (const struct sockaddr *name, int namelen); - int connect (const struct sockaddr *name, int namelen); - int listen (int backlog); - int accept4 (struct sockaddr *peer, int *len, int flags); - int getsockname (struct sockaddr *name, int *namelen); - int getpeername (struct sockaddr *name, int *namelen); - int getpeereid (pid_t *pid, uid_t *euid, gid_t *egid); - int socketpair (int af, int type, int protocol, int flags, - fhandler_socket *fh_out); - int setsockopt (int level, int optname, const void *optval, - __socklen_t optlen); - int getsockopt (int level, int optname, const void *optval, - __socklen_t *optlen); + virtual int socket (int af, int type, int protocol, int flags) = 0; + virtual int socketpair (int af, int type, int protocol, int flags, + fhandler_socket *fh_out) = 0; + virtual int bind (const struct sockaddr *name, int namelen) = 0; + virtual int listen (int backlog) = 0; + virtual int accept4 (struct sockaddr *peer, int *len, int flags) = 0; + virtual int connect (const struct sockaddr *name, int namelen) = 0; + virtual int getsockname (struct sockaddr *name, int *namelen) = 0; + virtual int getpeername (struct sockaddr *name, int *namelen) = 0; + virtual int getpeereid (pid_t *pid, uid_t *euid, gid_t *egid); + virtual int setsockopt (int level, int optname, const void *optval, + __socklen_t optlen) = 0; + virtual int getsockopt (int level, int optname, const void *optval, + __socklen_t *optlen) = 0; int open (int flags, mode_t mode = 0); - void __reg3 read (void *ptr, size_t& len); - ssize_t __stdcall readv (const struct iovec *, int iovcnt, ssize_t tot = -1); - inline ssize_t __reg3 recv_internal (struct _WSAMSG *wsamsg, bool use_recvmsg); - ssize_t recvfrom (void *ptr, size_t len, int flags, - struct sockaddr *from, int *fromlen); - ssize_t recvmsg (struct msghdr *msg, int flags); + virtual ssize_t recvfrom (void *ptr, size_t len, int flags, + struct sockaddr *from, int *fromlen) = 0; + virtual ssize_t recvmsg (struct msghdr *msg, int flags) = 0; + virtual void __reg3 read (void *ptr, size_t& len) = 0; + virtual ssize_t __stdcall readv (const struct iovec *, int iovcnt, + ssize_t tot = -1) = 0; - ssize_t __stdcall write (const void *ptr, size_t len); - ssize_t __stdcall writev (const struct iovec *, int iovcnt, ssize_t tot = -1); - inline ssize_t send_internal (struct _WSAMSG *wsamsg, int flags); - ssize_t sendto (const void *ptr, size_t len, int flags, - const struct sockaddr *to, int tolen); - ssize_t sendmsg (const struct msghdr *msg, int flags); + virtual ssize_t sendto (const void *ptr, size_t len, int flags, + const struct sockaddr *to, int tolen) = 0; + virtual ssize_t sendmsg (const struct msghdr *msg, int flags) = 0; + virtual ssize_t __stdcall write (const void *ptr, size_t len) = 0; + virtual ssize_t __stdcall writev (const struct iovec *, int iovcnt, ssize_t tot = -1) = 0; int ioctl (unsigned int cmd, void *); int fcntl (int cmd, intptr_t); @@ -635,31 +618,159 @@ class fhandler_socket: public fhandler_base int get_addr_family () {return addr_family;} void set_socket_type (int st) { type = st;} int get_socket_type () {return type;} + + int __reg2 fstat (struct stat *buf); + int __reg2 fstatvfs (struct statvfs *buf); + int __reg1 fchmod (mode_t newmode); + int __reg2 fchown (uid_t newuid, gid_t newgid); + int __reg3 facl (int, int, struct acl *); + int __reg2 link (const char *); +}; + +class fhandler_socket_inet: public fhandler_socket +{ + protected: + int af_local_connect () { return 0; } + + private: + inline ssize_t recv_internal (struct _WSAMSG *wsamsg, bool use_recvmsg); + inline ssize_t send_internal (struct _WSAMSG *wsamsg, int flags); + + public: + fhandler_socket_inet (); + ~fhandler_socket_inet (); + + int socket (int af, int type, int protocol, int flags); + int socketpair (int af, int type, int protocol, int flags, + fhandler_socket *fh_out); + int bind (const struct sockaddr *name, int namelen); + int listen (int backlog); + int accept4 (struct sockaddr *peer, int *len, int flags); + int connect (const struct sockaddr *name, int namelen); + int getsockname (struct sockaddr *name, int *namelen); + int getpeername (struct sockaddr *name, int *namelen); + int setsockopt (int level, int optname, const void *optval, + __socklen_t optlen); + int getsockopt (int level, int optname, const void *optval, + __socklen_t *optlen); + ssize_t recvfrom (void *ptr, size_t len, int flags, + struct sockaddr *from, int *fromlen); + ssize_t recvmsg (struct msghdr *msg, int flags); + void __reg3 read (void *ptr, size_t& len); + ssize_t __stdcall readv (const struct iovec *, int iovcnt, ssize_t tot = -1); + ssize_t sendto (const void *ptr, size_t len, int flags, + const struct sockaddr *to, int tolen); + ssize_t sendmsg (const struct msghdr *msg, int flags); + ssize_t __stdcall write (const void *ptr, size_t len); + ssize_t __stdcall writev (const struct iovec *, int iovcnt, ssize_t tot = -1); + + /* from here on: CLONING */ + fhandler_socket_inet (void *) {} + + void copyto (fhandler_base *x) + { + x->pc.free_strings (); + *reinterpret_cast (x) = *this; + x->reset (this); + } + + fhandler_socket_inet *clone (cygheap_types malloc_type = HEAP_FHANDLER) + { + void *ptr = (void *) ccalloc (malloc_type, 1, sizeof (fhandler_socket_inet)); + fhandler_socket_inet *fh = new (ptr) fhandler_socket_inet (ptr); + copyto (fh); + return fh; + } +}; + +class fhandler_socket_local: public fhandler_socket +{ + protected: + char *sun_path; + char *peer_sun_path; void set_sun_path (const char *path); char *get_sun_path () {return sun_path;} void set_peer_sun_path (const char *path); char *get_peer_sun_path () {return peer_sun_path;} + protected: + int connect_secret[4]; + pid_t sec_pid; + uid_t sec_uid; + gid_t sec_gid; + pid_t sec_peer_pid; + uid_t sec_peer_uid; + gid_t sec_peer_gid; + void af_local_set_secret (char *); + void af_local_setblocking (bool &, bool &); + void af_local_unsetblocking (bool, bool); + void af_local_set_cred (); + void af_local_copy (fhandler_socket_local *); + bool af_local_recv_secret (); + bool af_local_send_secret (); + bool af_local_recv_cred (); + bool af_local_send_cred (); + int af_local_accept (); + int af_local_connect (); + int af_local_set_no_getpeereid (); + void af_local_set_sockpair_cred (); + + private: + inline ssize_t recv_internal (struct _WSAMSG *wsamsg, bool use_recvmsg); + inline ssize_t send_internal (struct _WSAMSG *wsamsg, int flags); + + public: + fhandler_socket_local (); + ~fhandler_socket_local (); + + int dup (fhandler_base *child, int); + + int socket (int af, int type, int protocol, int flags); + int socketpair (int af, int type, int protocol, int flags, + fhandler_socket *fh_out); + int bind (const struct sockaddr *name, int namelen); + int listen (int backlog); + int accept4 (struct sockaddr *peer, int *len, int flags); + int connect (const struct sockaddr *name, int namelen); + int getsockname (struct sockaddr *name, int *namelen); + int getpeername (struct sockaddr *name, int *namelen); + int getpeereid (pid_t *pid, uid_t *euid, gid_t *egid); + int setsockopt (int level, int optname, const void *optval, + __socklen_t optlen); + int getsockopt (int level, int optname, const void *optval, + __socklen_t *optlen); + ssize_t recvfrom (void *ptr, size_t len, int flags, + struct sockaddr *from, int *fromlen); + ssize_t recvmsg (struct msghdr *msg, int flags); + void __reg3 read (void *ptr, size_t& len); + ssize_t __stdcall readv (const struct iovec *, int iovcnt, ssize_t tot = -1); + ssize_t sendto (const void *ptr, size_t len, int flags, + const struct sockaddr *to, int tolen); + ssize_t sendmsg (const struct msghdr *msg, int flags); + ssize_t __stdcall write (const void *ptr, size_t len); + ssize_t __stdcall writev (const struct iovec *, int iovcnt, ssize_t tot = -1); + int __reg2 fstat (struct stat *buf); int __reg2 fstatvfs (struct statvfs *buf); - int __reg1 fchmod (mode_t mode); - int __reg2 fchown (uid_t uid, gid_t gid); + int __reg1 fchmod (mode_t newmode); + int __reg2 fchown (uid_t newuid, gid_t newgid); int __reg3 facl (int, int, struct acl *); int __reg2 link (const char *); - fhandler_socket (void *) {} + /* from here on: CLONING */ + fhandler_socket_local (void *) {} void copyto (fhandler_base *x) { x->pc.free_strings (); - *reinterpret_cast (x) = *this; + *reinterpret_cast (x) = *this; x->reset (this); } - fhandler_socket *clone (cygheap_types malloc_type = HEAP_FHANDLER) + fhandler_socket_local *clone (cygheap_types malloc_type = HEAP_FHANDLER) { - void *ptr = (void *) ccalloc (malloc_type, 1, sizeof (fhandler_socket)); - fhandler_socket *fh = new (ptr) fhandler_socket (ptr); + void *ptr = (void *) ccalloc (malloc_type, 1, sizeof (fhandler_socket_local)); + fhandler_socket_local *fh = new (ptr) fhandler_socket_local (ptr); copyto (fh); return fh; } @@ -2223,6 +2334,8 @@ typedef union char __registry[sizeof (fhandler_registry)]; char __serial[sizeof (fhandler_serial)]; char __socket[sizeof (fhandler_socket)]; + char __socket_inet[sizeof (fhandler_socket_inet)]; + char __socket_local[sizeof (fhandler_socket_local)]; char __termios[sizeof (fhandler_termios)]; char __pty_common[sizeof (fhandler_pty_common)]; char __pty_slave[sizeof (fhandler_pty_slave)]; diff --git a/winsup/cygwin/fhandler_socket.cc b/winsup/cygwin/fhandler_socket.cc index 26d4716b4..371cc398c 100644 --- a/winsup/cygwin/fhandler_socket.cc +++ b/winsup/cygwin/fhandler_socket.cc @@ -6,8 +6,6 @@ Cygwin license. Please consult the file "CYGWIN_LICENSE" for details. */ -/* #define DEBUG_NEST_ON 1 */ - #define __INSIDE_CYGWIN_NET__ #define USE_SYS_TYPES_FD_SET @@ -45,194 +43,29 @@ #include #include "ntdll.h" #include "miscfuncs.h" +#include "tls_pbuf.h" #define ASYNC_MASK (FD_READ|FD_WRITE|FD_OOB|FD_ACCEPT|FD_CONNECT) #define EVENT_MASK (FD_READ|FD_WRITE|FD_OOB|FD_ACCEPT|FD_CONNECT|FD_CLOSE) -extern bool fdsock (cygheap_fdmanip& fd, const device *, SOCKET soc); extern "C" { -int sscanf (const char *, const char *, ...); + int sscanf (const char *, const char *, ...); } /* End of "C" section */ -static inline mode_t -adjust_socket_file_mode (mode_t mode) -{ - /* Kludge: Don't allow to remove read bit on socket files for - user/group/other, if the accompanying write bit is set. It would - be nice to have exact permissions on a socket file, but it's - necessary that somebody able to access the socket can always read - the contents of the socket file to avoid spurious "permission - denied" messages. */ - return mode | ((mode & (S_IWUSR | S_IWGRP | S_IWOTH)) << 1); -} - -/* cygwin internal: map sockaddr into internet domain address */ -int -get_inet_addr (const struct sockaddr *in, int inlen, - struct sockaddr_storage *out, int *outlen, - int *type = NULL, int *secret = NULL) -{ - int secret_buf [4]; - int* secret_ptr = (secret ? : secret_buf); - - switch (in->sa_family) - { - case AF_LOCAL: - /* Check for abstract socket. These are generated for AF_LOCAL datagram - sockets in recv_internal, to allow a datagram server to use sendto - after recvfrom. */ - if (inlen >= (int) sizeof (in->sa_family) + 7 - && in->sa_data[0] == '\0' && in->sa_data[1] == 'd' - && in->sa_data[6] == '\0') - { - struct sockaddr_in addr; - addr.sin_family = AF_INET; - sscanf (in->sa_data + 2, "%04hx", &addr.sin_port); - addr.sin_addr.s_addr = htonl (INADDR_LOOPBACK); - *outlen = sizeof addr; - memcpy (out, &addr, *outlen); - return 0; - } - break; - case AF_INET: - memcpy (out, in, inlen); - *outlen = inlen; - /* If the peer address given in connect or sendto is the ANY address, - Winsock fails with WSAEADDRNOTAVAIL, while Linux converts that into - a connection/send attempt to LOOPBACK. We're doing the same here. */ - if (((struct sockaddr_in *) out)->sin_addr.s_addr == htonl (INADDR_ANY)) - ((struct sockaddr_in *) out)->sin_addr.s_addr = htonl (INADDR_LOOPBACK); - return 0; - case AF_INET6: - memcpy (out, in, inlen); - *outlen = inlen; - /* See comment in AF_INET case. */ - if (IN6_IS_ADDR_UNSPECIFIED (&((struct sockaddr_in6 *) out)->sin6_addr)) - ((struct sockaddr_in6 *) out)->sin6_addr = in6addr_loopback; - return 0; - default: - set_errno (EAFNOSUPPORT); - return SOCKET_ERROR; - } - /* AF_LOCAL/AF_UNIX only */ - path_conv pc (in->sa_data, PC_SYM_FOLLOW); - if (pc.error) - { - set_errno (pc.error); - return SOCKET_ERROR; - } - if (!pc.exists ()) - { - set_errno (ENOENT); - return SOCKET_ERROR; - } - /* Do NOT test for the file being a socket file here. The socket file - creation is not an atomic operation, so there is a chance that socket - files which are just in the process of being created are recognized - as non-socket files. To work around this problem we now create the - file with all sharing disabled. If the below NtOpenFile fails - with STATUS_SHARING_VIOLATION we know that the file already exists, - but the creating process isn't finished yet. So we yield and try - again, until we can either open the file successfully, or some error - other than STATUS_SHARING_VIOLATION occurs. - Since we now don't know if the file is actually a socket file, we - perform this check here explicitely. */ - NTSTATUS status; - HANDLE fh; - OBJECT_ATTRIBUTES attr; - IO_STATUS_BLOCK io; - - pc.get_object_attr (attr, sec_none_nih); - do - { - status = NtOpenFile (&fh, GENERIC_READ | SYNCHRONIZE, &attr, &io, - FILE_SHARE_VALID_FLAGS, - FILE_SYNCHRONOUS_IO_NONALERT - | FILE_OPEN_FOR_BACKUP_INTENT - | FILE_NON_DIRECTORY_FILE); - if (status == STATUS_SHARING_VIOLATION) - { - /* While we hope that the sharing violation is only temporary, we - also could easily get stuck here, waiting for a file in use by - some greedy Win32 application. Therefore we should never wait - endlessly without checking for signals and thread cancel event. */ - pthread_testcancel (); - if (cygwait (NULL, cw_nowait, cw_sig_eintr) == WAIT_SIGNALED - && !_my_tls.call_signal_handler ()) - { - set_errno (EINTR); - return SOCKET_ERROR; - } - yield (); - } - else if (!NT_SUCCESS (status)) - { - __seterrno_from_nt_status (status); - return SOCKET_ERROR; - } - } - while (status == STATUS_SHARING_VIOLATION); - /* Now test for the SYSTEM bit. */ - FILE_BASIC_INFORMATION fbi; - status = NtQueryInformationFile (fh, &io, &fbi, sizeof fbi, - FileBasicInformation); - if (!NT_SUCCESS (status)) - { - __seterrno_from_nt_status (status); - return SOCKET_ERROR; - } - if (!(fbi.FileAttributes & FILE_ATTRIBUTE_SYSTEM)) - { - NtClose (fh); - set_errno (EBADF); - return SOCKET_ERROR; - } - /* Eventually check the content and fetch the required information. */ - char buf[128]; - memset (buf, 0, sizeof buf); - status = NtReadFile (fh, NULL, NULL, NULL, &io, buf, 128, NULL, NULL); - NtClose (fh); - if (NT_SUCCESS (status)) - { - struct sockaddr_in sin; - char ctype; - sin.sin_family = AF_INET; - if (strncmp (buf, SOCKET_COOKIE, strlen (SOCKET_COOKIE))) - { - set_errno (EBADF); - return SOCKET_ERROR; - } - sscanf (buf + strlen (SOCKET_COOKIE), "%hu %c %08x-%08x-%08x-%08x", - &sin.sin_port, - &ctype, - secret_ptr, secret_ptr + 1, secret_ptr + 2, secret_ptr + 3); - sin.sin_port = htons (sin.sin_port); - sin.sin_addr.s_addr = htonl (INADDR_LOOPBACK); - memcpy (out, &sin, sizeof sin); - *outlen = sizeof sin; - if (type) - *type = (ctype == 's' ? SOCK_STREAM : - ctype == 'd' ? SOCK_DGRAM - : 0); - return 0; - } - __seterrno_from_nt_status (status); - return SOCKET_ERROR; -} - /**********************************************************************/ /* fhandler_socket */ fhandler_socket::fhandler_socket () : fhandler_base (), + uid (myself->uid), + gid (myself->gid), + mode (S_IFSOCK | S_IRWXU | S_IRWXG | S_IRWXO), wsock_events (NULL), wsock_mtx (NULL), wsock_evt (NULL), _rcvtimeo (INFINITE), _sndtimeo (INFINITE), prot_info_ptr (NULL), - sun_path (NULL), - peer_sun_path (NULL), status () { need_fork_fixup (true); @@ -242,10 +75,6 @@ fhandler_socket::~fhandler_socket () { if (prot_info_ptr) cfree (prot_info_ptr); - if (sun_path) - cfree (sun_path); - if (peer_sun_path) - cfree (peer_sun_path); } char * @@ -359,384 +188,6 @@ fhandler_socket::set_socket_handle (SOCKET sock, int af, int type, int flags) return 0; } -int -fhandler_socket::socket (int af, int type, int protocol, int flags) -{ - SOCKET sock; - int ret; - - sock = ::socket (af == AF_LOCAL ? AF_INET : af, type, protocol); - if (sock == INVALID_SOCKET) - { - set_winsock_errno (); - return -1; - } - ret = set_socket_handle (sock, af, type, flags); - if (ret < 0) - ::closesocket (sock); - return ret; -} - -/* fhandler_socket::socketpair is called on the fhandler handling the - accepting socket, fh_out is the fhandler for the connecting socket. */ -int -fhandler_socket::socketpair (int af, int type, int protocol, int flags, - fhandler_socket *fh_out) -{ - SOCKET insock = INVALID_SOCKET; - SOCKET outsock = INVALID_SOCKET; - SOCKET sock = INVALID_SOCKET; - struct sockaddr_in sock_in, sock_out; - int len; - - /* create listening socket */ - sock = ::socket (AF_INET, type, 0); - if (sock == INVALID_SOCKET) - { - set_winsock_errno (); - goto err; - } - /* bind to unused port */ - sock_in.sin_family = AF_INET; - sock_in.sin_port = 0; - sock_in.sin_addr.s_addr = htonl (INADDR_LOOPBACK); - if (::bind (sock, (struct sockaddr *) &sock_in, sizeof (sock_in)) < 0) - { - set_winsock_errno (); - goto err; - } - /* fetch socket name */ - len = sizeof (sock_in); - if (::getsockname (sock, (struct sockaddr *) &sock_in, &len) < 0) - { - set_winsock_errno (); - goto err; - } - /* on stream sockets, create listener */ - if (type == SOCK_STREAM && ::listen (sock, 2) < 0) - { - set_winsock_errno (); - goto err; - } - /* create connecting socket */ - outsock = ::socket (AF_INET, type, 0); - if (outsock == INVALID_SOCKET) - { - set_winsock_errno (); - goto err; - } - /* on datagram sockets, bind connecting socket */ - if (type == SOCK_DGRAM) - { - sock_out.sin_family = AF_INET; - sock_out.sin_port = 0; - sock_out.sin_addr.s_addr = htonl (INADDR_LOOPBACK); - if (::bind (outsock, (struct sockaddr *) &sock_out, - sizeof (sock_out)) < 0) - { - set_winsock_errno (); - goto err; - } - /* ...and fetch name */ - len = sizeof (sock_out); - if (::getsockname (outsock, (struct sockaddr *) &sock_out, &len) < 0) - { - set_winsock_errno (); - goto err; - } - } - sock_in.sin_addr.s_addr = htonl (INADDR_LOOPBACK); - if (type == SOCK_DGRAM) - sock_out.sin_addr.s_addr = htonl (INADDR_LOOPBACK); - /* connect */ - if (::connect (outsock, (struct sockaddr *) &sock_in, sizeof (sock_in)) < 0) - { - set_winsock_errno (); - goto err; - } - if (type == SOCK_STREAM) - { - /* on stream sockets, accept connection and close listener */ - len = sizeof (sock_in); - insock = ::accept (sock, (struct sockaddr *) &sock_in, &len); - if (insock == INVALID_SOCKET) - { - set_winsock_errno (); - goto err; - } - ::closesocket (sock); - } - else - { - /* on datagram sockets, connect vice versa */ - if (::connect (sock, (struct sockaddr *) &sock_out, - sizeof (sock_out)) < 0) - { - set_winsock_errno (); - goto err; - } - insock = sock; - } - sock = INVALID_SOCKET; - - /* postprocessing */ - connect_state (connected); - fh_out->connect_state (connected); - if (af == AF_LOCAL && type == SOCK_STREAM) - { - af_local_set_sockpair_cred (); - fh_out->af_local_set_sockpair_cred (); - } - if (set_socket_handle (insock, af, type, flags) < 0 - || fh_out->set_socket_handle (outsock, af, type, flags) < 0) - goto err; - - return 0; - -err: - if (sock != INVALID_SOCKET) - ::closesocket (sock); - if (insock != INVALID_SOCKET) - ::closesocket (insock); - if (outsock != INVALID_SOCKET) - ::closesocket (outsock); - return -1; -} - -void -fhandler_socket::af_local_set_sockpair_cred () -{ - sec_pid = sec_peer_pid = getpid (); - sec_uid = sec_peer_uid = geteuid32 (); - sec_gid = sec_peer_gid = getegid32 (); -} - -void -fhandler_socket::af_local_setblocking (bool &async, bool &nonblocking) -{ - async = async_io (); - nonblocking = is_nonblocking (); - if (async) - { - WSAAsyncSelect (get_socket (), winmsg, 0, 0); - WSAEventSelect (get_socket (), wsock_evt, EVENT_MASK); - } - set_nonblocking (false); - async_io (false); -} - -void -fhandler_socket::af_local_unsetblocking (bool async, bool nonblocking) -{ - if (nonblocking) - set_nonblocking (true); - if (async) - { - WSAAsyncSelect (get_socket (), winmsg, WM_ASYNCIO, ASYNC_MASK); - async_io (true); - } -} - -bool -fhandler_socket::af_local_recv_secret () -{ - int out[4] = { 0, 0, 0, 0 }; - int rest = sizeof out; - char *ptr = (char *) out; - while (rest > 0) - { - int ret = recvfrom (ptr, rest, 0, NULL, NULL); - if (ret <= 0) - break; - rest -= ret; - ptr += ret; - } - if (rest == 0) - { - debug_printf ("Received af_local secret: %08x-%08x-%08x-%08x", - out[0], out[1], out[2], out[3]); - if (out[0] != connect_secret[0] || out[1] != connect_secret[1] - || out[2] != connect_secret[2] || out[3] != connect_secret[3]) - { - debug_printf ("Receiving af_local secret mismatch"); - return false; - } - } - else - debug_printf ("Receiving af_local secret failed"); - return rest == 0; -} - -bool -fhandler_socket::af_local_send_secret () -{ - int rest = sizeof connect_secret; - char *ptr = (char *) connect_secret; - while (rest > 0) - { - int ret = sendto (ptr, rest, 0, NULL, 0); - if (ret <= 0) - break; - rest -= ret; - ptr += ret; - } - debug_printf ("Sending af_local secret %s", rest == 0 ? "succeeded" - : "failed"); - return rest == 0; -} - -bool -fhandler_socket::af_local_recv_cred () -{ - struct ucred out = { (pid_t) 0, (uid_t) -1, (gid_t) -1 }; - int rest = sizeof out; - char *ptr = (char *) &out; - while (rest > 0) - { - int ret = recvfrom (ptr, rest, 0, NULL, NULL); - if (ret <= 0) - break; - rest -= ret; - ptr += ret; - } - if (rest == 0) - { - debug_printf ("Received eid credentials: pid: %d, uid: %d, gid: %d", - out.pid, out.uid, out.gid); - sec_peer_pid = out.pid; - sec_peer_uid = out.uid; - sec_peer_gid = out.gid; - } - else - debug_printf ("Receiving eid credentials failed"); - return rest == 0; -} - -bool -fhandler_socket::af_local_send_cred () -{ - struct ucred in = { sec_pid, sec_uid, sec_gid }; - int rest = sizeof in; - char *ptr = (char *) ∈ - while (rest > 0) - { - int ret = sendto (ptr, rest, 0, NULL, 0); - if (ret <= 0) - break; - rest -= ret; - ptr += ret; - } - if (rest == 0) - debug_printf ("Sending eid credentials succeeded"); - else - debug_printf ("Sending eid credentials failed"); - return rest == 0; -} - -int -fhandler_socket::af_local_connect () -{ - bool orig_async_io, orig_is_nonblocking; - - if (get_addr_family () != AF_LOCAL || get_socket_type () != SOCK_STREAM) - return 0; - - debug_printf ("af_local_connect called, no_getpeereid=%d", no_getpeereid ()); - if (no_getpeereid ()) - return 0; - - af_local_setblocking (orig_async_io, orig_is_nonblocking); - if (!af_local_send_secret () || !af_local_recv_secret () - || !af_local_send_cred () || !af_local_recv_cred ()) - { - debug_printf ("accept from unauthorized server"); - ::shutdown (get_socket (), SD_BOTH); - WSASetLastError (WSAECONNREFUSED); - return -1; - } - af_local_unsetblocking (orig_async_io, orig_is_nonblocking); - return 0; -} - -int -fhandler_socket::af_local_accept () -{ - bool orig_async_io, orig_is_nonblocking; - - debug_printf ("af_local_accept called, no_getpeereid=%d", no_getpeereid ()); - if (no_getpeereid ()) - return 0; - - af_local_setblocking (orig_async_io, orig_is_nonblocking); - if (!af_local_recv_secret () || !af_local_send_secret () - || !af_local_recv_cred () || !af_local_send_cred ()) - { - debug_printf ("connect from unauthorized client"); - ::shutdown (get_socket (), SD_BOTH); - ::closesocket (get_socket ()); - WSASetLastError (WSAECONNABORTED); - return -1; - } - af_local_unsetblocking (orig_async_io, orig_is_nonblocking); - return 0; -} - -int -fhandler_socket::af_local_set_no_getpeereid () -{ - if (get_addr_family () != AF_LOCAL || get_socket_type () != SOCK_STREAM) - { - set_errno (EINVAL); - return -1; - } - if (connect_state () != unconnected) - { - set_errno (EALREADY); - return -1; - } - - debug_printf ("no_getpeereid set"); - no_getpeereid (true); - return 0; -} - -void -fhandler_socket::af_local_set_cred () -{ - sec_pid = getpid (); - sec_uid = geteuid32 (); - sec_gid = getegid32 (); - sec_peer_pid = (pid_t) 0; - sec_peer_uid = (uid_t) -1; - sec_peer_gid = (gid_t) -1; -} - -void -fhandler_socket::af_local_copy (fhandler_socket *sock) -{ - sock->connect_secret[0] = connect_secret[0]; - sock->connect_secret[1] = connect_secret[1]; - sock->connect_secret[2] = connect_secret[2]; - sock->connect_secret[3] = connect_secret[3]; - sock->sec_pid = sec_pid; - sock->sec_uid = sec_uid; - sock->sec_gid = sec_gid; - sock->sec_peer_pid = sec_peer_pid; - sock->sec_peer_uid = sec_peer_uid; - sock->sec_peer_gid = sec_peer_gid; - sock->no_getpeereid (no_getpeereid ()); -} - -void -fhandler_socket::af_local_set_secret (char *buf) -{ - if (!RtlGenRandom (connect_secret, sizeof (connect_secret))) - bzero ((char*) connect_secret, sizeof (connect_secret)); - __small_sprintf (buf, "%08x-%08x-%08x-%08x", - connect_secret [0], connect_secret [1], - connect_secret [2], connect_secret [3]); -} - /* Maximum number of concurrently opened sockets from all Cygwin processes per session. Note that shared sockets (through dup/fork/exec) are counted as one socket. */ @@ -874,12 +325,7 @@ fhandler_socket::init_events () NtClose (wsock_mtx); return false; } - - /* sock type not yet set here. */ - /* FIXME: as soon as we switch to socket method, we're good to use - get_socket_type (). */ - - if (pc.dev == FH_UDP || pc.dev == FH_DGRAM) + if (get_socket_type () == SOCK_DGRAM) wsock_events->events = FD_WRITE; return true; } @@ -1075,9 +521,9 @@ fhandler_socket::release_events () } } -/* Called from net.cc:fdsock() if a freshly created socket is not - inheritable. In that case we use fixup_before_fork_exec. See - the comment in fdsock() for a description of the problem. */ +/* Called if a freshly created socket is not inheritable. In that case we + have to use fixup_before_fork_exec. See comment in set_socket_handle for + a description of the problem. */ void fhandler_socket::init_fixup_before () { @@ -1155,11 +601,6 @@ fhandler_socket::dup (fhandler_base *child, int flags) NtClose (fhs->wsock_mtx); return -1; } - if (get_addr_family () == AF_LOCAL) - { - fhs->set_sun_path (get_sun_path ()); - fhs->set_peer_sun_path (get_peer_sun_path ()); - } if (!need_fixup_before ()) { int ret = fhandler_base::dup (child, flags); @@ -1192,40 +633,18 @@ int __reg2 fhandler_socket::fstat (struct stat *buf) { int res; - if (get_addr_family () == AF_LOCAL) + + res = fhandler_socket::fstat (buf); + if (!res) { - if (!get_sun_path () || get_sun_path ()[0] == '\0') - { - memset (buf, 0, sizeof *buf); - buf->st_dev = FH_UNIX; - buf->st_ino = get_plain_ino (); - buf->st_mode = S_IFSOCK | S_IRWXU | S_IRWXG | S_IRWXO; - buf->st_nlink = 1; - buf->st_uid = myself->uid; - buf->st_gid = myself->gid; - time_as_timestruc_t (&buf->st_ctim); - buf->st_blksize = 4096; - return 0; - } - res = fhandler_base::fstat_fs (buf); - if (!res) - { - buf->st_mode = (buf->st_mode & ~S_IFMT) | S_IFSOCK; - buf->st_size = 0; - } - } - else - { - res = fhandler_base::fstat (buf); - if (!res) - { - buf->st_dev = FHDEV (DEV_TCP_MAJOR, 0); - if (!(buf->st_ino = get_plain_ino ())) - sscanf (get_name (), "/proc/%*d/fd/socket:[%lld]", - (long long *) &buf->st_ino); - buf->st_mode = S_IFSOCK | S_IRWXU | S_IRWXG | S_IRWXO; - buf->st_size = 0; - } + buf->st_dev = FHDEV (DEV_TCP_MAJOR, 0); + if (!(buf->st_ino = get_plain_ino ())) + sscanf (get_name (), "/proc/%*d/fd/socket:[%lld]", + (long long *) &buf->st_ino); + buf->st_uid = uid; + buf->st_gid = gid; + buf->st_mode = mode; + buf->st_size = 0; } return res; } @@ -1233,1059 +652,85 @@ fhandler_socket::fstat (struct stat *buf) int __reg2 fhandler_socket::fstatvfs (struct statvfs *sfs) { - if (get_addr_family () == AF_LOCAL) + memset (sfs, 0, sizeof (*sfs)); + sfs->f_bsize = sfs->f_frsize = 4096; + sfs->f_namemax = NAME_MAX; + return 0; +} + +int +fhandler_socket::fchmod (mode_t newmode) +{ + mode = (newmode & ~S_IFMT) | S_IFSOCK; + return 0; +} + +int +fhandler_socket::fchown (uid_t newuid, gid_t newgid) +{ + bool perms = check_token_membership (&well_known_admins_sid); + + /* Admin rulez */ + if (!perms) { - if (!get_sun_path () || get_sun_path ()[0] == '\0') + /* Otherwise, new uid == old uid or current uid is fine */ + if (newuid == ILLEGAL_UID || newuid == uid || newuid == myself->uid) + perms = true; + /* Otherwise, new gid == old gid or current gid is fine */ + else if (newgid == ILLEGAL_GID || newgid == gid || newgid == myself->gid) + perms = true; + else { - memset (sfs, 0, sizeof (*sfs)); - sfs->f_bsize = sfs->f_frsize = 4096; - sfs->f_namemax = NAME_MAX; - return 0; + /* Last but not least, newgid in supplementary group list is fine */ + tmp_pathbuf tp; + gid_t *gids = (gid_t *) tp.w_get (); + int num = getgroups (65536 / sizeof (*gids), gids); + + for (int idx = 0; idx < num; ++idx) + if (newgid == gids[idx]) + { + perms = true; + break; + } } - fhandler_disk_file fh (pc); - fh.get_device () = FH_FS; - return fh.fstatvfs (sfs); - } - set_errno (EBADF); - return -1; -} + } -int -fhandler_socket::fchmod (mode_t mode) -{ - if (get_addr_family () == AF_LOCAL) + if (perms) { - if (!get_sun_path () || get_sun_path ()[0] == '\0') - return 0; - fhandler_disk_file fh (pc); - fh.get_device () = FH_FS; - int ret = fh.fchmod (S_IFSOCK | adjust_socket_file_mode (mode)); - return ret; + if (newuid != ILLEGAL_UID) + uid = newuid; + if (newgid != ILLEGAL_GID) + gid = newgid; + return 0; } - set_errno (EBADF); - return -1; -} - -int -fhandler_socket::fchown (uid_t uid, gid_t gid) -{ - if (get_addr_family () == AF_LOCAL) - { - if (!get_sun_path () || get_sun_path ()[0] == '\0') - return 0; - fhandler_disk_file fh (pc); - return fh.fchown (uid, gid); - } - set_errno (EBADF); + set_errno (EPERM); return -1; } int fhandler_socket::facl (int cmd, int nentries, aclent_t *aclbufp) { - if (get_addr_family () == AF_LOCAL) - { - if (!get_sun_path () || get_sun_path ()[0] == '\0') - return fhandler_base::facl (cmd, nentries, aclbufp); - fhandler_disk_file fh (pc); - return fh.facl (cmd, nentries, aclbufp); - } - set_errno (EBADF); + set_errno (EOPNOTSUPP); return -1; } int fhandler_socket::link (const char *newpath) { - if (get_addr_family () == AF_LOCAL) - { - fhandler_disk_file fh (pc); - return fh.link (newpath); - } return fhandler_base::link (newpath); } int -fhandler_socket::bind (const struct sockaddr *name, int namelen) +fhandler_socket::get_socket_flags () { - int res = -1; - - if (name->sa_family == AF_LOCAL) - { -#define un_addr ((struct sockaddr_un *) name) - struct sockaddr_in sin; - int len = namelen - offsetof (struct sockaddr_un, sun_path); - - /* Check that name is within bounds. Don't check if the string is - NUL-terminated, because there are projects out there which set - namelen to a value which doesn't cover the trailing NUL. */ - if (len <= 1 || (len = strnlen (un_addr->sun_path, len)) > UNIX_PATH_MAX) - { - set_errno (len <= 1 ? (len == 1 ? ENOENT : EINVAL) : ENAMETOOLONG); - goto out; - } - /* Copy over the sun_path string into a buffer big enough to add a - trailing NUL. */ - char sun_path[len + 1]; - strncpy (sun_path, un_addr->sun_path, len); - sun_path[len] = '\0'; - - /* This isn't entirely foolproof, but we check first if the file exists - so we can return with EADDRINUSE before having bound the socket. - This allows an application to call bind again on the same socket using - another filename. If we bind first, the application will not be able - to call bind successfully ever again. */ - path_conv pc (sun_path, PC_SYM_FOLLOW); - if (pc.error) - { - set_errno (pc.error); - goto out; - } - if (pc.exists ()) - { - set_errno (EADDRINUSE); - goto out; - } - - sin.sin_family = AF_INET; - sin.sin_port = 0; - sin.sin_addr.s_addr = htonl (INADDR_LOOPBACK); - if (::bind (get_socket (), (sockaddr *) &sin, len = sizeof sin)) - { - syscall_printf ("AF_LOCAL: bind failed"); - set_winsock_errno (); - goto out; - } - if (::getsockname (get_socket (), (sockaddr *) &sin, &len)) - { - syscall_printf ("AF_LOCAL: getsockname failed"); - set_winsock_errno (); - goto out; - } - - sin.sin_port = ntohs (sin.sin_port); - debug_printf ("AF_LOCAL: socket bound to port %u", sin.sin_port); - - mode_t mode = S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP | S_IROTH | S_IWOTH; - DWORD fattr = FILE_ATTRIBUTE_SYSTEM; - if (!pc.has_acls () - && !(mode & ~cygheap->umask & (S_IWUSR | S_IWGRP | S_IWOTH))) - fattr |= FILE_ATTRIBUTE_READONLY; - SECURITY_ATTRIBUTES sa = sec_none_nih; - NTSTATUS status; - HANDLE fh; - OBJECT_ATTRIBUTES attr; - IO_STATUS_BLOCK io; - ULONG access = DELETE | FILE_GENERIC_WRITE; - - /* If the filesystem supports ACLs, we will overwrite the DACL after the - call to NtCreateFile. This requires a handle with READ_CONTROL and - WRITE_DAC access, otherwise get_file_sd and set_file_sd both have to - open the file again. - FIXME: On remote NTFS shares open sometimes fails because even the - creator of the file doesn't have the right to change the DACL. - I don't know what setting that is or how to recognize such a share, - so for now we don't request WRITE_DAC on remote drives. */ - if (pc.has_acls () && !pc.isremote ()) - access |= READ_CONTROL | WRITE_DAC | WRITE_OWNER; - - status = NtCreateFile (&fh, access, pc.get_object_attr (attr, sa), &io, - NULL, fattr, 0, FILE_CREATE, - FILE_NON_DIRECTORY_FILE - | FILE_SYNCHRONOUS_IO_NONALERT - | FILE_OPEN_FOR_BACKUP_INTENT, - NULL, 0); - if (!NT_SUCCESS (status)) - { - if (io.Information == FILE_EXISTS) - set_errno (EADDRINUSE); - else - __seterrno_from_nt_status (status); - } - else - { - if (pc.has_acls ()) - set_created_file_access (fh, pc, mode); - char buf[sizeof (SOCKET_COOKIE) + 80]; - __small_sprintf (buf, "%s%u %c ", SOCKET_COOKIE, sin.sin_port, - get_socket_type () == SOCK_STREAM ? 's' - : get_socket_type () == SOCK_DGRAM ? 'd' : '-'); - af_local_set_secret (strchr (buf, '\0')); - DWORD blen = strlen (buf) + 1; - status = NtWriteFile (fh, NULL, NULL, NULL, &io, buf, blen, NULL, 0); - if (!NT_SUCCESS (status)) - { - __seterrno_from_nt_status (status); - FILE_DISPOSITION_INFORMATION fdi = { TRUE }; - status = NtSetInformationFile (fh, &io, &fdi, sizeof fdi, - FileDispositionInformation); - if (!NT_SUCCESS (status)) - debug_printf ("Setting delete dispostion failed, status = %y", - status); - } - else - { - set_sun_path (sun_path); - res = 0; - } - NtClose (fh); - } -#undef un_addr - } - else - { - if (!saw_reuseaddr ()) - { - /* If the application didn't explicitely request SO_REUSEADDR, - enforce POSIX standard socket binding behaviour by setting the - SO_EXCLUSIVEADDRUSE socket option. See cygwin_setsockopt() - for a more detailed description. */ - int on = 1; - int ret = ::setsockopt (get_socket (), SOL_SOCKET, - SO_EXCLUSIVEADDRUSE, - (const char *) &on, sizeof on); - debug_printf ("%d = setsockopt(SO_EXCLUSIVEADDRUSE), %E", ret); - } - if (::bind (get_socket (), name, namelen)) - set_winsock_errno (); - else - res = 0; - } - -out: - return res; -} - -int -fhandler_socket::connect (const struct sockaddr *name, int namelen) -{ - struct sockaddr_storage sst; - int type = 0; - - if (get_inet_addr (name, namelen, &sst, &namelen, &type, connect_secret) - == SOCKET_ERROR) - return SOCKET_ERROR; - - if (get_addr_family () == AF_LOCAL) - { - if (get_socket_type () != type) - { - WSASetLastError (WSAEPROTOTYPE); - set_winsock_errno (); - return SOCKET_ERROR; - } - - set_peer_sun_path (name->sa_data); - - /* Don't move af_local_set_cred into af_local_connect which may be called - via select, possibly running under another identity. Call early here, - because af_local_connect is called in wait_for_events. */ - if (get_socket_type () == SOCK_STREAM) - af_local_set_cred (); - } - - /* Initialize connect state to "connect_pending". State is ultimately set - to "connected" or "connect_failed" in wait_for_events when the FD_CONNECT - event occurs. Note that the underlying OS sockets are always non-blocking - and a successfully initiated non-blocking Winsock connect always returns - WSAEWOULDBLOCK. Thus it's safe to rely on event handling. - - Check for either unconnected or connect_failed since in both cases it's - allowed to retry connecting the socket. It's also ok (albeit ugly) to - call connect to check if a previous non-blocking connect finished. - - Set connect_state before calling connect, otherwise a race condition with - an already running select or poll might occur. */ - if (connect_state () == unconnected || connect_state () == connect_failed) - connect_state (connect_pending); - - int res = ::connect (get_socket (), (struct sockaddr *) &sst, namelen); - if (!is_nonblocking () - && res == SOCKET_ERROR - && WSAGetLastError () == WSAEWOULDBLOCK) - res = wait_for_events (FD_CONNECT | FD_CLOSE, 0); - - if (res) - { - DWORD err = WSAGetLastError (); - - /* Some applications use the ugly technique to check if a non-blocking - connect succeeded by calling connect again, until it returns EISCONN. - This circumvents the event handling and connect_state is never set. - Thus we check for this situation here. */ - if (err == WSAEISCONN) - connect_state (connected); - /* Winsock returns WSAEWOULDBLOCK if the non-blocking socket cannot be - conected immediately. Convert to POSIX/Linux compliant EINPROGRESS. */ - else if (is_nonblocking () && err == WSAEWOULDBLOCK) - WSASetLastError (WSAEINPROGRESS); - /* Winsock returns WSAEINVAL if the socket is already a listener. - Convert to POSIX/Linux compliant EISCONN. */ - else if (err == WSAEINVAL && connect_state () == listener) - WSASetLastError (WSAEISCONN); - /* Any other error except WSAEALREADY during connect_pending means the - connect failed. */ - else if (connect_state () == connect_pending && err != WSAEALREADY) - connect_state (connect_failed); - set_winsock_errno (); - } - - return res; -} - -int -fhandler_socket::listen (int backlog) -{ - int res = ::listen (get_socket (), backlog); - if (res && WSAGetLastError () == WSAEINVAL) - { - /* It's perfectly valid to call listen on an unbound INET socket. - In this case the socket is automatically bound to an unused - port number, listening on all interfaces. On WinSock, listen - fails with WSAEINVAL when it's called on an unbound socket. - So we have to bind manually here to have POSIX semantics. */ - if (get_addr_family () == AF_INET) - { - struct sockaddr_in sin; - sin.sin_family = AF_INET; - sin.sin_port = 0; - sin.sin_addr.s_addr = INADDR_ANY; - if (!::bind (get_socket (), (struct sockaddr *) &sin, sizeof sin)) - res = ::listen (get_socket (), backlog); - } - else if (get_addr_family () == AF_INET6) - { - struct sockaddr_in6 sin6; - memset (&sin6, 0, sizeof sin6); - sin6.sin6_family = AF_INET6; - if (!::bind (get_socket (), (struct sockaddr *) &sin6, sizeof sin6)) - res = ::listen (get_socket (), backlog); - } - } - if (!res) - { - if (get_addr_family () == AF_LOCAL && get_socket_type () == SOCK_STREAM) - af_local_set_cred (); - connect_state (listener); /* gets set to connected on accepted socket. */ - } - else - set_winsock_errno (); - return res; -} - -int -fhandler_socket::accept4 (struct sockaddr *peer, int *len, int flags) -{ - /* Allows NULL peer and len parameters. */ - struct sockaddr_storage lpeer; - int llen = sizeof (struct sockaddr_storage); - - int res = (int) INVALID_SOCKET; - - /* Windows event handling does not check for the validity of the desired - flags so we have to do it here. */ - if (connect_state () != listener) - { - WSASetLastError (WSAEINVAL); - set_winsock_errno (); - goto out; - } - - while (!(res = wait_for_events (FD_ACCEPT | FD_CLOSE, 0)) - && (res = ::accept (get_socket (), (struct sockaddr *) &lpeer, &llen)) - == SOCKET_ERROR - && WSAGetLastError () == WSAEWOULDBLOCK) - ; - if (res == (int) INVALID_SOCKET) - set_winsock_errno (); - else - { - cygheap_fdnew res_fd; - if (res_fd >= 0 && fdsock (res_fd, &dev (), res)) - { - fhandler_socket *sock = (fhandler_socket *) res_fd; - sock->set_addr_family (get_addr_family ()); - sock->set_socket_type (get_socket_type ()); - sock->async_io (false); /* fdsock switches async mode off. */ - if (get_addr_family () == AF_LOCAL) - { - sock->set_sun_path (get_sun_path ()); - sock->set_peer_sun_path (get_peer_sun_path ()); - if (get_socket_type () == SOCK_STREAM) - { - /* Don't forget to copy credentials from accepting - socket to accepted socket and start transaction - on accepted socket! */ - af_local_copy (sock); - res = sock->af_local_accept (); - if (res == -1) - { - res_fd.release (); - set_winsock_errno (); - goto out; - } - } - } - sock->set_nonblocking (flags & SOCK_NONBLOCK); - if (flags & SOCK_CLOEXEC) - sock->set_close_on_exec (true); - /* No locking necessary at this point. */ - sock->wsock_events->events = wsock_events->events | FD_WRITE; - sock->wsock_events->owner = wsock_events->owner; - sock->connect_state (connected); - res = res_fd; - if (peer) - { - if (get_addr_family () == AF_LOCAL) - { - /* FIXME: Right now we have no way to determine the - bound socket name of the peer's socket. For now - we just fake an unbound socket on the other side. */ - static struct sockaddr_un un = { AF_LOCAL, "" }; - memcpy (peer, &un, MIN (*len, (int) sizeof (un.sun_family))); - *len = (int) sizeof (un.sun_family); - } - else - { - memcpy (peer, &lpeer, MIN (*len, llen)); - *len = llen; - } - } - } - else - { - ::closesocket (res); - res = -1; - } - } - -out: - debug_printf ("res %d", res); - return res; -} - -int -fhandler_socket::getsockname (struct sockaddr *name, int *namelen) -{ - int res = -1; - - if (get_addr_family () == AF_LOCAL) - { - struct sockaddr_un sun; - sun.sun_family = AF_LOCAL; - sun.sun_path[0] = '\0'; - if (get_sun_path ()) - strncat (sun.sun_path, get_sun_path (), UNIX_PATH_MAX - 1); - memcpy (name, &sun, MIN (*namelen, (int) SUN_LEN (&sun) + 1)); - *namelen = (int) SUN_LEN (&sun) + (get_sun_path () ? 1 : 0); - res = 0; - } - else - { - /* WinSock just returns WSAEFAULT if the buffer is too small. Use a - big enough local buffer and truncate later as necessary, per POSIX. */ - struct sockaddr_storage sock; - int len = sizeof sock; - res = ::getsockname (get_socket (), (struct sockaddr *) &sock, &len); - if (!res) - { - memcpy (name, &sock, MIN (*namelen, len)); - *namelen = len; - } - else - { - if (WSAGetLastError () == WSAEINVAL) - { - /* WinSock returns WSAEINVAL if the socket is locally - unbound. Per SUSv3 this is not an error condition. - We're faking a valid return value here by creating the - same content in the sockaddr structure as on Linux. */ - memset (&sock, 0, sizeof sock); - sock.ss_family = get_addr_family (); - switch (get_addr_family ()) - { - case AF_INET: - res = 0; - len = (int) sizeof (struct sockaddr_in); - break; - case AF_INET6: - res = 0; - len = (int) sizeof (struct sockaddr_in6); - break; - default: - WSASetLastError (WSAEOPNOTSUPP); - break; - } - if (!res) - { - memcpy (name, &sock, MIN (*namelen, len)); - *namelen = len; - } - } - if (res) - set_winsock_errno (); - } - } - - return res; -} - -int -fhandler_socket::getpeername (struct sockaddr *name, int *namelen) -{ - /* Always use a local big enough buffer and truncate later as necessary - per POSIX. WinSock unfortunately only returns WSAEFAULT if the buffer - is too small. */ - struct sockaddr_storage sock; - int len = sizeof sock; - int res = ::getpeername (get_socket (), (struct sockaddr *) &sock, &len); - if (res) - set_winsock_errno (); - else if (get_addr_family () == AF_LOCAL) - { - struct sockaddr_un sun; - memset (&sun, 0, sizeof sun); - sun.sun_family = AF_LOCAL; - sun.sun_path[0] = '\0'; - if (get_peer_sun_path ()) - strncat (sun.sun_path, get_peer_sun_path (), UNIX_PATH_MAX - 1); - memcpy (name, &sun, MIN (*namelen, (int) SUN_LEN (&sun) + 1)); - *namelen = (int) SUN_LEN (&sun) + (get_peer_sun_path () ? 1 : 0); - } - else - { - memcpy (name, &sock, MIN (*namelen, len)); - *namelen = len; - } - - return res; -} - -/* There's no DLL which exports the symbol WSARecvMsg. One has to call - WSAIoctl as below to fetch the function pointer. Why on earth did the - MS developers decide not to export a normal symbol for these extension - functions? */ -inline int -get_ext_funcptr (SOCKET sock, void *funcptr) -{ - DWORD bret; - const GUID guid = WSAID_WSARECVMSG; - return WSAIoctl (sock, SIO_GET_EXTENSION_FUNCTION_POINTER, - (void *) &guid, sizeof (GUID), funcptr, sizeof (void *), - &bret, NULL, NULL); -} - -inline ssize_t -fhandler_socket::recv_internal (LPWSAMSG wsamsg, bool use_recvmsg) -{ - ssize_t res = 0; - DWORD ret = 0, wret; - int evt_mask = FD_READ | ((wsamsg->dwFlags & MSG_OOB) ? FD_OOB : 0); - LPWSABUF &wsabuf = wsamsg->lpBuffers; - ULONG &wsacnt = wsamsg->dwBufferCount; - static NO_COPY LPFN_WSARECVMSG WSARecvMsg; - int orig_namelen = wsamsg->namelen; - - /* CV 2014-10-26: Do not check for the connect_state at this point. In - certain scenarios there's no way to check the connect state reliably. - Example (hexchat): Parent process creates socket, forks, child process - calls connect, parent process calls read. Even if the event handling - allows to check for FD_CONNECT in the parent, there is always yet another - scenario we can easily break. */ - - DWORD wait_flags = wsamsg->dwFlags; - bool waitall = !!(wait_flags & MSG_WAITALL); - wsamsg->dwFlags &= (MSG_OOB | MSG_PEEK | MSG_DONTROUTE); - if (use_recvmsg) - { - if (!WSARecvMsg - && get_ext_funcptr (get_socket (), &WSARecvMsg) == SOCKET_ERROR) - { - if (wsamsg->Control.len > 0) - { - set_winsock_errno (); - return SOCKET_ERROR; - } - use_recvmsg = false; - } - else /* Only MSG_PEEK is supported by WSARecvMsg. */ - wsamsg->dwFlags &= MSG_PEEK; - } - if (waitall) - { - if (get_socket_type () != SOCK_STREAM) - { - WSASetLastError (WSAEOPNOTSUPP); - set_winsock_errno (); - return SOCKET_ERROR; - } - if (is_nonblocking () || (wsamsg->dwFlags & (MSG_OOB | MSG_PEEK))) - waitall = false; - } - - /* Note: Don't call WSARecvFrom(MSG_PEEK) without actually having data - waiting in the buffers, otherwise the event handling gets messed up - for some reason. */ - while (!(res = wait_for_events (evt_mask | FD_CLOSE, wait_flags)) - || saw_shutdown_read ()) - { - if (use_recvmsg) - res = WSARecvMsg (get_socket (), wsamsg, &wret, NULL, NULL); - /* This is working around a really weird problem in WinSock. - - Assume you create a socket, fork the process (thus duplicating - the socket), connect the socket in the child, then call recv - on the original socket handle in the parent process. - In this scenario, calls to WinSock's recvfrom and WSARecvFrom - in the parent will fail with WSAEINVAL, regardless whether both - address parameters, name and namelen, are NULL or point to valid - storage. However, calls to recv and WSARecv succeed as expected. - Per MSDN, WSAEINVAL in the context of recv means "The socket has not - been bound". It is as if the recvfrom functions test if the socket - is bound locally, but in the parent process, WinSock doesn't know - about that and fails, while the same test is omitted in the recv - functions. - - This also covers another weird case: WinSock returns WSAEFAULT if - namelen is a valid pointer while name is NULL. Both parameters are - ignored for TCP sockets, so this only occurs when using UDP socket. */ - else if (!wsamsg->name || get_socket_type () == SOCK_STREAM) - res = WSARecv (get_socket (), wsabuf, wsacnt, &wret, &wsamsg->dwFlags, - NULL, NULL); - else - res = WSARecvFrom (get_socket (), wsabuf, wsacnt, &wret, - &wsamsg->dwFlags, wsamsg->name, &wsamsg->namelen, - NULL, NULL); - if (!res) - { - ret += wret; - if (!waitall) - break; - while (wret && wsacnt) - { - if (wsabuf->len > wret) - { - wsabuf->len -= wret; - wsabuf->buf += wret; - wret = 0; - } - else - { - wret -= wsabuf->len; - ++wsabuf; - --wsacnt; - } - } - if (!wret) - break; - } - else if (WSAGetLastError () != WSAEWOULDBLOCK) - break; - } - - if (res) - { - /* According to SUSv3, errno isn't set in that case and no error - condition is returned. */ - if (WSAGetLastError () == WSAEMSGSIZE) - ret += wret; - else if (!ret) - { - /* ESHUTDOWN isn't defined for recv in SUSv3. Simply EOF is returned - in this case. */ - if (WSAGetLastError () == WSAESHUTDOWN) - ret = 0; - else - { - set_winsock_errno (); - return SOCKET_ERROR; - } - } - } - - if (get_addr_family () == AF_LOCAL && wsamsg->name != NULL - && orig_namelen >= (int) sizeof (sa_family_t)) - { - /* WSARecvFrom copied the sockaddr_in block to wsamsg->name. We have to - overwrite it with a sockaddr_un block. For datagram sockets we - generate a sockaddr_un with a filename analogue to abstract socket - names under Linux. See `man 7 unix' under Linux for a description. */ - sockaddr_un *un = (sockaddr_un *) wsamsg->name; - un->sun_family = AF_LOCAL; - int len = orig_namelen - offsetof (struct sockaddr_un, sun_path); - if (len > 0) - { - if (get_socket_type () == SOCK_DGRAM) - { - if (len >= 7) - { - __small_sprintf (un->sun_path + 1, "d%04x", - ((struct sockaddr_in *) wsamsg->name)->sin_port); - wsamsg->namelen = offsetof (struct sockaddr_un, sun_path) + 7; - } - else - wsamsg->namelen = offsetof (struct sockaddr_un, sun_path) + 1; - un->sun_path[0] = '\0'; - } - else if (!get_peer_sun_path ()) - wsamsg->namelen = sizeof (sa_family_t); - else - { - memset (un->sun_path, 0, len); - strncpy (un->sun_path, get_peer_sun_path (), len); - if (un->sun_path[len - 1] == '\0') - len = strlen (un->sun_path) + 1; - if (len > UNIX_PATH_MAX) - len = UNIX_PATH_MAX; - wsamsg->namelen = offsetof (struct sockaddr_un, sun_path) + len; - } - } - } - + int ret = 0; + if (is_nonblocking ()) + ret |= SOCK_NONBLOCK; + if (close_on_exec ()) + ret |= SOCK_CLOEXEC; return ret; } -void __reg3 -fhandler_socket::read (void *in_ptr, size_t& len) -{ - char *ptr = (char *) in_ptr; - -#ifdef __x86_64__ - /* size_t is 64 bit, but the len member in WSABUF is 32 bit. - Split buffer if necessary. */ - DWORD bufcnt = len / UINT32_MAX + ((!len || (len % UINT32_MAX)) ? 1 : 0); - WSABUF wsabuf[bufcnt]; - WSAMSG wsamsg = { NULL, 0, wsabuf, bufcnt, { 0, NULL }, 0 }; - /* Don't use len as loop condition, it could be 0. */ - for (WSABUF *wsaptr = wsabuf; bufcnt--; ++wsaptr) - { - wsaptr->len = MIN (len, UINT32_MAX); - wsaptr->buf = ptr; - len -= wsaptr->len; - ptr += wsaptr->len; - } -#else - WSABUF wsabuf = { len, ptr }; - WSAMSG wsamsg = { NULL, 0, &wsabuf, 1, { 0, NULL }, 0 }; -#endif - - len = recv_internal (&wsamsg, false); -} - -ssize_t -fhandler_socket::readv (const struct iovec *const iov, const int iovcnt, - ssize_t tot) -{ - WSABUF wsabuf[iovcnt]; - WSABUF *wsaptr = wsabuf + iovcnt; - const struct iovec *iovptr = iov + iovcnt; - while (--wsaptr >= wsabuf) - { - wsaptr->len = (--iovptr)->iov_len; - wsaptr->buf = (char *) iovptr->iov_base; - } - WSAMSG wsamsg = { NULL, 0, wsabuf, (DWORD) iovcnt, { 0, NULL}, 0 }; - return recv_internal (&wsamsg, false); -} - -ssize_t -fhandler_socket::recvfrom (void *in_ptr, size_t len, int flags, - struct sockaddr *from, int *fromlen) -{ - char *ptr = (char *) in_ptr; - -#ifdef __x86_64__ - /* size_t is 64 bit, but the len member in WSABUF is 32 bit. - Split buffer if necessary. */ - DWORD bufcnt = len / UINT32_MAX + ((!len || (len % UINT32_MAX)) ? 1 : 0); - WSABUF wsabuf[bufcnt]; - WSAMSG wsamsg = { from, from && fromlen ? *fromlen : 0, - wsabuf, bufcnt, - { 0, NULL }, - (DWORD) flags }; - /* Don't use len as loop condition, it could be 0. */ - for (WSABUF *wsaptr = wsabuf; bufcnt--; ++wsaptr) - { - wsaptr->len = MIN (len, UINT32_MAX); - wsaptr->buf = ptr; - len -= wsaptr->len; - ptr += wsaptr->len; - } -#else - WSABUF wsabuf = { len, ptr }; - WSAMSG wsamsg = { from, from && fromlen ? *fromlen : 0, - &wsabuf, 1, - { 0, NULL}, - (DWORD) flags }; -#endif - ssize_t ret = recv_internal (&wsamsg, false); - if (fromlen) - *fromlen = wsamsg.namelen; - return ret; -} - -ssize_t -fhandler_socket::recvmsg (struct msghdr *msg, int flags) -{ - /* TODO: Descriptor passing on AF_LOCAL sockets. */ - - /* Disappointing but true: Even if WSARecvMsg is supported, it's only - supported for datagram and raw sockets. */ - bool use_recvmsg = true; - if (get_socket_type () == SOCK_STREAM || get_addr_family () == AF_LOCAL) - { - use_recvmsg = false; - msg->msg_controllen = 0; - } - - WSABUF wsabuf[msg->msg_iovlen]; - WSABUF *wsaptr = wsabuf + msg->msg_iovlen; - const struct iovec *iovptr = msg->msg_iov + msg->msg_iovlen; - while (--wsaptr >= wsabuf) - { - wsaptr->len = (--iovptr)->iov_len; - wsaptr->buf = (char *) iovptr->iov_base; - } - WSAMSG wsamsg = { (struct sockaddr *) msg->msg_name, msg->msg_namelen, - wsabuf, (DWORD) msg->msg_iovlen, - { (DWORD) msg->msg_controllen, (char *) msg->msg_control }, - (DWORD) flags }; - ssize_t ret = recv_internal (&wsamsg, use_recvmsg); - if (ret >= 0) - { - msg->msg_namelen = wsamsg.namelen; - msg->msg_controllen = wsamsg.Control.len; - if (!CYGWIN_VERSION_CHECK_FOR_USING_ANCIENT_MSGHDR) - msg->msg_flags = wsamsg.dwFlags; - } - return ret; -} - -inline ssize_t -fhandler_socket::send_internal (struct _WSAMSG *wsamsg, int flags) -{ - ssize_t res = 0; - DWORD ret = 0, sum = 0; - WSABUF out_buf[wsamsg->dwBufferCount]; - bool use_sendmsg = false; - DWORD wait_flags = flags & MSG_DONTWAIT; - bool nosignal = !!(flags & MSG_NOSIGNAL); - - flags &= (MSG_OOB | MSG_DONTROUTE); - if (wsamsg->Control.len > 0) - use_sendmsg = true; - /* Workaround for MSDN KB 823764: Split a message into chunks <= SO_SNDBUF. - in_idx is the index of the current lpBuffers from the input wsamsg buffer. - in_off is used to keep track of the next byte to write from a wsamsg - buffer which only gets partially written. */ - for (DWORD in_idx = 0, in_off = 0; - in_idx < wsamsg->dwBufferCount; - in_off >= wsamsg->lpBuffers[in_idx].len && (++in_idx, in_off = 0)) - { - /* Split a message into the least number of pieces to minimize the - number of WsaSendTo calls. Don't split datagram messages (bad idea). - out_idx is the index of the next buffer in the out_buf WSABUF, - also the number of buffers given to WSASendTo. - out_len is the number of bytes in the buffers given to WSASendTo. - Don't split datagram messages (very bad idea). */ - DWORD out_idx = 0; - DWORD out_len = 0; - if (get_socket_type () == SOCK_STREAM) - { - do - { - out_buf[out_idx].buf = wsamsg->lpBuffers[in_idx].buf + in_off; - out_buf[out_idx].len = wsamsg->lpBuffers[in_idx].len - in_off; - out_len += out_buf[out_idx].len; - out_idx++; - } - while (out_len < (unsigned) wmem () - && (in_off = 0, ++in_idx < wsamsg->dwBufferCount)); - /* Tweak len of the last out_buf buffer so the entire number of bytes - is (less than or) equal to wmem (). Fix out_len as well since it's - used in a subsequent test expression. */ - if (out_len > (unsigned) wmem ()) - { - out_buf[out_idx - 1].len -= out_len - (unsigned) wmem (); - out_len = (unsigned) wmem (); - } - /* Add the bytes written from the current last buffer to in_off, - so in_off points to the next byte to be written from that buffer, - or beyond which lets the outper loop skip to the next buffer. */ - in_off += out_buf[out_idx - 1].len; - } - - do - { - if (use_sendmsg) - res = WSASendMsg (get_socket (), wsamsg, flags, &ret, NULL, NULL); - else if (get_socket_type () == SOCK_STREAM) - res = WSASendTo (get_socket (), out_buf, out_idx, &ret, flags, - wsamsg->name, wsamsg->namelen, NULL, NULL); - else - res = WSASendTo (get_socket (), wsamsg->lpBuffers, - wsamsg->dwBufferCount, &ret, flags, - wsamsg->name, wsamsg->namelen, NULL, NULL); - if (res && (WSAGetLastError () == WSAEWOULDBLOCK)) - { - LOCK_EVENTS; - wsock_events->events &= ~FD_WRITE; - UNLOCK_EVENTS; - } - } - while (res && (WSAGetLastError () == WSAEWOULDBLOCK) - && !(res = wait_for_events (FD_WRITE | FD_CLOSE, wait_flags))); - - if (!res) - { - sum += ret; - /* For streams, return to application if the number of bytes written - is less than the number of bytes we intended to write in a single - call to WSASendTo. Otherwise we would have to add code to - backtrack in the input buffers, which is questionable. There was - probably a good reason we couldn't write more. */ - if (get_socket_type () != SOCK_STREAM || ret < out_len) - break; - } - else if (is_nonblocking () || WSAGetLastError() != WSAEWOULDBLOCK) - break; - } - - if (sum) - res = sum; - else if (res == SOCKET_ERROR) - { - set_winsock_errno (); - - /* Special handling for EPIPE and SIGPIPE. - - EPIPE is generated if the local end has been shut down on a connection - oriented socket. In this case the process will also receive a SIGPIPE - unless MSG_NOSIGNAL is set. */ - if ((get_errno () == ECONNABORTED || get_errno () == ESHUTDOWN) - && get_socket_type () == SOCK_STREAM) - { - set_errno (EPIPE); - if (!nosignal) - raise (SIGPIPE); - } - } - - return res; -} - -ssize_t -fhandler_socket::write (const void *in_ptr, size_t len) -{ - char *ptr = (char *) in_ptr; - -#ifdef __x86_64__ - /* size_t is 64 bit, but the len member in WSABUF is 32 bit. - Split buffer if necessary. */ - DWORD bufcnt = len / UINT32_MAX + ((!len || (len % UINT32_MAX)) ? 1 : 0); - WSABUF wsabuf[bufcnt]; - WSAMSG wsamsg = { NULL, 0, wsabuf, bufcnt, { 0, NULL }, 0 }; - /* Don't use len as loop condition, it could be 0. */ - for (WSABUF *wsaptr = wsabuf; bufcnt--; ++wsaptr) - { - wsaptr->len = MIN (len, UINT32_MAX); - wsaptr->buf = ptr; - len -= wsaptr->len; - ptr += wsaptr->len; - } -#else - WSABUF wsabuf = { len, ptr }; - WSAMSG wsamsg = { NULL, 0, &wsabuf, 1, { 0, NULL }, 0 }; -#endif - return send_internal (&wsamsg, 0); -} - -ssize_t -fhandler_socket::writev (const struct iovec *const iov, const int iovcnt, - ssize_t tot) -{ - WSABUF wsabuf[iovcnt]; - WSABUF *wsaptr = wsabuf; - const struct iovec *iovptr = iov; - for (int i = 0; i < iovcnt; ++i) - { - wsaptr->len = iovptr->iov_len; - (wsaptr++)->buf = (char *) (iovptr++)->iov_base; - } - WSAMSG wsamsg = { NULL, 0, wsabuf, (DWORD) iovcnt, { 0, NULL}, 0 }; - return send_internal (&wsamsg, 0); -} - -ssize_t -fhandler_socket::sendto (const void *in_ptr, size_t len, int flags, - const struct sockaddr *to, int tolen) -{ - char *ptr = (char *) in_ptr; - struct sockaddr_storage sst; - - if (to && get_inet_addr (to, tolen, &sst, &tolen) == SOCKET_ERROR) - return SOCKET_ERROR; - -#ifdef __x86_64__ - /* size_t is 64 bit, but the len member in WSABUF is 32 bit. - Split buffer if necessary. */ - DWORD bufcnt = len / UINT32_MAX + ((!len || (len % UINT32_MAX)) ? 1 : 0); - WSABUF wsabuf[bufcnt]; - WSAMSG wsamsg = { to ? (struct sockaddr *) &sst : NULL, tolen, - wsabuf, bufcnt, - { 0, NULL }, - 0 }; - /* Don't use len as loop condition, it could be 0. */ - for (WSABUF *wsaptr = wsabuf; bufcnt--; ++wsaptr) - { - wsaptr->len = MIN (len, UINT32_MAX); - wsaptr->buf = ptr; - len -= wsaptr->len; - ptr += wsaptr->len; - } -#else - WSABUF wsabuf = { len, ptr }; - WSAMSG wsamsg = { to ? (struct sockaddr *) &sst : NULL, tolen, - &wsabuf, 1, - { 0, NULL}, - 0 }; -#endif - return send_internal (&wsamsg, flags); -} - -ssize_t -fhandler_socket::sendmsg (const struct msghdr *msg, int flags) -{ - /* TODO: Descriptor passing on AF_LOCAL sockets. */ - - struct sockaddr_storage sst; - int len = 0; - - if (msg->msg_name - && get_inet_addr ((struct sockaddr *) msg->msg_name, msg->msg_namelen, - &sst, &len) == SOCKET_ERROR) - return SOCKET_ERROR; - - WSABUF wsabuf[msg->msg_iovlen]; - WSABUF *wsaptr = wsabuf; - const struct iovec *iovptr = msg->msg_iov; - for (int i = 0; i < msg->msg_iovlen; ++i) - { - wsaptr->len = iovptr->iov_len; - (wsaptr++)->buf = (char *) (iovptr++)->iov_base; - } - /* Disappointing but true: Even if WSASendMsg is supported, it's only - supported for datagram and raw sockets. */ - DWORD controllen = (DWORD) (get_socket_type () == SOCK_STREAM - || get_addr_family () == AF_LOCAL - ? 0 : msg->msg_controllen); - WSAMSG wsamsg = { msg->msg_name ? (struct sockaddr *) &sst : NULL, len, - wsabuf, (DWORD) msg->msg_iovlen, - { controllen, (char *) msg->msg_control }, - 0 }; - return send_internal (&wsamsg, flags); -} - int fhandler_socket::shutdown (int how) { @@ -2640,378 +1085,9 @@ fhandler_socket::set_close_on_exec (bool val) fhandler_base::set_close_on_exec (val); } -void -fhandler_socket::set_sun_path (const char *path) -{ - sun_path = path ? cstrdup (path) : NULL; -} - -void -fhandler_socket::set_peer_sun_path (const char *path) -{ - peer_sun_path = path ? cstrdup (path) : NULL; -} - int fhandler_socket::getpeereid (pid_t *pid, uid_t *euid, gid_t *egid) { - if (get_addr_family () != AF_LOCAL || get_socket_type () != SOCK_STREAM) - { - set_errno (EINVAL); - return -1; - } - if (no_getpeereid ()) - { - set_errno (ENOTSUP); - return -1; - } - if (connect_state () != connected) - { - set_errno (ENOTCONN); - return -1; - } - - __try - { - if (pid) - *pid = sec_peer_pid; - if (euid) - *euid = sec_peer_uid; - if (egid) - *egid = sec_peer_gid; - return 0; - } - __except (EFAULT) {} - __endtry + set_errno (EINVAL); return -1; } - -static int -convert_ws1_ip_optname (int optname) -{ - static int ws2_optname[] = - { - 0, - IP_OPTIONS, - IP_MULTICAST_IF, - IP_MULTICAST_TTL, - IP_MULTICAST_LOOP, - IP_ADD_MEMBERSHIP, - IP_DROP_MEMBERSHIP, - IP_TTL, - IP_TOS, - IP_DONTFRAGMENT - }; - return (optname < 1 || optname > _WS1_IP_DONTFRAGMENT) - ? optname - : ws2_optname[optname]; -} - -int -fhandler_socket::setsockopt (int level, int optname, const void *optval, - socklen_t optlen) -{ - bool ignore = false; - int ret = -1; - - /* Preprocessing setsockopt. Set ignore to true if setsockopt call should - get skipped entirely. */ - switch (level) - { - case SOL_SOCKET: - switch (optname) - { - case SO_PEERCRED: - /* Switch off the AF_LOCAL handshake and thus SO_PEERCRED handling - for AF_LOCAL/SOCK_STREAM sockets. This allows to handle special - situations in which connect is called before a listening socket - accepts connections. - FIXME: In the long run we should find a more generic solution - which doesn't require a blocking handshake in accept/connect - to exchange SO_PEERCRED credentials. */ - if (optval || optlen) - set_errno (EINVAL); - else - ret = af_local_set_no_getpeereid (); - return ret; - - case SO_REUSEADDR: - /* Per POSIX we must not be able to reuse a complete duplicate of a - local TCP address (same IP, same port), even if SO_REUSEADDR has - been set. This behaviour is maintained in WinSock for backward - compatibility, while the WinSock standard behaviour of stream - socket binding is equivalent to the POSIX behaviour as if - SO_REUSEADDR has been set. The SO_EXCLUSIVEADDRUSE option has - been added to allow an application to request POSIX standard - behaviour in the non-SO_REUSEADDR case. - - To emulate POSIX socket binding behaviour, note that SO_REUSEADDR - has been set but don't call setsockopt. Instead - fhandler_socket::bind sets SO_EXCLUSIVEADDRUSE if the application - did not set SO_REUSEADDR. */ - if (optlen < (socklen_t) sizeof (int)) - { - set_errno (EINVAL); - return ret; - } - if (get_socket_type () == SOCK_STREAM) - ignore = true; - break; - - case SO_RCVTIMEO: - case SO_SNDTIMEO: - if (optlen < (socklen_t) sizeof (struct timeval)) - { - set_errno (EINVAL); - return ret; - } - if (timeval_to_ms ((struct timeval *) optval, - (optname == SO_RCVTIMEO) ? rcvtimeo () - : sndtimeo ())) - ret = 0; - else - set_errno (EDOM); - return ret; - - default: - break; - } - break; - - case IPPROTO_IP: - /* Old applications still use the old WinSock1 IPPROTO_IP values. */ - if (CYGWIN_VERSION_CHECK_FOR_USING_WINSOCK1_VALUES) - optname = convert_ws1_ip_optname (optname); - switch (optname) - { - case IP_TOS: - /* Winsock doesn't support setting the IP_TOS field with setsockopt - and TOS was never implemented for TCP anyway. setsockopt returns - WinSock error 10022, WSAEINVAL when trying to set the IP_TOS - field. We just return 0 instead. */ - ignore = true; - break; - - default: - break; - } - break; - - case IPPROTO_IPV6: - { - switch (optname) - { - case IPV6_TCLASS: - /* Unsupported */ - ignore = true; - break; - - default: - break; - } - } - default: - break; - } - - /* Call Winsock setsockopt (or not) */ - if (ignore) - ret = 0; - else - { - ret = ::setsockopt (get_socket (), level, optname, (const char *) optval, - optlen); - if (ret == SOCKET_ERROR) - { - set_winsock_errno (); - return ret; - } - } - - if (optlen == (socklen_t) sizeof (int)) - debug_printf ("setsockopt optval=%x", *(int *) optval); - - /* Postprocessing setsockopt, setting fhandler_socket members, etc. */ - switch (level) - { - case SOL_SOCKET: - switch (optname) - { - case SO_REUSEADDR: - saw_reuseaddr (*(int *) optval); - break; - - case SO_RCVBUF: - rmem (*(int *) optval); - break; - - case SO_SNDBUF: - wmem (*(int *) optval); - break; - - default: - break; - } - break; - - default: - break; - } - - return ret; -} - -int -fhandler_socket::getsockopt (int level, int optname, const void *optval, - socklen_t *optlen) -{ - bool ignore = false; - bool onebyte = false; - int ret = -1; - - /* Preprocessing getsockopt. Set ignore to true if getsockopt call should - get skipped entirely. */ - switch (level) - { - case SOL_SOCKET: - switch (optname) - { - case SO_PEERCRED: - { - struct ucred *cred = (struct ucred *) optval; - - if (*optlen < (socklen_t) sizeof *cred) - { - set_errno (EINVAL); - return ret; - } - ret = getpeereid (&cred->pid, &cred->uid, &cred->gid); - if (!ret) - *optlen = (socklen_t) sizeof *cred; - return ret; - } - break; - - case SO_REUSEADDR: - { - unsigned int *reuseaddr = (unsigned int *) optval; - - if (*optlen < (socklen_t) sizeof *reuseaddr) - { - set_errno (EINVAL); - return ret; - } - *reuseaddr = saw_reuseaddr(); - *optlen = (socklen_t) sizeof *reuseaddr; - ignore = true; - } - break; - - case SO_RCVTIMEO: - case SO_SNDTIMEO: - { - struct timeval *time_out = (struct timeval *) optval; - - if (*optlen < (socklen_t) sizeof *time_out) - { - set_errno (EINVAL); - return ret; - } - DWORD ms = (optname == SO_RCVTIMEO) ? rcvtimeo () : sndtimeo (); - if (ms == 0 || ms == INFINITE) - { - time_out->tv_sec = 0; - time_out->tv_usec = 0; - } - else - { - time_out->tv_sec = ms / MSPERSEC; - time_out->tv_usec = ((ms % MSPERSEC) * USPERSEC) / MSPERSEC; - } - *optlen = (socklen_t) sizeof *time_out; - ret = 0; - return ret; - } - - default: - break; - } - break; - - case IPPROTO_IP: - /* Old applications still use the old WinSock1 IPPROTO_IP values. */ - if (CYGWIN_VERSION_CHECK_FOR_USING_WINSOCK1_VALUES) - optname = convert_ws1_ip_optname (optname); - break; - - default: - break; - } - - /* Call Winsock getsockopt (or not) */ - if (ignore) - ret = 0; - else - { - ret = ::getsockopt (get_socket (), level, optname, (char *) optval, - (int *) optlen); - if (ret == SOCKET_ERROR) - { - set_winsock_errno (); - return ret; - } - } - - /* Postprocessing getsockopt, setting fhandler_socket members, etc. Set - onebyte true for options returning BOOLEAN instead of a boolean DWORD. */ - switch (level) - { - case SOL_SOCKET: - switch (optname) - { - case SO_ERROR: - { - int *e = (int *) optval; - debug_printf ("WinSock SO_ERROR = %d", *e); - *e = find_winsock_errno (*e); - } - break; - - case SO_KEEPALIVE: - case SO_DONTROUTE: - onebyte = true; - break; - - default: - break; - } - break; - case IPPROTO_TCP: - switch (optname) - { - case TCP_NODELAY: - onebyte = true; - break; - - default: - break; - } - default: - break; - } - - if (onebyte) - { - /* Regression in Vista and later: instead of a 4 byte BOOL value, a - 1 byte BOOLEAN value is returned, in contrast to older systems and - the documentation. Since an int type is expected by the calling - application, we convert the result here. For some reason only three - BSD-compatible socket options seem to be affected. */ - BOOLEAN *in = (BOOLEAN *) optval; - int *out = (int *) optval; - *out = *in; - *optlen = 4; - } - - return ret; -} diff --git a/winsup/cygwin/fhandler_socket_inet.cc b/winsup/cygwin/fhandler_socket_inet.cc new file mode 100644 index 000000000..91da47cd1 --- /dev/null +++ b/winsup/cygwin/fhandler_socket_inet.cc @@ -0,0 +1,1163 @@ +/* fhandler_socket_inet.cc. + + See fhandler.h for a description of the fhandler classes. + + This file is part of Cygwin. + + This software is a copyrighted work licensed under the terms of the + Cygwin license. Please consult the file "CYGWIN_LICENSE" for + details. */ + +#define __INSIDE_CYGWIN_NET__ +#define USE_SYS_TYPES_FD_SET + +#include "winsup.h" +#ifdef __x86_64__ +/* 2014-04-24: Current Mingw headers define sockaddr_in6 using u_long (8 byte) + because a redefinition for LP64 systems is missing. This leads to a wrong + definition and size of sockaddr_in6 when building with winsock headers. + This definition is also required to use the right u_long type in subsequent + function calls. */ +#undef u_long +#define u_long __ms_u_long +#endif +#include +#include +#include +#include +#include "cygerrno.h" +#include "security.h" +#include "path.h" +#include "fhandler.h" +#include "dtable.h" +#include "cygheap.h" +#include +#include "cygwin/version.h" +#include "perprocess.h" +#include "shared_info.h" +#include "sigproc.h" +#include "wininfo.h" +#include +#include +#include +#include +#include "cygtls.h" +#include +#include "ntdll.h" +#include "miscfuncs.h" +#include "tls_pbuf.h" + +#define ASYNC_MASK (FD_READ|FD_WRITE|FD_OOB|FD_ACCEPT|FD_CONNECT) +#define EVENT_MASK (FD_READ|FD_WRITE|FD_OOB|FD_ACCEPT|FD_CONNECT|FD_CLOSE) + +#define LOCK_EVENTS \ + if (wsock_mtx && \ + WaitForSingleObject (wsock_mtx, INFINITE) != WAIT_FAILED) \ + { + +#define UNLOCK_EVENTS \ + ReleaseMutex (wsock_mtx); \ + } + +/* cygwin internal: map sockaddr into internet domain address */ +static int +get_inet_addr_inet (const struct sockaddr *in, int inlen, + struct sockaddr_storage *out, int *outlen) +{ + switch (in->sa_family) + { + case AF_INET: + memcpy (out, in, inlen); + *outlen = inlen; + /* If the peer address given in connect or sendto is the ANY address, + Winsock fails with WSAEADDRNOTAVAIL, while Linux converts that into + a connection/send attempt to LOOPBACK. We're doing the same here. */ + if (((struct sockaddr_in *) out)->sin_addr.s_addr == htonl (INADDR_ANY)) + ((struct sockaddr_in *) out)->sin_addr.s_addr = htonl (INADDR_LOOPBACK); + return 0; + case AF_INET6: + memcpy (out, in, inlen); + *outlen = inlen; + /* See comment in AF_INET case. */ + if (IN6_IS_ADDR_UNSPECIFIED (&((struct sockaddr_in6 *) out)->sin6_addr)) + ((struct sockaddr_in6 *) out)->sin6_addr = in6addr_loopback; + return 0; + default: + set_errno (EAFNOSUPPORT); + return SOCKET_ERROR; + } +} + +static int +convert_ws1_ip_optname (int optname) +{ + static int ws2_optname[] = + { + 0, + IP_OPTIONS, + IP_MULTICAST_IF, + IP_MULTICAST_TTL, + IP_MULTICAST_LOOP, + IP_ADD_MEMBERSHIP, + IP_DROP_MEMBERSHIP, + IP_TTL, + IP_TOS, + IP_DONTFRAGMENT + }; + return (optname < 1 || optname > _WS1_IP_DONTFRAGMENT) + ? optname + : ws2_optname[optname]; +} + +fhandler_socket_inet::fhandler_socket_inet () : + fhandler_socket () +{ +} + +fhandler_socket_inet::~fhandler_socket_inet () +{ +} + +int +fhandler_socket_inet::socket (int af, int type, int protocol, int flags) +{ + SOCKET sock; + int ret; + + sock = ::socket (af, type, protocol); + if (sock == INVALID_SOCKET) + { + set_winsock_errno (); + return -1; + } + ret = set_socket_handle (sock, af, type, flags); + if (ret < 0) + ::closesocket (sock); + return ret; +} + +/* socketpair is called on the fhandler handling the accepting socket, + fh_out is the fhandler for the connecting socket. */ +int +fhandler_socket_inet::socketpair (int af, int type, int protocol, int flags, + fhandler_socket *fh_out) +{ + set_errno (EAFNOSUPPORT); + return -1; +} + +int +fhandler_socket_inet::bind (const struct sockaddr *name, int namelen) +{ + int res = -1; + + if (!saw_reuseaddr ()) + { + /* If the application didn't explicitely request SO_REUSEADDR, + enforce POSIX standard socket binding behaviour by setting the + SO_EXCLUSIVEADDRUSE socket option. See cygwin_setsockopt() + for a more detailed description. */ + int on = 1; + int ret = ::setsockopt (get_socket (), SOL_SOCKET, + SO_EXCLUSIVEADDRUSE, + (const char *) &on, sizeof on); + debug_printf ("%d = setsockopt(SO_EXCLUSIVEADDRUSE), %E", ret); + } + if (::bind (get_socket (), name, namelen)) + set_winsock_errno (); + else + res = 0; + + return res; +} + +int +fhandler_socket_inet::connect (const struct sockaddr *name, int namelen) +{ + struct sockaddr_storage sst; + + if (get_inet_addr_inet (name, namelen, &sst, &namelen) == SOCKET_ERROR) + return SOCKET_ERROR; + + /* Initialize connect state to "connect_pending". State is ultimately set + to "connected" or "connect_failed" in wait_for_events when the FD_CONNECT + event occurs. Note that the underlying OS sockets are always non-blocking + and a successfully initiated non-blocking Winsock connect always returns + WSAEWOULDBLOCK. Thus it's safe to rely on event handling. + + Check for either unconnected or connect_failed since in both cases it's + allowed to retry connecting the socket. It's also ok (albeit ugly) to + call connect to check if a previous non-blocking connect finished. + + Set connect_state before calling connect, otherwise a race condition with + an already running select or poll might occur. */ + if (connect_state () == unconnected || connect_state () == connect_failed) + connect_state (connect_pending); + + int res = ::connect (get_socket (), (struct sockaddr *) &sst, namelen); + if (!is_nonblocking () + && res == SOCKET_ERROR + && WSAGetLastError () == WSAEWOULDBLOCK) + res = wait_for_events (FD_CONNECT | FD_CLOSE, 0); + + if (res) + { + DWORD err = WSAGetLastError (); + + /* Some applications use the ugly technique to check if a non-blocking + connect succeeded by calling connect again, until it returns EISCONN. + This circumvents the event handling and connect_state is never set. + Thus we check for this situation here. */ + if (err == WSAEISCONN) + connect_state (connected); + /* Winsock returns WSAEWOULDBLOCK if the non-blocking socket cannot be + conected immediately. Convert to POSIX/Linux compliant EINPROGRESS. */ + else if (is_nonblocking () && err == WSAEWOULDBLOCK) + WSASetLastError (WSAEINPROGRESS); + /* Winsock returns WSAEINVAL if the socket is already a listener. + Convert to POSIX/Linux compliant EISCONN. */ + else if (err == WSAEINVAL && connect_state () == listener) + WSASetLastError (WSAEISCONN); + /* Any other error except WSAEALREADY during connect_pending means the + connect failed. */ + else if (connect_state () == connect_pending && err != WSAEALREADY) + connect_state (connect_failed); + set_winsock_errno (); + } + + return res; +} + +int +fhandler_socket_inet::listen (int backlog) +{ + int res = ::listen (get_socket (), backlog); + if (res && WSAGetLastError () == WSAEINVAL) + { + /* It's perfectly valid to call listen on an unbound INET socket. + In this case the socket is automatically bound to an unused + port number, listening on all interfaces. On WinSock, listen + fails with WSAEINVAL when it's called on an unbound socket. + So we have to bind manually here to have POSIX semantics. */ + if (get_addr_family () == AF_INET) + { + struct sockaddr_in sin; + sin.sin_family = AF_INET; + sin.sin_port = 0; + sin.sin_addr.s_addr = INADDR_ANY; + if (!::bind (get_socket (), (struct sockaddr *) &sin, sizeof sin)) + res = ::listen (get_socket (), backlog); + } + else if (get_addr_family () == AF_INET6) + { + struct sockaddr_in6 sin6; + memset (&sin6, 0, sizeof sin6); + sin6.sin6_family = AF_INET6; + if (!::bind (get_socket (), (struct sockaddr *) &sin6, sizeof sin6)) + res = ::listen (get_socket (), backlog); + } + } + if (!res) + connect_state (listener); /* gets set to connected on accepted socket. */ + else + set_winsock_errno (); + return res; +} + +int +fhandler_socket_inet::accept4 (struct sockaddr *peer, int *len, int flags) +{ + int ret = -1; + /* Allows NULL peer and len parameters. */ + struct sockaddr_storage lpeer; + int llen = sizeof (struct sockaddr_storage); + + /* Windows event handling does not check for the validity of the desired + flags so we have to do it here. */ + if (connect_state () != listener) + { + WSASetLastError (WSAEINVAL); + set_winsock_errno (); + return -1; + } + + SOCKET res = INVALID_SOCKET; + while (!(res = wait_for_events (FD_ACCEPT | FD_CLOSE, 0)) + && (res = ::accept (get_socket (), (struct sockaddr *) &lpeer, &llen)) + == INVALID_SOCKET + && WSAGetLastError () == WSAEWOULDBLOCK) + ; + if (res == INVALID_SOCKET) + set_winsock_errno (); + else + { + cygheap_fdnew fd; + + if (fd >= 0) + { + fhandler_socket_inet *sock = (fhandler_socket_inet *) + build_fh_dev (dev ()); + if (sock && sock->set_socket_handle (res, get_addr_family (), + get_socket_type (), + get_socket_flags ())) + { + sock->async_io (false); /* set_socket_handle disables async. */ + /* No locking necessary at this point. */ + sock->wsock_events->events = wsock_events->events | FD_WRITE; + sock->wsock_events->owner = wsock_events->owner; + sock->connect_state (connected); + fd = sock; + if (fd <= 2) + set_std_handle (fd); + ret = fd; + if (peer) + { + memcpy (peer, &lpeer, MIN (*len, llen)); + *len = llen; + } + } + } + if (ret == -1) + ::closesocket (res); + } + return ret; +} + +int +fhandler_socket_inet::getsockname (struct sockaddr *name, int *namelen) +{ + int res = -1; + + /* WinSock just returns WSAEFAULT if the buffer is too small. Use a + big enough local buffer and truncate later as necessary, per POSIX. */ + struct sockaddr_storage sock; + int len = sizeof sock; + res = ::getsockname (get_socket (), (struct sockaddr *) &sock, &len); + if (!res) + { + memcpy (name, &sock, MIN (*namelen, len)); + *namelen = len; + } + else + { + if (WSAGetLastError () == WSAEINVAL) + { + /* WinSock returns WSAEINVAL if the socket is locally + unbound. Per SUSv3 this is not an error condition. + We're faking a valid return value here by creating the + same content in the sockaddr structure as on Linux. */ + memset (&sock, 0, sizeof sock); + sock.ss_family = get_addr_family (); + switch (get_addr_family ()) + { + case AF_INET: + res = 0; + len = (int) sizeof (struct sockaddr_in); + break; + case AF_INET6: + res = 0; + len = (int) sizeof (struct sockaddr_in6); + break; + default: + WSASetLastError (WSAEOPNOTSUPP); + break; + } + if (!res) + { + memcpy (name, &sock, MIN (*namelen, len)); + *namelen = len; + } + } + if (res) + set_winsock_errno (); + } + return res; +} + +int +fhandler_socket_inet::getpeername (struct sockaddr *name, int *namelen) +{ + /* Always use a local big enough buffer and truncate later as necessary + per POSIX. WinSock unfortunately only returns WSAEFAULT if the buffer + is too small. */ + struct sockaddr_storage sock; + int len = sizeof sock; + int res = ::getpeername (get_socket (), (struct sockaddr *) &sock, &len); + if (res) + set_winsock_errno (); + else + { + memcpy (name, &sock, MIN (*namelen, len)); + *namelen = len; + } + return res; +} + +/* There's no DLL which exports the symbol WSARecvMsg. One has to call + WSAIoctl as below to fetch the function pointer. Why on earth did the + MS developers decide not to export a normal symbol for these extension + functions? */ +inline int +get_ext_funcptr (SOCKET sock, void *funcptr) +{ + DWORD bret; + const GUID guid = WSAID_WSARECVMSG; + return WSAIoctl (sock, SIO_GET_EXTENSION_FUNCTION_POINTER, + (void *) &guid, sizeof (GUID), funcptr, sizeof (void *), + &bret, NULL, NULL); +} + +inline ssize_t +fhandler_socket_inet::recv_internal (LPWSAMSG wsamsg, bool use_recvmsg) +{ + ssize_t res = 0; + DWORD ret = 0, wret; + int evt_mask = FD_READ | ((wsamsg->dwFlags & MSG_OOB) ? FD_OOB : 0); + LPWSABUF &wsabuf = wsamsg->lpBuffers; + ULONG &wsacnt = wsamsg->dwBufferCount; + static NO_COPY LPFN_WSARECVMSG WSARecvMsg; + + /* CV 2014-10-26: Do not check for the connect_state at this point. In + certain scenarios there's no way to check the connect state reliably. + Example (hexchat): Parent process creates socket, forks, child process + calls connect, parent process calls read. Even if the event handling + allows to check for FD_CONNECT in the parent, there is always yet another + scenario we can easily break. */ + + DWORD wait_flags = wsamsg->dwFlags; + bool waitall = !!(wait_flags & MSG_WAITALL); + wsamsg->dwFlags &= (MSG_OOB | MSG_PEEK | MSG_DONTROUTE); + if (use_recvmsg) + { + if (!WSARecvMsg + && get_ext_funcptr (get_socket (), &WSARecvMsg) == SOCKET_ERROR) + { + if (wsamsg->Control.len > 0) + { + set_winsock_errno (); + return SOCKET_ERROR; + } + use_recvmsg = false; + } + else /* Only MSG_PEEK is supported by WSARecvMsg. */ + wsamsg->dwFlags &= MSG_PEEK; + } + if (waitall) + { + if (get_socket_type () != SOCK_STREAM) + { + WSASetLastError (WSAEOPNOTSUPP); + set_winsock_errno (); + return SOCKET_ERROR; + } + if (is_nonblocking () || (wsamsg->dwFlags & (MSG_OOB | MSG_PEEK))) + waitall = false; + } + + /* Note: Don't call WSARecvFrom(MSG_PEEK) without actually having data + waiting in the buffers, otherwise the event handling gets messed up + for some reason. */ + while (!(res = wait_for_events (evt_mask | FD_CLOSE, wait_flags)) + || saw_shutdown_read ()) + { + if (use_recvmsg) + res = WSARecvMsg (get_socket (), wsamsg, &wret, NULL, NULL); + /* This is working around a really weird problem in WinSock. + + Assume you create a socket, fork the process (thus duplicating + the socket), connect the socket in the child, then call recv + on the original socket handle in the parent process. + In this scenario, calls to WinSock's recvfrom and WSARecvFrom + in the parent will fail with WSAEINVAL, regardless whether both + address parameters, name and namelen, are NULL or point to valid + storage. However, calls to recv and WSARecv succeed as expected. + Per MSDN, WSAEINVAL in the context of recv means "The socket has not + been bound". It is as if the recvfrom functions test if the socket + is bound locally, but in the parent process, WinSock doesn't know + about that and fails, while the same test is omitted in the recv + functions. + + This also covers another weird case: WinSock returns WSAEFAULT if + namelen is a valid pointer while name is NULL. Both parameters are + ignored for TCP sockets, so this only occurs when using UDP socket. */ + else if (!wsamsg->name || get_socket_type () == SOCK_STREAM) + res = WSARecv (get_socket (), wsabuf, wsacnt, &wret, &wsamsg->dwFlags, + NULL, NULL); + else + res = WSARecvFrom (get_socket (), wsabuf, wsacnt, &wret, + &wsamsg->dwFlags, wsamsg->name, &wsamsg->namelen, + NULL, NULL); + if (!res) + { + ret += wret; + if (!waitall) + break; + while (wret && wsacnt) + { + if (wsabuf->len > wret) + { + wsabuf->len -= wret; + wsabuf->buf += wret; + wret = 0; + } + else + { + wret -= wsabuf->len; + ++wsabuf; + --wsacnt; + } + } + if (!wret) + break; + } + else if (WSAGetLastError () != WSAEWOULDBLOCK) + break; + } + + if (res) + { + /* According to SUSv3, errno isn't set in that case and no error + condition is returned. */ + if (WSAGetLastError () == WSAEMSGSIZE) + ret += wret; + else if (!ret) + { + /* ESHUTDOWN isn't defined for recv in SUSv3. Simply EOF is returned + in this case. */ + if (WSAGetLastError () == WSAESHUTDOWN) + ret = 0; + else + { + set_winsock_errno (); + return SOCKET_ERROR; + } + } + } + + return ret; +} + +ssize_t +fhandler_socket_inet::recvfrom (void *in_ptr, size_t len, int flags, + struct sockaddr *from, int *fromlen) +{ + char *ptr = (char *) in_ptr; + +#ifdef __x86_64__ + /* size_t is 64 bit, but the len member in WSABUF is 32 bit. + Split buffer if necessary. */ + DWORD bufcnt = len / UINT32_MAX + ((!len || (len % UINT32_MAX)) ? 1 : 0); + WSABUF wsabuf[bufcnt]; + WSAMSG wsamsg = { from, from && fromlen ? *fromlen : 0, + wsabuf, bufcnt, + { 0, NULL }, + (DWORD) flags }; + /* Don't use len as loop condition, it could be 0. */ + for (WSABUF *wsaptr = wsabuf; bufcnt--; ++wsaptr) + { + wsaptr->len = MIN (len, UINT32_MAX); + wsaptr->buf = ptr; + len -= wsaptr->len; + ptr += wsaptr->len; + } +#else + WSABUF wsabuf = { len, ptr }; + WSAMSG wsamsg = { from, from && fromlen ? *fromlen : 0, + &wsabuf, 1, + { 0, NULL}, + (DWORD) flags }; +#endif + ssize_t ret = recv_internal (&wsamsg, false); + if (fromlen) + *fromlen = wsamsg.namelen; + return ret; +} + +ssize_t +fhandler_socket_inet::recvmsg (struct msghdr *msg, int flags) +{ + /* Disappointing but true: Even if WSARecvMsg is supported, it's only + supported for datagram and raw sockets. */ + bool use_recvmsg = true; + if (get_socket_type () == SOCK_STREAM || get_addr_family () == AF_LOCAL) + { + use_recvmsg = false; + msg->msg_controllen = 0; + } + + WSABUF wsabuf[msg->msg_iovlen]; + WSABUF *wsaptr = wsabuf + msg->msg_iovlen; + const struct iovec *iovptr = msg->msg_iov + msg->msg_iovlen; + while (--wsaptr >= wsabuf) + { + wsaptr->len = (--iovptr)->iov_len; + wsaptr->buf = (char *) iovptr->iov_base; + } + WSAMSG wsamsg = { (struct sockaddr *) msg->msg_name, msg->msg_namelen, + wsabuf, (DWORD) msg->msg_iovlen, + { (DWORD) msg->msg_controllen, (char *) msg->msg_control }, + (DWORD) flags }; + ssize_t ret = recv_internal (&wsamsg, use_recvmsg); + if (ret >= 0) + { + msg->msg_namelen = wsamsg.namelen; + msg->msg_controllen = wsamsg.Control.len; + if (!CYGWIN_VERSION_CHECK_FOR_USING_ANCIENT_MSGHDR) + msg->msg_flags = wsamsg.dwFlags; + } + return ret; +} + +void __reg3 +fhandler_socket_inet::read (void *in_ptr, size_t& len) +{ + char *ptr = (char *) in_ptr; + +#ifdef __x86_64__ + /* size_t is 64 bit, but the len member in WSABUF is 32 bit. + Split buffer if necessary. */ + DWORD bufcnt = len / UINT32_MAX + ((!len || (len % UINT32_MAX)) ? 1 : 0); + WSABUF wsabuf[bufcnt]; + WSAMSG wsamsg = { NULL, 0, wsabuf, bufcnt, { 0, NULL }, 0 }; + /* Don't use len as loop condition, it could be 0. */ + for (WSABUF *wsaptr = wsabuf; bufcnt--; ++wsaptr) + { + wsaptr->len = MIN (len, UINT32_MAX); + wsaptr->buf = ptr; + len -= wsaptr->len; + ptr += wsaptr->len; + } +#else + WSABUF wsabuf = { len, ptr }; + WSAMSG wsamsg = { NULL, 0, &wsabuf, 1, { 0, NULL }, 0 }; +#endif + + len = recv_internal (&wsamsg, false); +} + +ssize_t +fhandler_socket_inet::readv (const struct iovec *const iov, const int iovcnt, + ssize_t tot) +{ + WSABUF wsabuf[iovcnt]; + WSABUF *wsaptr = wsabuf + iovcnt; + const struct iovec *iovptr = iov + iovcnt; + while (--wsaptr >= wsabuf) + { + wsaptr->len = (--iovptr)->iov_len; + wsaptr->buf = (char *) iovptr->iov_base; + } + WSAMSG wsamsg = { NULL, 0, wsabuf, (DWORD) iovcnt, { 0, NULL}, 0 }; + return recv_internal (&wsamsg, false); +} + +inline ssize_t +fhandler_socket_inet::send_internal (struct _WSAMSG *wsamsg, int flags) +{ + ssize_t res = 0; + DWORD ret = 0, sum = 0; + WSABUF out_buf[wsamsg->dwBufferCount]; + bool use_sendmsg = false; + DWORD wait_flags = flags & MSG_DONTWAIT; + bool nosignal = !!(flags & MSG_NOSIGNAL); + + flags &= (MSG_OOB | MSG_DONTROUTE); + if (wsamsg->Control.len > 0) + use_sendmsg = true; + /* Workaround for MSDN KB 823764: Split a message into chunks <= SO_SNDBUF. + in_idx is the index of the current lpBuffers from the input wsamsg buffer. + in_off is used to keep track of the next byte to write from a wsamsg + buffer which only gets partially written. */ + for (DWORD in_idx = 0, in_off = 0; + in_idx < wsamsg->dwBufferCount; + in_off >= wsamsg->lpBuffers[in_idx].len && (++in_idx, in_off = 0)) + { + /* Split a message into the least number of pieces to minimize the + number of WsaSendTo calls. Don't split datagram messages (bad idea). + out_idx is the index of the next buffer in the out_buf WSABUF, + also the number of buffers given to WSASendTo. + out_len is the number of bytes in the buffers given to WSASendTo. + Don't split datagram messages (very bad idea). */ + DWORD out_idx = 0; + DWORD out_len = 0; + if (get_socket_type () == SOCK_STREAM) + { + do + { + out_buf[out_idx].buf = wsamsg->lpBuffers[in_idx].buf + in_off; + out_buf[out_idx].len = wsamsg->lpBuffers[in_idx].len - in_off; + out_len += out_buf[out_idx].len; + out_idx++; + } + while (out_len < (unsigned) wmem () + && (in_off = 0, ++in_idx < wsamsg->dwBufferCount)); + /* Tweak len of the last out_buf buffer so the entire number of bytes + is (less than or) equal to wmem (). Fix out_len as well since it's + used in a subsequent test expression. */ + if (out_len > (unsigned) wmem ()) + { + out_buf[out_idx - 1].len -= out_len - (unsigned) wmem (); + out_len = (unsigned) wmem (); + } + /* Add the bytes written from the current last buffer to in_off, + so in_off points to the next byte to be written from that buffer, + or beyond which lets the outper loop skip to the next buffer. */ + in_off += out_buf[out_idx - 1].len; + } + + do + { + if (use_sendmsg) + res = WSASendMsg (get_socket (), wsamsg, flags, &ret, NULL, NULL); + else if (get_socket_type () == SOCK_STREAM) + res = WSASendTo (get_socket (), out_buf, out_idx, &ret, flags, + wsamsg->name, wsamsg->namelen, NULL, NULL); + else + res = WSASendTo (get_socket (), wsamsg->lpBuffers, + wsamsg->dwBufferCount, &ret, flags, + wsamsg->name, wsamsg->namelen, NULL, NULL); + if (res && (WSAGetLastError () == WSAEWOULDBLOCK)) + { + LOCK_EVENTS; + wsock_events->events &= ~FD_WRITE; + UNLOCK_EVENTS; + } + } + while (res && (WSAGetLastError () == WSAEWOULDBLOCK) + && !(res = wait_for_events (FD_WRITE | FD_CLOSE, wait_flags))); + + if (!res) + { + sum += ret; + /* For streams, return to application if the number of bytes written + is less than the number of bytes we intended to write in a single + call to WSASendTo. Otherwise we would have to add code to + backtrack in the input buffers, which is questionable. There was + probably a good reason we couldn't write more. */ + if (get_socket_type () != SOCK_STREAM || ret < out_len) + break; + } + else if (is_nonblocking () || WSAGetLastError() != WSAEWOULDBLOCK) + break; + } + + if (sum) + res = sum; + else if (res == SOCKET_ERROR) + { + set_winsock_errno (); + + /* Special handling for EPIPE and SIGPIPE. + + EPIPE is generated if the local end has been shut down on a connection + oriented socket. In this case the process will also receive a SIGPIPE + unless MSG_NOSIGNAL is set. */ + if ((get_errno () == ECONNABORTED || get_errno () == ESHUTDOWN) + && get_socket_type () == SOCK_STREAM) + { + set_errno (EPIPE); + if (!nosignal) + raise (SIGPIPE); + } + } + + return res; +} + +ssize_t +fhandler_socket_inet::sendto (const void *in_ptr, size_t len, int flags, + const struct sockaddr *to, int tolen) +{ + char *ptr = (char *) in_ptr; + struct sockaddr_storage sst; + + if (to && get_inet_addr_inet (to, tolen, &sst, &tolen) == SOCKET_ERROR) + return SOCKET_ERROR; + +#ifdef __x86_64__ + /* size_t is 64 bit, but the len member in WSABUF is 32 bit. + Split buffer if necessary. */ + DWORD bufcnt = len / UINT32_MAX + ((!len || (len % UINT32_MAX)) ? 1 : 0); + WSABUF wsabuf[bufcnt]; + WSAMSG wsamsg = { to ? (struct sockaddr *) &sst : NULL, tolen, + wsabuf, bufcnt, + { 0, NULL }, + 0 }; + /* Don't use len as loop condition, it could be 0. */ + for (WSABUF *wsaptr = wsabuf; bufcnt--; ++wsaptr) + { + wsaptr->len = MIN (len, UINT32_MAX); + wsaptr->buf = ptr; + len -= wsaptr->len; + ptr += wsaptr->len; + } +#else + WSABUF wsabuf = { len, ptr }; + WSAMSG wsamsg = { to ? (struct sockaddr *) &sst : NULL, tolen, + &wsabuf, 1, + { 0, NULL}, + 0 }; +#endif + return send_internal (&wsamsg, flags); +} + +ssize_t +fhandler_socket_inet::sendmsg (const struct msghdr *msg, int flags) +{ + /* TODO: Descriptor passing on AF_LOCAL sockets. */ + + struct sockaddr_storage sst; + int len = 0; + + if (msg->msg_name + && get_inet_addr_inet ((struct sockaddr *) msg->msg_name, + msg->msg_namelen, &sst, &len) == SOCKET_ERROR) + return SOCKET_ERROR; + + WSABUF wsabuf[msg->msg_iovlen]; + WSABUF *wsaptr = wsabuf; + const struct iovec *iovptr = msg->msg_iov; + for (int i = 0; i < msg->msg_iovlen; ++i) + { + wsaptr->len = iovptr->iov_len; + (wsaptr++)->buf = (char *) (iovptr++)->iov_base; + } + /* Disappointing but true: Even if WSASendMsg is supported, it's only + supported for datagram and raw sockets. */ + DWORD controllen = (DWORD) (get_socket_type () == SOCK_STREAM + || get_addr_family () == AF_LOCAL + ? 0 : msg->msg_controllen); + WSAMSG wsamsg = { msg->msg_name ? (struct sockaddr *) &sst : NULL, len, + wsabuf, (DWORD) msg->msg_iovlen, + { controllen, (char *) msg->msg_control }, + 0 }; + return send_internal (&wsamsg, flags); +} + +ssize_t +fhandler_socket_inet::write (const void *in_ptr, size_t len) +{ + char *ptr = (char *) in_ptr; + +#ifdef __x86_64__ + /* size_t is 64 bit, but the len member in WSABUF is 32 bit. + Split buffer if necessary. */ + DWORD bufcnt = len / UINT32_MAX + ((!len || (len % UINT32_MAX)) ? 1 : 0); + WSABUF wsabuf[bufcnt]; + WSAMSG wsamsg = { NULL, 0, wsabuf, bufcnt, { 0, NULL }, 0 }; + /* Don't use len as loop condition, it could be 0. */ + for (WSABUF *wsaptr = wsabuf; bufcnt--; ++wsaptr) + { + wsaptr->len = MIN (len, UINT32_MAX); + wsaptr->buf = ptr; + len -= wsaptr->len; + ptr += wsaptr->len; + } +#else + WSABUF wsabuf = { len, ptr }; + WSAMSG wsamsg = { NULL, 0, &wsabuf, 1, { 0, NULL }, 0 }; +#endif + return send_internal (&wsamsg, 0); +} + +ssize_t +fhandler_socket_inet::writev (const struct iovec *const iov, const int iovcnt, + ssize_t tot) +{ + WSABUF wsabuf[iovcnt]; + WSABUF *wsaptr = wsabuf; + const struct iovec *iovptr = iov; + for (int i = 0; i < iovcnt; ++i) + { + wsaptr->len = iovptr->iov_len; + (wsaptr++)->buf = (char *) (iovptr++)->iov_base; + } + WSAMSG wsamsg = { NULL, 0, wsabuf, (DWORD) iovcnt, { 0, NULL}, 0 }; + return send_internal (&wsamsg, 0); +} + +int +fhandler_socket_inet::setsockopt (int level, int optname, const void *optval, + socklen_t optlen) +{ + bool ignore = false; + int ret = -1; + + /* Preprocessing setsockopt. Set ignore to true if setsockopt call should + get skipped entirely. */ + switch (level) + { + case SOL_SOCKET: + switch (optname) + { + case SO_PEERCRED: + set_errno (ENOPROTOOPT); + return -1; + + case SO_REUSEADDR: + /* Per POSIX we must not be able to reuse a complete duplicate of a + local TCP address (same IP, same port), even if SO_REUSEADDR has + been set. This behaviour is maintained in WinSock for backward + compatibility, while the WinSock standard behaviour of stream + socket binding is equivalent to the POSIX behaviour as if + SO_REUSEADDR has been set. The SO_EXCLUSIVEADDRUSE option has + been added to allow an application to request POSIX standard + behaviour in the non-SO_REUSEADDR case. + + To emulate POSIX socket binding behaviour, note that SO_REUSEADDR + has been set but don't call setsockopt. Instead + fhandler_socket::bind sets SO_EXCLUSIVEADDRUSE if the application + did not set SO_REUSEADDR. */ + if (optlen < (socklen_t) sizeof (int)) + { + set_errno (EINVAL); + return ret; + } + if (get_socket_type () == SOCK_STREAM) + ignore = true; + break; + + case SO_RCVTIMEO: + case SO_SNDTIMEO: + if (optlen < (socklen_t) sizeof (struct timeval)) + { + set_errno (EINVAL); + return ret; + } + if (timeval_to_ms ((struct timeval *) optval, + (optname == SO_RCVTIMEO) ? rcvtimeo () + : sndtimeo ())) + ret = 0; + else + set_errno (EDOM); + return ret; + + default: + break; + } + break; + + case IPPROTO_IP: + /* Old applications still use the old WinSock1 IPPROTO_IP values. */ + if (CYGWIN_VERSION_CHECK_FOR_USING_WINSOCK1_VALUES) + optname = convert_ws1_ip_optname (optname); + switch (optname) + { + case IP_TOS: + /* Winsock doesn't support setting the IP_TOS field with setsockopt + and TOS was never implemented for TCP anyway. setsockopt returns + WinSock error 10022, WSAEINVAL when trying to set the IP_TOS + field. We just return 0 instead. */ + ignore = true; + break; + + default: + break; + } + break; + + case IPPROTO_IPV6: + { + switch (optname) + { + case IPV6_TCLASS: + /* Unsupported */ + ignore = true; + break; + + default: + break; + } + } + default: + break; + } + + /* Call Winsock setsockopt (or not) */ + if (ignore) + ret = 0; + else + { + ret = ::setsockopt (get_socket (), level, optname, (const char *) optval, + optlen); + if (ret == SOCKET_ERROR) + { + set_winsock_errno (); + return ret; + } + } + + if (optlen == (socklen_t) sizeof (int)) + debug_printf ("setsockopt optval=%x", *(int *) optval); + + /* Postprocessing setsockopt, setting fhandler_socket members, etc. */ + switch (level) + { + case SOL_SOCKET: + switch (optname) + { + case SO_REUSEADDR: + saw_reuseaddr (*(int *) optval); + break; + + case SO_RCVBUF: + rmem (*(int *) optval); + break; + + case SO_SNDBUF: + wmem (*(int *) optval); + break; + + default: + break; + } + break; + + default: + break; + } + + return ret; +} + +int +fhandler_socket_inet::getsockopt (int level, int optname, const void *optval, + socklen_t *optlen) +{ + bool onebyte = false; + int ret = -1; + + /* Preprocessing getsockopt. */ + switch (level) + { + case SOL_SOCKET: + switch (optname) + { + case SO_PEERCRED: + set_errno (ENOPROTOOPT); + return -1; + + case SO_REUSEADDR: + { + unsigned int *reuseaddr = (unsigned int *) optval; + + if (*optlen < (socklen_t) sizeof *reuseaddr) + { + set_errno (EINVAL); + return -1; + } + *reuseaddr = saw_reuseaddr(); + *optlen = (socklen_t) sizeof *reuseaddr; + return 0; + } + + case SO_RCVTIMEO: + case SO_SNDTIMEO: + { + struct timeval *time_out = (struct timeval *) optval; + + if (*optlen < (socklen_t) sizeof *time_out) + { + set_errno (EINVAL); + return -1; + } + DWORD ms = (optname == SO_RCVTIMEO) ? rcvtimeo () : sndtimeo (); + if (ms == 0 || ms == INFINITE) + { + time_out->tv_sec = 0; + time_out->tv_usec = 0; + } + else + { + time_out->tv_sec = ms / MSPERSEC; + time_out->tv_usec = ((ms % MSPERSEC) * USPERSEC) / MSPERSEC; + } + *optlen = (socklen_t) sizeof *time_out; + return 0; + } + + case SO_TYPE: + { + unsigned int *type = (unsigned int *) optval; + *type = get_socket_type (); + *optlen = (socklen_t) sizeof *type; + return 0; + } + + default: + break; + } + break; + + case IPPROTO_IP: + /* Old applications still use the old WinSock1 IPPROTO_IP values. */ + if (CYGWIN_VERSION_CHECK_FOR_USING_WINSOCK1_VALUES) + optname = convert_ws1_ip_optname (optname); + break; + + default: + break; + } + + /* Call Winsock getsockopt */ + ret = ::getsockopt (get_socket (), level, optname, (char *) optval, + (int *) optlen); + if (ret == SOCKET_ERROR) + { + set_winsock_errno (); + return ret; + } + + /* Postprocessing getsockopt, setting fhandler_socket members, etc. Set + onebyte true for options returning BOOLEAN instead of a boolean DWORD. */ + switch (level) + { + case SOL_SOCKET: + switch (optname) + { + case SO_ERROR: + { + int *e = (int *) optval; + debug_printf ("WinSock SO_ERROR = %d", *e); + *e = find_winsock_errno (*e); + } + break; + + case SO_KEEPALIVE: + case SO_DONTROUTE: + onebyte = true; + break; + + default: + break; + } + break; + case IPPROTO_TCP: + switch (optname) + { + case TCP_NODELAY: + onebyte = true; + break; + + default: + break; + } + default: + break; + } + + if (onebyte) + { + /* Regression in Vista and later: instead of a 4 byte BOOL value, a + 1 byte BOOLEAN value is returned, in contrast to older systems and + the documentation. Since an int type is expected by the calling + application, we convert the result here. For some reason only three + BSD-compatible socket options seem to be affected. */ + BOOLEAN *in = (BOOLEAN *) optval; + int *out = (int *) optval; + *out = *in; + *optlen = 4; + } + + return ret; +} diff --git a/winsup/cygwin/fhandler_socket_local.cc b/winsup/cygwin/fhandler_socket_local.cc new file mode 100644 index 000000000..3d48a8159 --- /dev/null +++ b/winsup/cygwin/fhandler_socket_local.cc @@ -0,0 +1,1844 @@ +/* fhandler_socket_local.cc. + + See fhandler.h for a description of the fhandler classes. + + This file is part of Cygwin. + + This software is a copyrighted work licensed under the terms of the + Cygwin license. Please consult the file "CYGWIN_LICENSE" for + details. */ + +#define __INSIDE_CYGWIN_NET__ +#define USE_SYS_TYPES_FD_SET + +#include "winsup.h" +#ifdef __x86_64__ +/* 2014-04-24: Current Mingw headers define sockaddr_in6 using u_long (8 byte) + because a redefinition for LP64 systems is missing. This leads to a wrong + definition and size of sockaddr_in6 when building with winsock headers. + This definition is also required to use the right u_long type in subsequent + function calls. */ +#undef u_long +#define u_long __ms_u_long +#endif +#include +#include +#include +#include +#include "cygerrno.h" +#include "security.h" +#include "path.h" +#include "fhandler.h" +#include "dtable.h" +#include "cygheap.h" +#include +#include "cygwin/version.h" +#include "perprocess.h" +#include "shared_info.h" +#include "sigproc.h" +#include "wininfo.h" +#include +#include +#include +#include +#include "cygtls.h" +#include +#include "ntdll.h" +#include "miscfuncs.h" +#include "tls_pbuf.h" + +extern "C" { + int sscanf (const char *, const char *, ...); +} /* End of "C" section */ + +#define ASYNC_MASK (FD_READ|FD_WRITE|FD_OOB|FD_ACCEPT|FD_CONNECT) +#define EVENT_MASK (FD_READ|FD_WRITE|FD_OOB|FD_ACCEPT|FD_CONNECT|FD_CLOSE) + +#define LOCK_EVENTS \ + if (wsock_mtx && \ + WaitForSingleObject (wsock_mtx, INFINITE) != WAIT_FAILED) \ + { + +#define UNLOCK_EVENTS \ + ReleaseMutex (wsock_mtx); \ + } + +static inline mode_t +adjust_socket_file_mode (mode_t mode) +{ + /* Kludge: Don't allow to remove read bit on socket files for + user/group/other, if the accompanying write bit is set. It would + be nice to have exact permissions on a socket file, but it's + necessary that somebody able to access the socket can always read + the contents of the socket file to avoid spurious "permission + denied" messages. */ + return mode | ((mode & (S_IWUSR | S_IWGRP | S_IWOTH)) << 1); +} + +/* cygwin internal: map sockaddr into internet domain address */ +static int +get_inet_addr_local (const struct sockaddr *in, int inlen, + struct sockaddr_storage *out, int *outlen, + int *type = NULL, int *secret = NULL) +{ + int secret_buf [4]; + int* secret_ptr = (secret ? : secret_buf); + + /* Check for abstract socket. These are generated for AF_LOCAL datagram + sockets in recv_internal, to allow a datagram server to use sendto + after recvfrom. */ + if (inlen >= (int) sizeof (in->sa_family) + 7 + && in->sa_data[0] == '\0' && in->sa_data[1] == 'd' + && in->sa_data[6] == '\0') + { + struct sockaddr_in addr; + addr.sin_family = AF_INET; + sscanf (in->sa_data + 2, "%04hx", &addr.sin_port); + addr.sin_addr.s_addr = htonl (INADDR_LOOPBACK); + *outlen = sizeof addr; + memcpy (out, &addr, *outlen); + return 0; + } + + /* AF_LOCAL/AF_UNIX only */ + path_conv pc (in->sa_data, PC_SYM_FOLLOW); + if (pc.error) + { + set_errno (pc.error); + return SOCKET_ERROR; + } + if (!pc.exists ()) + { + set_errno (ENOENT); + return SOCKET_ERROR; + } + /* Do NOT test for the file being a socket file here. The socket file + creation is not an atomic operation, so there is a chance that socket + files which are just in the process of being created are recognized + as non-socket files. To work around this problem we now create the + file with all sharing disabled. If the below NtOpenFile fails + with STATUS_SHARING_VIOLATION we know that the file already exists, + but the creating process isn't finished yet. So we yield and try + again, until we can either open the file successfully, or some error + other than STATUS_SHARING_VIOLATION occurs. + Since we now don't know if the file is actually a socket file, we + perform this check here explicitely. */ + NTSTATUS status; + HANDLE fh; + OBJECT_ATTRIBUTES attr; + IO_STATUS_BLOCK io; + + pc.get_object_attr (attr, sec_none_nih); + do + { + status = NtOpenFile (&fh, GENERIC_READ | SYNCHRONIZE, &attr, &io, + FILE_SHARE_VALID_FLAGS, + FILE_SYNCHRONOUS_IO_NONALERT + | FILE_OPEN_FOR_BACKUP_INTENT + | FILE_NON_DIRECTORY_FILE); + if (status == STATUS_SHARING_VIOLATION) + { + /* While we hope that the sharing violation is only temporary, we + also could easily get stuck here, waiting for a file in use by + some greedy Win32 application. Therefore we should never wait + endlessly without checking for signals and thread cancel event. */ + pthread_testcancel (); + if (cygwait (NULL, cw_nowait, cw_sig_eintr) == WAIT_SIGNALED + && !_my_tls.call_signal_handler ()) + { + set_errno (EINTR); + return SOCKET_ERROR; + } + yield (); + } + else if (!NT_SUCCESS (status)) + { + __seterrno_from_nt_status (status); + return SOCKET_ERROR; + } + } + while (status == STATUS_SHARING_VIOLATION); + /* Now test for the SYSTEM bit. */ + FILE_BASIC_INFORMATION fbi; + status = NtQueryInformationFile (fh, &io, &fbi, sizeof fbi, + FileBasicInformation); + if (!NT_SUCCESS (status)) + { + __seterrno_from_nt_status (status); + return SOCKET_ERROR; + } + if (!(fbi.FileAttributes & FILE_ATTRIBUTE_SYSTEM)) + { + NtClose (fh); + set_errno (EBADF); + return SOCKET_ERROR; + } + /* Eventually check the content and fetch the required information. */ + char buf[128]; + memset (buf, 0, sizeof buf); + status = NtReadFile (fh, NULL, NULL, NULL, &io, buf, 128, NULL, NULL); + NtClose (fh); + if (NT_SUCCESS (status)) + { + struct sockaddr_in sin; + char ctype; + sin.sin_family = AF_INET; + if (strncmp (buf, SOCKET_COOKIE, strlen (SOCKET_COOKIE))) + { + set_errno (EBADF); + return SOCKET_ERROR; + } + sscanf (buf + strlen (SOCKET_COOKIE), "%hu %c %08x-%08x-%08x-%08x", + &sin.sin_port, + &ctype, + secret_ptr, secret_ptr + 1, secret_ptr + 2, secret_ptr + 3); + sin.sin_port = htons (sin.sin_port); + sin.sin_addr.s_addr = htonl (INADDR_LOOPBACK); + memcpy (out, &sin, sizeof sin); + *outlen = sizeof sin; + if (type) + *type = (ctype == 's' ? SOCK_STREAM : + ctype == 'd' ? SOCK_DGRAM + : 0); + return 0; + } + __seterrno_from_nt_status (status); + return SOCKET_ERROR; +} + +fhandler_socket_local::fhandler_socket_local () : + fhandler_socket (), + sun_path (NULL), + peer_sun_path (NULL) +{ +} + +fhandler_socket_local::~fhandler_socket_local () +{ + if (sun_path) + cfree (sun_path); + if (peer_sun_path) + cfree (peer_sun_path); +} + +int +fhandler_socket_local::socket (int af, int type, int protocol, int flags) +{ + SOCKET sock; + int ret; + + sock = ::socket (AF_INET, type, protocol); + if (sock == INVALID_SOCKET) + { + set_winsock_errno (); + return -1; + } + ret = set_socket_handle (sock, af, type, flags); + if (ret < 0) + ::closesocket (sock); + return ret; +} + +int +fhandler_socket_local::socketpair (int af, int type, int protocol, int flags, + fhandler_socket *_fh_out) +{ + SOCKET insock = INVALID_SOCKET; + SOCKET outsock = INVALID_SOCKET; + SOCKET sock = INVALID_SOCKET; + struct sockaddr_in sock_in, sock_out; + int len; + + fhandler_socket_local *fh_out = (fhandler_socket_local *) _fh_out; + /* create listening socket */ + sock = ::socket (AF_INET, type, 0); + if (sock == INVALID_SOCKET) + { + set_winsock_errno (); + goto err; + } + /* bind to unused port */ + sock_in.sin_family = AF_INET; + sock_in.sin_port = 0; + sock_in.sin_addr.s_addr = htonl (INADDR_LOOPBACK); + if (::bind (sock, (struct sockaddr *) &sock_in, sizeof (sock_in)) < 0) + { + set_winsock_errno (); + goto err; + } + /* fetch socket name */ + len = sizeof (sock_in); + if (::getsockname (sock, (struct sockaddr *) &sock_in, &len) < 0) + { + set_winsock_errno (); + goto err; + } + /* on stream sockets, create listener */ + if (type == SOCK_STREAM && ::listen (sock, 2) < 0) + { + set_winsock_errno (); + goto err; + } + /* create connecting socket */ + outsock = ::socket (AF_INET, type, 0); + if (outsock == INVALID_SOCKET) + { + set_winsock_errno (); + goto err; + } + /* on datagram sockets, bind connecting socket */ + if (type == SOCK_DGRAM) + { + sock_out.sin_family = AF_INET; + sock_out.sin_port = 0; + sock_out.sin_addr.s_addr = htonl (INADDR_LOOPBACK); + if (::bind (outsock, (struct sockaddr *) &sock_out, + sizeof (sock_out)) < 0) + { + set_winsock_errno (); + goto err; + } + /* ...and fetch name */ + len = sizeof (sock_out); + if (::getsockname (outsock, (struct sockaddr *) &sock_out, &len) < 0) + { + set_winsock_errno (); + goto err; + } + } + sock_in.sin_addr.s_addr = htonl (INADDR_LOOPBACK); + if (type == SOCK_DGRAM) + sock_out.sin_addr.s_addr = htonl (INADDR_LOOPBACK); + /* connect */ + if (::connect (outsock, (struct sockaddr *) &sock_in, sizeof (sock_in)) < 0) + { + set_winsock_errno (); + goto err; + } + if (type == SOCK_STREAM) + { + /* on stream sockets, accept connection and close listener */ + len = sizeof (sock_in); + insock = ::accept (sock, (struct sockaddr *) &sock_in, &len); + if (insock == INVALID_SOCKET) + { + set_winsock_errno (); + goto err; + } + ::closesocket (sock); + } + else + { + /* on datagram sockets, connect vice versa */ + if (::connect (sock, (struct sockaddr *) &sock_out, + sizeof (sock_out)) < 0) + { + set_winsock_errno (); + goto err; + } + insock = sock; + } + sock = INVALID_SOCKET; + + /* postprocessing */ + connect_state (connected); + fh_out->connect_state (connected); + if (af == AF_LOCAL && type == SOCK_STREAM) + { + af_local_set_sockpair_cred (); + fh_out->af_local_set_sockpair_cred (); + } + if (set_socket_handle (insock, af, type, flags) < 0 + || fh_out->set_socket_handle (outsock, af, type, flags) < 0) + goto err; + + return 0; + +err: + if (sock != INVALID_SOCKET) + ::closesocket (sock); + if (insock != INVALID_SOCKET) + ::closesocket (insock); + if (outsock != INVALID_SOCKET) + ::closesocket (outsock); + return -1; +} + +void +fhandler_socket_local::af_local_set_sockpair_cred () +{ + sec_pid = sec_peer_pid = getpid (); + sec_uid = sec_peer_uid = geteuid32 (); + sec_gid = sec_peer_gid = getegid32 (); +} + +void +fhandler_socket_local::af_local_setblocking (bool &async, bool &nonblocking) +{ + async = async_io (); + nonblocking = is_nonblocking (); + if (async) + { + WSAAsyncSelect (get_socket (), winmsg, 0, 0); + WSAEventSelect (get_socket (), wsock_evt, EVENT_MASK); + } + set_nonblocking (false); + async_io (false); +} + +void +fhandler_socket_local::af_local_unsetblocking (bool async, bool nonblocking) +{ + if (nonblocking) + set_nonblocking (true); + if (async) + { + WSAAsyncSelect (get_socket (), winmsg, WM_ASYNCIO, ASYNC_MASK); + async_io (true); + } +} + +bool +fhandler_socket_local::af_local_recv_secret () +{ + int out[4] = { 0, 0, 0, 0 }; + int rest = sizeof out; + char *ptr = (char *) out; + while (rest > 0) + { + int ret = recvfrom (ptr, rest, 0, NULL, NULL); + if (ret <= 0) + break; + rest -= ret; + ptr += ret; + } + if (rest == 0) + { + debug_printf ("Received af_local secret: %08x-%08x-%08x-%08x", + out[0], out[1], out[2], out[3]); + if (out[0] != connect_secret[0] || out[1] != connect_secret[1] + || out[2] != connect_secret[2] || out[3] != connect_secret[3]) + { + debug_printf ("Receiving af_local secret mismatch"); + return false; + } + } + else + debug_printf ("Receiving af_local secret failed"); + return rest == 0; +} + +bool +fhandler_socket_local::af_local_send_secret () +{ + int rest = sizeof connect_secret; + char *ptr = (char *) connect_secret; + while (rest > 0) + { + int ret = sendto (ptr, rest, 0, NULL, 0); + if (ret <= 0) + break; + rest -= ret; + ptr += ret; + } + debug_printf ("Sending af_local secret %s", rest == 0 ? "succeeded" + : "failed"); + return rest == 0; +} + +bool +fhandler_socket_local::af_local_recv_cred () +{ + struct ucred out = { (pid_t) 0, (uid_t) -1, (gid_t) -1 }; + int rest = sizeof out; + char *ptr = (char *) &out; + while (rest > 0) + { + int ret = recvfrom (ptr, rest, 0, NULL, NULL); + if (ret <= 0) + break; + rest -= ret; + ptr += ret; + } + if (rest == 0) + { + debug_printf ("Received eid credentials: pid: %d, uid: %d, gid: %d", + out.pid, out.uid, out.gid); + sec_peer_pid = out.pid; + sec_peer_uid = out.uid; + sec_peer_gid = out.gid; + } + else + debug_printf ("Receiving eid credentials failed"); + return rest == 0; +} + +bool +fhandler_socket_local::af_local_send_cred () +{ + struct ucred in = { sec_pid, sec_uid, sec_gid }; + int rest = sizeof in; + char *ptr = (char *) ∈ + while (rest > 0) + { + int ret = sendto (ptr, rest, 0, NULL, 0); + if (ret <= 0) + break; + rest -= ret; + ptr += ret; + } + if (rest == 0) + debug_printf ("Sending eid credentials succeeded"); + else + debug_printf ("Sending eid credentials failed"); + return rest == 0; +} + +int +fhandler_socket_local::af_local_connect () +{ + bool orig_async_io, orig_is_nonblocking; + + if (get_socket_type () != SOCK_STREAM) + return 0; + + debug_printf ("af_local_connect called, no_getpeereid=%d", no_getpeereid ()); + if (no_getpeereid ()) + return 0; + + af_local_setblocking (orig_async_io, orig_is_nonblocking); + if (!af_local_send_secret () || !af_local_recv_secret () + || !af_local_send_cred () || !af_local_recv_cred ()) + { + debug_printf ("accept from unauthorized server"); + ::shutdown (get_socket (), SD_BOTH); + WSASetLastError (WSAECONNREFUSED); + return -1; + } + af_local_unsetblocking (orig_async_io, orig_is_nonblocking); + return 0; +} + +int +fhandler_socket_local::af_local_accept () +{ + bool orig_async_io, orig_is_nonblocking; + + debug_printf ("af_local_accept called, no_getpeereid=%d", no_getpeereid ()); + if (no_getpeereid ()) + return 0; + + af_local_setblocking (orig_async_io, orig_is_nonblocking); + if (!af_local_recv_secret () || !af_local_send_secret () + || !af_local_recv_cred () || !af_local_send_cred ()) + { + debug_printf ("connect from unauthorized client"); + ::shutdown (get_socket (), SD_BOTH); + ::closesocket (get_socket ()); + WSASetLastError (WSAECONNABORTED); + return -1; + } + af_local_unsetblocking (orig_async_io, orig_is_nonblocking); + return 0; +} + +int +fhandler_socket_local::af_local_set_no_getpeereid () +{ + if (get_addr_family () != AF_LOCAL || get_socket_type () != SOCK_STREAM) + { + set_errno (EINVAL); + return -1; + } + if (connect_state () != unconnected) + { + set_errno (EALREADY); + return -1; + } + + debug_printf ("no_getpeereid set"); + no_getpeereid (true); + return 0; +} + +void +fhandler_socket_local::af_local_set_cred () +{ + sec_pid = getpid (); + sec_uid = geteuid32 (); + sec_gid = getegid32 (); + sec_peer_pid = (pid_t) 0; + sec_peer_uid = (uid_t) -1; + sec_peer_gid = (gid_t) -1; +} + +void +fhandler_socket_local::af_local_copy (fhandler_socket_local *sock) +{ + sock->connect_secret[0] = connect_secret[0]; + sock->connect_secret[1] = connect_secret[1]; + sock->connect_secret[2] = connect_secret[2]; + sock->connect_secret[3] = connect_secret[3]; + sock->sec_pid = sec_pid; + sock->sec_uid = sec_uid; + sock->sec_gid = sec_gid; + sock->sec_peer_pid = sec_peer_pid; + sock->sec_peer_uid = sec_peer_uid; + sock->sec_peer_gid = sec_peer_gid; + sock->no_getpeereid (no_getpeereid ()); +} + +void +fhandler_socket_local::af_local_set_secret (char *buf) +{ + if (!RtlGenRandom (connect_secret, sizeof (connect_secret))) + bzero ((char*) connect_secret, sizeof (connect_secret)); + __small_sprintf (buf, "%08x-%08x-%08x-%08x", + connect_secret [0], connect_secret [1], + connect_secret [2], connect_secret [3]); +} + +int +fhandler_socket_local::dup (fhandler_base *child, int flags) +{ + fhandler_socket_local *fhs = (fhandler_socket_local *) child; + fhs->set_sun_path (get_sun_path ()); + fhs->set_peer_sun_path (get_peer_sun_path ()); + return fhandler_socket::dup (child, flags); +} + +int __reg2 +fhandler_socket_local::fstat (struct stat *buf) +{ + int res; + + if (!get_sun_path () || get_sun_path ()[0] == '\0') + return fhandler_socket::fstat (buf); + res = fhandler_base::fstat_fs (buf); + if (!res) + { + buf->st_mode = (buf->st_mode & ~S_IFMT) | S_IFSOCK; + buf->st_size = 0; + } + return res; +} + +int __reg2 +fhandler_socket_local::fstatvfs (struct statvfs *sfs) +{ + if (!get_sun_path () || get_sun_path ()[0] == '\0') + return fhandler_socket::fstatvfs (sfs); + fhandler_disk_file fh (pc); + fh.get_device () = FH_FS; + return fh.fstatvfs (sfs); +} + +int +fhandler_socket_local::fchmod (mode_t newmode) +{ + if (!get_sun_path () || get_sun_path ()[0] == '\0') + return 0; + fhandler_disk_file fh (pc); + fh.get_device () = FH_FS; + return fh.fchmod (S_IFSOCK | adjust_socket_file_mode (newmode)); +} + +int +fhandler_socket_local::fchown (uid_t uid, gid_t gid) +{ + if (!get_sun_path () || get_sun_path ()[0] == '\0') + return fhandler_socket::fchown (uid, gid); + fhandler_disk_file fh (pc); + return fh.fchown (uid, gid); +} + +int +fhandler_socket_local::facl (int cmd, int nentries, aclent_t *aclbufp) +{ + if (!get_sun_path () || get_sun_path ()[0] == '\0') + return fhandler_socket::facl (cmd, nentries, aclbufp); + fhandler_disk_file fh (pc); + return fh.facl (cmd, nentries, aclbufp); +} + +int +fhandler_socket_local::link (const char *newpath) +{ + if (!get_sun_path () || get_sun_path ()[0] == '\0') + return fhandler_socket::link (newpath); + fhandler_disk_file fh (pc); + return fh.link (newpath); +} + +int +fhandler_socket_local::bind (const struct sockaddr *name, int namelen) +{ + int res = -1; + +#define un_addr ((struct sockaddr_un *) name) + struct sockaddr_in sin; + int len = namelen - offsetof (struct sockaddr_un, sun_path); + + /* Check that name is within bounds. Don't check if the string is + NUL-terminated, because there are projects out there which set + namelen to a value which doesn't cover the trailing NUL. */ + if (len <= 1 || (len = strnlen (un_addr->sun_path, len)) > UNIX_PATH_MAX) + { + set_errno (len <= 1 ? (len == 1 ? ENOENT : EINVAL) : ENAMETOOLONG); + return -1; + } + /* Copy over the sun_path string into a buffer big enough to add a + trailing NUL. */ + char sun_path[len + 1]; + strncpy (sun_path, un_addr->sun_path, len); + sun_path[len] = '\0'; + + /* This isn't entirely foolproof, but we check first if the file exists + so we can return with EADDRINUSE before having bound the socket. + This allows an application to call bind again on the same socket using + another filename. If we bind first, the application will not be able + to call bind successfully ever again. */ + path_conv pc (sun_path, PC_SYM_FOLLOW); + if (pc.error) + { + set_errno (pc.error); + return -1; + } + if (pc.exists ()) + { + set_errno (EADDRINUSE); + return -1; + } + + sin.sin_family = AF_INET; + sin.sin_port = 0; + sin.sin_addr.s_addr = htonl (INADDR_LOOPBACK); + if (::bind (get_socket (), (sockaddr *) &sin, len = sizeof sin)) + { + syscall_printf ("AF_LOCAL: bind failed"); + set_winsock_errno (); + return -1; + } + if (::getsockname (get_socket (), (sockaddr *) &sin, &len)) + { + syscall_printf ("AF_LOCAL: getsockname failed"); + set_winsock_errno (); + return -1; + } + + sin.sin_port = ntohs (sin.sin_port); + debug_printf ("AF_LOCAL: socket bound to port %u", sin.sin_port); + + mode_t mode = S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP | S_IROTH | S_IWOTH; + DWORD fattr = FILE_ATTRIBUTE_SYSTEM; + if (!pc.has_acls () + && !(mode & ~cygheap->umask & (S_IWUSR | S_IWGRP | S_IWOTH))) + fattr |= FILE_ATTRIBUTE_READONLY; + SECURITY_ATTRIBUTES sa = sec_none_nih; + NTSTATUS status; + HANDLE fh; + OBJECT_ATTRIBUTES attr; + IO_STATUS_BLOCK io; + ULONG access = DELETE | FILE_GENERIC_WRITE; + + /* If the filesystem supports ACLs, we will overwrite the DACL after the + call to NtCreateFile. This requires a handle with READ_CONTROL and + WRITE_DAC access, otherwise get_file_sd and set_file_sd both have to + open the file again. + FIXME: On remote NTFS shares open sometimes fails because even the + creator of the file doesn't have the right to change the DACL. + I don't know what setting that is or how to recognize such a share, + so for now we don't request WRITE_DAC on remote drives. */ + if (pc.has_acls () && !pc.isremote ()) + access |= READ_CONTROL | WRITE_DAC | WRITE_OWNER; + + status = NtCreateFile (&fh, access, pc.get_object_attr (attr, sa), &io, + NULL, fattr, 0, FILE_CREATE, + FILE_NON_DIRECTORY_FILE + | FILE_SYNCHRONOUS_IO_NONALERT + | FILE_OPEN_FOR_BACKUP_INTENT, + NULL, 0); + if (!NT_SUCCESS (status)) + { + if (io.Information == FILE_EXISTS) + set_errno (EADDRINUSE); + else + __seterrno_from_nt_status (status); + } + else + { + if (pc.has_acls ()) + set_created_file_access (fh, pc, mode); + char buf[sizeof (SOCKET_COOKIE) + 80]; + __small_sprintf (buf, "%s%u %c ", SOCKET_COOKIE, sin.sin_port, + get_socket_type () == SOCK_STREAM ? 's' + : get_socket_type () == SOCK_DGRAM ? 'd' : '-'); + af_local_set_secret (strchr (buf, '\0')); + DWORD blen = strlen (buf) + 1; + status = NtWriteFile (fh, NULL, NULL, NULL, &io, buf, blen, NULL, 0); + if (!NT_SUCCESS (status)) + { + __seterrno_from_nt_status (status); + FILE_DISPOSITION_INFORMATION fdi = { TRUE }; + status = NtSetInformationFile (fh, &io, &fdi, sizeof fdi, + FileDispositionInformation); + if (!NT_SUCCESS (status)) + debug_printf ("Setting delete dispostion failed, status = %y", + status); + } + else + { + set_sun_path (sun_path); + res = 0; + } + NtClose (fh); + } +#undef un_addr + + return res; +} + +int +fhandler_socket_local::connect (const struct sockaddr *name, int namelen) +{ + struct sockaddr_storage sst; + int type = 0; + + if (get_inet_addr_local (name, namelen, &sst, &namelen, &type, connect_secret) + == SOCKET_ERROR) + return SOCKET_ERROR; + + if (get_socket_type () != type) + { + WSASetLastError (WSAEPROTOTYPE); + set_winsock_errno (); + return SOCKET_ERROR; + } + + set_peer_sun_path (name->sa_data); + + /* Don't move af_local_set_cred into af_local_connect which may be called + via select, possibly running under another identity. Call early here, + because af_local_connect is called in wait_for_events. */ + if (get_socket_type () == SOCK_STREAM) + af_local_set_cred (); + + /* Initialize connect state to "connect_pending". State is ultimately set + to "connected" or "connect_failed" in wait_for_events when the FD_CONNECT + event occurs. Note that the underlying OS sockets are always non-blocking + and a successfully initiated non-blocking Winsock connect always returns + WSAEWOULDBLOCK. Thus it's safe to rely on event handling. + + Check for either unconnected or connect_failed since in both cases it's + allowed to retry connecting the socket. It's also ok (albeit ugly) to + call connect to check if a previous non-blocking connect finished. + + Set connect_state before calling connect, otherwise a race condition with + an already running select or poll might occur. */ + if (connect_state () == unconnected || connect_state () == connect_failed) + connect_state (connect_pending); + + int res = ::connect (get_socket (), (struct sockaddr *) &sst, namelen); + if (!is_nonblocking () + && res == SOCKET_ERROR + && WSAGetLastError () == WSAEWOULDBLOCK) + res = wait_for_events (FD_CONNECT | FD_CLOSE, 0); + + if (res) + { + DWORD err = WSAGetLastError (); + + /* Some applications use the ugly technique to check if a non-blocking + connect succeeded by calling connect again, until it returns EISCONN. + This circumvents the event handling and connect_state is never set. + Thus we check for this situation here. */ + if (err == WSAEISCONN) + connect_state (connected); + /* Winsock returns WSAEWOULDBLOCK if the non-blocking socket cannot be + conected immediately. Convert to POSIX/Linux compliant EINPROGRESS. */ + else if (is_nonblocking () && err == WSAEWOULDBLOCK) + WSASetLastError (WSAEINPROGRESS); + /* Winsock returns WSAEINVAL if the socket is already a listener. + Convert to POSIX/Linux compliant EISCONN. */ + else if (err == WSAEINVAL && connect_state () == listener) + WSASetLastError (WSAEISCONN); + /* Any other error except WSAEALREADY during connect_pending means the + connect failed. */ + else if (connect_state () == connect_pending && err != WSAEALREADY) + connect_state (connect_failed); + set_winsock_errno (); + } + + return res; +} + +int +fhandler_socket_local::listen (int backlog) +{ + int res = ::listen (get_socket (), backlog); + if (res && WSAGetLastError () == WSAEINVAL) + { + /* It's perfectly valid to call listen on an unbound INET socket. + In this case the socket is automatically bound to an unused + port number, listening on all interfaces. On WinSock, listen + fails with WSAEINVAL when it's called on an unbound socket. + So we have to bind manually here to have POSIX semantics. */ + if (get_addr_family () == AF_INET) + { + struct sockaddr_in sin; + sin.sin_family = AF_INET; + sin.sin_port = 0; + sin.sin_addr.s_addr = INADDR_ANY; + if (!::bind (get_socket (), (struct sockaddr *) &sin, sizeof sin)) + res = ::listen (get_socket (), backlog); + } + else if (get_addr_family () == AF_INET6) + { + struct sockaddr_in6 sin6; + memset (&sin6, 0, sizeof sin6); + sin6.sin6_family = AF_INET6; + if (!::bind (get_socket (), (struct sockaddr *) &sin6, sizeof sin6)) + res = ::listen (get_socket (), backlog); + } + } + if (!res) + { + if (get_addr_family () == AF_LOCAL && get_socket_type () == SOCK_STREAM) + af_local_set_cred (); + connect_state (listener); /* gets set to connected on accepted socket. */ + } + else + set_winsock_errno (); + return res; +} + +int +fhandler_socket_local::accept4 (struct sockaddr *peer, int *len, int flags) +{ + int ret = -1; + /* Allows NULL peer and len parameters. */ + struct sockaddr_storage lpeer; + int llen = sizeof (struct sockaddr_storage); + + /* Windows event handling does not check for the validity of the desired + flags so we have to do it here. */ + if (connect_state () != listener) + { + WSASetLastError (WSAEINVAL); + set_winsock_errno (); + return -1; + } + + SOCKET res = INVALID_SOCKET; + while (!(res = wait_for_events (FD_ACCEPT | FD_CLOSE, 0)) + && (res = ::accept (get_socket (), (struct sockaddr *) &lpeer, &llen)) + == INVALID_SOCKET + && WSAGetLastError () == WSAEWOULDBLOCK) + ; + if (res == INVALID_SOCKET) + set_winsock_errno (); + else + { + cygheap_fdnew fd; + + if (fd >= 0) + { + fhandler_socket_local *sock = (fhandler_socket_local *) + build_fh_dev (dev ()); + if (sock && sock->set_socket_handle (res, get_addr_family (), + get_socket_type (), + get_socket_flags ())) + { + sock->async_io (false); /* set_socket_handle disables async. */ + sock->set_sun_path (get_sun_path ()); + sock->set_peer_sun_path (get_peer_sun_path ()); + if (get_socket_type () == SOCK_STREAM) + { + /* Don't forget to copy credentials from accepting + socket to accepted socket and start transaction + on accepted socket! */ + af_local_copy (sock); + ret = sock->af_local_accept (); + if (ret == -1) + { + fd.release (); + delete sock; + set_winsock_errno (); + return -1; + } + } + /* No locking necessary at this point. */ + sock->wsock_events->events = wsock_events->events | FD_WRITE; + sock->wsock_events->owner = wsock_events->owner; + sock->connect_state (connected); + fd = sock; + if (fd <= 2) + set_std_handle (fd); + ret = fd; + if (peer) + { + /* FIXME: Right now we have no way to determine the + bound socket name of the peer's socket. For now + we just fake an unbound socket on the other side. */ + static struct sockaddr_un un = { AF_LOCAL, "" }; + memcpy (peer, &un, MIN (*len, (int) sizeof (un.sun_family))); + *len = (int) sizeof (un.sun_family); + } + } + } + if (ret == -1) + ::closesocket (res); + } + return ret; +} + +int +fhandler_socket_local::getsockname (struct sockaddr *name, int *namelen) +{ + struct sockaddr_un sun; + + sun.sun_family = AF_LOCAL; + sun.sun_path[0] = '\0'; + if (get_sun_path ()) + strncat (sun.sun_path, get_sun_path (), UNIX_PATH_MAX - 1); + memcpy (name, &sun, MIN (*namelen, (int) SUN_LEN (&sun) + 1)); + *namelen = (int) SUN_LEN (&sun) + (get_sun_path () ? 1 : 0); + return 0; +} + +int +fhandler_socket_local::getpeername (struct sockaddr *name, int *namelen) +{ + /* Always use a local big enough buffer and truncate later as necessary + per POSIX. WinSock unfortunately only returns WSAEFAULT if the buffer + is too small. */ + struct sockaddr_storage sock; + int len = sizeof sock; + int res = ::getpeername (get_socket (), (struct sockaddr *) &sock, &len); + if (res) + set_winsock_errno (); + else + { + struct sockaddr_un sun; + memset (&sun, 0, sizeof sun); + sun.sun_family = AF_LOCAL; + sun.sun_path[0] = '\0'; + if (get_peer_sun_path ()) + strncat (sun.sun_path, get_peer_sun_path (), UNIX_PATH_MAX - 1); + memcpy (name, &sun, MIN (*namelen, (int) SUN_LEN (&sun) + 1)); + *namelen = (int) SUN_LEN (&sun) + (get_peer_sun_path () ? 1 : 0); + } + return res; +} + +/* There's no DLL which exports the symbol WSARecvMsg. One has to call + WSAIoctl as below to fetch the function pointer. Why on earth did the + MS developers decide not to export a normal symbol for these extension + functions? */ +inline int +get_ext_funcptr (SOCKET sock, void *funcptr) +{ + DWORD bret; + const GUID guid = WSAID_WSARECVMSG; + return WSAIoctl (sock, SIO_GET_EXTENSION_FUNCTION_POINTER, + (void *) &guid, sizeof (GUID), funcptr, sizeof (void *), + &bret, NULL, NULL); +} + +inline ssize_t +fhandler_socket_local::recv_internal (LPWSAMSG wsamsg, bool use_recvmsg) +{ + ssize_t res = 0; + DWORD ret = 0, wret; + int evt_mask = FD_READ | ((wsamsg->dwFlags & MSG_OOB) ? FD_OOB : 0); + LPWSABUF &wsabuf = wsamsg->lpBuffers; + ULONG &wsacnt = wsamsg->dwBufferCount; + static NO_COPY LPFN_WSARECVMSG WSARecvMsg; + int orig_namelen = wsamsg->namelen; + + /* CV 2014-10-26: Do not check for the connect_state at this point. In + certain scenarios there's no way to check the connect state reliably. + Example (hexchat): Parent process creates socket, forks, child process + calls connect, parent process calls read. Even if the event handling + allows to check for FD_CONNECT in the parent, there is always yet another + scenario we can easily break. */ + + DWORD wait_flags = wsamsg->dwFlags; + bool waitall = !!(wait_flags & MSG_WAITALL); + wsamsg->dwFlags &= (MSG_OOB | MSG_PEEK | MSG_DONTROUTE); + if (use_recvmsg) + { + if (!WSARecvMsg + && get_ext_funcptr (get_socket (), &WSARecvMsg) == SOCKET_ERROR) + { + if (wsamsg->Control.len > 0) + { + set_winsock_errno (); + return SOCKET_ERROR; + } + use_recvmsg = false; + } + else /* Only MSG_PEEK is supported by WSARecvMsg. */ + wsamsg->dwFlags &= MSG_PEEK; + } + if (waitall) + { + if (get_socket_type () != SOCK_STREAM) + { + WSASetLastError (WSAEOPNOTSUPP); + set_winsock_errno (); + return SOCKET_ERROR; + } + if (is_nonblocking () || (wsamsg->dwFlags & (MSG_OOB | MSG_PEEK))) + waitall = false; + } + + /* Note: Don't call WSARecvFrom(MSG_PEEK) without actually having data + waiting in the buffers, otherwise the event handling gets messed up + for some reason. */ + while (!(res = wait_for_events (evt_mask | FD_CLOSE, wait_flags)) + || saw_shutdown_read ()) + { + if (use_recvmsg) + res = WSARecvMsg (get_socket (), wsamsg, &wret, NULL, NULL); + /* This is working around a really weird problem in WinSock. + + Assume you create a socket, fork the process (thus duplicating + the socket), connect the socket in the child, then call recv + on the original socket handle in the parent process. + In this scenario, calls to WinSock's recvfrom and WSARecvFrom + in the parent will fail with WSAEINVAL, regardless whether both + address parameters, name and namelen, are NULL or point to valid + storage. However, calls to recv and WSARecv succeed as expected. + Per MSDN, WSAEINVAL in the context of recv means "The socket has not + been bound". It is as if the recvfrom functions test if the socket + is bound locally, but in the parent process, WinSock doesn't know + about that and fails, while the same test is omitted in the recv + functions. + + This also covers another weird case: WinSock returns WSAEFAULT if + namelen is a valid pointer while name is NULL. Both parameters are + ignored for TCP sockets, so this only occurs when using UDP socket. */ + else if (!wsamsg->name || get_socket_type () == SOCK_STREAM) + res = WSARecv (get_socket (), wsabuf, wsacnt, &wret, &wsamsg->dwFlags, + NULL, NULL); + else + res = WSARecvFrom (get_socket (), wsabuf, wsacnt, &wret, + &wsamsg->dwFlags, wsamsg->name, &wsamsg->namelen, + NULL, NULL); + if (!res) + { + ret += wret; + if (!waitall) + break; + while (wret && wsacnt) + { + if (wsabuf->len > wret) + { + wsabuf->len -= wret; + wsabuf->buf += wret; + wret = 0; + } + else + { + wret -= wsabuf->len; + ++wsabuf; + --wsacnt; + } + } + if (!wret) + break; + } + else if (WSAGetLastError () != WSAEWOULDBLOCK) + break; + } + + if (res) + { + /* According to SUSv3, errno isn't set in that case and no error + condition is returned. */ + if (WSAGetLastError () == WSAEMSGSIZE) + ret += wret; + else if (!ret) + { + /* ESHUTDOWN isn't defined for recv in SUSv3. Simply EOF is returned + in this case. */ + if (WSAGetLastError () == WSAESHUTDOWN) + ret = 0; + else + { + set_winsock_errno (); + return SOCKET_ERROR; + } + } + } + + if (wsamsg->name != NULL && orig_namelen >= (int) sizeof (sa_family_t)) + { + /* WSARecvFrom copied the sockaddr_in block to wsamsg->name. We have to + overwrite it with a sockaddr_un block. For datagram sockets we + generate a sockaddr_un with a filename analogue to abstract socket + names under Linux. See `man 7 unix' under Linux for a description. */ + sockaddr_un *un = (sockaddr_un *) wsamsg->name; + un->sun_family = AF_LOCAL; + int len = orig_namelen - offsetof (struct sockaddr_un, sun_path); + if (len > 0) + { + if (get_socket_type () == SOCK_DGRAM) + { + if (len >= 7) + { + __small_sprintf (un->sun_path + 1, "d%04x", + ((struct sockaddr_in *) wsamsg->name)->sin_port); + wsamsg->namelen = offsetof (struct sockaddr_un, sun_path) + 7; + } + else + wsamsg->namelen = offsetof (struct sockaddr_un, sun_path) + 1; + un->sun_path[0] = '\0'; + } + else if (!get_peer_sun_path ()) + wsamsg->namelen = sizeof (sa_family_t); + else + { + memset (un->sun_path, 0, len); + strncpy (un->sun_path, get_peer_sun_path (), len); + if (un->sun_path[len - 1] == '\0') + len = strlen (un->sun_path) + 1; + if (len > UNIX_PATH_MAX) + len = UNIX_PATH_MAX; + wsamsg->namelen = offsetof (struct sockaddr_un, sun_path) + len; + } + } + } + + return ret; +} + +ssize_t +fhandler_socket_local::recvfrom (void *in_ptr, size_t len, int flags, + struct sockaddr *from, int *fromlen) +{ + char *ptr = (char *) in_ptr; + +#ifdef __x86_64__ + /* size_t is 64 bit, but the len member in WSABUF is 32 bit. + Split buffer if necessary. */ + DWORD bufcnt = len / UINT32_MAX + ((!len || (len % UINT32_MAX)) ? 1 : 0); + WSABUF wsabuf[bufcnt]; + WSAMSG wsamsg = { from, from && fromlen ? *fromlen : 0, + wsabuf, bufcnt, + { 0, NULL }, + (DWORD) flags }; + /* Don't use len as loop condition, it could be 0. */ + for (WSABUF *wsaptr = wsabuf; bufcnt--; ++wsaptr) + { + wsaptr->len = MIN (len, UINT32_MAX); + wsaptr->buf = ptr; + len -= wsaptr->len; + ptr += wsaptr->len; + } +#else + WSABUF wsabuf = { len, ptr }; + WSAMSG wsamsg = { from, from && fromlen ? *fromlen : 0, + &wsabuf, 1, + { 0, NULL}, + (DWORD) flags }; +#endif + ssize_t ret = recv_internal (&wsamsg, false); + if (fromlen) + *fromlen = wsamsg.namelen; + return ret; +} + +ssize_t +fhandler_socket_local::recvmsg (struct msghdr *msg, int flags) +{ + /* TODO: Descriptor passing on AF_LOCAL sockets. */ + + /* Disappointing but true: Even if WSARecvMsg is supported, it's only + supported for datagram and raw sockets. */ + bool use_recvmsg = true; + if (get_socket_type () == SOCK_STREAM || get_addr_family () == AF_LOCAL) + { + use_recvmsg = false; + msg->msg_controllen = 0; + } + + WSABUF wsabuf[msg->msg_iovlen]; + WSABUF *wsaptr = wsabuf + msg->msg_iovlen; + const struct iovec *iovptr = msg->msg_iov + msg->msg_iovlen; + while (--wsaptr >= wsabuf) + { + wsaptr->len = (--iovptr)->iov_len; + wsaptr->buf = (char *) iovptr->iov_base; + } + WSAMSG wsamsg = { (struct sockaddr *) msg->msg_name, msg->msg_namelen, + wsabuf, (DWORD) msg->msg_iovlen, + { (DWORD) msg->msg_controllen, (char *) msg->msg_control }, + (DWORD) flags }; + ssize_t ret = recv_internal (&wsamsg, use_recvmsg); + if (ret >= 0) + { + msg->msg_namelen = wsamsg.namelen; + msg->msg_controllen = wsamsg.Control.len; + if (!CYGWIN_VERSION_CHECK_FOR_USING_ANCIENT_MSGHDR) + msg->msg_flags = wsamsg.dwFlags; + } + return ret; +} + +void __reg3 +fhandler_socket_local::read (void *in_ptr, size_t& len) +{ + char *ptr = (char *) in_ptr; + +#ifdef __x86_64__ + /* size_t is 64 bit, but the len member in WSABUF is 32 bit. + Split buffer if necessary. */ + DWORD bufcnt = len / UINT32_MAX + ((!len || (len % UINT32_MAX)) ? 1 : 0); + WSABUF wsabuf[bufcnt]; + WSAMSG wsamsg = { NULL, 0, wsabuf, bufcnt, { 0, NULL }, 0 }; + /* Don't use len as loop condition, it could be 0. */ + for (WSABUF *wsaptr = wsabuf; bufcnt--; ++wsaptr) + { + wsaptr->len = MIN (len, UINT32_MAX); + wsaptr->buf = ptr; + len -= wsaptr->len; + ptr += wsaptr->len; + } +#else + WSABUF wsabuf = { len, ptr }; + WSAMSG wsamsg = { NULL, 0, &wsabuf, 1, { 0, NULL }, 0 }; +#endif + + len = recv_internal (&wsamsg, false); +} + +ssize_t +fhandler_socket_local::readv (const struct iovec *const iov, const int iovcnt, + ssize_t tot) +{ + WSABUF wsabuf[iovcnt]; + WSABUF *wsaptr = wsabuf + iovcnt; + const struct iovec *iovptr = iov + iovcnt; + while (--wsaptr >= wsabuf) + { + wsaptr->len = (--iovptr)->iov_len; + wsaptr->buf = (char *) iovptr->iov_base; + } + WSAMSG wsamsg = { NULL, 0, wsabuf, (DWORD) iovcnt, { 0, NULL}, 0 }; + return recv_internal (&wsamsg, false); +} + +inline ssize_t +fhandler_socket_local::send_internal (struct _WSAMSG *wsamsg, int flags) +{ + ssize_t res = 0; + DWORD ret = 0, sum = 0; + WSABUF out_buf[wsamsg->dwBufferCount]; + bool use_sendmsg = false; + DWORD wait_flags = flags & MSG_DONTWAIT; + bool nosignal = !!(flags & MSG_NOSIGNAL); + + flags &= (MSG_OOB | MSG_DONTROUTE); + if (wsamsg->Control.len > 0) + use_sendmsg = true; + /* Workaround for MSDN KB 823764: Split a message into chunks <= SO_SNDBUF. + in_idx is the index of the current lpBuffers from the input wsamsg buffer. + in_off is used to keep track of the next byte to write from a wsamsg + buffer which only gets partially written. */ + for (DWORD in_idx = 0, in_off = 0; + in_idx < wsamsg->dwBufferCount; + in_off >= wsamsg->lpBuffers[in_idx].len && (++in_idx, in_off = 0)) + { + /* Split a message into the least number of pieces to minimize the + number of WsaSendTo calls. Don't split datagram messages (bad idea). + out_idx is the index of the next buffer in the out_buf WSABUF, + also the number of buffers given to WSASendTo. + out_len is the number of bytes in the buffers given to WSASendTo. + Don't split datagram messages (very bad idea). */ + DWORD out_idx = 0; + DWORD out_len = 0; + if (get_socket_type () == SOCK_STREAM) + { + do + { + out_buf[out_idx].buf = wsamsg->lpBuffers[in_idx].buf + in_off; + out_buf[out_idx].len = wsamsg->lpBuffers[in_idx].len - in_off; + out_len += out_buf[out_idx].len; + out_idx++; + } + while (out_len < (unsigned) wmem () + && (in_off = 0, ++in_idx < wsamsg->dwBufferCount)); + /* Tweak len of the last out_buf buffer so the entire number of bytes + is (less than or) equal to wmem (). Fix out_len as well since it's + used in a subsequent test expression. */ + if (out_len > (unsigned) wmem ()) + { + out_buf[out_idx - 1].len -= out_len - (unsigned) wmem (); + out_len = (unsigned) wmem (); + } + /* Add the bytes written from the current last buffer to in_off, + so in_off points to the next byte to be written from that buffer, + or beyond which lets the outper loop skip to the next buffer. */ + in_off += out_buf[out_idx - 1].len; + } + + do + { + if (use_sendmsg) + res = WSASendMsg (get_socket (), wsamsg, flags, &ret, NULL, NULL); + else if (get_socket_type () == SOCK_STREAM) + res = WSASendTo (get_socket (), out_buf, out_idx, &ret, flags, + wsamsg->name, wsamsg->namelen, NULL, NULL); + else + res = WSASendTo (get_socket (), wsamsg->lpBuffers, + wsamsg->dwBufferCount, &ret, flags, + wsamsg->name, wsamsg->namelen, NULL, NULL); + if (res && (WSAGetLastError () == WSAEWOULDBLOCK)) + { + LOCK_EVENTS; + wsock_events->events &= ~FD_WRITE; + UNLOCK_EVENTS; + } + } + while (res && (WSAGetLastError () == WSAEWOULDBLOCK) + && !(res = wait_for_events (FD_WRITE | FD_CLOSE, wait_flags))); + + if (!res) + { + sum += ret; + /* For streams, return to application if the number of bytes written + is less than the number of bytes we intended to write in a single + call to WSASendTo. Otherwise we would have to add code to + backtrack in the input buffers, which is questionable. There was + probably a good reason we couldn't write more. */ + if (get_socket_type () != SOCK_STREAM || ret < out_len) + break; + } + else if (is_nonblocking () || WSAGetLastError() != WSAEWOULDBLOCK) + break; + } + + if (sum) + res = sum; + else if (res == SOCKET_ERROR) + { + set_winsock_errno (); + + /* Special handling for EPIPE and SIGPIPE. + + EPIPE is generated if the local end has been shut down on a connection + oriented socket. In this case the process will also receive a SIGPIPE + unless MSG_NOSIGNAL is set. */ + if ((get_errno () == ECONNABORTED || get_errno () == ESHUTDOWN) + && get_socket_type () == SOCK_STREAM) + { + set_errno (EPIPE); + if (!nosignal) + raise (SIGPIPE); + } + } + + return res; +} + +ssize_t +fhandler_socket_local::sendto (const void *in_ptr, size_t len, int flags, + const struct sockaddr *to, int tolen) +{ + char *ptr = (char *) in_ptr; + struct sockaddr_storage sst; + + if (to && get_inet_addr_local (to, tolen, &sst, &tolen) == SOCKET_ERROR) + return SOCKET_ERROR; + +#ifdef __x86_64__ + /* size_t is 64 bit, but the len member in WSABUF is 32 bit. + Split buffer if necessary. */ + DWORD bufcnt = len / UINT32_MAX + ((!len || (len % UINT32_MAX)) ? 1 : 0); + WSABUF wsabuf[bufcnt]; + WSAMSG wsamsg = { to ? (struct sockaddr *) &sst : NULL, tolen, + wsabuf, bufcnt, + { 0, NULL }, + 0 }; + /* Don't use len as loop condition, it could be 0. */ + for (WSABUF *wsaptr = wsabuf; bufcnt--; ++wsaptr) + { + wsaptr->len = MIN (len, UINT32_MAX); + wsaptr->buf = ptr; + len -= wsaptr->len; + ptr += wsaptr->len; + } +#else + WSABUF wsabuf = { len, ptr }; + WSAMSG wsamsg = { to ? (struct sockaddr *) &sst : NULL, tolen, + &wsabuf, 1, + { 0, NULL}, + 0 }; +#endif + return send_internal (&wsamsg, flags); +} + +ssize_t +fhandler_socket_local::sendmsg (const struct msghdr *msg, int flags) +{ + /* TODO: Descriptor passing on AF_LOCAL sockets. */ + + struct sockaddr_storage sst; + int len = 0; + + if (msg->msg_name + && get_inet_addr_local ((struct sockaddr *) msg->msg_name, + msg->msg_namelen, &sst, &len) == SOCKET_ERROR) + return SOCKET_ERROR; + + WSABUF wsabuf[msg->msg_iovlen]; + WSABUF *wsaptr = wsabuf; + const struct iovec *iovptr = msg->msg_iov; + for (int i = 0; i < msg->msg_iovlen; ++i) + { + wsaptr->len = iovptr->iov_len; + (wsaptr++)->buf = (char *) (iovptr++)->iov_base; + } + /* Disappointing but true: Even if WSASendMsg is supported, it's only + supported for datagram and raw sockets. */ + DWORD controllen = (DWORD) (get_socket_type () == SOCK_STREAM + || get_addr_family () == AF_LOCAL + ? 0 : msg->msg_controllen); + WSAMSG wsamsg = { msg->msg_name ? (struct sockaddr *) &sst : NULL, len, + wsabuf, (DWORD) msg->msg_iovlen, + { controllen, (char *) msg->msg_control }, + 0 }; + return send_internal (&wsamsg, flags); +} + +ssize_t +fhandler_socket_local::write (const void *in_ptr, size_t len) +{ + char *ptr = (char *) in_ptr; + +#ifdef __x86_64__ + /* size_t is 64 bit, but the len member in WSABUF is 32 bit. + Split buffer if necessary. */ + DWORD bufcnt = len / UINT32_MAX + ((!len || (len % UINT32_MAX)) ? 1 : 0); + WSABUF wsabuf[bufcnt]; + WSAMSG wsamsg = { NULL, 0, wsabuf, bufcnt, { 0, NULL }, 0 }; + /* Don't use len as loop condition, it could be 0. */ + for (WSABUF *wsaptr = wsabuf; bufcnt--; ++wsaptr) + { + wsaptr->len = MIN (len, UINT32_MAX); + wsaptr->buf = ptr; + len -= wsaptr->len; + ptr += wsaptr->len; + } +#else + WSABUF wsabuf = { len, ptr }; + WSAMSG wsamsg = { NULL, 0, &wsabuf, 1, { 0, NULL }, 0 }; +#endif + return send_internal (&wsamsg, 0); +} + +ssize_t +fhandler_socket_local::writev (const struct iovec *const iov, const int iovcnt, + ssize_t tot) +{ + WSABUF wsabuf[iovcnt]; + WSABUF *wsaptr = wsabuf; + const struct iovec *iovptr = iov; + for (int i = 0; i < iovcnt; ++i) + { + wsaptr->len = iovptr->iov_len; + (wsaptr++)->buf = (char *) (iovptr++)->iov_base; + } + WSAMSG wsamsg = { NULL, 0, wsabuf, (DWORD) iovcnt, { 0, NULL}, 0 }; + return send_internal (&wsamsg, 0); +} + +void +fhandler_socket_local::set_sun_path (const char *path) +{ + sun_path = path ? cstrdup (path) : NULL; +} + +void +fhandler_socket_local::set_peer_sun_path (const char *path) +{ + peer_sun_path = path ? cstrdup (path) : NULL; +} + +int +fhandler_socket_local::getpeereid (pid_t *pid, uid_t *euid, gid_t *egid) +{ + if (get_socket_type () != SOCK_STREAM) + { + set_errno (EINVAL); + return -1; + } + if (no_getpeereid ()) + { + set_errno (ENOTSUP); + return -1; + } + if (connect_state () != connected) + { + set_errno (ENOTCONN); + return -1; + } + + __try + { + if (pid) + *pid = sec_peer_pid; + if (euid) + *euid = sec_peer_uid; + if (egid) + *egid = sec_peer_gid; + return 0; + } + __except (EFAULT) {} + __endtry + return -1; +} + +int +fhandler_socket_local::setsockopt (int level, int optname, const void *optval, + socklen_t optlen) +{ + int ret = -1; + + /* Preprocessing setsockopt. */ + switch (level) + { + case SOL_SOCKET: + switch (optname) + { + case SO_PEERCRED: + /* Switch off the AF_LOCAL handshake and thus SO_PEERCRED handling + for AF_LOCAL/SOCK_STREAM sockets. This allows to handle special + situations in which connect is called before a listening socket + accepts connections. + FIXME: In the long run we should find a more generic solution + which doesn't require a blocking handshake in accept/connect + to exchange SO_PEERCRED credentials. */ + if (optval || optlen) + set_errno (EINVAL); + else + ret = af_local_set_no_getpeereid (); + return ret; + + case SO_REUSEADDR: + saw_reuseaddr (*(int *) optval); + return 0; + + case SO_RCVTIMEO: + case SO_SNDTIMEO: + if (optlen < (socklen_t) sizeof (struct timeval)) + { + set_errno (EINVAL); + return ret; + } + if (timeval_to_ms ((struct timeval *) optval, + (optname == SO_RCVTIMEO) ? rcvtimeo () + : sndtimeo ())) + return 0; + set_errno (EDOM); + return -1; + + case SO_DEBUG: + case SO_RCVBUF: + case SO_RCVLOWAT: + case SO_SNDBUF: + case SO_SNDLOWAT: + break; + + default: + /* AF_LOCAL sockets simply ignore all other SOL_SOCKET options. */ + return 0; + } + break; + + default: + set_errno (ENOPROTOOPT); + return -1; + } + + /* Call Winsock setsockopt */ + ret = ::setsockopt (get_socket (), level, optname, (const char *) optval, + optlen); + if (ret == SOCKET_ERROR) + { + set_winsock_errno (); + return ret; + } + + if (optlen == (socklen_t) sizeof (int)) + debug_printf ("setsockopt optval=%x", *(int *) optval); + + /* Postprocessing setsockopt, setting fhandler_socket members, etc. */ + switch (level) + { + case SOL_SOCKET: + switch (optname) + { + case SO_RCVBUF: + rmem (*(int *) optval); + break; + + case SO_SNDBUF: + wmem (*(int *) optval); + break; + + default: + break; + } + break; + + default: + break; + } + + return ret; +} + +int +fhandler_socket_local::getsockopt (int level, int optname, const void *optval, + socklen_t *optlen) +{ + int ret = -1; + + /* Preprocessing getsockopt.*/ + switch (level) + { + case SOL_SOCKET: + switch (optname) + { + case SO_PEERCRED: + { + struct ucred *cred = (struct ucred *) optval; + + if (*optlen < (socklen_t) sizeof *cred) + { + set_errno (EINVAL); + return ret; + } + ret = getpeereid (&cred->pid, &cred->uid, &cred->gid); + if (!ret) + *optlen = (socklen_t) sizeof *cred; + return ret; + } + + case SO_REUSEADDR: + { + unsigned int *reuseaddr = (unsigned int *) optval; + + if (*optlen < (socklen_t) sizeof *reuseaddr) + { + set_errno (EINVAL); + return -1; + } + *reuseaddr = saw_reuseaddr(); + *optlen = (socklen_t) sizeof *reuseaddr; + return 0; + } + + case SO_RCVTIMEO: + case SO_SNDTIMEO: + { + struct timeval *time_out = (struct timeval *) optval; + + if (*optlen < (socklen_t) sizeof *time_out) + { + set_errno (EINVAL); + return ret; + } + DWORD ms = (optname == SO_RCVTIMEO) ? rcvtimeo () : sndtimeo (); + if (ms == 0 || ms == INFINITE) + { + time_out->tv_sec = 0; + time_out->tv_usec = 0; + } + else + { + time_out->tv_sec = ms / MSPERSEC; + time_out->tv_usec = ((ms % MSPERSEC) * USPERSEC) / MSPERSEC; + } + *optlen = (socklen_t) sizeof *time_out; + return 0; + } + + case SO_TYPE: + { + unsigned int *type = (unsigned int *) optval; + *type = get_socket_type (); + *optlen = (socklen_t) sizeof *type; + return 0; + } + + case SO_ACCEPTCONN: + case SO_DEBUG: + case SO_ERROR: + case SO_RCVBUF: + case SO_RCVLOWAT: + case SO_SNDBUF: + case SO_SNDLOWAT: + break; + + /* AF_LOCAL sockets simply ignore all other SOL_SOCKET options. */ + + case SO_LINGER: + { + struct linger *linger = (struct linger *) optval; + memset (linger, 0, sizeof *linger); + *optlen = (socklen_t) sizeof *linger; + return 0; + } + + default: + { + unsigned int *val = (unsigned int *) optval; + *val = 0; + *optlen = (socklen_t) sizeof *val; + return 0; + } + } + break; + + default: + set_errno (ENOPROTOOPT); + return -1; + } + + /* Call Winsock getsockopt */ + ret = ::getsockopt (get_socket (), level, optname, (char *) optval, + (int *) optlen); + if (ret == SOCKET_ERROR) + { + set_winsock_errno (); + return ret; + } + + /* Postprocessing getsockopt, setting fhandler_socket members, etc. */ + switch (level) + { + case SOL_SOCKET: + switch (optname) + { + case SO_ERROR: + { + int *e = (int *) optval; + debug_printf ("WinSock SO_ERROR = %d", *e); + *e = find_winsock_errno (*e); + } + break; + + default: + break; + } + break; + default: + break; + } + + return ret; +} diff --git a/winsup/cygwin/net.cc b/winsup/cygwin/net.cc index 0d853327e..e849b04cc 100644 --- a/winsup/cygwin/net.cc +++ b/winsup/cygwin/net.cc @@ -500,146 +500,6 @@ cygwin_getprotobynumber (int number) return dup_ent (getprotobynumber (number)); } -#ifndef SIO_BASE_HANDLE -#define SIO_BASE_HANDLE _WSAIOR(IOC_WS2,34) -#endif - -bool -fdsock (cygheap_fdmanip& fd, const device *dev, SOCKET soc) -{ - fd = build_fh_dev (*dev); - if (!fd.isopen ()) - return false; - - /* Usually sockets are inheritable IFS objects. Unfortunately some virus - scanners or other network-oriented software replace normal sockets - with their own kind, which is running through a filter driver called - "layered service provider" (LSP). - - LSP sockets are not kernel objects. They are typically not marked as - inheritable, nor are they IFS handles. They are in fact not inheritable - to child processes, and it does not help to mark them inheritable via - SetHandleInformation. Subsequent socket calls in the child process fail - with error 10038, WSAENOTSOCK. - - There's a neat way to workaround these annoying LSP sockets. WSAIoctl - allows to fetch the underlying base socket, which is a normal, inheritable - IFS handle. So we fetch the base socket, duplicate it, and close the - original socket. Now we have a standard IFS socket which (hopefully) - works as expected. - - If that doesn't work for some reason, mark the sockets for duplication - via WSADuplicateSocket/WSASocket. This requires to start the child - process in SUSPENDED state so we only do this if really necessary. */ - DWORD flags; - bool fixup = false; - if (!GetHandleInformation ((HANDLE) soc, &flags) - || !(flags & HANDLE_FLAG_INHERIT)) - { - int ret; - SOCKET base_soc; - DWORD bret; - - fixup = true; - debug_printf ("LSP handle: %p", soc); - ret = WSAIoctl (soc, SIO_BASE_HANDLE, NULL, 0, (void *) &base_soc, - sizeof (base_soc), &bret, NULL, NULL); - if (ret) - debug_printf ("WSAIoctl: %u", WSAGetLastError ()); - else if (base_soc != soc) - { - if (GetHandleInformation ((HANDLE) base_soc, &flags) - && (flags & HANDLE_FLAG_INHERIT)) - { - if (!DuplicateHandle (GetCurrentProcess (), (HANDLE) base_soc, - GetCurrentProcess (), (PHANDLE) &base_soc, - 0, TRUE, DUPLICATE_SAME_ACCESS)) - debug_printf ("DuplicateHandle failed, %E"); - else - { - closesocket (soc); - soc = base_soc; - fixup = false; - } - } - } - } - fd->set_io_handle ((HANDLE) soc); - if (!((fhandler_socket *) fd)->init_events ()) - return false; - if (fixup) - ((fhandler_socket *) fd)->init_fixup_before (); - fd->set_flags (O_RDWR | O_BINARY); - debug_printf ("fd %d, name '%s', soc %p", (int) fd, dev->name (), soc); - - /* Raise default buffer sizes (instead of WinSock default 8K). - - 64K appear to have the best size/performance ratio for a default - value. Tested with ssh/scp on Vista over Gigabit LAN. - - NOTE. If the SO_RCVBUF size exceeds 65535(*), and if the socket is - connected to a remote machine, then calling WSADuplicateSocket on - fork/exec fails with WinSock error 10022, WSAEINVAL. Fortunately - we don't use WSADuplicateSocket anymore, rather we just utilize - handle inheritance. An explanation for this weird behaviour would - be nice, though. - - NOTE 2. Testing on x86_64 (Vista, 2008 R2, W8) indicates that - this is no problem on 64 bit. So we set the default buffer size to - the default values in current 3.x Linux versions. - - NOTE 3. Setting the window size to 65535 results in extremely bad - performance for apps that send data in multiples of Kb, as they - eventually end up sending 1 byte on the network and naggle + delay - ack kicks in. For example, iperf on a 10Gb network gives only 10 - Mbits/sec with a 65535 send buffer. We want this to be a multiple - of 1k, but since 64k breaks WSADuplicateSocket we use 63Kb. - - NOTE 4. Tests with iperf uncover a problem in setting the SO_RCVBUF - and SO_SNDBUF sizes. Windows is using autotuning since Windows Vista. - Manually setting SO_RCVBUF/SO_SNDBUF disables autotuning and leads to - inferior send/recv performance in scenarios with larger RTTs, as is - basically standard when accessing the internet. For a discussion, - see https://cygwin.com/ml/cygwin-patches/2017-q1/msg00010.html. - - (*) Maximum normal TCP window size. Coincidence? */ -#ifdef __x86_64__ - ((fhandler_socket *) fd)->rmem () = 212992; - ((fhandler_socket *) fd)->wmem () = 212992; -#else - ((fhandler_socket *) fd)->rmem () = 64512; - ((fhandler_socket *) fd)->wmem () = 64512; -#endif -#if 0 /* See NOTE 4 above. */ - int size; - - if (::setsockopt (soc, SOL_SOCKET, SO_RCVBUF, - (char *) &((fhandler_socket *) fd)->rmem (), sizeof (int))) - { - debug_printf ("setsockopt(SO_RCVBUF) failed, %u", WSAGetLastError ()); - if (::getsockopt (soc, SOL_SOCKET, SO_RCVBUF, - (char *) &((fhandler_socket *) fd)->rmem (), - (size = sizeof (int), &size))) - system_printf ("getsockopt(SO_RCVBUF) failed, %u", WSAGetLastError ()); - } - if (::setsockopt (soc, SOL_SOCKET, SO_SNDBUF, - (char *) &((fhandler_socket *) fd)->wmem (), sizeof (int))) - { - debug_printf ("setsockopt(SO_SNDBUF) failed, %u", WSAGetLastError ()); - if (::getsockopt (soc, SOL_SOCKET, SO_SNDBUF, - (char *) &((fhandler_socket *) fd)->wmem (), - (size = sizeof (int), &size))) - system_printf ("getsockopt(SO_SNDBUF) failed, %u", WSAGetLastError ()); - } -#endif - /* A unique ID is necessary to recognize fhandler entries which are - duplicated by dup(2) or fork(2). This is used in BSD flock calls - to identify the descriptor. */ - ((fhandler_socket *) fd)->set_unique_id (); - - return true; -} - /* exported as socket: POSIX.1-2001, POSIX.1-2008, 4.4BSD */ extern "C" int cygwin_socket (int af, int type, int protocol) diff --git a/winsup/cygwin/security.h b/winsup/cygwin/security.h index 3faa99c23..2f9d0ad4d 100644 --- a/winsup/cygwin/security.h +++ b/winsup/cygwin/security.h @@ -17,6 +17,7 @@ details. */ /* UID/GID */ void uinfo_init (); +bool check_token_membership (PSID); #define ILLEGAL_UID ((uid_t)-1) #define ILLEGAL_GID ((gid_t)-1) diff --git a/winsup/cygwin/syslog.cc b/winsup/cygwin/syslog.cc index 81717c645..7ea00d7c3 100644 --- a/winsup/cygwin/syslog.cc +++ b/winsup/cygwin/syslog.cc @@ -185,17 +185,12 @@ static enum { static int syslogd_sock = -1; extern "C" int cygwin_socket (int, int, int); extern "C" int cygwin_connect (int, const struct sockaddr *, int); -extern int get_inet_addr (const struct sockaddr *, int, - struct sockaddr_storage *, int *, - int * = NULL, int * = NULL); static void connect_syslogd () { int fd; struct sockaddr_un sun; - struct sockaddr_storage sst; - int len, type; if (syslogd_inited != not_inited && syslogd_sock >= 0) close (syslogd_sock); @@ -203,20 +198,38 @@ connect_syslogd () syslogd_sock = -1; sun.sun_family = AF_LOCAL; strncpy (sun.sun_path, _PATH_LOG, sizeof sun.sun_path); - if (get_inet_addr ((struct sockaddr *) &sun, sizeof sun, &sst, &len, &type)) - return; - if ((fd = cygwin_socket (AF_LOCAL, type, 0)) < 0) + if ((fd = cygwin_socket (AF_LOCAL, SOCK_STREAM | SOCK_CLOEXEC, 0)) < 0) return; if (cygwin_connect (fd, (struct sockaddr *) &sun, sizeof sun) == 0) + syslogd_inited = inited_stream; + else { - /* connect on a dgram socket always succeeds. We still don't know - if syslogd is actually listening. */ - if (type == SOCK_DGRAM) + close (fd); + if ((fd = cygwin_socket (AF_LOCAL, SOCK_DGRAM | SOCK_CLOEXEC, 0)) < 0) + return; + if (cygwin_connect (fd, (struct sockaddr *) &sun, sizeof sun) == 0) { + /* + * FIXME + * + * As soon as AF_LOCAL sockets are using pipes, this code has to + * got away. + */ + + /* connect on a dgram socket always succeeds. We still don't know + if syslogd is actually listening. */ + cygheap_fdget cfd (fd); + fhandler_socket_local *const fh = (fhandler_socket_local *) + cfd->is_socket (); tmp_pathbuf tp; PMIB_UDPTABLE tab = (PMIB_UDPTABLE) tp.w_get (); DWORD size = 65536; bool found = false; + struct sockaddr_storage sst; + int len; + + len = sizeof sst; + ::getsockname (fh->get_socket (), (struct sockaddr *) &sst, &len); struct sockaddr_in *sa = (struct sockaddr_in *) &sst; if (GetUdpTable (tab, &size, FALSE) == NO_ERROR) @@ -235,11 +248,12 @@ connect_syslogd () return; } } + syslogd_inited = inited_dgram; } - syslogd_inited = type == SOCK_DGRAM ? inited_dgram : inited_stream; + else + close (fd); } syslogd_sock = fd; - fcntl64 (syslogd_sock, F_SETFD, FD_CLOEXEC); debug_printf ("found /dev/log, fd = %d, type = %s", fd, syslogd_inited == inited_stream ? "STREAM" : "DGRAM"); return; diff --git a/winsup/cygwin/uinfo.cc b/winsup/cygwin/uinfo.cc index 286105057..c7aedfe10 100644 --- a/winsup/cygwin/uinfo.cc +++ b/winsup/cygwin/uinfo.cc @@ -117,7 +117,7 @@ cygheap_user::init () This needs careful checking should we use check_token_membership in other circumstances. */ -static bool +bool check_token_membership (PSID sid) { NTSTATUS status; @@ -142,7 +142,7 @@ check_token_membership (PSID sid) return false; } -void +static void internal_getlogin (cygheap_user &user) { struct passwd *pwd;