8152 lines
202 KiB
C
8152 lines
202 KiB
C
/*
|
|
* Copyright (c) 1998-2022 Apple Inc. All rights reserved.
|
|
*
|
|
* @APPLE_OSREFERENCE_LICENSE_HEADER_START@
|
|
*
|
|
* This file contains Original Code and/or Modifications of Original Code
|
|
* as defined in and that are subject to the Apple Public Source License
|
|
* Version 2.0 (the 'License'). You may not use this file except in
|
|
* compliance with the License. The rights granted to you under the License
|
|
* may not be used to create, or enable the creation or redistribution of,
|
|
* unlawful or unlicensed copies of an Apple operating system, or to
|
|
* circumvent, violate, or enable the circumvention or violation of, any
|
|
* terms of an Apple operating system software license agreement.
|
|
*
|
|
* Please obtain a copy of the License at
|
|
* http://www.opensource.apple.com/apsl/ and read it before using this file.
|
|
*
|
|
* The Original Code and all software distributed under the License are
|
|
* distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
|
|
* EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
|
|
* INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
|
|
* FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
|
|
* Please see the License for the specific language governing rights and
|
|
* limitations under the License.
|
|
*
|
|
* @APPLE_OSREFERENCE_LICENSE_HEADER_END@
|
|
*/
|
|
/* Copyright (c) 1995 NeXT Computer, Inc. All Rights Reserved */
|
|
/*
|
|
* Copyright (c) 1982, 1986, 1988, 1990, 1993
|
|
* The Regents of the University of California. All rights reserved.
|
|
*
|
|
* Redistribution and use in source and binary forms, with or without
|
|
* modification, are permitted provided that the following conditions
|
|
* are met:
|
|
* 1. Redistributions of source code must retain the above copyright
|
|
* notice, this list of conditions and the following disclaimer.
|
|
* 2. Redistributions in binary form must reproduce the above copyright
|
|
* notice, this list of conditions and the following disclaimer in the
|
|
* documentation and/or other materials provided with the distribution.
|
|
* 3. All advertising materials mentioning features or use of this software
|
|
* must display the following acknowledgement:
|
|
* This product includes software developed by the University of
|
|
* California, Berkeley and its contributors.
|
|
* 4. Neither the name of the University nor the names of its contributors
|
|
* may be used to endorse or promote products derived from this software
|
|
* without specific prior written permission.
|
|
*
|
|
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
|
|
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
|
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
|
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
|
|
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
|
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
|
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
|
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
|
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
|
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
|
* SUCH DAMAGE.
|
|
*
|
|
* @(#)uipc_socket.c 8.3 (Berkeley) 4/15/94
|
|
*/
|
|
/*
|
|
* NOTICE: This file was modified by SPARTA, Inc. in 2005 to introduce
|
|
* support for mandatory and extensible security protections. This notice
|
|
* is included in support of clause 2.2 (b) of the Apple Public License,
|
|
* Version 2.0.
|
|
*/
|
|
|
|
#include <sys/param.h>
|
|
#include <sys/systm.h>
|
|
#include <sys/filedesc.h>
|
|
#include <sys/proc.h>
|
|
#include <sys/proc_internal.h>
|
|
#include <sys/kauth.h>
|
|
#include <sys/file_internal.h>
|
|
#include <sys/fcntl.h>
|
|
#include <sys/malloc.h>
|
|
#include <sys/mbuf.h>
|
|
#include <sys/domain.h>
|
|
#include <sys/kernel.h>
|
|
#include <sys/event.h>
|
|
#include <sys/poll.h>
|
|
#include <sys/protosw.h>
|
|
#include <sys/socket.h>
|
|
#include <sys/socketvar.h>
|
|
#include <sys/resourcevar.h>
|
|
#include <sys/signalvar.h>
|
|
#include <sys/sysctl.h>
|
|
#include <sys/syslog.h>
|
|
#include <sys/uio.h>
|
|
#include <sys/uio_internal.h>
|
|
#include <sys/ev.h>
|
|
#include <sys/kdebug.h>
|
|
#include <sys/un.h>
|
|
#include <sys/user.h>
|
|
#include <sys/priv.h>
|
|
#include <sys/kern_event.h>
|
|
#include <sys/persona.h>
|
|
#include <net/route.h>
|
|
#include <net/init.h>
|
|
#include <net/net_api_stats.h>
|
|
#include <net/ntstat.h>
|
|
#include <net/content_filter.h>
|
|
#include <netinet/in.h>
|
|
#include <netinet/in_pcb.h>
|
|
#include <netinet/in_tclass.h>
|
|
#include <netinet/in_var.h>
|
|
#include <netinet/tcp_var.h>
|
|
#include <netinet/ip6.h>
|
|
#include <netinet6/ip6_var.h>
|
|
#include <netinet/flow_divert.h>
|
|
#include <kern/zalloc.h>
|
|
#include <kern/locks.h>
|
|
#include <machine/limits.h>
|
|
#include <libkern/OSAtomic.h>
|
|
#include <pexpert/pexpert.h>
|
|
#include <kern/assert.h>
|
|
#include <kern/task.h>
|
|
#include <kern/policy_internal.h>
|
|
|
|
#include <sys/kpi_mbuf.h>
|
|
#include <sys/mcache.h>
|
|
#include <sys/unpcb.h>
|
|
#include <libkern/section_keywords.h>
|
|
|
|
#include <os/log.h>
|
|
|
|
#if CONFIG_MACF
|
|
#include <security/mac_framework.h>
|
|
#endif /* MAC */
|
|
|
|
#if MULTIPATH
|
|
#include <netinet/mp_pcb.h>
|
|
#include <netinet/mptcp_var.h>
|
|
#endif /* MULTIPATH */
|
|
|
|
#define ROUNDUP(a, b) (((a) + ((b) - 1)) & (~((b) - 1)))
|
|
|
|
#if DEBUG || DEVELOPMENT
|
|
#define DEBUG_KERNEL_ADDRPERM(_v) (_v)
|
|
#else
|
|
#define DEBUG_KERNEL_ADDRPERM(_v) VM_KERNEL_ADDRPERM(_v)
|
|
#endif
|
|
|
|
/* TODO: this should be in a header file somewhere */
|
|
extern char *proc_name_address(void *p);
|
|
|
|
static u_int32_t so_cache_hw; /* High water mark for socache */
|
|
static u_int32_t so_cache_timeouts; /* number of timeouts */
|
|
static u_int32_t so_cache_max_freed; /* max freed per timeout */
|
|
static u_int32_t cached_sock_count = 0;
|
|
STAILQ_HEAD(, socket) so_cache_head;
|
|
int max_cached_sock_count = MAX_CACHED_SOCKETS;
|
|
static uint64_t so_cache_time;
|
|
static int socketinit_done;
|
|
static struct zone *so_cache_zone;
|
|
|
|
static LCK_GRP_DECLARE(so_cache_mtx_grp, "so_cache");
|
|
static LCK_MTX_DECLARE(so_cache_mtx, &so_cache_mtx_grp);
|
|
|
|
#include <machine/limits.h>
|
|
|
|
static int filt_sorattach(struct knote *kn, struct kevent_qos_s *kev);
|
|
static void filt_sordetach(struct knote *kn);
|
|
static int filt_soread(struct knote *kn, long hint);
|
|
static int filt_sortouch(struct knote *kn, struct kevent_qos_s *kev);
|
|
static int filt_sorprocess(struct knote *kn, struct kevent_qos_s *kev);
|
|
|
|
static int filt_sowattach(struct knote *kn, struct kevent_qos_s *kev);
|
|
static void filt_sowdetach(struct knote *kn);
|
|
static int filt_sowrite(struct knote *kn, long hint);
|
|
static int filt_sowtouch(struct knote *kn, struct kevent_qos_s *kev);
|
|
static int filt_sowprocess(struct knote *kn, struct kevent_qos_s *kev);
|
|
|
|
static int filt_sockattach(struct knote *kn, struct kevent_qos_s *kev);
|
|
static void filt_sockdetach(struct knote *kn);
|
|
static int filt_sockev(struct knote *kn, long hint);
|
|
static int filt_socktouch(struct knote *kn, struct kevent_qos_s *kev);
|
|
static int filt_sockprocess(struct knote *kn, struct kevent_qos_s *kev);
|
|
|
|
static int sooptcopyin_timeval(struct sockopt *, struct timeval *);
|
|
static int sooptcopyout_timeval(struct sockopt *, const struct timeval *);
|
|
|
|
SECURITY_READ_ONLY_EARLY(struct filterops) soread_filtops = {
|
|
.f_isfd = 1,
|
|
.f_attach = filt_sorattach,
|
|
.f_detach = filt_sordetach,
|
|
.f_event = filt_soread,
|
|
.f_touch = filt_sortouch,
|
|
.f_process = filt_sorprocess,
|
|
};
|
|
|
|
SECURITY_READ_ONLY_EARLY(struct filterops) sowrite_filtops = {
|
|
.f_isfd = 1,
|
|
.f_attach = filt_sowattach,
|
|
.f_detach = filt_sowdetach,
|
|
.f_event = filt_sowrite,
|
|
.f_touch = filt_sowtouch,
|
|
.f_process = filt_sowprocess,
|
|
};
|
|
|
|
SECURITY_READ_ONLY_EARLY(struct filterops) sock_filtops = {
|
|
.f_isfd = 1,
|
|
.f_attach = filt_sockattach,
|
|
.f_detach = filt_sockdetach,
|
|
.f_event = filt_sockev,
|
|
.f_touch = filt_socktouch,
|
|
.f_process = filt_sockprocess,
|
|
};
|
|
|
|
SECURITY_READ_ONLY_EARLY(struct filterops) soexcept_filtops = {
|
|
.f_isfd = 1,
|
|
.f_attach = filt_sorattach,
|
|
.f_detach = filt_sordetach,
|
|
.f_event = filt_soread,
|
|
.f_touch = filt_sortouch,
|
|
.f_process = filt_sorprocess,
|
|
};
|
|
|
|
SYSCTL_DECL(_kern_ipc);
|
|
|
|
#define EVEN_MORE_LOCKING_DEBUG 0
|
|
|
|
int socket_debug = 0;
|
|
SYSCTL_INT(_kern_ipc, OID_AUTO, socket_debug,
|
|
CTLFLAG_RW | CTLFLAG_LOCKED, &socket_debug, 0, "");
|
|
|
|
#if (DEBUG || DEVELOPMENT)
|
|
#define DEFAULT_SOSEND_ASSERT_PANIC 1
|
|
#else
|
|
#define DEFAULT_SOSEND_ASSERT_PANIC 0
|
|
#endif /* (DEBUG || DEVELOPMENT) */
|
|
|
|
int sosend_assert_panic = 0;
|
|
SYSCTL_INT(_kern_ipc, OID_AUTO, sosend_assert_panic,
|
|
CTLFLAG_RW | CTLFLAG_LOCKED, &sosend_assert_panic, DEFAULT_SOSEND_ASSERT_PANIC, "");
|
|
|
|
static unsigned long sodefunct_calls = 0;
|
|
SYSCTL_LONG(_kern_ipc, OID_AUTO, sodefunct_calls, CTLFLAG_LOCKED,
|
|
&sodefunct_calls, "");
|
|
|
|
ZONE_DEFINE_TYPE(socket_zone, "socket", struct socket, ZC_ZFREE_CLEARMEM);
|
|
so_gen_t so_gencnt; /* generation count for sockets */
|
|
|
|
MALLOC_DEFINE(M_PCB, "pcb", "protocol control block");
|
|
|
|
#define DBG_LAYER_IN_BEG NETDBG_CODE(DBG_NETSOCK, 0)
|
|
#define DBG_LAYER_IN_END NETDBG_CODE(DBG_NETSOCK, 2)
|
|
#define DBG_LAYER_OUT_BEG NETDBG_CODE(DBG_NETSOCK, 1)
|
|
#define DBG_LAYER_OUT_END NETDBG_CODE(DBG_NETSOCK, 3)
|
|
#define DBG_FNC_SOSEND NETDBG_CODE(DBG_NETSOCK, (4 << 8) | 1)
|
|
#define DBG_FNC_SOSEND_LIST NETDBG_CODE(DBG_NETSOCK, (4 << 8) | 3)
|
|
#define DBG_FNC_SORECEIVE NETDBG_CODE(DBG_NETSOCK, (8 << 8))
|
|
#define DBG_FNC_SORECEIVE_LIST NETDBG_CODE(DBG_NETSOCK, (8 << 8) | 3)
|
|
#define DBG_FNC_SOSHUTDOWN NETDBG_CODE(DBG_NETSOCK, (9 << 8))
|
|
|
|
#define MAX_SOOPTGETM_SIZE (128 * MCLBYTES)
|
|
|
|
int somaxconn = SOMAXCONN;
|
|
SYSCTL_INT(_kern_ipc, KIPC_SOMAXCONN, somaxconn,
|
|
CTLFLAG_RW | CTLFLAG_LOCKED, &somaxconn, 0, "");
|
|
|
|
/* Should we get a maximum also ??? */
|
|
static int sosendmaxchain = 65536;
|
|
static int sosendminchain = 16384;
|
|
static int sorecvmincopy = 16384;
|
|
SYSCTL_INT(_kern_ipc, OID_AUTO, sosendminchain,
|
|
CTLFLAG_RW | CTLFLAG_LOCKED, &sosendminchain, 0, "");
|
|
SYSCTL_INT(_kern_ipc, OID_AUTO, sorecvmincopy,
|
|
CTLFLAG_RW | CTLFLAG_LOCKED, &sorecvmincopy, 0, "");
|
|
|
|
/*
|
|
* Set to enable jumbo clusters (if available) for large writes when
|
|
* the socket is marked with SOF_MULTIPAGES; see below.
|
|
*/
|
|
int sosendjcl = 1;
|
|
SYSCTL_INT(_kern_ipc, OID_AUTO, sosendjcl,
|
|
CTLFLAG_RW | CTLFLAG_LOCKED, &sosendjcl, 0, "");
|
|
|
|
/*
|
|
* Set this to ignore SOF_MULTIPAGES and use jumbo clusters for large
|
|
* writes on the socket for all protocols on any network interfaces,
|
|
* depending upon sosendjcl above. Be extra careful when setting this
|
|
* to 1, because sending down packets that cross physical pages down to
|
|
* broken drivers (those that falsely assume that the physical pages
|
|
* are contiguous) might lead to system panics or silent data corruption.
|
|
* When set to 0, the system will respect SOF_MULTIPAGES, which is set
|
|
* only for TCP sockets whose outgoing interface is IFNET_MULTIPAGES
|
|
* capable. Set this to 1 only for testing/debugging purposes.
|
|
*/
|
|
int sosendjcl_ignore_capab = 0;
|
|
SYSCTL_INT(_kern_ipc, OID_AUTO, sosendjcl_ignore_capab,
|
|
CTLFLAG_RW | CTLFLAG_LOCKED, &sosendjcl_ignore_capab, 0, "");
|
|
|
|
/*
|
|
* Set this to ignore SOF1_IF_2KCL and use big clusters for large
|
|
* writes on the socket for all protocols on any network interfaces.
|
|
* Be extra careful when setting this to 1, because sending down packets with
|
|
* clusters larger that 2 KB might lead to system panics or data corruption.
|
|
* When set to 0, the system will respect SOF1_IF_2KCL, which is set
|
|
* on the outgoing interface
|
|
* Set this to 1 for testing/debugging purposes only.
|
|
*/
|
|
int sosendbigcl_ignore_capab = 0;
|
|
SYSCTL_INT(_kern_ipc, OID_AUTO, sosendbigcl_ignore_capab,
|
|
CTLFLAG_RW | CTLFLAG_LOCKED, &sosendbigcl_ignore_capab, 0, "");
|
|
|
|
int sodefunctlog = 0;
|
|
SYSCTL_INT(_kern_ipc, OID_AUTO, sodefunctlog, CTLFLAG_RW | CTLFLAG_LOCKED,
|
|
&sodefunctlog, 0, "");
|
|
|
|
int sothrottlelog = 0;
|
|
SYSCTL_INT(_kern_ipc, OID_AUTO, sothrottlelog, CTLFLAG_RW | CTLFLAG_LOCKED,
|
|
&sothrottlelog, 0, "");
|
|
|
|
int sorestrictrecv = 1;
|
|
SYSCTL_INT(_kern_ipc, OID_AUTO, sorestrictrecv, CTLFLAG_RW | CTLFLAG_LOCKED,
|
|
&sorestrictrecv, 0, "Enable inbound interface restrictions");
|
|
|
|
int sorestrictsend = 1;
|
|
SYSCTL_INT(_kern_ipc, OID_AUTO, sorestrictsend, CTLFLAG_RW | CTLFLAG_LOCKED,
|
|
&sorestrictsend, 0, "Enable outbound interface restrictions");
|
|
|
|
int soreserveheadroom = 1;
|
|
SYSCTL_INT(_kern_ipc, OID_AUTO, soreserveheadroom, CTLFLAG_RW | CTLFLAG_LOCKED,
|
|
&soreserveheadroom, 0, "To allocate contiguous datagram buffers");
|
|
|
|
#if (DEBUG || DEVELOPMENT)
|
|
int so_notsent_lowat_check = 1;
|
|
SYSCTL_INT(_kern_ipc, OID_AUTO, notsent_lowat, CTLFLAG_RW | CTLFLAG_LOCKED,
|
|
&so_notsent_lowat_check, 0, "enable/disable notsnet lowat check");
|
|
#endif /* DEBUG || DEVELOPMENT */
|
|
|
|
int so_accept_list_waits = 0;
|
|
#if (DEBUG || DEVELOPMENT)
|
|
SYSCTL_INT(_kern_ipc, OID_AUTO, accept_list_waits, CTLFLAG_RW | CTLFLAG_LOCKED,
|
|
&so_accept_list_waits, 0, "number of waits for listener incomp list");
|
|
#endif /* DEBUG || DEVELOPMENT */
|
|
|
|
extern struct inpcbinfo tcbinfo;
|
|
|
|
/* TODO: these should be in header file */
|
|
extern int get_inpcb_str_size(void);
|
|
extern int get_tcp_str_size(void);
|
|
|
|
vm_size_t so_cache_zone_element_size;
|
|
|
|
static int sodelayed_copy(struct socket *, struct uio *, struct mbuf **,
|
|
user_ssize_t *);
|
|
static void cached_sock_alloc(struct socket **, zalloc_flags_t);
|
|
static void cached_sock_free(struct socket *);
|
|
|
|
/*
|
|
* Maximum of extended background idle sockets per process
|
|
* Set to zero to disable further setting of the option
|
|
*/
|
|
|
|
#define SO_IDLE_BK_IDLE_MAX_PER_PROC 1
|
|
#define SO_IDLE_BK_IDLE_TIME 600
|
|
#define SO_IDLE_BK_IDLE_RCV_HIWAT 131072
|
|
|
|
struct soextbkidlestat soextbkidlestat;
|
|
|
|
SYSCTL_UINT(_kern_ipc, OID_AUTO, maxextbkidleperproc,
|
|
CTLFLAG_RW | CTLFLAG_LOCKED, &soextbkidlestat.so_xbkidle_maxperproc, 0,
|
|
"Maximum of extended background idle sockets per process");
|
|
|
|
SYSCTL_UINT(_kern_ipc, OID_AUTO, extbkidletime, CTLFLAG_RW | CTLFLAG_LOCKED,
|
|
&soextbkidlestat.so_xbkidle_time, 0,
|
|
"Time in seconds to keep extended background idle sockets");
|
|
|
|
SYSCTL_UINT(_kern_ipc, OID_AUTO, extbkidlercvhiwat, CTLFLAG_RW | CTLFLAG_LOCKED,
|
|
&soextbkidlestat.so_xbkidle_rcvhiwat, 0,
|
|
"High water mark for extended background idle sockets");
|
|
|
|
SYSCTL_STRUCT(_kern_ipc, OID_AUTO, extbkidlestat, CTLFLAG_RD | CTLFLAG_LOCKED,
|
|
&soextbkidlestat, soextbkidlestat, "");
|
|
|
|
int so_set_extended_bk_idle(struct socket *, int);
|
|
|
|
#define SO_MAX_MSG_X 1024
|
|
|
|
/*
|
|
* SOTCDB_NO_DSCP is set by default, to prevent the networking stack from
|
|
* setting the DSCP code on the packet based on the service class; see
|
|
* <rdar://problem/11277343> for details.
|
|
*/
|
|
__private_extern__ u_int32_t sotcdb = 0;
|
|
SYSCTL_INT(_kern_ipc, OID_AUTO, sotcdb, CTLFLAG_RW | CTLFLAG_LOCKED,
|
|
&sotcdb, 0, "");
|
|
|
|
void
|
|
socketinit(void)
|
|
{
|
|
_CASSERT(sizeof(so_gencnt) == sizeof(uint64_t));
|
|
VERIFY(IS_P2ALIGNED(&so_gencnt, sizeof(uint32_t)));
|
|
|
|
#ifdef __LP64__
|
|
_CASSERT(sizeof(struct sa_endpoints) == sizeof(struct user64_sa_endpoints));
|
|
_CASSERT(offsetof(struct sa_endpoints, sae_srcif) == offsetof(struct user64_sa_endpoints, sae_srcif));
|
|
_CASSERT(offsetof(struct sa_endpoints, sae_srcaddr) == offsetof(struct user64_sa_endpoints, sae_srcaddr));
|
|
_CASSERT(offsetof(struct sa_endpoints, sae_srcaddrlen) == offsetof(struct user64_sa_endpoints, sae_srcaddrlen));
|
|
_CASSERT(offsetof(struct sa_endpoints, sae_dstaddr) == offsetof(struct user64_sa_endpoints, sae_dstaddr));
|
|
_CASSERT(offsetof(struct sa_endpoints, sae_dstaddrlen) == offsetof(struct user64_sa_endpoints, sae_dstaddrlen));
|
|
#else
|
|
_CASSERT(sizeof(struct sa_endpoints) == sizeof(struct user32_sa_endpoints));
|
|
_CASSERT(offsetof(struct sa_endpoints, sae_srcif) == offsetof(struct user32_sa_endpoints, sae_srcif));
|
|
_CASSERT(offsetof(struct sa_endpoints, sae_srcaddr) == offsetof(struct user32_sa_endpoints, sae_srcaddr));
|
|
_CASSERT(offsetof(struct sa_endpoints, sae_srcaddrlen) == offsetof(struct user32_sa_endpoints, sae_srcaddrlen));
|
|
_CASSERT(offsetof(struct sa_endpoints, sae_dstaddr) == offsetof(struct user32_sa_endpoints, sae_dstaddr));
|
|
_CASSERT(offsetof(struct sa_endpoints, sae_dstaddrlen) == offsetof(struct user32_sa_endpoints, sae_dstaddrlen));
|
|
#endif
|
|
|
|
if (socketinit_done) {
|
|
printf("socketinit: already called...\n");
|
|
return;
|
|
}
|
|
socketinit_done = 1;
|
|
|
|
PE_parse_boot_argn("socket_debug", &socket_debug,
|
|
sizeof(socket_debug));
|
|
|
|
PE_parse_boot_argn("sosend_assert_panic", &sosend_assert_panic,
|
|
sizeof(sosend_assert_panic));
|
|
|
|
STAILQ_INIT(&so_cache_head);
|
|
|
|
so_cache_zone_element_size = (vm_size_t)(sizeof(struct socket) + 4
|
|
+ get_inpcb_str_size() + 4 + get_tcp_str_size());
|
|
|
|
so_cache_zone = zone_create("socache zone", so_cache_zone_element_size,
|
|
ZC_PGZ_USE_GUARDS | ZC_ZFREE_CLEARMEM);
|
|
|
|
bzero(&soextbkidlestat, sizeof(struct soextbkidlestat));
|
|
soextbkidlestat.so_xbkidle_maxperproc = SO_IDLE_BK_IDLE_MAX_PER_PROC;
|
|
soextbkidlestat.so_xbkidle_time = SO_IDLE_BK_IDLE_TIME;
|
|
soextbkidlestat.so_xbkidle_rcvhiwat = SO_IDLE_BK_IDLE_RCV_HIWAT;
|
|
|
|
in_pcbinit();
|
|
}
|
|
|
|
static void
|
|
cached_sock_alloc(struct socket **so, zalloc_flags_t how)
|
|
{
|
|
caddr_t temp;
|
|
uintptr_t offset;
|
|
|
|
lck_mtx_lock(&so_cache_mtx);
|
|
|
|
if (!STAILQ_EMPTY(&so_cache_head)) {
|
|
VERIFY(cached_sock_count > 0);
|
|
|
|
*so = STAILQ_FIRST(&so_cache_head);
|
|
STAILQ_REMOVE_HEAD(&so_cache_head, so_cache_ent);
|
|
STAILQ_NEXT((*so), so_cache_ent) = NULL;
|
|
|
|
cached_sock_count--;
|
|
lck_mtx_unlock(&so_cache_mtx);
|
|
|
|
temp = (*so)->so_saved_pcb;
|
|
bzero((caddr_t)*so, sizeof(struct socket));
|
|
|
|
(*so)->so_saved_pcb = temp;
|
|
} else {
|
|
lck_mtx_unlock(&so_cache_mtx);
|
|
|
|
*so = zalloc_flags(so_cache_zone, how | Z_ZERO);
|
|
|
|
/*
|
|
* Define offsets for extra structures into our
|
|
* single block of memory. Align extra structures
|
|
* on longword boundaries.
|
|
*/
|
|
|
|
offset = (uintptr_t)*so;
|
|
offset += sizeof(struct socket);
|
|
|
|
offset = ALIGN(offset);
|
|
|
|
(*so)->so_saved_pcb = (caddr_t)offset;
|
|
offset += get_inpcb_str_size();
|
|
|
|
offset = ALIGN(offset);
|
|
|
|
((struct inpcb *)(void *)(*so)->so_saved_pcb)->inp_saved_ppcb =
|
|
(caddr_t)offset;
|
|
}
|
|
|
|
OSBitOrAtomic(SOF1_CACHED_IN_SOCK_LAYER, &(*so)->so_flags1);
|
|
}
|
|
|
|
static void
|
|
cached_sock_free(struct socket *so)
|
|
{
|
|
lck_mtx_lock(&so_cache_mtx);
|
|
|
|
so_cache_time = net_uptime();
|
|
if (++cached_sock_count > max_cached_sock_count) {
|
|
--cached_sock_count;
|
|
lck_mtx_unlock(&so_cache_mtx);
|
|
zfree(so_cache_zone, so);
|
|
} else {
|
|
if (so_cache_hw < cached_sock_count) {
|
|
so_cache_hw = cached_sock_count;
|
|
}
|
|
|
|
STAILQ_INSERT_TAIL(&so_cache_head, so, so_cache_ent);
|
|
|
|
so->cache_timestamp = so_cache_time;
|
|
lck_mtx_unlock(&so_cache_mtx);
|
|
}
|
|
}
|
|
|
|
void
|
|
so_update_last_owner_locked(struct socket *so, proc_t self)
|
|
{
|
|
if (so->last_pid != 0) {
|
|
/*
|
|
* last_pid and last_upid should remain zero for sockets
|
|
* created using sock_socket. The check above achieves that
|
|
*/
|
|
if (self == PROC_NULL) {
|
|
self = current_proc();
|
|
}
|
|
|
|
if (so->last_upid != proc_uniqueid(self) ||
|
|
so->last_pid != proc_pid(self)) {
|
|
so->last_upid = proc_uniqueid(self);
|
|
so->last_pid = proc_pid(self);
|
|
proc_getexecutableuuid(self, so->last_uuid,
|
|
sizeof(so->last_uuid));
|
|
if (so->so_proto != NULL && so->so_proto->pr_update_last_owner != NULL) {
|
|
(*so->so_proto->pr_update_last_owner)(so, self, NULL);
|
|
}
|
|
}
|
|
proc_pidoriginatoruuid(so->so_vuuid, sizeof(so->so_vuuid));
|
|
}
|
|
}
|
|
|
|
void
|
|
so_update_policy(struct socket *so)
|
|
{
|
|
if (SOCK_DOM(so) == PF_INET || SOCK_DOM(so) == PF_INET6) {
|
|
(void) inp_update_policy(sotoinpcb(so));
|
|
}
|
|
}
|
|
|
|
#if NECP
|
|
static void
|
|
so_update_necp_policy(struct socket *so, struct sockaddr *override_local_addr,
|
|
struct sockaddr *override_remote_addr)
|
|
{
|
|
if (SOCK_DOM(so) == PF_INET || SOCK_DOM(so) == PF_INET6) {
|
|
inp_update_necp_policy(sotoinpcb(so), override_local_addr,
|
|
override_remote_addr, 0);
|
|
}
|
|
}
|
|
#endif /* NECP */
|
|
|
|
boolean_t
|
|
so_cache_timer(void)
|
|
{
|
|
struct socket *p;
|
|
int n_freed = 0;
|
|
boolean_t rc = FALSE;
|
|
|
|
lck_mtx_lock(&so_cache_mtx);
|
|
so_cache_timeouts++;
|
|
so_cache_time = net_uptime();
|
|
|
|
while (!STAILQ_EMPTY(&so_cache_head)) {
|
|
VERIFY(cached_sock_count > 0);
|
|
p = STAILQ_FIRST(&so_cache_head);
|
|
if ((so_cache_time - p->cache_timestamp) <
|
|
SO_CACHE_TIME_LIMIT) {
|
|
break;
|
|
}
|
|
|
|
STAILQ_REMOVE_HEAD(&so_cache_head, so_cache_ent);
|
|
--cached_sock_count;
|
|
|
|
zfree(so_cache_zone, p);
|
|
|
|
if (++n_freed >= SO_CACHE_MAX_FREE_BATCH) {
|
|
so_cache_max_freed++;
|
|
break;
|
|
}
|
|
}
|
|
|
|
/* Schedule again if there is more to cleanup */
|
|
if (!STAILQ_EMPTY(&so_cache_head)) {
|
|
rc = TRUE;
|
|
}
|
|
|
|
lck_mtx_unlock(&so_cache_mtx);
|
|
return rc;
|
|
}
|
|
|
|
/*
|
|
* Get a socket structure from our zone, and initialize it.
|
|
* We don't implement `waitok' yet (see comments in uipc_domain.c).
|
|
* Note that it would probably be better to allocate socket
|
|
* and PCB at the same time, but I'm not convinced that all
|
|
* the protocols can be easily modified to do this.
|
|
*/
|
|
struct socket *
|
|
soalloc(int waitok, int dom, int type)
|
|
{
|
|
zalloc_flags_t how = waitok ? Z_WAITOK : Z_NOWAIT;
|
|
struct socket *so;
|
|
|
|
if ((dom == PF_INET) && (type == SOCK_STREAM)) {
|
|
cached_sock_alloc(&so, how);
|
|
} else {
|
|
so = zalloc_flags(socket_zone, how | Z_ZERO);
|
|
}
|
|
if (so != NULL) {
|
|
so->so_gencnt = OSIncrementAtomic64((SInt64 *)&so_gencnt);
|
|
|
|
/*
|
|
* Increment the socket allocation statistics
|
|
*/
|
|
INC_ATOMIC_INT64_LIM(net_api_stats.nas_socket_alloc_total);
|
|
}
|
|
|
|
return so;
|
|
}
|
|
|
|
int
|
|
socreate_internal(int dom, struct socket **aso, int type, int proto,
|
|
struct proc *p, uint32_t flags, struct proc *ep)
|
|
{
|
|
struct protosw *prp;
|
|
struct socket *so;
|
|
int error = 0;
|
|
pid_t rpid = -1;
|
|
|
|
#if TCPDEBUG
|
|
extern int tcpconsdebug;
|
|
#endif
|
|
|
|
VERIFY(aso != NULL);
|
|
*aso = NULL;
|
|
|
|
if (proto != 0) {
|
|
prp = pffindproto(dom, proto, type);
|
|
} else {
|
|
prp = pffindtype(dom, type);
|
|
}
|
|
|
|
if (prp == NULL || prp->pr_usrreqs->pru_attach == NULL) {
|
|
if (pffinddomain(dom) == NULL) {
|
|
return EAFNOSUPPORT;
|
|
}
|
|
if (proto != 0) {
|
|
if (pffindprotonotype(dom, proto) != NULL) {
|
|
return EPROTOTYPE;
|
|
}
|
|
}
|
|
return EPROTONOSUPPORT;
|
|
}
|
|
if (prp->pr_type != type) {
|
|
return EPROTOTYPE;
|
|
}
|
|
so = soalloc(1, dom, type);
|
|
if (so == NULL) {
|
|
return ENOBUFS;
|
|
}
|
|
|
|
switch (dom) {
|
|
case PF_LOCAL:
|
|
INC_ATOMIC_INT64_LIM(net_api_stats.nas_socket_domain_local_total);
|
|
break;
|
|
case PF_INET:
|
|
INC_ATOMIC_INT64_LIM(net_api_stats.nas_socket_domain_inet_total);
|
|
if (type == SOCK_STREAM) {
|
|
INC_ATOMIC_INT64_LIM(net_api_stats.nas_socket_inet_stream_total);
|
|
} else {
|
|
INC_ATOMIC_INT64_LIM(net_api_stats.nas_socket_inet_dgram_total);
|
|
}
|
|
break;
|
|
case PF_ROUTE:
|
|
INC_ATOMIC_INT64_LIM(net_api_stats.nas_socket_domain_route_total);
|
|
break;
|
|
case PF_NDRV:
|
|
INC_ATOMIC_INT64_LIM(net_api_stats.nas_socket_domain_ndrv_total);
|
|
break;
|
|
case PF_KEY:
|
|
INC_ATOMIC_INT64_LIM(net_api_stats.nas_socket_domain_key_total);
|
|
break;
|
|
case PF_INET6:
|
|
INC_ATOMIC_INT64_LIM(net_api_stats.nas_socket_domain_inet6_total);
|
|
if (type == SOCK_STREAM) {
|
|
INC_ATOMIC_INT64_LIM(net_api_stats.nas_socket_inet6_stream_total);
|
|
} else {
|
|
INC_ATOMIC_INT64_LIM(net_api_stats.nas_socket_inet6_dgram_total);
|
|
}
|
|
break;
|
|
case PF_SYSTEM:
|
|
INC_ATOMIC_INT64_LIM(net_api_stats.nas_socket_domain_system_total);
|
|
break;
|
|
case PF_MULTIPATH:
|
|
INC_ATOMIC_INT64_LIM(net_api_stats.nas_socket_domain_multipath_total);
|
|
break;
|
|
default:
|
|
INC_ATOMIC_INT64_LIM(net_api_stats.nas_socket_domain_other_total);
|
|
break;
|
|
}
|
|
|
|
if (flags & SOCF_MPTCP) {
|
|
so->so_state |= SS_NBIO;
|
|
}
|
|
|
|
TAILQ_INIT(&so->so_incomp);
|
|
TAILQ_INIT(&so->so_comp);
|
|
so->so_type = (short)type;
|
|
so->so_family = prp->pr_domain->dom_family;
|
|
so->so_protocol = prp->pr_protocol;
|
|
so->last_upid = proc_uniqueid(p);
|
|
so->last_pid = proc_pid(p);
|
|
proc_getexecutableuuid(p, so->last_uuid, sizeof(so->last_uuid));
|
|
proc_pidoriginatoruuid(so->so_vuuid, sizeof(so->so_vuuid));
|
|
|
|
so->so_rpid = -1;
|
|
uuid_clear(so->so_ruuid);
|
|
|
|
if (ep != PROC_NULL && ep != p) {
|
|
so->e_upid = proc_uniqueid(ep);
|
|
so->e_pid = proc_pid(ep);
|
|
proc_getexecutableuuid(ep, so->e_uuid, sizeof(so->e_uuid));
|
|
so->so_flags |= SOF_DELEGATED;
|
|
if (ep->p_responsible_pid != so->e_pid) {
|
|
rpid = ep->p_responsible_pid;
|
|
so->so_rpid = rpid;
|
|
proc_getresponsibleuuid(ep, so->so_ruuid, sizeof(so->so_ruuid));
|
|
}
|
|
}
|
|
|
|
if (rpid < 0 && p->p_responsible_pid != so->last_pid) {
|
|
rpid = p->p_responsible_pid;
|
|
so->so_rpid = rpid;
|
|
proc_getresponsibleuuid(p, so->so_ruuid, sizeof(so->so_ruuid));
|
|
}
|
|
|
|
so->so_cred = kauth_cred_proc_ref(p);
|
|
if (!suser(kauth_cred_get(), NULL)) {
|
|
so->so_state |= SS_PRIV;
|
|
}
|
|
|
|
so->so_persona_id = current_persona_get_id();
|
|
so->so_proto = prp;
|
|
so->so_rcv.sb_flags |= SB_RECV;
|
|
so->so_rcv.sb_so = so->so_snd.sb_so = so;
|
|
so->next_lock_lr = 0;
|
|
so->next_unlock_lr = 0;
|
|
|
|
/*
|
|
* Attachment will create the per pcb lock if necessary and
|
|
* increase refcount for creation, make sure it's done before
|
|
* socket is inserted in lists.
|
|
*/
|
|
so->so_usecount++;
|
|
|
|
error = (*prp->pr_usrreqs->pru_attach)(so, proto, p);
|
|
if (error != 0) {
|
|
/*
|
|
* Warning:
|
|
* If so_pcb is not zero, the socket will be leaked,
|
|
* so protocol attachment handler must be coded carefuly
|
|
*/
|
|
if (so->so_pcb != NULL) {
|
|
os_log_error(OS_LOG_DEFAULT,
|
|
"so_pcb not NULL after pru_attach error %d for dom %d, proto %d, type %d",
|
|
error, dom, proto, type);
|
|
}
|
|
/*
|
|
* Both SS_NOFDREF and SOF_PCBCLEARING should be set to free the socket
|
|
*/
|
|
so->so_state |= SS_NOFDREF;
|
|
so->so_flags |= SOF_PCBCLEARING;
|
|
VERIFY(so->so_usecount > 0);
|
|
so->so_usecount--;
|
|
sofreelastref(so, 1); /* will deallocate the socket */
|
|
return error;
|
|
}
|
|
|
|
/*
|
|
* Note: needs so_pcb to be set after pru_attach
|
|
*/
|
|
if (prp->pr_update_last_owner != NULL) {
|
|
(*prp->pr_update_last_owner)(so, p, ep);
|
|
}
|
|
|
|
os_atomic_inc(&prp->pr_domain->dom_refs, relaxed);
|
|
|
|
/* Attach socket filters for this protocol */
|
|
sflt_initsock(so);
|
|
#if TCPDEBUG
|
|
if (tcpconsdebug == 2) {
|
|
so->so_options |= SO_DEBUG;
|
|
}
|
|
#endif
|
|
so_set_default_traffic_class(so);
|
|
|
|
/*
|
|
* If this thread or task is marked to create backgrounded sockets,
|
|
* mark the socket as background.
|
|
*/
|
|
if (!(flags & SOCF_MPTCP) &&
|
|
proc_get_effective_thread_policy(current_thread(), TASK_POLICY_NEW_SOCKETS_BG)) {
|
|
socket_set_traffic_mgt_flags(so, TRAFFIC_MGT_SO_BACKGROUND);
|
|
so->so_background_thread = current_thread();
|
|
}
|
|
|
|
switch (dom) {
|
|
/*
|
|
* Don't mark Unix domain or system
|
|
* eligible for defunct by default.
|
|
*/
|
|
case PF_LOCAL:
|
|
case PF_SYSTEM:
|
|
so->so_flags |= SOF_NODEFUNCT;
|
|
break;
|
|
default:
|
|
break;
|
|
}
|
|
|
|
/*
|
|
* Entitlements can't be checked at socket creation time except if the
|
|
* application requested a feature guarded by a privilege (c.f., socket
|
|
* delegation).
|
|
* The priv(9) and the Sandboxing APIs are designed with the idea that
|
|
* a privilege check should only be triggered by a userland request.
|
|
* A privilege check at socket creation time is time consuming and
|
|
* could trigger many authorisation error messages from the security
|
|
* APIs.
|
|
*/
|
|
|
|
*aso = so;
|
|
|
|
return 0;
|
|
}
|
|
|
|
/*
|
|
* Returns: 0 Success
|
|
* EAFNOSUPPORT
|
|
* EPROTOTYPE
|
|
* EPROTONOSUPPORT
|
|
* ENOBUFS
|
|
* <pru_attach>:ENOBUFS[AF_UNIX]
|
|
* <pru_attach>:ENOBUFS[TCP]
|
|
* <pru_attach>:ENOMEM[TCP]
|
|
* <pru_attach>:??? [other protocol families, IPSEC]
|
|
*/
|
|
int
|
|
socreate(int dom, struct socket **aso, int type, int proto)
|
|
{
|
|
return socreate_internal(dom, aso, type, proto, current_proc(), 0,
|
|
PROC_NULL);
|
|
}
|
|
|
|
int
|
|
socreate_delegate(int dom, struct socket **aso, int type, int proto, pid_t epid)
|
|
{
|
|
int error = 0;
|
|
struct proc *ep = PROC_NULL;
|
|
|
|
if ((proc_selfpid() != epid) && ((ep = proc_find(epid)) == PROC_NULL)) {
|
|
error = ESRCH;
|
|
goto done;
|
|
}
|
|
|
|
error = socreate_internal(dom, aso, type, proto, current_proc(), 0, ep);
|
|
|
|
/*
|
|
* It might not be wise to hold the proc reference when calling
|
|
* socreate_internal since it calls soalloc with M_WAITOK
|
|
*/
|
|
done:
|
|
if (ep != PROC_NULL) {
|
|
proc_rele(ep);
|
|
}
|
|
|
|
return error;
|
|
}
|
|
|
|
/*
|
|
* Returns: 0 Success
|
|
* <pru_bind>:EINVAL Invalid argument [COMMON_START]
|
|
* <pru_bind>:EAFNOSUPPORT Address family not supported
|
|
* <pru_bind>:EADDRNOTAVAIL Address not available.
|
|
* <pru_bind>:EINVAL Invalid argument
|
|
* <pru_bind>:EAFNOSUPPORT Address family not supported [notdef]
|
|
* <pru_bind>:EACCES Permission denied
|
|
* <pru_bind>:EADDRINUSE Address in use
|
|
* <pru_bind>:EAGAIN Resource unavailable, try again
|
|
* <pru_bind>:EPERM Operation not permitted
|
|
* <pru_bind>:???
|
|
* <sf_bind>:???
|
|
*
|
|
* Notes: It's not possible to fully enumerate the return codes above,
|
|
* since socket filter authors and protocol family authors may
|
|
* not choose to limit their error returns to those listed, even
|
|
* though this may result in some software operating incorrectly.
|
|
*
|
|
* The error codes which are enumerated above are those known to
|
|
* be returned by the tcp_usr_bind function supplied.
|
|
*/
|
|
int
|
|
sobindlock(struct socket *so, struct sockaddr *nam, int dolock)
|
|
{
|
|
struct proc *p = current_proc();
|
|
int error = 0;
|
|
|
|
if (dolock) {
|
|
socket_lock(so, 1);
|
|
}
|
|
|
|
so_update_last_owner_locked(so, p);
|
|
so_update_policy(so);
|
|
|
|
#if NECP
|
|
so_update_necp_policy(so, nam, NULL);
|
|
#endif /* NECP */
|
|
|
|
/*
|
|
* If this is a bind request on a socket that has been marked
|
|
* as inactive, reject it now before we go any further.
|
|
*/
|
|
if (so->so_flags & SOF_DEFUNCT) {
|
|
error = EINVAL;
|
|
SODEFUNCTLOG("%s[%d, %s]: defunct so 0x%llu [%d,%d] (%d)\n",
|
|
__func__, proc_pid(p), proc_best_name(p),
|
|
so->so_gencnt,
|
|
SOCK_DOM(so), SOCK_TYPE(so), error);
|
|
goto out;
|
|
}
|
|
|
|
/* Socket filter */
|
|
error = sflt_bind(so, nam);
|
|
|
|
if (error == 0) {
|
|
error = (*so->so_proto->pr_usrreqs->pru_bind)(so, nam, p);
|
|
}
|
|
out:
|
|
if (dolock) {
|
|
socket_unlock(so, 1);
|
|
}
|
|
|
|
if (error == EJUSTRETURN) {
|
|
error = 0;
|
|
}
|
|
|
|
return error;
|
|
}
|
|
|
|
void
|
|
sodealloc(struct socket *so)
|
|
{
|
|
kauth_cred_unref(&so->so_cred);
|
|
|
|
/* Remove any filters */
|
|
sflt_termsock(so);
|
|
|
|
so->so_gencnt = OSIncrementAtomic64((SInt64 *)&so_gencnt);
|
|
|
|
if (so->so_flags1 & SOF1_CACHED_IN_SOCK_LAYER) {
|
|
cached_sock_free(so);
|
|
} else {
|
|
zfree(socket_zone, so);
|
|
}
|
|
}
|
|
|
|
/*
|
|
* Returns: 0 Success
|
|
* EINVAL
|
|
* EOPNOTSUPP
|
|
* <pru_listen>:EINVAL[AF_UNIX]
|
|
* <pru_listen>:EINVAL[TCP]
|
|
* <pru_listen>:EADDRNOTAVAIL[TCP] Address not available.
|
|
* <pru_listen>:EINVAL[TCP] Invalid argument
|
|
* <pru_listen>:EAFNOSUPPORT[TCP] Address family not supported [notdef]
|
|
* <pru_listen>:EACCES[TCP] Permission denied
|
|
* <pru_listen>:EADDRINUSE[TCP] Address in use
|
|
* <pru_listen>:EAGAIN[TCP] Resource unavailable, try again
|
|
* <pru_listen>:EPERM[TCP] Operation not permitted
|
|
* <sf_listen>:???
|
|
*
|
|
* Notes: Other <pru_listen> returns depend on the protocol family; all
|
|
* <sf_listen> returns depend on what the filter author causes
|
|
* their filter to return.
|
|
*/
|
|
int
|
|
solisten(struct socket *so, int backlog)
|
|
{
|
|
struct proc *p = current_proc();
|
|
int error = 0;
|
|
|
|
socket_lock(so, 1);
|
|
|
|
so_update_last_owner_locked(so, p);
|
|
so_update_policy(so);
|
|
|
|
if (TAILQ_EMPTY(&so->so_comp)) {
|
|
so->so_options |= SO_ACCEPTCONN;
|
|
}
|
|
|
|
#if NECP
|
|
so_update_necp_policy(so, NULL, NULL);
|
|
#endif /* NECP */
|
|
|
|
if (so->so_proto == NULL) {
|
|
error = EINVAL;
|
|
so->so_options &= ~SO_ACCEPTCONN;
|
|
goto out;
|
|
}
|
|
if ((so->so_proto->pr_flags & PR_CONNREQUIRED) == 0) {
|
|
error = EOPNOTSUPP;
|
|
so->so_options &= ~SO_ACCEPTCONN;
|
|
goto out;
|
|
}
|
|
|
|
/*
|
|
* If the listen request is made on a socket that is not fully
|
|
* disconnected, or on a socket that has been marked as inactive,
|
|
* reject the request now.
|
|
*/
|
|
if ((so->so_state &
|
|
(SS_ISCONNECTED | SS_ISCONNECTING | SS_ISDISCONNECTING)) ||
|
|
(so->so_flags & SOF_DEFUNCT)) {
|
|
error = EINVAL;
|
|
if (so->so_flags & SOF_DEFUNCT) {
|
|
SODEFUNCTLOG("%s[%d, %s]: defunct so 0x%llu [%d,%d] "
|
|
"(%d)\n", __func__, proc_pid(p),
|
|
proc_best_name(p),
|
|
so->so_gencnt,
|
|
SOCK_DOM(so), SOCK_TYPE(so), error);
|
|
}
|
|
so->so_options &= ~SO_ACCEPTCONN;
|
|
goto out;
|
|
}
|
|
|
|
if ((so->so_restrictions & SO_RESTRICT_DENY_IN) != 0) {
|
|
error = EPERM;
|
|
so->so_options &= ~SO_ACCEPTCONN;
|
|
goto out;
|
|
}
|
|
|
|
error = sflt_listen(so);
|
|
if (error == 0) {
|
|
error = (*so->so_proto->pr_usrreqs->pru_listen)(so, p);
|
|
}
|
|
|
|
if (error) {
|
|
if (error == EJUSTRETURN) {
|
|
error = 0;
|
|
}
|
|
so->so_options &= ~SO_ACCEPTCONN;
|
|
goto out;
|
|
}
|
|
|
|
/*
|
|
* POSIX: The implementation may have an upper limit on the length of
|
|
* the listen queue-either global or per accepting socket. If backlog
|
|
* exceeds this limit, the length of the listen queue is set to the
|
|
* limit.
|
|
*
|
|
* If listen() is called with a backlog argument value that is less
|
|
* than 0, the function behaves as if it had been called with a backlog
|
|
* argument value of 0.
|
|
*
|
|
* A backlog argument of 0 may allow the socket to accept connections,
|
|
* in which case the length of the listen queue may be set to an
|
|
* implementation-defined minimum value.
|
|
*/
|
|
if (backlog <= 0 || backlog > somaxconn) {
|
|
backlog = somaxconn;
|
|
}
|
|
|
|
so->so_qlimit = (short)backlog;
|
|
out:
|
|
socket_unlock(so, 1);
|
|
return error;
|
|
}
|
|
|
|
/*
|
|
* The "accept list lock" protects the fields related to the listener queues
|
|
* because we can unlock a socket to respect the lock ordering between
|
|
* the listener socket and its clients sockets. The lock ordering is first to
|
|
* acquire the client socket before the listener socket.
|
|
*
|
|
* The accept list lock serializes access to the following fields:
|
|
* - of the listener socket:
|
|
* - so_comp
|
|
* - so_incomp
|
|
* - so_qlen
|
|
* - so_inqlen
|
|
* - of client sockets that are in so_comp or so_incomp:
|
|
* - so_head
|
|
* - so_list
|
|
*
|
|
* As one can see the accept list lock protects the consistent of the
|
|
* linkage of the client sockets.
|
|
*
|
|
* Note that those fields may be read without holding the accept list lock
|
|
* for a preflight provided the accept list lock is taken when committing
|
|
* to take an action based on the result of the preflight. The preflight
|
|
* saves the cost of doing the unlock/lock dance.
|
|
*/
|
|
void
|
|
so_acquire_accept_list(struct socket *head, struct socket *so)
|
|
{
|
|
lck_mtx_t *mutex_held;
|
|
|
|
if (head->so_proto->pr_getlock == NULL) {
|
|
return;
|
|
}
|
|
mutex_held = (*head->so_proto->pr_getlock)(head, PR_F_WILLUNLOCK);
|
|
LCK_MTX_ASSERT(mutex_held, LCK_MTX_ASSERT_OWNED);
|
|
|
|
if (!(head->so_flags1 & SOF1_ACCEPT_LIST_HELD)) {
|
|
head->so_flags1 |= SOF1_ACCEPT_LIST_HELD;
|
|
return;
|
|
}
|
|
if (so != NULL) {
|
|
socket_unlock(so, 0);
|
|
}
|
|
while (head->so_flags1 & SOF1_ACCEPT_LIST_HELD) {
|
|
so_accept_list_waits += 1;
|
|
msleep((caddr_t)&head->so_incomp, mutex_held,
|
|
PSOCK | PCATCH, __func__, NULL);
|
|
}
|
|
head->so_flags1 |= SOF1_ACCEPT_LIST_HELD;
|
|
if (so != NULL) {
|
|
socket_unlock(head, 0);
|
|
socket_lock(so, 0);
|
|
socket_lock(head, 0);
|
|
}
|
|
}
|
|
|
|
void
|
|
so_release_accept_list(struct socket *head)
|
|
{
|
|
if (head->so_proto->pr_getlock != NULL) {
|
|
lck_mtx_t *mutex_held;
|
|
|
|
mutex_held = (*head->so_proto->pr_getlock)(head, 0);
|
|
LCK_MTX_ASSERT(mutex_held, LCK_MTX_ASSERT_OWNED);
|
|
|
|
head->so_flags1 &= ~SOF1_ACCEPT_LIST_HELD;
|
|
wakeup((caddr_t)&head->so_incomp);
|
|
}
|
|
}
|
|
|
|
void
|
|
sofreelastref(struct socket *so, int dealloc)
|
|
{
|
|
struct socket *head = so->so_head;
|
|
|
|
/* Assume socket is locked */
|
|
|
|
#if FLOW_DIVERT
|
|
if (so->so_flags & SOF_FLOW_DIVERT) {
|
|
flow_divert_detach(so);
|
|
}
|
|
#endif /* FLOW_DIVERT */
|
|
|
|
#if CONTENT_FILTER
|
|
if ((so->so_flags & SOF_CONTENT_FILTER) != 0) {
|
|
cfil_sock_detach(so);
|
|
}
|
|
#endif /* CONTENT_FILTER */
|
|
|
|
if (NEED_DGRAM_FLOW_TRACKING(so)) {
|
|
soflow_detach(so);
|
|
}
|
|
|
|
if (!(so->so_flags & SOF_PCBCLEARING) || !(so->so_state & SS_NOFDREF)) {
|
|
selthreadclear(&so->so_snd.sb_sel);
|
|
selthreadclear(&so->so_rcv.sb_sel);
|
|
so->so_rcv.sb_flags &= ~(SB_SEL | SB_UPCALL);
|
|
so->so_snd.sb_flags &= ~(SB_SEL | SB_UPCALL);
|
|
so->so_event = sonullevent;
|
|
return;
|
|
}
|
|
if (head != NULL) {
|
|
/*
|
|
* Need to lock the listener when the protocol has
|
|
* per socket locks
|
|
*/
|
|
if (head->so_proto->pr_getlock != NULL) {
|
|
socket_lock(head, 1);
|
|
so_acquire_accept_list(head, so);
|
|
}
|
|
if (so->so_state & SS_INCOMP) {
|
|
so->so_state &= ~SS_INCOMP;
|
|
TAILQ_REMOVE(&head->so_incomp, so, so_list);
|
|
head->so_incqlen--;
|
|
head->so_qlen--;
|
|
so->so_head = NULL;
|
|
|
|
if (head->so_proto->pr_getlock != NULL) {
|
|
so_release_accept_list(head);
|
|
socket_unlock(head, 1);
|
|
}
|
|
} else if (so->so_state & SS_COMP) {
|
|
if (head->so_proto->pr_getlock != NULL) {
|
|
so_release_accept_list(head);
|
|
socket_unlock(head, 1);
|
|
}
|
|
/*
|
|
* We must not decommission a socket that's
|
|
* on the accept(2) queue. If we do, then
|
|
* accept(2) may hang after select(2) indicated
|
|
* that the listening socket was ready.
|
|
*/
|
|
selthreadclear(&so->so_snd.sb_sel);
|
|
selthreadclear(&so->so_rcv.sb_sel);
|
|
so->so_rcv.sb_flags &= ~(SB_SEL | SB_UPCALL);
|
|
so->so_snd.sb_flags &= ~(SB_SEL | SB_UPCALL);
|
|
so->so_event = sonullevent;
|
|
return;
|
|
} else {
|
|
if (head->so_proto->pr_getlock != NULL) {
|
|
so_release_accept_list(head);
|
|
socket_unlock(head, 1);
|
|
}
|
|
printf("sofree: not queued\n");
|
|
}
|
|
}
|
|
sowflush(so);
|
|
sorflush(so);
|
|
|
|
/* 3932268: disable upcall */
|
|
so->so_rcv.sb_flags &= ~SB_UPCALL;
|
|
so->so_snd.sb_flags &= ~(SB_UPCALL | SB_SNDBYTE_CNT);
|
|
so->so_event = sonullevent;
|
|
|
|
if (dealloc) {
|
|
sodealloc(so);
|
|
}
|
|
}
|
|
|
|
void
|
|
soclose_wait_locked(struct socket *so)
|
|
{
|
|
lck_mtx_t *mutex_held;
|
|
|
|
if (so->so_proto->pr_getlock != NULL) {
|
|
mutex_held = (*so->so_proto->pr_getlock)(so, PR_F_WILLUNLOCK);
|
|
} else {
|
|
mutex_held = so->so_proto->pr_domain->dom_mtx;
|
|
}
|
|
LCK_MTX_ASSERT(mutex_held, LCK_MTX_ASSERT_OWNED);
|
|
|
|
/*
|
|
* Double check here and return if there's no outstanding upcall;
|
|
* otherwise proceed further only if SOF_UPCALLCLOSEWAIT is set.
|
|
*/
|
|
if (!so->so_upcallusecount || !(so->so_flags & SOF_UPCALLCLOSEWAIT)) {
|
|
return;
|
|
}
|
|
so->so_rcv.sb_flags &= ~SB_UPCALL;
|
|
so->so_snd.sb_flags &= ~SB_UPCALL;
|
|
so->so_flags |= SOF_CLOSEWAIT;
|
|
|
|
(void) msleep((caddr_t)&so->so_upcallusecount, mutex_held, (PZERO - 1),
|
|
"soclose_wait_locked", NULL);
|
|
LCK_MTX_ASSERT(mutex_held, LCK_MTX_ASSERT_OWNED);
|
|
so->so_flags &= ~SOF_CLOSEWAIT;
|
|
}
|
|
|
|
/*
|
|
* Close a socket on last file table reference removal.
|
|
* Initiate disconnect if connected.
|
|
* Free socket when disconnect complete.
|
|
*/
|
|
int
|
|
soclose_locked(struct socket *so)
|
|
{
|
|
int error = 0;
|
|
struct timespec ts;
|
|
|
|
if (so->so_usecount == 0) {
|
|
panic("soclose: so=%p refcount=0", so);
|
|
/* NOTREACHED */
|
|
}
|
|
|
|
sflt_notify(so, sock_evt_closing, NULL);
|
|
|
|
if (so->so_upcallusecount) {
|
|
soclose_wait_locked(so);
|
|
}
|
|
|
|
#if CONTENT_FILTER
|
|
/*
|
|
* We have to wait until the content filters are done
|
|
*/
|
|
if ((so->so_flags & SOF_CONTENT_FILTER) != 0) {
|
|
cfil_sock_close_wait(so);
|
|
cfil_sock_is_closed(so);
|
|
cfil_sock_detach(so);
|
|
}
|
|
#endif /* CONTENT_FILTER */
|
|
|
|
if (NEED_DGRAM_FLOW_TRACKING(so)) {
|
|
soflow_detach(so);
|
|
}
|
|
|
|
if (so->so_flags1 & SOF1_EXTEND_BK_IDLE_INPROG) {
|
|
soresume(current_proc(), so, 1);
|
|
so->so_flags1 &= ~SOF1_EXTEND_BK_IDLE_WANTED;
|
|
}
|
|
|
|
if ((so->so_options & SO_ACCEPTCONN)) {
|
|
struct socket *sp, *sonext;
|
|
int persocklock = 0;
|
|
int incomp_overflow_only;
|
|
|
|
/*
|
|
* We do not want new connection to be added
|
|
* to the connection queues
|
|
*/
|
|
so->so_options &= ~SO_ACCEPTCONN;
|
|
|
|
/*
|
|
* We can drop the lock on the listener once
|
|
* we've acquired the incoming list
|
|
*/
|
|
if (so->so_proto->pr_getlock != NULL) {
|
|
persocklock = 1;
|
|
so_acquire_accept_list(so, NULL);
|
|
socket_unlock(so, 0);
|
|
}
|
|
again:
|
|
incomp_overflow_only = 1;
|
|
|
|
TAILQ_FOREACH_SAFE(sp, &so->so_incomp, so_list, sonext) {
|
|
/*
|
|
* Radar 5350314
|
|
* skip sockets thrown away by tcpdropdropblreq
|
|
* they will get cleanup by the garbage collection.
|
|
* otherwise, remove the incomp socket from the queue
|
|
* and let soabort trigger the appropriate cleanup.
|
|
*/
|
|
if (sp->so_flags & SOF_OVERFLOW) {
|
|
continue;
|
|
}
|
|
|
|
if (persocklock != 0) {
|
|
socket_lock(sp, 1);
|
|
}
|
|
|
|
/*
|
|
* Radar 27945981
|
|
* The extra reference for the list insure the
|
|
* validity of the socket pointer when we perform the
|
|
* unlock of the head above
|
|
*/
|
|
if (sp->so_state & SS_INCOMP) {
|
|
sp->so_state &= ~SS_INCOMP;
|
|
sp->so_head = NULL;
|
|
TAILQ_REMOVE(&so->so_incomp, sp, so_list);
|
|
so->so_incqlen--;
|
|
so->so_qlen--;
|
|
|
|
(void) soabort(sp);
|
|
} else {
|
|
panic("%s sp %p in so_incomp but !SS_INCOMP",
|
|
__func__, sp);
|
|
}
|
|
|
|
if (persocklock != 0) {
|
|
socket_unlock(sp, 1);
|
|
}
|
|
}
|
|
|
|
TAILQ_FOREACH_SAFE(sp, &so->so_comp, so_list, sonext) {
|
|
/* Dequeue from so_comp since sofree() won't do it */
|
|
if (persocklock != 0) {
|
|
socket_lock(sp, 1);
|
|
}
|
|
|
|
if (sp->so_state & SS_COMP) {
|
|
sp->so_state &= ~SS_COMP;
|
|
sp->so_head = NULL;
|
|
TAILQ_REMOVE(&so->so_comp, sp, so_list);
|
|
so->so_qlen--;
|
|
|
|
(void) soabort(sp);
|
|
} else {
|
|
panic("%s sp %p in so_comp but !SS_COMP",
|
|
__func__, sp);
|
|
}
|
|
|
|
if (persocklock) {
|
|
socket_unlock(sp, 1);
|
|
}
|
|
}
|
|
|
|
if (incomp_overflow_only == 0 && !TAILQ_EMPTY(&so->so_incomp)) {
|
|
#if (DEBUG | DEVELOPMENT)
|
|
panic("%s head %p so_comp not empty", __func__, so);
|
|
#endif /* (DEVELOPMENT || DEBUG) */
|
|
|
|
goto again;
|
|
}
|
|
|
|
if (!TAILQ_EMPTY(&so->so_comp)) {
|
|
#if (DEBUG | DEVELOPMENT)
|
|
panic("%s head %p so_comp not empty", __func__, so);
|
|
#endif /* (DEVELOPMENT || DEBUG) */
|
|
|
|
goto again;
|
|
}
|
|
|
|
if (persocklock) {
|
|
socket_lock(so, 0);
|
|
so_release_accept_list(so);
|
|
}
|
|
}
|
|
if (so->so_pcb == NULL) {
|
|
/* 3915887: mark the socket as ready for dealloc */
|
|
so->so_flags |= SOF_PCBCLEARING;
|
|
goto discard;
|
|
}
|
|
|
|
if (so->so_state & SS_ISCONNECTED) {
|
|
if ((so->so_state & SS_ISDISCONNECTING) == 0) {
|
|
error = sodisconnectlocked(so);
|
|
if (error) {
|
|
goto drop;
|
|
}
|
|
}
|
|
if (so->so_options & SO_LINGER) {
|
|
if ((so->so_state & SS_ISDISCONNECTING) &&
|
|
(so->so_state & SS_NBIO)) {
|
|
goto drop;
|
|
}
|
|
while ((so->so_state & SS_ISCONNECTED) && so->so_linger > 0) {
|
|
lck_mtx_t *mutex_held;
|
|
|
|
if (so->so_proto->pr_getlock != NULL) {
|
|
mutex_held = (*so->so_proto->pr_getlock)(so, PR_F_WILLUNLOCK);
|
|
} else {
|
|
mutex_held = so->so_proto->pr_domain->dom_mtx;
|
|
}
|
|
ts.tv_sec = (so->so_linger / 100);
|
|
ts.tv_nsec = (so->so_linger % 100) *
|
|
NSEC_PER_USEC * 1000 * 10;
|
|
error = msleep((caddr_t)&so->so_timeo,
|
|
mutex_held, PSOCK | PCATCH, "soclose", &ts);
|
|
if (error) {
|
|
/*
|
|
* It's OK when the time fires,
|
|
* don't report an error
|
|
*/
|
|
if (error == EWOULDBLOCK) {
|
|
error = 0;
|
|
}
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
drop:
|
|
if (so->so_usecount == 0) {
|
|
panic("soclose: usecount is zero so=%p", so);
|
|
/* NOTREACHED */
|
|
}
|
|
if (so->so_pcb != NULL && !(so->so_flags & SOF_PCBCLEARING)) {
|
|
int error2 = (*so->so_proto->pr_usrreqs->pru_detach)(so);
|
|
if (error == 0) {
|
|
error = error2;
|
|
}
|
|
}
|
|
if (so->so_usecount <= 0) {
|
|
panic("soclose: usecount is zero so=%p", so);
|
|
/* NOTREACHED */
|
|
}
|
|
discard:
|
|
if (so->so_pcb != NULL && !(so->so_flags & SOF_MP_SUBFLOW) &&
|
|
(so->so_state & SS_NOFDREF)) {
|
|
panic("soclose: NOFDREF");
|
|
/* NOTREACHED */
|
|
}
|
|
so->so_state |= SS_NOFDREF;
|
|
|
|
if ((so->so_flags & SOF_KNOTE) != 0) {
|
|
KNOTE(&so->so_klist, SO_FILT_HINT_LOCKED);
|
|
}
|
|
|
|
os_atomic_dec(&so->so_proto->pr_domain->dom_refs, relaxed);
|
|
|
|
VERIFY(so->so_usecount > 0);
|
|
so->so_usecount--;
|
|
sofree(so);
|
|
return error;
|
|
}
|
|
|
|
int
|
|
soclose(struct socket *so)
|
|
{
|
|
int error = 0;
|
|
socket_lock(so, 1);
|
|
|
|
if (so->so_retaincnt == 0) {
|
|
error = soclose_locked(so);
|
|
} else {
|
|
/*
|
|
* if the FD is going away, but socket is
|
|
* retained in kernel remove its reference
|
|
*/
|
|
so->so_usecount--;
|
|
if (so->so_usecount < 2) {
|
|
panic("soclose: retaincnt non null and so=%p "
|
|
"usecount=%d\n", so, so->so_usecount);
|
|
}
|
|
}
|
|
socket_unlock(so, 1);
|
|
return error;
|
|
}
|
|
|
|
/*
|
|
* Must be called at splnet...
|
|
*/
|
|
/* Should already be locked */
|
|
int
|
|
soabort(struct socket *so)
|
|
{
|
|
int error;
|
|
|
|
#ifdef MORE_LOCKING_DEBUG
|
|
lck_mtx_t *mutex_held;
|
|
|
|
if (so->so_proto->pr_getlock != NULL) {
|
|
mutex_held = (*so->so_proto->pr_getlock)(so, 0);
|
|
} else {
|
|
mutex_held = so->so_proto->pr_domain->dom_mtx;
|
|
}
|
|
LCK_MTX_ASSERT(mutex_held, LCK_MTX_ASSERT_OWNED);
|
|
#endif
|
|
|
|
if ((so->so_flags & SOF_ABORTED) == 0) {
|
|
so->so_flags |= SOF_ABORTED;
|
|
error = (*so->so_proto->pr_usrreqs->pru_abort)(so);
|
|
if (error) {
|
|
sofree(so);
|
|
return error;
|
|
}
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
int
|
|
soacceptlock(struct socket *so, struct sockaddr **nam, int dolock)
|
|
{
|
|
int error;
|
|
|
|
if (dolock) {
|
|
socket_lock(so, 1);
|
|
}
|
|
|
|
so_update_last_owner_locked(so, PROC_NULL);
|
|
so_update_policy(so);
|
|
#if NECP
|
|
so_update_necp_policy(so, NULL, NULL);
|
|
#endif /* NECP */
|
|
|
|
if ((so->so_state & SS_NOFDREF) == 0) {
|
|
panic("soaccept: !NOFDREF");
|
|
}
|
|
so->so_state &= ~SS_NOFDREF;
|
|
error = (*so->so_proto->pr_usrreqs->pru_accept)(so, nam);
|
|
|
|
if (dolock) {
|
|
socket_unlock(so, 1);
|
|
}
|
|
return error;
|
|
}
|
|
|
|
int
|
|
soaccept(struct socket *so, struct sockaddr **nam)
|
|
{
|
|
return soacceptlock(so, nam, 1);
|
|
}
|
|
|
|
int
|
|
soacceptfilter(struct socket *so, struct socket *head)
|
|
{
|
|
struct sockaddr *local = NULL, *remote = NULL;
|
|
int error = 0;
|
|
|
|
/*
|
|
* Hold the lock even if this socket has not been made visible
|
|
* to the filter(s). For sockets with global locks, this protects
|
|
* against the head or peer going away
|
|
*/
|
|
socket_lock(so, 1);
|
|
if (sogetaddr_locked(so, &remote, 1) != 0 ||
|
|
sogetaddr_locked(so, &local, 0) != 0) {
|
|
so->so_state &= ~SS_NOFDREF;
|
|
socket_unlock(so, 1);
|
|
soclose(so);
|
|
/* Out of resources; try it again next time */
|
|
error = ECONNABORTED;
|
|
goto done;
|
|
}
|
|
|
|
error = sflt_accept(head, so, local, remote);
|
|
|
|
/*
|
|
* If we get EJUSTRETURN from one of the filters, mark this socket
|
|
* as inactive and return it anyway. This newly accepted socket
|
|
* will be disconnected later before we hand it off to the caller.
|
|
*/
|
|
if (error == EJUSTRETURN) {
|
|
error = 0;
|
|
(void) sosetdefunct(current_proc(), so,
|
|
SHUTDOWN_SOCKET_LEVEL_DISCONNECT_INTERNAL, FALSE);
|
|
}
|
|
|
|
if (error != 0) {
|
|
/*
|
|
* This may seem like a duplication to the above error
|
|
* handling part when we return ECONNABORTED, except
|
|
* the following is done while holding the lock since
|
|
* the socket has been exposed to the filter(s) earlier.
|
|
*/
|
|
so->so_state &= ~SS_NOFDREF;
|
|
socket_unlock(so, 1);
|
|
soclose(so);
|
|
/* Propagate socket filter's error code to the caller */
|
|
} else {
|
|
socket_unlock(so, 1);
|
|
}
|
|
done:
|
|
/* Callee checks for NULL pointer */
|
|
sock_freeaddr(remote);
|
|
sock_freeaddr(local);
|
|
return error;
|
|
}
|
|
|
|
/*
|
|
* Returns: 0 Success
|
|
* EOPNOTSUPP Operation not supported on socket
|
|
* EISCONN Socket is connected
|
|
* <pru_connect>:EADDRNOTAVAIL Address not available.
|
|
* <pru_connect>:EINVAL Invalid argument
|
|
* <pru_connect>:EAFNOSUPPORT Address family not supported [notdef]
|
|
* <pru_connect>:EACCES Permission denied
|
|
* <pru_connect>:EADDRINUSE Address in use
|
|
* <pru_connect>:EAGAIN Resource unavailable, try again
|
|
* <pru_connect>:EPERM Operation not permitted
|
|
* <sf_connect_out>:??? [anything a filter writer might set]
|
|
*/
|
|
int
|
|
soconnectlock(struct socket *so, struct sockaddr *nam, int dolock)
|
|
{
|
|
int error;
|
|
struct proc *p = current_proc();
|
|
tracker_metadata_t metadata = { };
|
|
|
|
if (dolock) {
|
|
socket_lock(so, 1);
|
|
}
|
|
|
|
so_update_last_owner_locked(so, p);
|
|
so_update_policy(so);
|
|
|
|
/*
|
|
* If this is a listening socket or if this is a previously-accepted
|
|
* socket that has been marked as inactive, reject the connect request.
|
|
*/
|
|
if ((so->so_options & SO_ACCEPTCONN) || (so->so_flags & SOF_DEFUNCT)) {
|
|
error = EOPNOTSUPP;
|
|
if (so->so_flags & SOF_DEFUNCT) {
|
|
SODEFUNCTLOG("%s[%d, %s]: defunct so 0x%llu [%d,%d] "
|
|
"(%d)\n", __func__, proc_pid(p),
|
|
proc_best_name(p),
|
|
so->so_gencnt,
|
|
SOCK_DOM(so), SOCK_TYPE(so), error);
|
|
}
|
|
if (dolock) {
|
|
socket_unlock(so, 1);
|
|
}
|
|
return error;
|
|
}
|
|
|
|
if ((so->so_restrictions & SO_RESTRICT_DENY_OUT) != 0) {
|
|
if (dolock) {
|
|
socket_unlock(so, 1);
|
|
}
|
|
return EPERM;
|
|
}
|
|
|
|
/*
|
|
* If protocol is connection-based, can only connect once.
|
|
* Otherwise, if connected, try to disconnect first.
|
|
* This allows user to disconnect by connecting to, e.g.,
|
|
* a null address.
|
|
*/
|
|
if (so->so_state & (SS_ISCONNECTED | SS_ISCONNECTING) &&
|
|
((so->so_proto->pr_flags & PR_CONNREQUIRED) ||
|
|
(error = sodisconnectlocked(so)))) {
|
|
error = EISCONN;
|
|
} else {
|
|
/*
|
|
* For connected v4/v6 sockets, check if destination address associates with a domain name and if it is
|
|
* a tracker domain. Mark socket accordingly. Skip lookup if socket has already been marked a tracker.
|
|
*/
|
|
if (!(so->so_flags1 & SOF1_KNOWN_TRACKER) && IS_INET(so)) {
|
|
if (tracker_lookup(so->so_flags & SOF_DELEGATED ? so->e_uuid : so->last_uuid, nam, &metadata) == 0) {
|
|
if (metadata.flags & SO_TRACKER_ATTRIBUTE_FLAGS_TRACKER) {
|
|
so->so_flags1 |= SOF1_KNOWN_TRACKER;
|
|
}
|
|
if (metadata.flags & SO_TRACKER_ATTRIBUTE_FLAGS_APP_APPROVED) {
|
|
so->so_flags1 |= SOF1_APPROVED_APP_DOMAIN;
|
|
}
|
|
if (necp_set_socket_domain_attributes(so, metadata.domain, metadata.domain_owner)) {
|
|
printf("connect() - failed necp_set_socket_domain_attributes");
|
|
}
|
|
}
|
|
}
|
|
|
|
#if NECP
|
|
/* Update NECP evaluation after setting any domain via the tracker checks */
|
|
so_update_necp_policy(so, NULL, nam);
|
|
#endif /* NECP */
|
|
|
|
/*
|
|
* Run connect filter before calling protocol:
|
|
* - non-blocking connect returns before completion;
|
|
*/
|
|
error = sflt_connectout(so, nam);
|
|
if (error != 0) {
|
|
if (error == EJUSTRETURN) {
|
|
error = 0;
|
|
}
|
|
} else {
|
|
error = (*so->so_proto->pr_usrreqs->pru_connect)
|
|
(so, nam, p);
|
|
if (error != 0) {
|
|
so->so_state &= ~SS_ISCONNECTING;
|
|
}
|
|
}
|
|
}
|
|
if (dolock) {
|
|
socket_unlock(so, 1);
|
|
}
|
|
return error;
|
|
}
|
|
|
|
int
|
|
soconnect(struct socket *so, struct sockaddr *nam)
|
|
{
|
|
return soconnectlock(so, nam, 1);
|
|
}
|
|
|
|
/*
|
|
* Returns: 0 Success
|
|
* <pru_connect2>:EINVAL[AF_UNIX]
|
|
* <pru_connect2>:EPROTOTYPE[AF_UNIX]
|
|
* <pru_connect2>:??? [other protocol families]
|
|
*
|
|
* Notes: <pru_connect2> is not supported by [TCP].
|
|
*/
|
|
int
|
|
soconnect2(struct socket *so1, struct socket *so2)
|
|
{
|
|
int error;
|
|
|
|
socket_lock(so1, 1);
|
|
if (so2->so_proto->pr_lock) {
|
|
socket_lock(so2, 1);
|
|
}
|
|
|
|
error = (*so1->so_proto->pr_usrreqs->pru_connect2)(so1, so2);
|
|
|
|
socket_unlock(so1, 1);
|
|
if (so2->so_proto->pr_lock) {
|
|
socket_unlock(so2, 1);
|
|
}
|
|
return error;
|
|
}
|
|
|
|
int
|
|
soconnectxlocked(struct socket *so, struct sockaddr *src,
|
|
struct sockaddr *dst, struct proc *p, uint32_t ifscope,
|
|
sae_associd_t aid, sae_connid_t *pcid, uint32_t flags, void *arg,
|
|
uint32_t arglen, uio_t auio, user_ssize_t *bytes_written)
|
|
{
|
|
int error;
|
|
tracker_metadata_t metadata = { };
|
|
|
|
so_update_last_owner_locked(so, p);
|
|
so_update_policy(so);
|
|
|
|
/*
|
|
* If this is a listening socket or if this is a previously-accepted
|
|
* socket that has been marked as inactive, reject the connect request.
|
|
*/
|
|
if ((so->so_options & SO_ACCEPTCONN) || (so->so_flags & SOF_DEFUNCT)) {
|
|
error = EOPNOTSUPP;
|
|
if (so->so_flags & SOF_DEFUNCT) {
|
|
SODEFUNCTLOG("%s[%d, %s]: defunct so 0x%llu [%d,%d] "
|
|
"(%d)\n", __func__, proc_pid(p),
|
|
proc_best_name(p),
|
|
so->so_gencnt,
|
|
SOCK_DOM(so), SOCK_TYPE(so), error);
|
|
}
|
|
return error;
|
|
}
|
|
|
|
if ((so->so_restrictions & SO_RESTRICT_DENY_OUT) != 0) {
|
|
return EPERM;
|
|
}
|
|
|
|
/*
|
|
* If protocol is connection-based, can only connect once
|
|
* unless PR_MULTICONN is set. Otherwise, if connected,
|
|
* try to disconnect first. This allows user to disconnect
|
|
* by connecting to, e.g., a null address.
|
|
*/
|
|
if ((so->so_state & (SS_ISCONNECTED | SS_ISCONNECTING)) &&
|
|
!(so->so_proto->pr_flags & PR_MULTICONN) &&
|
|
((so->so_proto->pr_flags & PR_CONNREQUIRED) ||
|
|
(error = sodisconnectlocked(so)) != 0)) {
|
|
error = EISCONN;
|
|
} else {
|
|
/*
|
|
* For TCP, check if destination address is a tracker and mark the socket accordingly
|
|
* (only if it hasn't been marked yet).
|
|
*/
|
|
if (SOCK_CHECK_TYPE(so, SOCK_STREAM) && SOCK_CHECK_PROTO(so, IPPROTO_TCP) &&
|
|
!(so->so_flags1 & SOF1_KNOWN_TRACKER)) {
|
|
if (tracker_lookup(so->so_flags & SOF_DELEGATED ? so->e_uuid : so->last_uuid, dst, &metadata) == 0) {
|
|
if (metadata.flags & SO_TRACKER_ATTRIBUTE_FLAGS_TRACKER) {
|
|
so->so_flags1 |= SOF1_KNOWN_TRACKER;
|
|
}
|
|
if (metadata.flags & SO_TRACKER_ATTRIBUTE_FLAGS_APP_APPROVED) {
|
|
so->so_flags1 |= SOF1_APPROVED_APP_DOMAIN;
|
|
}
|
|
if (necp_set_socket_domain_attributes(so, metadata.domain, metadata.domain_owner)) {
|
|
printf("connectx() - failed necp_set_socket_domain_attributes");
|
|
}
|
|
}
|
|
}
|
|
|
|
if ((so->so_proto->pr_flags & PR_DATA_IDEMPOTENT) &&
|
|
(flags & CONNECT_DATA_IDEMPOTENT)) {
|
|
so->so_flags1 |= SOF1_DATA_IDEMPOTENT;
|
|
|
|
if (flags & CONNECT_DATA_AUTHENTICATED) {
|
|
so->so_flags1 |= SOF1_DATA_AUTHENTICATED;
|
|
}
|
|
}
|
|
|
|
/*
|
|
* Case 1: CONNECT_RESUME_ON_READ_WRITE set, no data.
|
|
* Case 2: CONNECT_RESUME_ON_READ_WRITE set, with data (user error)
|
|
* Case 3: CONNECT_RESUME_ON_READ_WRITE not set, with data
|
|
* Case 3 allows user to combine write with connect even if they have
|
|
* no use for TFO (such as regular TCP, and UDP).
|
|
* Case 4: CONNECT_RESUME_ON_READ_WRITE not set, no data (regular case)
|
|
*/
|
|
if ((so->so_proto->pr_flags & PR_PRECONN_WRITE) &&
|
|
((flags & CONNECT_RESUME_ON_READ_WRITE) || auio)) {
|
|
so->so_flags1 |= SOF1_PRECONNECT_DATA;
|
|
}
|
|
|
|
/*
|
|
* If a user sets data idempotent and does not pass an uio, or
|
|
* sets CONNECT_RESUME_ON_READ_WRITE, this is an error, reset
|
|
* SOF1_DATA_IDEMPOTENT.
|
|
*/
|
|
if (!(so->so_flags1 & SOF1_PRECONNECT_DATA) &&
|
|
(so->so_flags1 & SOF1_DATA_IDEMPOTENT)) {
|
|
/* We should return EINVAL instead perhaps. */
|
|
so->so_flags1 &= ~SOF1_DATA_IDEMPOTENT;
|
|
}
|
|
|
|
/*
|
|
* Run connect filter before calling protocol:
|
|
* - non-blocking connect returns before completion;
|
|
*/
|
|
error = sflt_connectout(so, dst);
|
|
if (error != 0) {
|
|
/* Disable PRECONNECT_DATA, as we don't need to send a SYN anymore. */
|
|
so->so_flags1 &= ~SOF1_PRECONNECT_DATA;
|
|
if (error == EJUSTRETURN) {
|
|
error = 0;
|
|
}
|
|
} else {
|
|
error = (*so->so_proto->pr_usrreqs->pru_connectx)
|
|
(so, src, dst, p, ifscope, aid, pcid,
|
|
flags, arg, arglen, auio, bytes_written);
|
|
if (error != 0) {
|
|
so->so_state &= ~SS_ISCONNECTING;
|
|
if (error != EINPROGRESS) {
|
|
so->so_flags1 &= ~SOF1_PRECONNECT_DATA;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
return error;
|
|
}
|
|
|
|
int
|
|
sodisconnectlocked(struct socket *so)
|
|
{
|
|
int error;
|
|
|
|
if ((so->so_state & SS_ISCONNECTED) == 0) {
|
|
error = ENOTCONN;
|
|
goto bad;
|
|
}
|
|
if (so->so_state & SS_ISDISCONNECTING) {
|
|
error = EALREADY;
|
|
goto bad;
|
|
}
|
|
|
|
error = (*so->so_proto->pr_usrreqs->pru_disconnect)(so);
|
|
if (error == 0) {
|
|
sflt_notify(so, sock_evt_disconnected, NULL);
|
|
}
|
|
|
|
bad:
|
|
return error;
|
|
}
|
|
|
|
/* Locking version */
|
|
int
|
|
sodisconnect(struct socket *so)
|
|
{
|
|
int error;
|
|
|
|
socket_lock(so, 1);
|
|
error = sodisconnectlocked(so);
|
|
socket_unlock(so, 1);
|
|
return error;
|
|
}
|
|
|
|
int
|
|
sodisconnectxlocked(struct socket *so, sae_associd_t aid, sae_connid_t cid)
|
|
{
|
|
int error;
|
|
|
|
/*
|
|
* Call the protocol disconnectx handler; let it handle all
|
|
* matters related to the connection state of this session.
|
|
*/
|
|
error = (*so->so_proto->pr_usrreqs->pru_disconnectx)(so, aid, cid);
|
|
if (error == 0) {
|
|
/*
|
|
* The event applies only for the session, not for
|
|
* the disconnection of individual subflows.
|
|
*/
|
|
if (so->so_state & (SS_ISDISCONNECTING | SS_ISDISCONNECTED)) {
|
|
sflt_notify(so, sock_evt_disconnected, NULL);
|
|
}
|
|
}
|
|
return error;
|
|
}
|
|
|
|
int
|
|
sodisconnectx(struct socket *so, sae_associd_t aid, sae_connid_t cid)
|
|
{
|
|
int error;
|
|
|
|
socket_lock(so, 1);
|
|
error = sodisconnectxlocked(so, aid, cid);
|
|
socket_unlock(so, 1);
|
|
return error;
|
|
}
|
|
|
|
#define SBLOCKWAIT(f) (((f) & MSG_DONTWAIT) ? 0 : SBL_WAIT)
|
|
|
|
/*
|
|
* sosendcheck will lock the socket buffer if it isn't locked and
|
|
* verify that there is space for the data being inserted.
|
|
*
|
|
* Returns: 0 Success
|
|
* EPIPE
|
|
* sblock:EWOULDBLOCK
|
|
* sblock:EINTR
|
|
* sbwait:EBADF
|
|
* sbwait:EINTR
|
|
* [so_error]:???
|
|
*/
|
|
int
|
|
sosendcheck(struct socket *so, struct sockaddr *addr, user_ssize_t resid,
|
|
int32_t clen, int32_t atomic, int flags, int *sblocked)
|
|
{
|
|
int error = 0;
|
|
int32_t space;
|
|
int assumelock = 0;
|
|
|
|
restart:
|
|
if (*sblocked == 0) {
|
|
if ((so->so_snd.sb_flags & SB_LOCK) != 0 &&
|
|
so->so_send_filt_thread != 0 &&
|
|
so->so_send_filt_thread == current_thread()) {
|
|
/*
|
|
* We're being called recursively from a filter,
|
|
* allow this to continue. Radar 4150520.
|
|
* Don't set sblocked because we don't want
|
|
* to perform an unlock later.
|
|
*/
|
|
assumelock = 1;
|
|
} else {
|
|
error = sblock(&so->so_snd, SBLOCKWAIT(flags));
|
|
if (error) {
|
|
if (so->so_flags & SOF_DEFUNCT) {
|
|
goto defunct;
|
|
}
|
|
return error;
|
|
}
|
|
*sblocked = 1;
|
|
}
|
|
}
|
|
|
|
/*
|
|
* If a send attempt is made on a socket that has been marked
|
|
* as inactive (disconnected), reject the request.
|
|
*/
|
|
if (so->so_flags & SOF_DEFUNCT) {
|
|
defunct:
|
|
error = EPIPE;
|
|
SODEFUNCTLOG("%s[%d, %s]: defunct so 0x%llu [%d,%d] (%d)\n",
|
|
__func__, proc_selfpid(), proc_best_name(current_proc()),
|
|
so->so_gencnt,
|
|
SOCK_DOM(so), SOCK_TYPE(so), error);
|
|
return error;
|
|
}
|
|
|
|
if (so->so_state & SS_CANTSENDMORE) {
|
|
#if CONTENT_FILTER
|
|
/*
|
|
* Can re-inject data of half closed connections
|
|
*/
|
|
if ((so->so_state & SS_ISDISCONNECTED) == 0 &&
|
|
so->so_snd.sb_cfil_thread == current_thread() &&
|
|
cfil_sock_data_pending(&so->so_snd) != 0) {
|
|
CFIL_LOG(LOG_INFO,
|
|
"so %llx ignore SS_CANTSENDMORE",
|
|
(uint64_t)DEBUG_KERNEL_ADDRPERM(so));
|
|
} else
|
|
#endif /* CONTENT_FILTER */
|
|
return EPIPE;
|
|
}
|
|
if (so->so_error) {
|
|
error = so->so_error;
|
|
so->so_error = 0;
|
|
return error;
|
|
}
|
|
|
|
if ((so->so_state & SS_ISCONNECTED) == 0) {
|
|
if ((so->so_proto->pr_flags & PR_CONNREQUIRED) != 0) {
|
|
if (((so->so_state & SS_ISCONFIRMING) == 0) &&
|
|
(resid != 0 || clen == 0) &&
|
|
!(so->so_flags1 & SOF1_PRECONNECT_DATA)) {
|
|
return ENOTCONN;
|
|
}
|
|
} else if (addr == 0) {
|
|
return (so->so_proto->pr_flags & PR_CONNREQUIRED) ?
|
|
ENOTCONN : EDESTADDRREQ;
|
|
}
|
|
}
|
|
|
|
space = sbspace(&so->so_snd);
|
|
|
|
if (flags & MSG_OOB) {
|
|
space += 1024;
|
|
}
|
|
if ((atomic && resid > so->so_snd.sb_hiwat) ||
|
|
clen > so->so_snd.sb_hiwat) {
|
|
return EMSGSIZE;
|
|
}
|
|
|
|
if ((space < resid + clen &&
|
|
(atomic || (space < (int32_t)so->so_snd.sb_lowat) ||
|
|
space < clen)) ||
|
|
(so->so_type == SOCK_STREAM && so_wait_for_if_feedback(so))) {
|
|
/*
|
|
* don't block the connectx call when there's more data
|
|
* than can be copied.
|
|
*/
|
|
if (so->so_flags1 & SOF1_PRECONNECT_DATA) {
|
|
if (space == 0) {
|
|
return EWOULDBLOCK;
|
|
}
|
|
if (space < (int32_t)so->so_snd.sb_lowat) {
|
|
return 0;
|
|
}
|
|
}
|
|
if ((so->so_state & SS_NBIO) || (flags & MSG_NBIO) ||
|
|
assumelock) {
|
|
return EWOULDBLOCK;
|
|
}
|
|
sbunlock(&so->so_snd, TRUE); /* keep socket locked */
|
|
*sblocked = 0;
|
|
error = sbwait(&so->so_snd);
|
|
if (error) {
|
|
if (so->so_flags & SOF_DEFUNCT) {
|
|
goto defunct;
|
|
}
|
|
return error;
|
|
}
|
|
goto restart;
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
/*
|
|
* Send on a socket.
|
|
* If send must go all at once and message is larger than
|
|
* send buffering, then hard error.
|
|
* Lock against other senders.
|
|
* If must go all at once and not enough room now, then
|
|
* inform user that this would block and do nothing.
|
|
* Otherwise, if nonblocking, send as much as possible.
|
|
* The data to be sent is described by "uio" if nonzero,
|
|
* otherwise by the mbuf chain "top" (which must be null
|
|
* if uio is not). Data provided in mbuf chain must be small
|
|
* enough to send all at once.
|
|
*
|
|
* Returns nonzero on error, timeout or signal; callers
|
|
* must check for short counts if EINTR/ERESTART are returned.
|
|
* Data and control buffers are freed on return.
|
|
*
|
|
* Returns: 0 Success
|
|
* EOPNOTSUPP
|
|
* EINVAL
|
|
* ENOBUFS
|
|
* uiomove:EFAULT
|
|
* sosendcheck:EPIPE
|
|
* sosendcheck:EWOULDBLOCK
|
|
* sosendcheck:EINTR
|
|
* sosendcheck:EBADF
|
|
* sosendcheck:EINTR
|
|
* sosendcheck:??? [value from so_error]
|
|
* <pru_send>:ECONNRESET[TCP]
|
|
* <pru_send>:EINVAL[TCP]
|
|
* <pru_send>:ENOBUFS[TCP]
|
|
* <pru_send>:EADDRINUSE[TCP]
|
|
* <pru_send>:EADDRNOTAVAIL[TCP]
|
|
* <pru_send>:EAFNOSUPPORT[TCP]
|
|
* <pru_send>:EACCES[TCP]
|
|
* <pru_send>:EAGAIN[TCP]
|
|
* <pru_send>:EPERM[TCP]
|
|
* <pru_send>:EMSGSIZE[TCP]
|
|
* <pru_send>:EHOSTUNREACH[TCP]
|
|
* <pru_send>:ENETUNREACH[TCP]
|
|
* <pru_send>:ENETDOWN[TCP]
|
|
* <pru_send>:ENOMEM[TCP]
|
|
* <pru_send>:ENOBUFS[TCP]
|
|
* <pru_send>:???[TCP] [ignorable: mostly IPSEC/firewall/DLIL]
|
|
* <pru_send>:EINVAL[AF_UNIX]
|
|
* <pru_send>:EOPNOTSUPP[AF_UNIX]
|
|
* <pru_send>:EPIPE[AF_UNIX]
|
|
* <pru_send>:ENOTCONN[AF_UNIX]
|
|
* <pru_send>:EISCONN[AF_UNIX]
|
|
* <pru_send>:???[AF_UNIX] [whatever a filter author chooses]
|
|
* <sf_data_out>:??? [whatever a filter author chooses]
|
|
*
|
|
* Notes: Other <pru_send> returns depend on the protocol family; all
|
|
* <sf_data_out> returns depend on what the filter author causes
|
|
* their filter to return.
|
|
*/
|
|
int
|
|
sosend(struct socket *so, struct sockaddr *addr, struct uio *uio,
|
|
struct mbuf *top, struct mbuf *control, int flags)
|
|
{
|
|
struct mbuf **mp;
|
|
struct mbuf *m, *freelist = NULL;
|
|
struct soflow_hash_entry *dgram_flow_entry = NULL;
|
|
user_ssize_t space, len, resid, orig_resid;
|
|
int clen = 0, error, dontroute, sendflags;
|
|
int atomic = sosendallatonce(so) || top;
|
|
int sblocked = 0;
|
|
struct proc *p = current_proc();
|
|
uint16_t headroom = 0;
|
|
ssize_t mlen;
|
|
boolean_t en_tracing = FALSE;
|
|
|
|
if (uio != NULL) {
|
|
resid = uio_resid(uio);
|
|
} else {
|
|
resid = top->m_pkthdr.len;
|
|
}
|
|
orig_resid = resid;
|
|
|
|
KERNEL_DEBUG((DBG_FNC_SOSEND | DBG_FUNC_START), so, resid,
|
|
so->so_snd.sb_cc, so->so_snd.sb_lowat, so->so_snd.sb_hiwat);
|
|
|
|
socket_lock(so, 1);
|
|
|
|
if (NEED_DGRAM_FLOW_TRACKING(so)) {
|
|
dgram_flow_entry = soflow_get_flow(so, NULL, addr, control, resid, true, 0);
|
|
}
|
|
|
|
/*
|
|
* trace if tracing & network (vs. unix) sockets & and
|
|
* non-loopback
|
|
*/
|
|
if (ENTR_SHOULDTRACE &&
|
|
(SOCK_CHECK_DOM(so, AF_INET) || SOCK_CHECK_DOM(so, AF_INET6))) {
|
|
struct inpcb *inp = sotoinpcb(so);
|
|
if (inp->inp_last_outifp != NULL &&
|
|
!(inp->inp_last_outifp->if_flags & IFF_LOOPBACK)) {
|
|
en_tracing = TRUE;
|
|
KERNEL_ENERGYTRACE(kEnTrActKernSockWrite, DBG_FUNC_START,
|
|
VM_KERNEL_ADDRPERM(so),
|
|
((so->so_state & SS_NBIO) ? kEnTrFlagNonBlocking : 0),
|
|
(int64_t)resid);
|
|
}
|
|
}
|
|
|
|
/*
|
|
* Re-injection should not affect process accounting
|
|
*/
|
|
if ((flags & MSG_SKIPCFIL) == 0) {
|
|
so_update_last_owner_locked(so, p);
|
|
so_update_policy(so);
|
|
|
|
#if NECP
|
|
so_update_necp_policy(so, NULL, addr);
|
|
#endif /* NECP */
|
|
}
|
|
|
|
if (so->so_type != SOCK_STREAM && (flags & MSG_OOB) != 0) {
|
|
error = EOPNOTSUPP;
|
|
goto out_locked;
|
|
}
|
|
|
|
/*
|
|
* In theory resid should be unsigned.
|
|
* However, space must be signed, as it might be less than 0
|
|
* if we over-committed, and we must use a signed comparison
|
|
* of space and resid. On the other hand, a negative resid
|
|
* causes us to loop sending 0-length segments to the protocol.
|
|
*
|
|
* Usually, MSG_EOR isn't used on SOCK_STREAM type sockets.
|
|
*
|
|
* Note: We limit resid to be a positive int value as we use
|
|
* imin() to set bytes_to_copy -- radr://14558484
|
|
*/
|
|
if (resid < 0 || resid > INT_MAX ||
|
|
(so->so_type == SOCK_STREAM && (flags & MSG_EOR))) {
|
|
error = EINVAL;
|
|
goto out_locked;
|
|
}
|
|
|
|
dontroute = (flags & MSG_DONTROUTE) &&
|
|
(so->so_options & SO_DONTROUTE) == 0 &&
|
|
(so->so_proto->pr_flags & PR_ATOMIC);
|
|
OSIncrementAtomicLong(&p->p_stats->p_ru.ru_msgsnd);
|
|
|
|
if (control != NULL) {
|
|
clen = control->m_len;
|
|
}
|
|
|
|
if (soreserveheadroom != 0) {
|
|
headroom = so->so_pktheadroom;
|
|
}
|
|
|
|
do {
|
|
error = sosendcheck(so, addr, resid, clen, atomic, flags,
|
|
&sblocked);
|
|
if (error) {
|
|
goto out_locked;
|
|
}
|
|
|
|
mp = ⊤
|
|
space = sbspace(&so->so_snd) - clen;
|
|
space += ((flags & MSG_OOB) ? 1024 : 0);
|
|
|
|
do {
|
|
if (uio == NULL) {
|
|
/*
|
|
* Data is prepackaged in "top".
|
|
*/
|
|
resid = 0;
|
|
if (flags & MSG_EOR) {
|
|
top->m_flags |= M_EOR;
|
|
}
|
|
} else {
|
|
int chainlength;
|
|
int bytes_to_copy;
|
|
boolean_t jumbocl;
|
|
boolean_t bigcl;
|
|
int bytes_to_alloc;
|
|
|
|
bytes_to_copy = imin((int)resid, (int)space);
|
|
|
|
bytes_to_alloc = bytes_to_copy;
|
|
if (top == NULL) {
|
|
bytes_to_alloc += headroom;
|
|
}
|
|
|
|
if (sosendminchain > 0) {
|
|
chainlength = 0;
|
|
} else {
|
|
chainlength = sosendmaxchain;
|
|
}
|
|
|
|
/*
|
|
* Use big 4 KB cluster when the outgoing interface
|
|
* does not prefer 2 KB clusters
|
|
*/
|
|
bigcl = !(so->so_flags1 & SOF1_IF_2KCL) ||
|
|
sosendbigcl_ignore_capab;
|
|
|
|
/*
|
|
* Attempt to use larger than system page-size
|
|
* clusters for large writes only if there is
|
|
* a jumbo cluster pool and if the socket is
|
|
* marked accordingly.
|
|
*/
|
|
jumbocl = sosendjcl && njcl > 0 &&
|
|
((so->so_flags & SOF_MULTIPAGES) ||
|
|
sosendjcl_ignore_capab) &&
|
|
bigcl;
|
|
|
|
socket_unlock(so, 0);
|
|
|
|
do {
|
|
int num_needed;
|
|
int hdrs_needed = (top == NULL) ? 1 : 0;
|
|
|
|
/*
|
|
* try to maintain a local cache of mbuf
|
|
* clusters needed to complete this
|
|
* write the list is further limited to
|
|
* the number that are currently needed
|
|
* to fill the socket this mechanism
|
|
* allows a large number of mbufs/
|
|
* clusters to be grabbed under a single
|
|
* mbuf lock... if we can't get any
|
|
* clusters, than fall back to trying
|
|
* for mbufs if we fail early (or
|
|
* miscalcluate the number needed) make
|
|
* sure to release any clusters we
|
|
* haven't yet consumed.
|
|
*/
|
|
if (freelist == NULL &&
|
|
bytes_to_alloc > MBIGCLBYTES &&
|
|
jumbocl) {
|
|
num_needed =
|
|
bytes_to_alloc / M16KCLBYTES;
|
|
|
|
if ((bytes_to_alloc -
|
|
(num_needed * M16KCLBYTES))
|
|
>= MINCLSIZE) {
|
|
num_needed++;
|
|
}
|
|
|
|
freelist =
|
|
m_getpackets_internal(
|
|
(unsigned int *)&num_needed,
|
|
hdrs_needed, M_WAIT, 0,
|
|
M16KCLBYTES);
|
|
/*
|
|
* Fall back to 4K cluster size
|
|
* if allocation failed
|
|
*/
|
|
}
|
|
|
|
if (freelist == NULL &&
|
|
bytes_to_alloc > MCLBYTES &&
|
|
bigcl) {
|
|
num_needed =
|
|
bytes_to_alloc / MBIGCLBYTES;
|
|
|
|
if ((bytes_to_alloc -
|
|
(num_needed * MBIGCLBYTES)) >=
|
|
MINCLSIZE) {
|
|
num_needed++;
|
|
}
|
|
|
|
freelist =
|
|
m_getpackets_internal(
|
|
(unsigned int *)&num_needed,
|
|
hdrs_needed, M_WAIT, 0,
|
|
MBIGCLBYTES);
|
|
/*
|
|
* Fall back to cluster size
|
|
* if allocation failed
|
|
*/
|
|
}
|
|
|
|
/*
|
|
* Allocate a cluster as we want to
|
|
* avoid to split the data in more
|
|
* that one segment and using MINCLSIZE
|
|
* would lead us to allocate two mbufs
|
|
*/
|
|
if (soreserveheadroom != 0 &&
|
|
freelist == NULL &&
|
|
((top == NULL &&
|
|
bytes_to_alloc > _MHLEN) ||
|
|
bytes_to_alloc > _MLEN)) {
|
|
num_needed = ROUNDUP(bytes_to_alloc, MCLBYTES) /
|
|
MCLBYTES;
|
|
freelist =
|
|
m_getpackets_internal(
|
|
(unsigned int *)&num_needed,
|
|
hdrs_needed, M_WAIT, 0,
|
|
MCLBYTES);
|
|
/*
|
|
* Fall back to a single mbuf
|
|
* if allocation failed
|
|
*/
|
|
} else if (freelist == NULL &&
|
|
bytes_to_alloc > MINCLSIZE) {
|
|
num_needed =
|
|
bytes_to_alloc / MCLBYTES;
|
|
|
|
if ((bytes_to_alloc -
|
|
(num_needed * MCLBYTES)) >=
|
|
MINCLSIZE) {
|
|
num_needed++;
|
|
}
|
|
|
|
freelist =
|
|
m_getpackets_internal(
|
|
(unsigned int *)&num_needed,
|
|
hdrs_needed, M_WAIT, 0,
|
|
MCLBYTES);
|
|
/*
|
|
* Fall back to a single mbuf
|
|
* if allocation failed
|
|
*/
|
|
}
|
|
/*
|
|
* For datagram protocols, leave
|
|
* headroom for protocol headers
|
|
* in the first cluster of the chain
|
|
*/
|
|
if (freelist != NULL && atomic &&
|
|
top == NULL && headroom > 0) {
|
|
freelist->m_data += headroom;
|
|
}
|
|
|
|
/*
|
|
* Fall back to regular mbufs without
|
|
* reserving the socket headroom
|
|
*/
|
|
if (freelist == NULL) {
|
|
if (SOCK_TYPE(so) != SOCK_STREAM || bytes_to_alloc <= MINCLSIZE) {
|
|
if (top == NULL) {
|
|
MGETHDR(freelist,
|
|
M_WAIT, MT_DATA);
|
|
} else {
|
|
MGET(freelist,
|
|
M_WAIT, MT_DATA);
|
|
}
|
|
}
|
|
|
|
if (freelist == NULL) {
|
|
error = ENOBUFS;
|
|
socket_lock(so, 0);
|
|
goto out_locked;
|
|
}
|
|
/*
|
|
* For datagram protocols,
|
|
* leave room for protocol
|
|
* headers in first mbuf.
|
|
*/
|
|
if (atomic && top == NULL &&
|
|
bytes_to_copy > 0 &&
|
|
bytes_to_copy < MHLEN) {
|
|
MH_ALIGN(freelist,
|
|
bytes_to_copy);
|
|
}
|
|
}
|
|
m = freelist;
|
|
freelist = m->m_next;
|
|
m->m_next = NULL;
|
|
|
|
if ((m->m_flags & M_EXT)) {
|
|
mlen = m->m_ext.ext_size -
|
|
M_LEADINGSPACE(m);
|
|
} else if ((m->m_flags & M_PKTHDR)) {
|
|
mlen = MHLEN - M_LEADINGSPACE(m);
|
|
m_add_crumb(m, PKT_CRUMB_SOSEND);
|
|
} else {
|
|
mlen = MLEN - M_LEADINGSPACE(m);
|
|
}
|
|
len = imin((int)mlen, bytes_to_copy);
|
|
|
|
chainlength += len;
|
|
|
|
space -= len;
|
|
|
|
error = uiomove(mtod(m, caddr_t),
|
|
(int)len, uio);
|
|
|
|
resid = uio_resid(uio);
|
|
|
|
m->m_len = (int32_t)len;
|
|
*mp = m;
|
|
top->m_pkthdr.len += len;
|
|
if (error) {
|
|
break;
|
|
}
|
|
mp = &m->m_next;
|
|
if (resid <= 0) {
|
|
if (flags & MSG_EOR) {
|
|
top->m_flags |= M_EOR;
|
|
}
|
|
break;
|
|
}
|
|
bytes_to_copy = imin((int)resid, (int)space);
|
|
} while (space > 0 &&
|
|
(chainlength < sosendmaxchain || atomic ||
|
|
resid < MINCLSIZE));
|
|
|
|
socket_lock(so, 0);
|
|
|
|
if (error) {
|
|
goto out_locked;
|
|
}
|
|
}
|
|
|
|
if (dontroute) {
|
|
so->so_options |= SO_DONTROUTE;
|
|
}
|
|
|
|
/*
|
|
* Compute flags here, for pru_send and NKEs
|
|
*
|
|
* If the user set MSG_EOF, the protocol
|
|
* understands this flag and nothing left to
|
|
* send then use PRU_SEND_EOF instead of PRU_SEND.
|
|
*/
|
|
sendflags = (flags & MSG_OOB) ? PRUS_OOB :
|
|
((flags & MSG_EOF) &&
|
|
(so->so_proto->pr_flags & PR_IMPLOPCL) &&
|
|
(resid <= 0)) ? PRUS_EOF :
|
|
/* If there is more to send set PRUS_MORETOCOME */
|
|
(resid > 0 && space > 0) ? PRUS_MORETOCOME : 0;
|
|
|
|
if ((flags & MSG_SKIPCFIL) == 0) {
|
|
/*
|
|
* Socket filter processing
|
|
*/
|
|
error = sflt_data_out(so, addr, &top,
|
|
&control, (sendflags & MSG_OOB) ?
|
|
sock_data_filt_flag_oob : 0);
|
|
if (error) {
|
|
if (error == EJUSTRETURN) {
|
|
error = 0;
|
|
goto packet_consumed;
|
|
}
|
|
goto out_locked;
|
|
}
|
|
#if CONTENT_FILTER
|
|
/*
|
|
* Content filter processing
|
|
*/
|
|
error = cfil_sock_data_out(so, addr, top,
|
|
control, sendflags, dgram_flow_entry);
|
|
if (error) {
|
|
if (error == EJUSTRETURN) {
|
|
error = 0;
|
|
goto packet_consumed;
|
|
}
|
|
goto out_locked;
|
|
}
|
|
#endif /* CONTENT_FILTER */
|
|
}
|
|
error = (*so->so_proto->pr_usrreqs->pru_send)
|
|
(so, sendflags, top, addr, control, p);
|
|
|
|
packet_consumed:
|
|
if (dontroute) {
|
|
so->so_options &= ~SO_DONTROUTE;
|
|
}
|
|
|
|
clen = 0;
|
|
control = NULL;
|
|
top = NULL;
|
|
mp = ⊤
|
|
if (error) {
|
|
goto out_locked;
|
|
}
|
|
} while (resid && space > 0);
|
|
} while (resid);
|
|
|
|
|
|
out_locked:
|
|
if (resid > orig_resid) {
|
|
char pname[MAXCOMLEN] = {};
|
|
pid_t current_pid = proc_pid(current_proc());
|
|
proc_name(current_pid, pname, sizeof(pname));
|
|
|
|
if (sosend_assert_panic != 0) {
|
|
panic("sosend so %p resid %lld > orig_resid %lld proc %s:%d",
|
|
so, resid, orig_resid, pname, current_pid);
|
|
} else {
|
|
os_log_error(OS_LOG_DEFAULT, "sosend: so_gencnt %llu resid %lld > orig_resid %lld proc %s:%d",
|
|
so->so_gencnt, resid, orig_resid, pname, current_pid);
|
|
}
|
|
}
|
|
|
|
if (sblocked) {
|
|
sbunlock(&so->so_snd, FALSE); /* will unlock socket */
|
|
} else {
|
|
socket_unlock(so, 1);
|
|
}
|
|
if (top != NULL) {
|
|
m_freem(top);
|
|
}
|
|
if (control != NULL) {
|
|
m_freem(control);
|
|
}
|
|
if (freelist != NULL) {
|
|
m_freem_list(freelist);
|
|
}
|
|
|
|
if (dgram_flow_entry != NULL) {
|
|
soflow_free_flow(dgram_flow_entry);
|
|
}
|
|
|
|
soclearfastopen(so);
|
|
|
|
if (en_tracing) {
|
|
/* resid passed here is the bytes left in uio */
|
|
KERNEL_ENERGYTRACE(kEnTrActKernSockWrite, DBG_FUNC_END,
|
|
VM_KERNEL_ADDRPERM(so),
|
|
((error == EWOULDBLOCK) ? kEnTrFlagNoWork : 0),
|
|
(int64_t)(orig_resid - resid));
|
|
}
|
|
KERNEL_DEBUG(DBG_FNC_SOSEND | DBG_FUNC_END, so, resid,
|
|
so->so_snd.sb_cc, space, error);
|
|
|
|
return error;
|
|
}
|
|
|
|
int
|
|
sosend_reinject(struct socket *so, struct sockaddr *addr, struct mbuf *top, struct mbuf *control, uint32_t sendflags)
|
|
{
|
|
struct mbuf *m0 = NULL, *control_end = NULL;
|
|
|
|
socket_lock_assert_owned(so);
|
|
|
|
/*
|
|
* top must points to mbuf chain to be sent.
|
|
* If control is not NULL, top must be packet header
|
|
*/
|
|
VERIFY(top != NULL &&
|
|
(control == NULL || top->m_flags & M_PKTHDR));
|
|
|
|
/*
|
|
* If control is not passed in, see if we can get it
|
|
* from top.
|
|
*/
|
|
if (control == NULL && (top->m_flags & M_PKTHDR) == 0) {
|
|
// Locate start of control if present and start of data
|
|
for (m0 = top; m0 != NULL; m0 = m0->m_next) {
|
|
if (m0->m_flags & M_PKTHDR) {
|
|
top = m0;
|
|
break;
|
|
} else if (m0->m_type == MT_CONTROL) {
|
|
if (control == NULL) {
|
|
// Found start of control
|
|
control = m0;
|
|
}
|
|
if (control != NULL && m0->m_next != NULL && m0->m_next->m_type != MT_CONTROL) {
|
|
// Found end of control
|
|
control_end = m0;
|
|
}
|
|
}
|
|
}
|
|
if (control_end != NULL) {
|
|
control_end->m_next = NULL;
|
|
}
|
|
}
|
|
|
|
int error = (*so->so_proto->pr_usrreqs->pru_send)
|
|
(so, sendflags, top, addr, control, current_proc());
|
|
|
|
return error;
|
|
}
|
|
|
|
static struct mbuf *
|
|
mbuf_detach_control_from_list(struct mbuf **mp)
|
|
{
|
|
struct mbuf *control = NULL;
|
|
struct mbuf *m = *mp;
|
|
|
|
if (m->m_type == MT_CONTROL) {
|
|
struct mbuf *control_end;
|
|
struct mbuf *n;
|
|
|
|
n = control_end = control = m;
|
|
|
|
/*
|
|
* Break the chain per mbuf type
|
|
*/
|
|
while (n != NULL && n->m_type == MT_CONTROL) {
|
|
control_end = n;
|
|
n = n->m_next;
|
|
}
|
|
control_end->m_next = NULL;
|
|
*mp = n;
|
|
}
|
|
VERIFY(*mp != NULL);
|
|
|
|
return control;
|
|
}
|
|
|
|
/*
|
|
* Supported only connected sockets (no address) without ancillary data
|
|
* (control mbuf) for atomic protocols
|
|
*/
|
|
int
|
|
sosend_list(struct socket *so, struct mbuf *pktlist, size_t total_len, u_int *pktcnt, int flags)
|
|
{
|
|
struct mbuf *m;
|
|
struct soflow_hash_entry *dgram_flow_entry = NULL;
|
|
int error, dontroute;
|
|
int atomic = sosendallatonce(so);
|
|
int sblocked = 0;
|
|
struct proc *p = current_proc();
|
|
struct mbuf *top = pktlist;
|
|
bool skip_filt = (SOCK_DOM(so) != PF_INET && SOCK_DOM(so) != PF_INET6) || (flags & MSG_SKIPCFIL);
|
|
|
|
KERNEL_DEBUG((DBG_FNC_SOSEND_LIST | DBG_FUNC_START), so, uiocnt,
|
|
so->so_snd.sb_cc, so->so_snd.sb_lowat, so->so_snd.sb_hiwat);
|
|
|
|
if (so->so_type != SOCK_DGRAM) {
|
|
error = EINVAL;
|
|
os_log(OS_LOG_DEFAULT, "sosend_list: so->so_type != SOCK_DGRAM error %d",
|
|
error);
|
|
goto out;
|
|
}
|
|
if (atomic == 0) {
|
|
error = EINVAL;
|
|
os_log(OS_LOG_DEFAULT, "sosend_list: atomic == 0 error %d",
|
|
error);
|
|
goto out;
|
|
}
|
|
if ((so->so_state & SS_ISCONNECTED) == 0) {
|
|
error = ENOTCONN;
|
|
os_log(OS_LOG_DEFAULT, "sosend_list: SS_ISCONNECTED not set error: %d",
|
|
error);
|
|
goto out;
|
|
}
|
|
if (flags & ~(MSG_DONTWAIT | MSG_NBIO | MSG_SKIPCFIL)) {
|
|
error = EINVAL;
|
|
os_log(OS_LOG_DEFAULT, "sosend_list: flags 0x%x error %d",
|
|
flags, error);
|
|
goto out;
|
|
}
|
|
|
|
socket_lock(so, 1);
|
|
so_update_last_owner_locked(so, p);
|
|
so_update_policy(so);
|
|
|
|
if (NEED_DGRAM_FLOW_TRACKING(so)) {
|
|
dgram_flow_entry = soflow_get_flow(so, NULL, NULL, NULL, total_len, true, 0);
|
|
}
|
|
|
|
#if NECP
|
|
so_update_necp_policy(so, NULL, NULL);
|
|
#endif /* NECP */
|
|
|
|
dontroute = (flags & MSG_DONTROUTE) &&
|
|
(so->so_options & SO_DONTROUTE) == 0 &&
|
|
(so->so_proto->pr_flags & PR_ATOMIC);
|
|
if (dontroute) {
|
|
so->so_options |= SO_DONTROUTE;
|
|
}
|
|
|
|
OSIncrementAtomicLong(&p->p_stats->p_ru.ru_msgsnd);
|
|
|
|
error = sosendcheck(so, NULL, 0, 0, atomic, flags, &sblocked);
|
|
if (error) {
|
|
os_log(OS_LOG_DEFAULT, "sosend_list: sosendcheck error %d",
|
|
error);
|
|
goto release;
|
|
}
|
|
|
|
if (!skip_filt) {
|
|
struct mbuf **prevnextp = NULL;
|
|
|
|
for (m = top; m != NULL; m = m->m_nextpkt) {
|
|
struct mbuf *control = NULL;
|
|
struct mbuf *last_control = NULL;
|
|
struct mbuf *nextpkt;
|
|
|
|
/*
|
|
* Remove packet from the list of packets
|
|
*/
|
|
nextpkt = m->m_nextpkt;
|
|
if (prevnextp != NULL) {
|
|
*prevnextp = nextpkt;
|
|
} else {
|
|
top = nextpkt;
|
|
}
|
|
m->m_nextpkt = NULL;
|
|
|
|
/*
|
|
* Break the chain per mbuf type
|
|
*/
|
|
if (m->m_type == MT_CONTROL) {
|
|
control = mbuf_detach_control_from_list(&m);
|
|
}
|
|
/*
|
|
* Socket filter processing
|
|
*/
|
|
error = sflt_data_out(so, NULL, &m,
|
|
&control, 0);
|
|
if (error != 0 && error != EJUSTRETURN) {
|
|
os_log(OS_LOG_DEFAULT, "sosend_list: sflt_data_out error %d",
|
|
error);
|
|
goto release;
|
|
}
|
|
|
|
#if CONTENT_FILTER
|
|
if (error == 0) {
|
|
/*
|
|
* Content filter processing
|
|
*/
|
|
error = cfil_sock_data_out(so, NULL, m,
|
|
control, 0, dgram_flow_entry);
|
|
if (error != 0 && error != EJUSTRETURN) {
|
|
os_log(OS_LOG_DEFAULT, "sosend_list: cfil_sock_data_out error %d",
|
|
error);
|
|
goto release;
|
|
}
|
|
}
|
|
#endif /* CONTENT_FILTER */
|
|
if (error == EJUSTRETURN) {
|
|
/*
|
|
* When swallowed by a filter, the packet is not
|
|
* in the list anymore
|
|
*/
|
|
error = 0;
|
|
} else {
|
|
/*
|
|
* Rebuild the mbuf chain of the packet
|
|
*/
|
|
if (control != NULL) {
|
|
last_control->m_next = m;
|
|
m = control;
|
|
}
|
|
/*
|
|
* Reinsert the packet in the list of packets
|
|
*/
|
|
m->m_nextpkt = nextpkt;
|
|
if (prevnextp != NULL) {
|
|
*prevnextp = m;
|
|
} else {
|
|
top = m;
|
|
}
|
|
prevnextp = &m->m_nextpkt;
|
|
}
|
|
}
|
|
}
|
|
|
|
if (top != NULL) {
|
|
if (so->so_proto->pr_usrreqs->pru_send_list != pru_send_list_notsupp) {
|
|
error = (*so->so_proto->pr_usrreqs->pru_send_list)
|
|
(so, top, pktcnt, flags);
|
|
if (error != 0) {
|
|
os_log(OS_LOG_DEFAULT, "sosend_list: pru_send_list error %d",
|
|
error);
|
|
}
|
|
top = NULL;
|
|
} else {
|
|
*pktcnt = 0;
|
|
for (m = top; m != NULL; m = top) {
|
|
struct mbuf *control = NULL;
|
|
|
|
top = m->m_nextpkt;
|
|
m->m_nextpkt = NULL;
|
|
|
|
/*
|
|
* Break the chain per mbuf type
|
|
*/
|
|
if (m->m_type == MT_CONTROL) {
|
|
control = mbuf_detach_control_from_list(&m);
|
|
}
|
|
|
|
error = (*so->so_proto->pr_usrreqs->pru_send)
|
|
(so, 0, m, NULL, control, current_proc());
|
|
if (error != 0) {
|
|
os_log(OS_LOG_DEFAULT, "sosend_list: pru_send error %d",
|
|
error);
|
|
goto release;
|
|
}
|
|
*pktcnt += 1;
|
|
}
|
|
}
|
|
}
|
|
|
|
release:
|
|
if (dontroute) {
|
|
so->so_options &= ~SO_DONTROUTE;
|
|
}
|
|
if (sblocked) {
|
|
sbunlock(&so->so_snd, FALSE); /* will unlock socket */
|
|
} else {
|
|
socket_unlock(so, 1);
|
|
}
|
|
out:
|
|
if (top != NULL) {
|
|
os_log(OS_LOG_DEFAULT, "sosend_list: m_freem_list(top) with error %d",
|
|
error);
|
|
m_freem_list(top);
|
|
}
|
|
|
|
if (dgram_flow_entry != NULL) {
|
|
soflow_free_flow(dgram_flow_entry);
|
|
}
|
|
|
|
KERNEL_DEBUG(DBG_FNC_SOSEND_LIST | DBG_FUNC_END, so, resid,
|
|
so->so_snd.sb_cc, 0, error);
|
|
|
|
return error;
|
|
}
|
|
|
|
/*
|
|
* May return ERESTART when packet is dropped by MAC policy check
|
|
*/
|
|
static int
|
|
soreceive_addr(struct proc *p, struct socket *so, struct sockaddr **psa,
|
|
struct mbuf **maddrp,
|
|
int flags, struct mbuf **mp, struct mbuf **nextrecordp, int canwait)
|
|
{
|
|
int error = 0;
|
|
struct mbuf *m = *mp;
|
|
struct mbuf *nextrecord = *nextrecordp;
|
|
|
|
KASSERT(m->m_type == MT_SONAME, ("receive 1a"));
|
|
#if CONFIG_MACF_SOCKET_SUBSET
|
|
/*
|
|
* Call the MAC framework for policy checking if we're in
|
|
* the user process context and the socket isn't connected.
|
|
*/
|
|
if (p != kernproc && !(so->so_state & SS_ISCONNECTED)) {
|
|
struct mbuf *m0 = m;
|
|
/*
|
|
* Dequeue this record (temporarily) from the receive
|
|
* list since we're about to drop the socket's lock
|
|
* where a new record may arrive and be appended to
|
|
* the list. Upon MAC policy failure, the record
|
|
* will be freed. Otherwise, we'll add it back to
|
|
* the head of the list. We cannot rely on SB_LOCK
|
|
* because append operation uses the socket's lock.
|
|
*/
|
|
do {
|
|
m->m_nextpkt = NULL;
|
|
sbfree(&so->so_rcv, m);
|
|
m = m->m_next;
|
|
} while (m != NULL);
|
|
m = m0;
|
|
so->so_rcv.sb_mb = nextrecord;
|
|
SB_EMPTY_FIXUP(&so->so_rcv);
|
|
SBLASTRECORDCHK(&so->so_rcv, "soreceive 1a");
|
|
SBLASTMBUFCHK(&so->so_rcv, "soreceive 1a");
|
|
socket_unlock(so, 0);
|
|
|
|
error = mac_socket_check_received(kauth_cred_get(), so,
|
|
mtod(m, struct sockaddr *));
|
|
|
|
if (error != 0) {
|
|
/*
|
|
* MAC policy failure; free this record and
|
|
* process the next record (or block until
|
|
* one is available). We have adjusted sb_cc
|
|
* and sb_mbcnt above so there is no need to
|
|
* call sbfree() again.
|
|
*/
|
|
m_freem(m);
|
|
/*
|
|
* Clear SB_LOCK but don't unlock the socket.
|
|
* Process the next record or wait for one.
|
|
*/
|
|
socket_lock(so, 0);
|
|
sbunlock(&so->so_rcv, TRUE); /* stay locked */
|
|
error = ERESTART;
|
|
goto done;
|
|
}
|
|
socket_lock(so, 0);
|
|
/*
|
|
* If the socket has been defunct'd, drop it.
|
|
*/
|
|
if (so->so_flags & SOF_DEFUNCT) {
|
|
m_freem(m);
|
|
error = ENOTCONN;
|
|
goto done;
|
|
}
|
|
/*
|
|
* Re-adjust the socket receive list and re-enqueue
|
|
* the record in front of any packets which may have
|
|
* been appended while we dropped the lock.
|
|
*/
|
|
for (m = m0; m->m_next != NULL; m = m->m_next) {
|
|
sballoc(&so->so_rcv, m);
|
|
}
|
|
sballoc(&so->so_rcv, m);
|
|
if (so->so_rcv.sb_mb == NULL) {
|
|
so->so_rcv.sb_lastrecord = m0;
|
|
so->so_rcv.sb_mbtail = m;
|
|
}
|
|
m = m0;
|
|
nextrecord = m->m_nextpkt = so->so_rcv.sb_mb;
|
|
so->so_rcv.sb_mb = m;
|
|
SBLASTRECORDCHK(&so->so_rcv, "soreceive 1b");
|
|
SBLASTMBUFCHK(&so->so_rcv, "soreceive 1b");
|
|
}
|
|
#endif /* CONFIG_MACF_SOCKET_SUBSET */
|
|
if (psa != NULL) {
|
|
*psa = dup_sockaddr(mtod(m, struct sockaddr *), canwait);
|
|
if ((*psa == NULL) && (flags & MSG_NEEDSA)) {
|
|
error = EWOULDBLOCK;
|
|
goto done;
|
|
}
|
|
} else if (maddrp != NULL) {
|
|
*maddrp = m;
|
|
}
|
|
if (flags & MSG_PEEK) {
|
|
m = m->m_next;
|
|
} else {
|
|
sbfree(&so->so_rcv, m);
|
|
if (m->m_next == NULL && so->so_rcv.sb_cc != 0) {
|
|
panic("%s: about to create invalid socketbuf",
|
|
__func__);
|
|
/* NOTREACHED */
|
|
}
|
|
if (maddrp == NULL) {
|
|
MFREE(m, so->so_rcv.sb_mb);
|
|
} else {
|
|
so->so_rcv.sb_mb = m->m_next;
|
|
m->m_next = NULL;
|
|
}
|
|
m = so->so_rcv.sb_mb;
|
|
if (m != NULL) {
|
|
m->m_nextpkt = nextrecord;
|
|
} else {
|
|
so->so_rcv.sb_mb = nextrecord;
|
|
SB_EMPTY_FIXUP(&so->so_rcv);
|
|
}
|
|
}
|
|
done:
|
|
*mp = m;
|
|
*nextrecordp = nextrecord;
|
|
|
|
return error;
|
|
}
|
|
|
|
/*
|
|
* When peeking SCM_RIGHTS, the actual file descriptors are not yet created
|
|
* so clear the data portion in order not to leak the file pointers
|
|
*/
|
|
static void
|
|
sopeek_scm_rights(struct mbuf *rights)
|
|
{
|
|
struct cmsghdr *cm = mtod(rights, struct cmsghdr *);
|
|
|
|
if (cm->cmsg_level == SOL_SOCKET && cm->cmsg_type == SCM_RIGHTS) {
|
|
VERIFY(cm->cmsg_len <= rights->m_len);
|
|
memset(cm + 1, 0, cm->cmsg_len - sizeof(*cm));
|
|
}
|
|
}
|
|
|
|
/*
|
|
* Process one or more MT_CONTROL mbufs present before any data mbufs
|
|
* in the first mbuf chain on the socket buffer. If MSG_PEEK, we
|
|
* just copy the data; if !MSG_PEEK, we call into the protocol to
|
|
* perform externalization.
|
|
*/
|
|
static int
|
|
soreceive_ctl(struct socket *so, struct mbuf **controlp, int flags,
|
|
struct mbuf **mp, struct mbuf **nextrecordp)
|
|
{
|
|
int error = 0;
|
|
struct mbuf *cm = NULL, *cmn;
|
|
struct mbuf **cme = &cm;
|
|
struct sockbuf *sb_rcv = &so->so_rcv;
|
|
struct mbuf **msgpcm = NULL;
|
|
struct mbuf *m = *mp;
|
|
struct mbuf *nextrecord = *nextrecordp;
|
|
struct protosw *pr = so->so_proto;
|
|
|
|
/*
|
|
* Externalizing the control messages would require us to
|
|
* drop the socket's lock below. Once we re-acquire the
|
|
* lock, the mbuf chain might change. In order to preserve
|
|
* consistency, we unlink all control messages from the
|
|
* first mbuf chain in one shot and link them separately
|
|
* onto a different chain.
|
|
*/
|
|
do {
|
|
if (flags & MSG_PEEK) {
|
|
if (controlp != NULL) {
|
|
if (*controlp == NULL) {
|
|
msgpcm = controlp;
|
|
}
|
|
*controlp = m_copy(m, 0, m->m_len);
|
|
|
|
/*
|
|
* If we failed to allocate an mbuf,
|
|
* release any previously allocated
|
|
* mbufs for control data. Return
|
|
* an error. Keep the mbufs in the
|
|
* socket as this is using
|
|
* MSG_PEEK flag.
|
|
*/
|
|
if (*controlp == NULL) {
|
|
m_freem(*msgpcm);
|
|
error = ENOBUFS;
|
|
goto done;
|
|
}
|
|
|
|
if (pr->pr_domain->dom_externalize != NULL) {
|
|
sopeek_scm_rights(*controlp);
|
|
}
|
|
|
|
controlp = &(*controlp)->m_next;
|
|
}
|
|
m = m->m_next;
|
|
} else {
|
|
m->m_nextpkt = NULL;
|
|
sbfree(sb_rcv, m);
|
|
sb_rcv->sb_mb = m->m_next;
|
|
m->m_next = NULL;
|
|
*cme = m;
|
|
cme = &(*cme)->m_next;
|
|
m = sb_rcv->sb_mb;
|
|
}
|
|
} while (m != NULL && m->m_type == MT_CONTROL);
|
|
|
|
if (!(flags & MSG_PEEK)) {
|
|
if (sb_rcv->sb_mb != NULL) {
|
|
sb_rcv->sb_mb->m_nextpkt = nextrecord;
|
|
} else {
|
|
sb_rcv->sb_mb = nextrecord;
|
|
SB_EMPTY_FIXUP(sb_rcv);
|
|
}
|
|
if (nextrecord == NULL) {
|
|
sb_rcv->sb_lastrecord = m;
|
|
}
|
|
}
|
|
|
|
SBLASTRECORDCHK(&so->so_rcv, "soreceive ctl");
|
|
SBLASTMBUFCHK(&so->so_rcv, "soreceive ctl");
|
|
|
|
while (cm != NULL) {
|
|
int cmsg_level;
|
|
int cmsg_type;
|
|
|
|
cmn = cm->m_next;
|
|
cm->m_next = NULL;
|
|
cmsg_level = mtod(cm, struct cmsghdr *)->cmsg_level;
|
|
cmsg_type = mtod(cm, struct cmsghdr *)->cmsg_type;
|
|
|
|
/*
|
|
* Call the protocol to externalize SCM_RIGHTS message
|
|
* and return the modified message to the caller upon
|
|
* success. Otherwise, all other control messages are
|
|
* returned unmodified to the caller. Note that we
|
|
* only get into this loop if MSG_PEEK is not set.
|
|
*/
|
|
if (pr->pr_domain->dom_externalize != NULL &&
|
|
cmsg_level == SOL_SOCKET &&
|
|
cmsg_type == SCM_RIGHTS) {
|
|
/*
|
|
* Release socket lock: see 3903171. This
|
|
* would also allow more records to be appended
|
|
* to the socket buffer. We still have SB_LOCK
|
|
* set on it, so we can be sure that the head
|
|
* of the mbuf chain won't change.
|
|
*/
|
|
socket_unlock(so, 0);
|
|
error = (*pr->pr_domain->dom_externalize)(cm);
|
|
socket_lock(so, 0);
|
|
} else {
|
|
error = 0;
|
|
}
|
|
|
|
if (controlp != NULL && error == 0) {
|
|
*controlp = cm;
|
|
controlp = &(*controlp)->m_next;
|
|
} else {
|
|
(void) m_free(cm);
|
|
}
|
|
cm = cmn;
|
|
}
|
|
/*
|
|
* Update the value of nextrecord in case we received new
|
|
* records when the socket was unlocked above for
|
|
* externalizing SCM_RIGHTS.
|
|
*/
|
|
if (m != NULL) {
|
|
nextrecord = sb_rcv->sb_mb->m_nextpkt;
|
|
} else {
|
|
nextrecord = sb_rcv->sb_mb;
|
|
}
|
|
|
|
done:
|
|
*mp = m;
|
|
*nextrecordp = nextrecord;
|
|
|
|
return error;
|
|
}
|
|
|
|
/*
|
|
* If we have less data than requested, block awaiting more
|
|
* (subject to any timeout) if:
|
|
* 1. the current count is less than the low water mark, or
|
|
* 2. MSG_WAITALL is set, and it is possible to do the entire
|
|
* receive operation at once if we block (resid <= hiwat).
|
|
* 3. MSG_DONTWAIT is not set
|
|
* If MSG_WAITALL is set but resid is larger than the receive buffer,
|
|
* we have to do the receive in sections, and thus risk returning
|
|
* a short count if a timeout or signal occurs after we start.
|
|
*/
|
|
static boolean_t
|
|
so_should_wait(struct socket *so, struct uio *uio, struct mbuf *m, int flags)
|
|
{
|
|
struct protosw *pr = so->so_proto;
|
|
|
|
/* No mbufs in the receive-queue? Wait! */
|
|
if (m == NULL) {
|
|
return true;
|
|
}
|
|
|
|
/* Not enough data in the receive socket-buffer - we may have to wait */
|
|
if ((flags & MSG_DONTWAIT) == 0 && so->so_rcv.sb_cc < uio_resid(uio) &&
|
|
m->m_nextpkt == NULL && (pr->pr_flags & PR_ATOMIC) == 0) {
|
|
/*
|
|
* Application did set the lowater-mark, so we should wait for
|
|
* this data to be present.
|
|
*/
|
|
if (so->so_rcv.sb_cc < so->so_rcv.sb_lowat) {
|
|
return true;
|
|
}
|
|
|
|
/*
|
|
* Application wants all the data - so let's try to do the
|
|
* receive-operation at once by waiting for everything to
|
|
* be there.
|
|
*/
|
|
if ((flags & MSG_WAITALL) && uio_resid(uio) <= so->so_rcv.sb_hiwat) {
|
|
return true;
|
|
}
|
|
}
|
|
|
|
return false;
|
|
}
|
|
|
|
/*
|
|
* Implement receive operations on a socket.
|
|
* We depend on the way that records are added to the sockbuf
|
|
* by sbappend*. In particular, each record (mbufs linked through m_next)
|
|
* must begin with an address if the protocol so specifies,
|
|
* followed by an optional mbuf or mbufs containing ancillary data,
|
|
* and then zero or more mbufs of data.
|
|
* In order to avoid blocking network interrupts for the entire time here,
|
|
* we splx() while doing the actual copy to user space.
|
|
* Although the sockbuf is locked, new data may still be appended,
|
|
* and thus we must maintain consistency of the sockbuf during that time.
|
|
*
|
|
* The caller may receive the data as a single mbuf chain by supplying
|
|
* an mbuf **mp0 for use in returning the chain. The uio is then used
|
|
* only for the count in uio_resid.
|
|
*
|
|
* Returns: 0 Success
|
|
* ENOBUFS
|
|
* ENOTCONN
|
|
* EWOULDBLOCK
|
|
* uiomove:EFAULT
|
|
* sblock:EWOULDBLOCK
|
|
* sblock:EINTR
|
|
* sbwait:EBADF
|
|
* sbwait:EINTR
|
|
* sodelayed_copy:EFAULT
|
|
* <pru_rcvoob>:EINVAL[TCP]
|
|
* <pru_rcvoob>:EWOULDBLOCK[TCP]
|
|
* <pru_rcvoob>:???
|
|
* <pr_domain->dom_externalize>:EMSGSIZE[AF_UNIX]
|
|
* <pr_domain->dom_externalize>:ENOBUFS[AF_UNIX]
|
|
* <pr_domain->dom_externalize>:???
|
|
*
|
|
* Notes: Additional return values from calls through <pru_rcvoob> and
|
|
* <pr_domain->dom_externalize> depend on protocols other than
|
|
* TCP or AF_UNIX, which are documented above.
|
|
*/
|
|
int
|
|
soreceive(struct socket *so, struct sockaddr **psa, struct uio *uio,
|
|
struct mbuf **mp0, struct mbuf **controlp, int *flagsp)
|
|
{
|
|
struct mbuf *m, **mp, *ml = NULL;
|
|
struct mbuf *nextrecord, *free_list;
|
|
int flags, error, offset;
|
|
user_ssize_t len;
|
|
struct protosw *pr = so->so_proto;
|
|
int moff, type = 0;
|
|
user_ssize_t orig_resid = uio_resid(uio);
|
|
user_ssize_t delayed_copy_len;
|
|
int can_delay;
|
|
struct proc *p = current_proc();
|
|
boolean_t en_tracing = FALSE;
|
|
|
|
/*
|
|
* Sanity check on the length passed by caller as we are making 'int'
|
|
* comparisons
|
|
*/
|
|
if (orig_resid < 0 || orig_resid > INT_MAX) {
|
|
return EINVAL;
|
|
}
|
|
|
|
KERNEL_DEBUG(DBG_FNC_SORECEIVE | DBG_FUNC_START, so,
|
|
uio_resid(uio), so->so_rcv.sb_cc, so->so_rcv.sb_lowat,
|
|
so->so_rcv.sb_hiwat);
|
|
|
|
socket_lock(so, 1);
|
|
so_update_last_owner_locked(so, p);
|
|
so_update_policy(so);
|
|
|
|
#ifdef MORE_LOCKING_DEBUG
|
|
if (so->so_usecount == 1) {
|
|
panic("%s: so=%x no other reference on socket", __func__, so);
|
|
/* NOTREACHED */
|
|
}
|
|
#endif
|
|
mp = mp0;
|
|
if (psa != NULL) {
|
|
*psa = NULL;
|
|
}
|
|
if (controlp != NULL) {
|
|
*controlp = NULL;
|
|
}
|
|
if (flagsp != NULL) {
|
|
flags = *flagsp & ~MSG_EOR;
|
|
} else {
|
|
flags = 0;
|
|
}
|
|
|
|
/*
|
|
* If a recv attempt is made on a previously-accepted socket
|
|
* that has been marked as inactive (disconnected), reject
|
|
* the request.
|
|
*/
|
|
if (so->so_flags & SOF_DEFUNCT) {
|
|
struct sockbuf *sb = &so->so_rcv;
|
|
|
|
error = ENOTCONN;
|
|
SODEFUNCTLOG("%s[%d, %s]: defunct so 0x%llu [%d,%d] (%d)\n",
|
|
__func__, proc_pid(p), proc_best_name(p),
|
|
so->so_gencnt,
|
|
SOCK_DOM(so), SOCK_TYPE(so), error);
|
|
/*
|
|
* This socket should have been disconnected and flushed
|
|
* prior to being returned from sodefunct(); there should
|
|
* be no data on its receive list, so panic otherwise.
|
|
*/
|
|
if (so->so_state & SS_DEFUNCT) {
|
|
sb_empty_assert(sb, __func__);
|
|
}
|
|
socket_unlock(so, 1);
|
|
return error;
|
|
}
|
|
|
|
if ((so->so_flags1 & SOF1_PRECONNECT_DATA) &&
|
|
pr->pr_usrreqs->pru_preconnect) {
|
|
/*
|
|
* A user may set the CONNECT_RESUME_ON_READ_WRITE-flag but not
|
|
* calling write() right after this. *If* the app calls a read
|
|
* we do not want to block this read indefinetely. Thus,
|
|
* we trigger a connect so that the session gets initiated.
|
|
*/
|
|
error = (*pr->pr_usrreqs->pru_preconnect)(so);
|
|
|
|
if (error) {
|
|
socket_unlock(so, 1);
|
|
return error;
|
|
}
|
|
}
|
|
|
|
if (ENTR_SHOULDTRACE &&
|
|
(SOCK_CHECK_DOM(so, AF_INET) || SOCK_CHECK_DOM(so, AF_INET6))) {
|
|
/*
|
|
* enable energy tracing for inet sockets that go over
|
|
* non-loopback interfaces only.
|
|
*/
|
|
struct inpcb *inp = sotoinpcb(so);
|
|
if (inp->inp_last_outifp != NULL &&
|
|
!(inp->inp_last_outifp->if_flags & IFF_LOOPBACK)) {
|
|
en_tracing = TRUE;
|
|
KERNEL_ENERGYTRACE(kEnTrActKernSockRead, DBG_FUNC_START,
|
|
VM_KERNEL_ADDRPERM(so),
|
|
((so->so_state & SS_NBIO) ?
|
|
kEnTrFlagNonBlocking : 0),
|
|
(int64_t)orig_resid);
|
|
}
|
|
}
|
|
|
|
/*
|
|
* When SO_WANTOOBFLAG is set we try to get out-of-band data
|
|
* regardless of the flags argument. Here is the case were
|
|
* out-of-band data is not inline.
|
|
*/
|
|
if ((flags & MSG_OOB) ||
|
|
((so->so_options & SO_WANTOOBFLAG) != 0 &&
|
|
(so->so_options & SO_OOBINLINE) == 0 &&
|
|
(so->so_oobmark || (so->so_state & SS_RCVATMARK)))) {
|
|
m = m_get(M_WAIT, MT_DATA);
|
|
if (m == NULL) {
|
|
socket_unlock(so, 1);
|
|
KERNEL_DEBUG(DBG_FNC_SORECEIVE | DBG_FUNC_END,
|
|
ENOBUFS, 0, 0, 0, 0);
|
|
return ENOBUFS;
|
|
}
|
|
error = (*pr->pr_usrreqs->pru_rcvoob)(so, m, flags & MSG_PEEK);
|
|
if (error) {
|
|
goto bad;
|
|
}
|
|
socket_unlock(so, 0);
|
|
do {
|
|
error = uiomove(mtod(m, caddr_t),
|
|
imin((int)uio_resid(uio), m->m_len), uio);
|
|
m = m_free(m);
|
|
} while (uio_resid(uio) && error == 0 && m != NULL);
|
|
socket_lock(so, 0);
|
|
bad:
|
|
if (m != NULL) {
|
|
m_freem(m);
|
|
}
|
|
|
|
if ((so->so_options & SO_WANTOOBFLAG) != 0) {
|
|
if (error == EWOULDBLOCK || error == EINVAL) {
|
|
/*
|
|
* Let's try to get normal data:
|
|
* EWOULDBLOCK: out-of-band data not
|
|
* receive yet. EINVAL: out-of-band data
|
|
* already read.
|
|
*/
|
|
error = 0;
|
|
goto nooob;
|
|
} else if (error == 0 && flagsp != NULL) {
|
|
*flagsp |= MSG_OOB;
|
|
}
|
|
}
|
|
socket_unlock(so, 1);
|
|
if (en_tracing) {
|
|
KERNEL_ENERGYTRACE(kEnTrActKernSockRead, DBG_FUNC_END,
|
|
VM_KERNEL_ADDRPERM(so), 0,
|
|
(int64_t)(orig_resid - uio_resid(uio)));
|
|
}
|
|
KERNEL_DEBUG(DBG_FNC_SORECEIVE | DBG_FUNC_END, error,
|
|
0, 0, 0, 0);
|
|
|
|
return error;
|
|
}
|
|
nooob:
|
|
if (mp != NULL) {
|
|
*mp = NULL;
|
|
}
|
|
|
|
if (so->so_state & SS_ISCONFIRMING && uio_resid(uio)) {
|
|
(*pr->pr_usrreqs->pru_rcvd)(so, 0);
|
|
}
|
|
|
|
free_list = NULL;
|
|
delayed_copy_len = 0;
|
|
restart:
|
|
#ifdef MORE_LOCKING_DEBUG
|
|
if (so->so_usecount <= 1) {
|
|
printf("soreceive: sblock so=0x%llx ref=%d on socket\n",
|
|
(uint64_t)DEBUG_KERNEL_ADDRPERM(so), so->so_usecount);
|
|
}
|
|
#endif
|
|
/*
|
|
* See if the socket has been closed (SS_NOFDREF|SS_CANTRCVMORE)
|
|
* and if so just return to the caller. This could happen when
|
|
* soreceive() is called by a socket upcall function during the
|
|
* time the socket is freed. The socket buffer would have been
|
|
* locked across the upcall, therefore we cannot put this thread
|
|
* to sleep (else we will deadlock) or return EWOULDBLOCK (else
|
|
* we may livelock), because the lock on the socket buffer will
|
|
* only be released when the upcall routine returns to its caller.
|
|
* Because the socket has been officially closed, there can be
|
|
* no further read on it.
|
|
*
|
|
* A multipath subflow socket would have its SS_NOFDREF set by
|
|
* default, so check for SOF_MP_SUBFLOW socket flag; when the
|
|
* socket is closed for real, SOF_MP_SUBFLOW would be cleared.
|
|
*/
|
|
if ((so->so_state & (SS_NOFDREF | SS_CANTRCVMORE)) ==
|
|
(SS_NOFDREF | SS_CANTRCVMORE) && !(so->so_flags & SOF_MP_SUBFLOW)) {
|
|
socket_unlock(so, 1);
|
|
return 0;
|
|
}
|
|
|
|
error = sblock(&so->so_rcv, SBLOCKWAIT(flags));
|
|
if (error) {
|
|
socket_unlock(so, 1);
|
|
KERNEL_DEBUG(DBG_FNC_SORECEIVE | DBG_FUNC_END, error,
|
|
0, 0, 0, 0);
|
|
if (en_tracing) {
|
|
KERNEL_ENERGYTRACE(kEnTrActKernSockRead, DBG_FUNC_END,
|
|
VM_KERNEL_ADDRPERM(so), 0,
|
|
(int64_t)(orig_resid - uio_resid(uio)));
|
|
}
|
|
return error;
|
|
}
|
|
|
|
m = so->so_rcv.sb_mb;
|
|
if (so_should_wait(so, uio, m, flags)) {
|
|
/*
|
|
* Panic if we notice inconsistencies in the socket's
|
|
* receive list; both sb_mb and sb_cc should correctly
|
|
* reflect the contents of the list, otherwise we may
|
|
* end up with false positives during select() or poll()
|
|
* which could put the application in a bad state.
|
|
*/
|
|
SB_MB_CHECK(&so->so_rcv);
|
|
|
|
if (so->so_error) {
|
|
if (m != NULL) {
|
|
goto dontblock;
|
|
}
|
|
error = so->so_error;
|
|
if ((flags & MSG_PEEK) == 0) {
|
|
so->so_error = 0;
|
|
}
|
|
goto release;
|
|
}
|
|
if (so->so_state & SS_CANTRCVMORE) {
|
|
#if CONTENT_FILTER
|
|
/*
|
|
* Deal with half closed connections
|
|
*/
|
|
if ((so->so_state & SS_ISDISCONNECTED) == 0 &&
|
|
cfil_sock_data_pending(&so->so_rcv) != 0) {
|
|
CFIL_LOG(LOG_INFO,
|
|
"so %llx ignore SS_CANTRCVMORE",
|
|
(uint64_t)DEBUG_KERNEL_ADDRPERM(so));
|
|
} else
|
|
#endif /* CONTENT_FILTER */
|
|
if (m != NULL) {
|
|
goto dontblock;
|
|
} else {
|
|
goto release;
|
|
}
|
|
}
|
|
for (; m != NULL; m = m->m_next) {
|
|
if (m->m_type == MT_OOBDATA || (m->m_flags & M_EOR)) {
|
|
m = so->so_rcv.sb_mb;
|
|
goto dontblock;
|
|
}
|
|
}
|
|
if ((so->so_state & (SS_ISCONNECTED | SS_ISCONNECTING)) == 0 &&
|
|
(so->so_proto->pr_flags & PR_CONNREQUIRED)) {
|
|
error = ENOTCONN;
|
|
goto release;
|
|
}
|
|
if (uio_resid(uio) == 0) {
|
|
goto release;
|
|
}
|
|
|
|
if ((so->so_state & SS_NBIO) ||
|
|
(flags & (MSG_DONTWAIT | MSG_NBIO))) {
|
|
error = EWOULDBLOCK;
|
|
goto release;
|
|
}
|
|
SBLASTRECORDCHK(&so->so_rcv, "soreceive sbwait 1");
|
|
SBLASTMBUFCHK(&so->so_rcv, "soreceive sbwait 1");
|
|
sbunlock(&so->so_rcv, TRUE); /* keep socket locked */
|
|
#if EVEN_MORE_LOCKING_DEBUG
|
|
if (socket_debug) {
|
|
printf("Waiting for socket data\n");
|
|
}
|
|
#endif
|
|
|
|
/*
|
|
* Depending on the protocol (e.g. TCP), the following
|
|
* might cause the socket lock to be dropped and later
|
|
* be reacquired, and more data could have arrived and
|
|
* have been appended to the receive socket buffer by
|
|
* the time it returns. Therefore, we only sleep in
|
|
* sbwait() below if and only if the wait-condition is still
|
|
* true.
|
|
*/
|
|
if ((pr->pr_flags & PR_WANTRCVD) && so->so_pcb != NULL) {
|
|
(*pr->pr_usrreqs->pru_rcvd)(so, flags);
|
|
}
|
|
|
|
error = 0;
|
|
if (so_should_wait(so, uio, so->so_rcv.sb_mb, flags)) {
|
|
error = sbwait(&so->so_rcv);
|
|
}
|
|
|
|
#if EVEN_MORE_LOCKING_DEBUG
|
|
if (socket_debug) {
|
|
printf("SORECEIVE - sbwait returned %d\n", error);
|
|
}
|
|
#endif
|
|
if (so->so_usecount < 1) {
|
|
panic("%s: after 2nd sblock so=%p ref=%d on socket",
|
|
__func__, so, so->so_usecount);
|
|
/* NOTREACHED */
|
|
}
|
|
if (error) {
|
|
socket_unlock(so, 1);
|
|
KERNEL_DEBUG(DBG_FNC_SORECEIVE | DBG_FUNC_END, error,
|
|
0, 0, 0, 0);
|
|
if (en_tracing) {
|
|
KERNEL_ENERGYTRACE(kEnTrActKernSockRead, DBG_FUNC_END,
|
|
VM_KERNEL_ADDRPERM(so), 0,
|
|
(int64_t)(orig_resid - uio_resid(uio)));
|
|
}
|
|
return error;
|
|
}
|
|
goto restart;
|
|
}
|
|
dontblock:
|
|
OSIncrementAtomicLong(&p->p_stats->p_ru.ru_msgrcv);
|
|
SBLASTRECORDCHK(&so->so_rcv, "soreceive 1");
|
|
SBLASTMBUFCHK(&so->so_rcv, "soreceive 1");
|
|
nextrecord = m->m_nextpkt;
|
|
|
|
if ((pr->pr_flags & PR_ADDR) && m->m_type == MT_SONAME) {
|
|
error = soreceive_addr(p, so, psa, NULL, flags, &m, &nextrecord,
|
|
mp0 == NULL);
|
|
if (error == ERESTART) {
|
|
goto restart;
|
|
} else if (error != 0) {
|
|
goto release;
|
|
}
|
|
orig_resid = 0;
|
|
}
|
|
|
|
/*
|
|
* Process one or more MT_CONTROL mbufs present before any data mbufs
|
|
* in the first mbuf chain on the socket buffer. If MSG_PEEK, we
|
|
* just copy the data; if !MSG_PEEK, we call into the protocol to
|
|
* perform externalization.
|
|
*/
|
|
if (m != NULL && m->m_type == MT_CONTROL) {
|
|
error = soreceive_ctl(so, controlp, flags, &m, &nextrecord);
|
|
if (error != 0) {
|
|
goto release;
|
|
}
|
|
orig_resid = 0;
|
|
}
|
|
|
|
if (m != NULL) {
|
|
if (!(flags & MSG_PEEK)) {
|
|
/*
|
|
* We get here because m points to an mbuf following
|
|
* any MT_SONAME or MT_CONTROL mbufs which have been
|
|
* processed above. In any case, m should be pointing
|
|
* to the head of the mbuf chain, and the nextrecord
|
|
* should be either NULL or equal to m->m_nextpkt.
|
|
* See comments above about SB_LOCK.
|
|
*/
|
|
if (m != so->so_rcv.sb_mb ||
|
|
m->m_nextpkt != nextrecord) {
|
|
panic("%s: post-control !sync so=%p m=%p "
|
|
"nextrecord=%p\n", __func__, so, m,
|
|
nextrecord);
|
|
/* NOTREACHED */
|
|
}
|
|
if (nextrecord == NULL) {
|
|
so->so_rcv.sb_lastrecord = m;
|
|
}
|
|
}
|
|
type = m->m_type;
|
|
if (type == MT_OOBDATA) {
|
|
flags |= MSG_OOB;
|
|
}
|
|
} else {
|
|
if (!(flags & MSG_PEEK)) {
|
|
SB_EMPTY_FIXUP(&so->so_rcv);
|
|
}
|
|
}
|
|
SBLASTRECORDCHK(&so->so_rcv, "soreceive 2");
|
|
SBLASTMBUFCHK(&so->so_rcv, "soreceive 2");
|
|
|
|
moff = 0;
|
|
offset = 0;
|
|
|
|
if (!(flags & MSG_PEEK) && uio_resid(uio) > sorecvmincopy) {
|
|
can_delay = 1;
|
|
} else {
|
|
can_delay = 0;
|
|
}
|
|
|
|
while (m != NULL &&
|
|
(uio_resid(uio) - delayed_copy_len) > 0 && error == 0) {
|
|
if (m->m_type == MT_OOBDATA) {
|
|
if (type != MT_OOBDATA) {
|
|
break;
|
|
}
|
|
} else if (type == MT_OOBDATA) {
|
|
break;
|
|
}
|
|
|
|
if (!m_has_mtype(m, MTF_DATA | MTF_HEADER | MTF_OOBDATA)) {
|
|
break;
|
|
}
|
|
/*
|
|
* Make sure to allways set MSG_OOB event when getting
|
|
* out of band data inline.
|
|
*/
|
|
if ((so->so_options & SO_WANTOOBFLAG) != 0 &&
|
|
(so->so_options & SO_OOBINLINE) != 0 &&
|
|
(so->so_state & SS_RCVATMARK) != 0) {
|
|
flags |= MSG_OOB;
|
|
}
|
|
so->so_state &= ~SS_RCVATMARK;
|
|
len = uio_resid(uio) - delayed_copy_len;
|
|
if (so->so_oobmark && len > so->so_oobmark - offset) {
|
|
len = so->so_oobmark - offset;
|
|
}
|
|
if (len > m->m_len - moff) {
|
|
len = m->m_len - moff;
|
|
}
|
|
/*
|
|
* If mp is set, just pass back the mbufs.
|
|
* Otherwise copy them out via the uio, then free.
|
|
* Sockbuf must be consistent here (points to current mbuf,
|
|
* it points to next record) when we drop priority;
|
|
* we must note any additions to the sockbuf when we
|
|
* block interrupts again.
|
|
*/
|
|
if (mp == NULL) {
|
|
SBLASTRECORDCHK(&so->so_rcv, "soreceive uiomove");
|
|
SBLASTMBUFCHK(&so->so_rcv, "soreceive uiomove");
|
|
if (can_delay && len == m->m_len) {
|
|
/*
|
|
* only delay the copy if we're consuming the
|
|
* mbuf and we're NOT in MSG_PEEK mode
|
|
* and we have enough data to make it worthwile
|
|
* to drop and retake the lock... can_delay
|
|
* reflects the state of the 2 latter
|
|
* constraints moff should always be zero
|
|
* in these cases
|
|
*/
|
|
delayed_copy_len += len;
|
|
} else {
|
|
if (delayed_copy_len) {
|
|
error = sodelayed_copy(so, uio,
|
|
&free_list, &delayed_copy_len);
|
|
|
|
if (error) {
|
|
goto release;
|
|
}
|
|
/*
|
|
* can only get here if MSG_PEEK is not
|
|
* set therefore, m should point at the
|
|
* head of the rcv queue; if it doesn't,
|
|
* it means something drastically
|
|
* changed while we were out from behind
|
|
* the lock in sodelayed_copy. perhaps
|
|
* a RST on the stream. in any event,
|
|
* the stream has been interrupted. it's
|
|
* probably best just to return whatever
|
|
* data we've moved and let the caller
|
|
* sort it out...
|
|
*/
|
|
if (m != so->so_rcv.sb_mb) {
|
|
break;
|
|
}
|
|
}
|
|
socket_unlock(so, 0);
|
|
error = uiomove(mtod(m, caddr_t) + moff,
|
|
(int)len, uio);
|
|
socket_lock(so, 0);
|
|
|
|
if (error) {
|
|
goto release;
|
|
}
|
|
}
|
|
} else {
|
|
uio_setresid(uio, (uio_resid(uio) - len));
|
|
}
|
|
if (len == m->m_len - moff) {
|
|
if (m->m_flags & M_EOR) {
|
|
flags |= MSG_EOR;
|
|
}
|
|
if (flags & MSG_PEEK) {
|
|
m = m->m_next;
|
|
moff = 0;
|
|
} else {
|
|
nextrecord = m->m_nextpkt;
|
|
sbfree(&so->so_rcv, m);
|
|
m->m_nextpkt = NULL;
|
|
|
|
if (mp != NULL) {
|
|
*mp = m;
|
|
mp = &m->m_next;
|
|
so->so_rcv.sb_mb = m = m->m_next;
|
|
*mp = NULL;
|
|
} else {
|
|
if (free_list == NULL) {
|
|
free_list = m;
|
|
} else {
|
|
ml->m_next = m;
|
|
}
|
|
ml = m;
|
|
so->so_rcv.sb_mb = m = m->m_next;
|
|
ml->m_next = NULL;
|
|
}
|
|
if (m != NULL) {
|
|
m->m_nextpkt = nextrecord;
|
|
if (nextrecord == NULL) {
|
|
so->so_rcv.sb_lastrecord = m;
|
|
}
|
|
} else {
|
|
so->so_rcv.sb_mb = nextrecord;
|
|
SB_EMPTY_FIXUP(&so->so_rcv);
|
|
}
|
|
SBLASTRECORDCHK(&so->so_rcv, "soreceive 3");
|
|
SBLASTMBUFCHK(&so->so_rcv, "soreceive 3");
|
|
}
|
|
} else {
|
|
if (flags & MSG_PEEK) {
|
|
moff += len;
|
|
} else {
|
|
if (mp != NULL) {
|
|
int copy_flag;
|
|
|
|
if (flags & MSG_DONTWAIT) {
|
|
copy_flag = M_DONTWAIT;
|
|
} else {
|
|
copy_flag = M_WAIT;
|
|
}
|
|
*mp = m_copym(m, 0, (int)len, copy_flag);
|
|
/*
|
|
* Failed to allocate an mbuf?
|
|
* Adjust uio_resid back, it was
|
|
* adjusted down by len bytes which
|
|
* we didn't copy over.
|
|
*/
|
|
if (*mp == NULL) {
|
|
uio_setresid(uio,
|
|
(uio_resid(uio) + len));
|
|
break;
|
|
}
|
|
}
|
|
m->m_data += len;
|
|
m->m_len -= len;
|
|
so->so_rcv.sb_cc -= len;
|
|
}
|
|
}
|
|
if (so->so_oobmark) {
|
|
if ((flags & MSG_PEEK) == 0) {
|
|
so->so_oobmark -= len;
|
|
if (so->so_oobmark == 0) {
|
|
so->so_state |= SS_RCVATMARK;
|
|
break;
|
|
}
|
|
} else {
|
|
offset += len;
|
|
if (offset == so->so_oobmark) {
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
if (flags & MSG_EOR) {
|
|
break;
|
|
}
|
|
/*
|
|
* If the MSG_WAITALL or MSG_WAITSTREAM flag is set
|
|
* (for non-atomic socket), we must not quit until
|
|
* "uio->uio_resid == 0" or an error termination.
|
|
* If a signal/timeout occurs, return with a short
|
|
* count but without error. Keep sockbuf locked
|
|
* against other readers.
|
|
*/
|
|
while (flags & (MSG_WAITALL | MSG_WAITSTREAM) && m == NULL &&
|
|
(uio_resid(uio) - delayed_copy_len) > 0 &&
|
|
!sosendallatonce(so) && !nextrecord) {
|
|
if (so->so_error || ((so->so_state & SS_CANTRCVMORE)
|
|
#if CONTENT_FILTER
|
|
&& cfil_sock_data_pending(&so->so_rcv) == 0
|
|
#endif /* CONTENT_FILTER */
|
|
)) {
|
|
goto release;
|
|
}
|
|
|
|
/*
|
|
* Depending on the protocol (e.g. TCP), the following
|
|
* might cause the socket lock to be dropped and later
|
|
* be reacquired, and more data could have arrived and
|
|
* have been appended to the receive socket buffer by
|
|
* the time it returns. Therefore, we only sleep in
|
|
* sbwait() below if and only if the socket buffer is
|
|
* empty, in order to avoid a false sleep.
|
|
*/
|
|
if ((pr->pr_flags & PR_WANTRCVD) && so->so_pcb != NULL) {
|
|
(*pr->pr_usrreqs->pru_rcvd)(so, flags);
|
|
}
|
|
|
|
SBLASTRECORDCHK(&so->so_rcv, "soreceive sbwait 2");
|
|
SBLASTMBUFCHK(&so->so_rcv, "soreceive sbwait 2");
|
|
|
|
if (so->so_rcv.sb_mb == NULL && sbwait(&so->so_rcv)) {
|
|
error = 0;
|
|
goto release;
|
|
}
|
|
/*
|
|
* have to wait until after we get back from the sbwait
|
|
* to do the copy because we will drop the lock if we
|
|
* have enough data that has been delayed... by dropping
|
|
* the lock we open up a window allowing the netisr
|
|
* thread to process the incoming packets and to change
|
|
* the state of this socket... we're issuing the sbwait
|
|
* because the socket is empty and we're expecting the
|
|
* netisr thread to wake us up when more packets arrive;
|
|
* if we allow that processing to happen and then sbwait
|
|
* we could stall forever with packets sitting in the
|
|
* socket if no further packets arrive from the remote
|
|
* side.
|
|
*
|
|
* we want to copy before we've collected all the data
|
|
* to satisfy this request to allow the copy to overlap
|
|
* the incoming packet processing on an MP system
|
|
*/
|
|
if (delayed_copy_len > sorecvmincopy &&
|
|
(delayed_copy_len > (so->so_rcv.sb_hiwat / 2))) {
|
|
error = sodelayed_copy(so, uio,
|
|
&free_list, &delayed_copy_len);
|
|
|
|
if (error) {
|
|
goto release;
|
|
}
|
|
}
|
|
m = so->so_rcv.sb_mb;
|
|
if (m != NULL) {
|
|
nextrecord = m->m_nextpkt;
|
|
}
|
|
SB_MB_CHECK(&so->so_rcv);
|
|
}
|
|
}
|
|
#ifdef MORE_LOCKING_DEBUG
|
|
if (so->so_usecount <= 1) {
|
|
panic("%s: after big while so=%p ref=%d on socket",
|
|
__func__, so, so->so_usecount);
|
|
/* NOTREACHED */
|
|
}
|
|
#endif
|
|
|
|
if (m != NULL && pr->pr_flags & PR_ATOMIC) {
|
|
if (so->so_options & SO_DONTTRUNC) {
|
|
flags |= MSG_RCVMORE;
|
|
} else {
|
|
flags |= MSG_TRUNC;
|
|
if ((flags & MSG_PEEK) == 0) {
|
|
(void) sbdroprecord(&so->so_rcv);
|
|
}
|
|
}
|
|
}
|
|
|
|
/*
|
|
* pru_rcvd below (for TCP) may cause more data to be received
|
|
* if the socket lock is dropped prior to sending the ACK; some
|
|
* legacy OpenTransport applications don't handle this well
|
|
* (if it receives less data than requested while MSG_HAVEMORE
|
|
* is set), and so we set the flag now based on what we know
|
|
* prior to calling pru_rcvd.
|
|
*/
|
|
if ((so->so_options & SO_WANTMORE) && so->so_rcv.sb_cc > 0) {
|
|
flags |= MSG_HAVEMORE;
|
|
}
|
|
|
|
if ((flags & MSG_PEEK) == 0) {
|
|
if (m == NULL) {
|
|
so->so_rcv.sb_mb = nextrecord;
|
|
/*
|
|
* First part is an inline SB_EMPTY_FIXUP(). Second
|
|
* part makes sure sb_lastrecord is up-to-date if
|
|
* there is still data in the socket buffer.
|
|
*/
|
|
if (so->so_rcv.sb_mb == NULL) {
|
|
so->so_rcv.sb_mbtail = NULL;
|
|
so->so_rcv.sb_lastrecord = NULL;
|
|
} else if (nextrecord->m_nextpkt == NULL) {
|
|
so->so_rcv.sb_lastrecord = nextrecord;
|
|
}
|
|
SB_MB_CHECK(&so->so_rcv);
|
|
}
|
|
SBLASTRECORDCHK(&so->so_rcv, "soreceive 4");
|
|
SBLASTMBUFCHK(&so->so_rcv, "soreceive 4");
|
|
if (pr->pr_flags & PR_WANTRCVD && so->so_pcb) {
|
|
(*pr->pr_usrreqs->pru_rcvd)(so, flags);
|
|
}
|
|
}
|
|
|
|
if (delayed_copy_len) {
|
|
error = sodelayed_copy(so, uio, &free_list, &delayed_copy_len);
|
|
if (error) {
|
|
goto release;
|
|
}
|
|
}
|
|
if (free_list != NULL) {
|
|
m_freem_list(free_list);
|
|
free_list = NULL;
|
|
}
|
|
|
|
if (orig_resid == uio_resid(uio) && orig_resid &&
|
|
(flags & MSG_EOR) == 0 && (so->so_state & SS_CANTRCVMORE) == 0) {
|
|
sbunlock(&so->so_rcv, TRUE); /* keep socket locked */
|
|
goto restart;
|
|
}
|
|
|
|
if (flagsp != NULL) {
|
|
*flagsp |= flags;
|
|
}
|
|
release:
|
|
#ifdef MORE_LOCKING_DEBUG
|
|
if (so->so_usecount <= 1) {
|
|
panic("%s: release so=%p ref=%d on socket", __func__,
|
|
so, so->so_usecount);
|
|
/* NOTREACHED */
|
|
}
|
|
#endif
|
|
if (delayed_copy_len) {
|
|
error = sodelayed_copy(so, uio, &free_list, &delayed_copy_len);
|
|
}
|
|
|
|
if (free_list != NULL) {
|
|
m_freem_list(free_list);
|
|
}
|
|
|
|
sbunlock(&so->so_rcv, FALSE); /* will unlock socket */
|
|
|
|
if (en_tracing) {
|
|
KERNEL_ENERGYTRACE(kEnTrActKernSockRead, DBG_FUNC_END,
|
|
VM_KERNEL_ADDRPERM(so),
|
|
((error == EWOULDBLOCK) ? kEnTrFlagNoWork : 0),
|
|
(int64_t)(orig_resid - uio_resid(uio)));
|
|
}
|
|
KERNEL_DEBUG(DBG_FNC_SORECEIVE | DBG_FUNC_END, so, uio_resid(uio),
|
|
so->so_rcv.sb_cc, 0, error);
|
|
|
|
return error;
|
|
}
|
|
|
|
/*
|
|
* Returns: 0 Success
|
|
* uiomove:EFAULT
|
|
*/
|
|
static int
|
|
sodelayed_copy(struct socket *so, struct uio *uio, struct mbuf **free_list,
|
|
user_ssize_t *resid)
|
|
{
|
|
int error = 0;
|
|
struct mbuf *m;
|
|
|
|
m = *free_list;
|
|
|
|
socket_unlock(so, 0);
|
|
|
|
while (m != NULL && error == 0) {
|
|
error = uiomove(mtod(m, caddr_t), (int)m->m_len, uio);
|
|
m = m->m_next;
|
|
}
|
|
m_freem_list(*free_list);
|
|
|
|
*free_list = NULL;
|
|
*resid = 0;
|
|
|
|
socket_lock(so, 0);
|
|
|
|
return error;
|
|
}
|
|
|
|
int
|
|
soreceive_m_list(struct socket *so, u_int *pktcntp, struct mbuf **maddrp,
|
|
struct mbuf **mp0, struct mbuf **controlp, int *flagsp)
|
|
{
|
|
struct mbuf *m, **mp;
|
|
struct mbuf *nextrecord;
|
|
int flags, error;
|
|
struct protosw *pr = so->so_proto;
|
|
struct proc *p = current_proc();
|
|
u_int npkts = 0;
|
|
struct mbuf *free_list = NULL;
|
|
int sblocked = 0;
|
|
|
|
/*
|
|
* Sanity check on the parameters passed by caller
|
|
*/
|
|
if (mp0 == NULL || pktcntp == NULL) {
|
|
return EINVAL;
|
|
}
|
|
if (*pktcntp > SO_MAX_MSG_X || *pktcntp == 0) {
|
|
return EINVAL;
|
|
}
|
|
|
|
mp = mp0;
|
|
*mp0 = NULL;
|
|
if (controlp != NULL) {
|
|
*controlp = NULL;
|
|
}
|
|
if (maddrp != NULL) {
|
|
*maddrp = NULL;
|
|
}
|
|
if (flagsp != NULL) {
|
|
flags = *flagsp;
|
|
} else {
|
|
flags = 0;
|
|
}
|
|
|
|
KERNEL_DEBUG(DBG_FNC_SORECEIVE_LIST | DBG_FUNC_START, so,
|
|
*pktcntp, so->so_rcv.sb_cc, so->so_rcv.sb_lowat,
|
|
so->so_rcv.sb_hiwat);
|
|
|
|
socket_lock(so, 1);
|
|
so_update_last_owner_locked(so, p);
|
|
so_update_policy(so);
|
|
|
|
#if NECP
|
|
so_update_necp_policy(so, NULL, NULL);
|
|
#endif /* NECP */
|
|
|
|
/*
|
|
* If a recv attempt is made on a previously-accepted socket
|
|
* that has been marked as inactive (disconnected), reject
|
|
* the request.
|
|
*/
|
|
if (so->so_flags & SOF_DEFUNCT) {
|
|
struct sockbuf *sb = &so->so_rcv;
|
|
|
|
error = ENOTCONN;
|
|
SODEFUNCTLOG("%s[%d, %s]: defunct so 0x%llu [%d,%d] (%d)\n",
|
|
__func__, proc_pid(p), proc_best_name(p),
|
|
so->so_gencnt,
|
|
SOCK_DOM(so), SOCK_TYPE(so), error);
|
|
/*
|
|
* This socket should have been disconnected and flushed
|
|
* prior to being returned from sodefunct(); there should
|
|
* be no data on its receive list, so panic otherwise.
|
|
*/
|
|
if (so->so_state & SS_DEFUNCT) {
|
|
sb_empty_assert(sb, __func__);
|
|
}
|
|
goto release;
|
|
}
|
|
|
|
*mp = NULL;
|
|
|
|
restart:
|
|
/*
|
|
* See if the socket has been closed (SS_NOFDREF|SS_CANTRCVMORE)
|
|
* and if so just return to the caller. This could happen when
|
|
* soreceive() is called by a socket upcall function during the
|
|
* time the socket is freed. The socket buffer would have been
|
|
* locked across the upcall, therefore we cannot put this thread
|
|
* to sleep (else we will deadlock) or return EWOULDBLOCK (else
|
|
* we may livelock), because the lock on the socket buffer will
|
|
* only be released when the upcall routine returns to its caller.
|
|
* Because the socket has been officially closed, there can be
|
|
* no further read on it.
|
|
*/
|
|
if ((so->so_state & (SS_NOFDREF | SS_CANTRCVMORE)) ==
|
|
(SS_NOFDREF | SS_CANTRCVMORE)) {
|
|
error = 0;
|
|
goto out;
|
|
}
|
|
|
|
error = sblock(&so->so_rcv, SBLOCKWAIT(flags));
|
|
if (error) {
|
|
goto out;
|
|
}
|
|
sblocked = 1;
|
|
|
|
m = so->so_rcv.sb_mb;
|
|
/*
|
|
* Block awaiting more datagram if needed
|
|
*/
|
|
if (m == NULL || ((flags & MSG_DONTWAIT) == 0 &&
|
|
so->so_rcv.sb_cc < so->so_rcv.sb_lowat)) {
|
|
/*
|
|
* Panic if we notice inconsistencies in the socket's
|
|
* receive list; both sb_mb and sb_cc should correctly
|
|
* reflect the contents of the list, otherwise we may
|
|
* end up with false positives during select() or poll()
|
|
* which could put the application in a bad state.
|
|
*/
|
|
SB_MB_CHECK(&so->so_rcv);
|
|
|
|
if (so->so_error) {
|
|
if (m != NULL) {
|
|
goto dontblock;
|
|
}
|
|
error = so->so_error;
|
|
if ((flags & MSG_PEEK) == 0) {
|
|
so->so_error = 0;
|
|
}
|
|
goto release;
|
|
}
|
|
if (so->so_state & SS_CANTRCVMORE) {
|
|
if (m != NULL) {
|
|
goto dontblock;
|
|
} else {
|
|
goto release;
|
|
}
|
|
}
|
|
for (; m != NULL; m = m->m_next) {
|
|
if (m->m_flags & M_EOR) {
|
|
m = so->so_rcv.sb_mb;
|
|
goto dontblock;
|
|
}
|
|
}
|
|
if ((so->so_state & (SS_ISCONNECTED | SS_ISCONNECTING)) == 0 &&
|
|
(so->so_proto->pr_flags & PR_CONNREQUIRED)) {
|
|
error = ENOTCONN;
|
|
goto release;
|
|
}
|
|
if ((so->so_state & SS_NBIO) ||
|
|
(flags & (MSG_DONTWAIT | MSG_NBIO))) {
|
|
error = EWOULDBLOCK;
|
|
goto release;
|
|
}
|
|
SBLASTRECORDCHK(&so->so_rcv, "soreceive sbwait 1");
|
|
SBLASTMBUFCHK(&so->so_rcv, "soreceive sbwait 1");
|
|
|
|
sbunlock(&so->so_rcv, TRUE); /* keep socket locked */
|
|
sblocked = 0;
|
|
|
|
error = sbwait(&so->so_rcv);
|
|
if (error != 0) {
|
|
goto release;
|
|
}
|
|
goto restart;
|
|
}
|
|
dontblock:
|
|
m = so->so_rcv.sb_mb;
|
|
if (m == NULL) {
|
|
goto release;
|
|
}
|
|
|
|
OSIncrementAtomicLong(&p->p_stats->p_ru.ru_msgrcv);
|
|
SBLASTRECORDCHK(&so->so_rcv, "soreceive 1");
|
|
SBLASTMBUFCHK(&so->so_rcv, "soreceive 1");
|
|
nextrecord = m->m_nextpkt;
|
|
|
|
if ((pr->pr_flags & PR_ADDR) && m->m_type == MT_SONAME) {
|
|
struct mbuf *maddr = NULL;
|
|
|
|
error = soreceive_addr(p, so, NULL, &maddr, flags, &m,
|
|
&nextrecord, 1);
|
|
if (error == ERESTART) {
|
|
goto restart;
|
|
} else if (error != 0) {
|
|
goto release;
|
|
}
|
|
|
|
if (maddr != NULL) {
|
|
maddr->m_nextpkt = NULL;
|
|
maddr->m_next = NULL;
|
|
if (maddrp != NULL) {
|
|
*maddrp = maddr;
|
|
maddrp = &maddr->m_nextpkt;
|
|
} else {
|
|
maddr->m_next = free_list;
|
|
free_list = maddr;
|
|
}
|
|
}
|
|
}
|
|
|
|
/*
|
|
* Process one or more MT_CONTROL mbufs present before any data mbufs
|
|
* in the first mbuf chain on the socket buffer.
|
|
* We call into the protocol to perform externalization.
|
|
*/
|
|
if (m != NULL && m->m_type == MT_CONTROL) {
|
|
struct mbuf *control = NULL;
|
|
|
|
error = soreceive_ctl(so, &control, flags, &m, &nextrecord);
|
|
if (error != 0) {
|
|
goto release;
|
|
}
|
|
if (control != NULL) {
|
|
control->m_nextpkt = NULL;
|
|
control->m_next = NULL;
|
|
if (controlp != NULL) {
|
|
*controlp = control;
|
|
controlp = &control->m_nextpkt;
|
|
} else {
|
|
control->m_next = free_list;
|
|
free_list = control;
|
|
}
|
|
}
|
|
}
|
|
|
|
/*
|
|
* Link the packet to the list
|
|
*/
|
|
if (m != NULL) {
|
|
if (!m_has_mtype(m, MTF_DATA | MTF_HEADER | MTF_OOBDATA)) {
|
|
panic("%s: m %p m_type %d != MT_DATA", __func__, m, m->m_type);
|
|
}
|
|
m->m_nextpkt = NULL;
|
|
*mp = m;
|
|
mp = &m->m_nextpkt;
|
|
}
|
|
while (m != NULL) {
|
|
sbfree(&so->so_rcv, m);
|
|
|
|
m = m->m_next;
|
|
}
|
|
|
|
so->so_rcv.sb_mb = nextrecord;
|
|
/*
|
|
* First part is an inline SB_EMPTY_FIXUP(). Second
|
|
* part makes sure sb_lastrecord is up-to-date if
|
|
* there is still data in the socket buffer.
|
|
*/
|
|
if (so->so_rcv.sb_mb == NULL) {
|
|
so->so_rcv.sb_mbtail = NULL;
|
|
so->so_rcv.sb_lastrecord = NULL;
|
|
} else if (nextrecord->m_nextpkt == NULL) {
|
|
so->so_rcv.sb_lastrecord = nextrecord;
|
|
}
|
|
SB_MB_CHECK(&so->so_rcv);
|
|
|
|
SBLASTRECORDCHK(&so->so_rcv, "soreceive 4");
|
|
SBLASTMBUFCHK(&so->so_rcv, "soreceive 4");
|
|
|
|
npkts += 1;
|
|
|
|
/*
|
|
* We continue as long as all those conditions as we have less packets
|
|
* than requested and the socket buffer is not empty
|
|
*/
|
|
if (npkts < *pktcntp) {
|
|
if (so->so_rcv.sb_mb != NULL) {
|
|
goto dontblock;
|
|
}
|
|
if ((flags & MSG_WAITALL) != 0) {
|
|
goto restart;
|
|
}
|
|
}
|
|
|
|
if (flagsp != NULL) {
|
|
*flagsp |= flags;
|
|
}
|
|
|
|
release:
|
|
/*
|
|
* pru_rcvd may cause more data to be received if the socket lock
|
|
* is dropped so we set MSG_HAVEMORE now based on what we know.
|
|
* That way the caller won't be surprised if it receives less data
|
|
* than requested.
|
|
*/
|
|
if ((so->so_options & SO_WANTMORE) && so->so_rcv.sb_cc > 0) {
|
|
flags |= MSG_HAVEMORE;
|
|
}
|
|
|
|
if (pr->pr_flags & PR_WANTRCVD && so->so_pcb != NULL) {
|
|
(*pr->pr_usrreqs->pru_rcvd)(so, flags);
|
|
}
|
|
|
|
if (sblocked) {
|
|
sbunlock(&so->so_rcv, FALSE); /* will unlock socket */
|
|
} else {
|
|
socket_unlock(so, 1);
|
|
}
|
|
|
|
out:
|
|
*pktcntp = npkts;
|
|
/*
|
|
* Amortize the cost of freeing the mbufs
|
|
*/
|
|
if (free_list != NULL) {
|
|
m_freem_list(free_list);
|
|
}
|
|
|
|
KERNEL_DEBUG(DBG_FNC_SORECEIVE_LIST | DBG_FUNC_END, error,
|
|
0, 0, 0, 0);
|
|
return error;
|
|
}
|
|
|
|
static int
|
|
so_statistics_event_to_nstat_event(int64_t *input_options,
|
|
uint64_t *nstat_event)
|
|
{
|
|
int error = 0;
|
|
switch (*input_options) {
|
|
case SO_STATISTICS_EVENT_ENTER_CELLFALLBACK:
|
|
*nstat_event = NSTAT_EVENT_SRC_ENTER_CELLFALLBACK;
|
|
break;
|
|
case SO_STATISTICS_EVENT_EXIT_CELLFALLBACK:
|
|
*nstat_event = NSTAT_EVENT_SRC_EXIT_CELLFALLBACK;
|
|
break;
|
|
case SO_STATISTICS_EVENT_ATTRIBUTION_CHANGE:
|
|
*nstat_event = NSTAT_EVENT_SRC_ATTRIBUTION_CHANGE;
|
|
break;
|
|
#if (DEBUG || DEVELOPMENT)
|
|
case SO_STATISTICS_EVENT_RESERVED_2:
|
|
*nstat_event = NSTAT_EVENT_SRC_RESERVED_2;
|
|
break;
|
|
#endif /* (DEBUG || DEVELOPMENT) */
|
|
default:
|
|
error = EINVAL;
|
|
break;
|
|
}
|
|
return error;
|
|
}
|
|
|
|
/*
|
|
* Returns: 0 Success
|
|
* EINVAL
|
|
* ENOTCONN
|
|
* <pru_shutdown>:EINVAL
|
|
* <pru_shutdown>:EADDRNOTAVAIL[TCP]
|
|
* <pru_shutdown>:ENOBUFS[TCP]
|
|
* <pru_shutdown>:EMSGSIZE[TCP]
|
|
* <pru_shutdown>:EHOSTUNREACH[TCP]
|
|
* <pru_shutdown>:ENETUNREACH[TCP]
|
|
* <pru_shutdown>:ENETDOWN[TCP]
|
|
* <pru_shutdown>:ENOMEM[TCP]
|
|
* <pru_shutdown>:EACCES[TCP]
|
|
* <pru_shutdown>:EMSGSIZE[TCP]
|
|
* <pru_shutdown>:ENOBUFS[TCP]
|
|
* <pru_shutdown>:???[TCP] [ignorable: mostly IPSEC/firewall/DLIL]
|
|
* <pru_shutdown>:??? [other protocol families]
|
|
*/
|
|
int
|
|
soshutdown(struct socket *so, int how)
|
|
{
|
|
int error;
|
|
|
|
KERNEL_DEBUG(DBG_FNC_SOSHUTDOWN | DBG_FUNC_START, how, 0, 0, 0, 0);
|
|
|
|
switch (how) {
|
|
case SHUT_RD:
|
|
case SHUT_WR:
|
|
case SHUT_RDWR:
|
|
socket_lock(so, 1);
|
|
if ((so->so_state &
|
|
(SS_ISCONNECTED | SS_ISCONNECTING | SS_ISDISCONNECTING)) == 0) {
|
|
error = ENOTCONN;
|
|
} else {
|
|
error = soshutdownlock(so, how);
|
|
}
|
|
socket_unlock(so, 1);
|
|
break;
|
|
default:
|
|
error = EINVAL;
|
|
break;
|
|
}
|
|
|
|
KERNEL_DEBUG(DBG_FNC_SOSHUTDOWN | DBG_FUNC_END, how, error, 0, 0, 0);
|
|
|
|
return error;
|
|
}
|
|
|
|
int
|
|
soshutdownlock_final(struct socket *so, int how)
|
|
{
|
|
struct protosw *pr = so->so_proto;
|
|
int error = 0;
|
|
|
|
sflt_notify(so, sock_evt_shutdown, &how);
|
|
|
|
if (how != SHUT_WR) {
|
|
if ((so->so_state & SS_CANTRCVMORE) != 0) {
|
|
/* read already shut down */
|
|
error = ENOTCONN;
|
|
goto done;
|
|
}
|
|
sorflush(so);
|
|
}
|
|
if (how != SHUT_RD) {
|
|
if ((so->so_state & SS_CANTSENDMORE) != 0) {
|
|
/* write already shut down */
|
|
error = ENOTCONN;
|
|
goto done;
|
|
}
|
|
error = (*pr->pr_usrreqs->pru_shutdown)(so);
|
|
}
|
|
done:
|
|
KERNEL_DEBUG(DBG_FNC_SOSHUTDOWN, how, 1, 0, 0, 0);
|
|
return error;
|
|
}
|
|
|
|
int
|
|
soshutdownlock(struct socket *so, int how)
|
|
{
|
|
int error = 0;
|
|
|
|
#if CONTENT_FILTER
|
|
/*
|
|
* A content filter may delay the actual shutdown until it
|
|
* has processed the pending data
|
|
*/
|
|
if (so->so_flags & SOF_CONTENT_FILTER) {
|
|
error = cfil_sock_shutdown(so, &how);
|
|
if (error == EJUSTRETURN) {
|
|
error = 0;
|
|
goto done;
|
|
} else if (error != 0) {
|
|
goto done;
|
|
}
|
|
}
|
|
#endif /* CONTENT_FILTER */
|
|
|
|
error = soshutdownlock_final(so, how);
|
|
|
|
done:
|
|
return error;
|
|
}
|
|
|
|
void
|
|
sowflush(struct socket *so)
|
|
{
|
|
struct sockbuf *sb = &so->so_snd;
|
|
|
|
/*
|
|
* Obtain lock on the socket buffer (SB_LOCK). This is required
|
|
* to prevent the socket buffer from being unexpectedly altered
|
|
* while it is used by another thread in socket send/receive.
|
|
*
|
|
* sblock() must not fail here, hence the assertion.
|
|
*/
|
|
(void) sblock(sb, SBL_WAIT | SBL_NOINTR | SBL_IGNDEFUNCT);
|
|
VERIFY(sb->sb_flags & SB_LOCK);
|
|
|
|
sb->sb_flags &= ~(SB_SEL | SB_UPCALL);
|
|
sb->sb_flags |= SB_DROP;
|
|
sb->sb_upcall = NULL;
|
|
sb->sb_upcallarg = NULL;
|
|
|
|
sbunlock(sb, TRUE); /* keep socket locked */
|
|
|
|
selthreadclear(&sb->sb_sel);
|
|
sbrelease(sb);
|
|
}
|
|
|
|
void
|
|
sorflush(struct socket *so)
|
|
{
|
|
struct sockbuf *sb = &so->so_rcv;
|
|
struct protosw *pr = so->so_proto;
|
|
struct sockbuf asb;
|
|
#ifdef notyet
|
|
lck_mtx_t *mutex_held;
|
|
/*
|
|
* XXX: This code is currently commented out, because we may get here
|
|
* as part of sofreelastref(), and at that time, pr_getlock() may no
|
|
* longer be able to return us the lock; this will be fixed in future.
|
|
*/
|
|
if (so->so_proto->pr_getlock != NULL) {
|
|
mutex_held = (*so->so_proto->pr_getlock)(so, 0);
|
|
} else {
|
|
mutex_held = so->so_proto->pr_domain->dom_mtx;
|
|
}
|
|
|
|
LCK_MTX_ASSERT(mutex_held, LCK_MTX_ASSERT_OWNED);
|
|
#endif /* notyet */
|
|
|
|
sflt_notify(so, sock_evt_flush_read, NULL);
|
|
|
|
socantrcvmore(so);
|
|
|
|
/*
|
|
* Obtain lock on the socket buffer (SB_LOCK). This is required
|
|
* to prevent the socket buffer from being unexpectedly altered
|
|
* while it is used by another thread in socket send/receive.
|
|
*
|
|
* sblock() must not fail here, hence the assertion.
|
|
*/
|
|
(void) sblock(sb, SBL_WAIT | SBL_NOINTR | SBL_IGNDEFUNCT);
|
|
VERIFY(sb->sb_flags & SB_LOCK);
|
|
|
|
/*
|
|
* Copy only the relevant fields from "sb" to "asb" which we
|
|
* need for sbrelease() to function. In particular, skip
|
|
* sb_sel as it contains the wait queue linkage, which would
|
|
* wreak havoc if we were to issue selthreadclear() on "asb".
|
|
* Make sure to not carry over SB_LOCK in "asb", as we need
|
|
* to acquire it later as part of sbrelease().
|
|
*/
|
|
bzero(&asb, sizeof(asb));
|
|
asb.sb_cc = sb->sb_cc;
|
|
asb.sb_hiwat = sb->sb_hiwat;
|
|
asb.sb_mbcnt = sb->sb_mbcnt;
|
|
asb.sb_mbmax = sb->sb_mbmax;
|
|
asb.sb_ctl = sb->sb_ctl;
|
|
asb.sb_lowat = sb->sb_lowat;
|
|
asb.sb_mb = sb->sb_mb;
|
|
asb.sb_mbtail = sb->sb_mbtail;
|
|
asb.sb_lastrecord = sb->sb_lastrecord;
|
|
asb.sb_so = sb->sb_so;
|
|
asb.sb_flags = sb->sb_flags;
|
|
asb.sb_flags &= ~(SB_LOCK | SB_SEL | SB_KNOTE | SB_UPCALL);
|
|
asb.sb_flags |= SB_DROP;
|
|
|
|
/*
|
|
* Ideally we'd bzero() these and preserve the ones we need;
|
|
* but to do that we'd need to shuffle things around in the
|
|
* sockbuf, and we can't do it now because there are KEXTS
|
|
* that are directly referring to the socket structure.
|
|
*
|
|
* Setting SB_DROP acts as a barrier to prevent further appends.
|
|
* Clearing SB_SEL is done for selthreadclear() below.
|
|
*/
|
|
sb->sb_cc = 0;
|
|
sb->sb_hiwat = 0;
|
|
sb->sb_mbcnt = 0;
|
|
sb->sb_mbmax = 0;
|
|
sb->sb_ctl = 0;
|
|
sb->sb_lowat = 0;
|
|
sb->sb_mb = NULL;
|
|
sb->sb_mbtail = NULL;
|
|
sb->sb_lastrecord = NULL;
|
|
sb->sb_timeo.tv_sec = 0;
|
|
sb->sb_timeo.tv_usec = 0;
|
|
sb->sb_upcall = NULL;
|
|
sb->sb_upcallarg = NULL;
|
|
sb->sb_flags &= ~(SB_SEL | SB_UPCALL);
|
|
sb->sb_flags |= SB_DROP;
|
|
|
|
sbunlock(sb, TRUE); /* keep socket locked */
|
|
|
|
/*
|
|
* Note that selthreadclear() is called on the original "sb" and
|
|
* not the local "asb" because of the way wait queue linkage is
|
|
* implemented. Given that selwakeup() may be triggered, SB_SEL
|
|
* should no longer be set (cleared above.)
|
|
*/
|
|
selthreadclear(&sb->sb_sel);
|
|
|
|
if ((pr->pr_flags & PR_RIGHTS) && pr->pr_domain->dom_dispose) {
|
|
(*pr->pr_domain->dom_dispose)(asb.sb_mb);
|
|
}
|
|
|
|
sbrelease(&asb);
|
|
}
|
|
|
|
/*
|
|
* Perhaps this routine, and sooptcopyout(), below, ought to come in
|
|
* an additional variant to handle the case where the option value needs
|
|
* to be some kind of integer, but not a specific size.
|
|
* In addition to their use here, these functions are also called by the
|
|
* protocol-level pr_ctloutput() routines.
|
|
*
|
|
* Returns: 0 Success
|
|
* EINVAL
|
|
* copyin:EFAULT
|
|
*/
|
|
int
|
|
sooptcopyin(struct sockopt *sopt, void *buf, size_t len, size_t minlen)
|
|
{
|
|
size_t valsize;
|
|
|
|
/*
|
|
* If the user gives us more than we wanted, we ignore it,
|
|
* but if we don't get the minimum length the caller
|
|
* wants, we return EINVAL. On success, sopt->sopt_valsize
|
|
* is set to however much we actually retrieved.
|
|
*/
|
|
if ((valsize = sopt->sopt_valsize) < minlen) {
|
|
return EINVAL;
|
|
}
|
|
if (valsize > len) {
|
|
sopt->sopt_valsize = valsize = len;
|
|
}
|
|
|
|
if (sopt->sopt_p != kernproc) {
|
|
return copyin(sopt->sopt_val, buf, valsize);
|
|
}
|
|
|
|
bcopy(CAST_DOWN(caddr_t, sopt->sopt_val), buf, valsize);
|
|
return 0;
|
|
}
|
|
|
|
/*
|
|
* sooptcopyin_timeval
|
|
* Copy in a timeval value into tv_p, and take into account whether the
|
|
* the calling process is 64-bit or 32-bit. Moved the sanity checking
|
|
* code here so that we can verify the 64-bit tv_sec value before we lose
|
|
* the top 32-bits assigning tv64.tv_sec to tv_p->tv_sec.
|
|
*/
|
|
static int
|
|
sooptcopyin_timeval(struct sockopt *sopt, struct timeval *tv_p)
|
|
{
|
|
int error;
|
|
|
|
if (proc_is64bit(sopt->sopt_p)) {
|
|
struct user64_timeval tv64;
|
|
|
|
if (sopt->sopt_valsize < sizeof(tv64)) {
|
|
return EINVAL;
|
|
}
|
|
|
|
sopt->sopt_valsize = sizeof(tv64);
|
|
if (sopt->sopt_p != kernproc) {
|
|
error = copyin(sopt->sopt_val, &tv64, sizeof(tv64));
|
|
if (error != 0) {
|
|
return error;
|
|
}
|
|
} else {
|
|
bcopy(CAST_DOWN(caddr_t, sopt->sopt_val), &tv64,
|
|
sizeof(tv64));
|
|
}
|
|
if (tv64.tv_sec < 0 || tv64.tv_sec > LONG_MAX ||
|
|
tv64.tv_usec < 0 || tv64.tv_usec >= 1000000) {
|
|
return EDOM;
|
|
}
|
|
|
|
tv_p->tv_sec = (__darwin_time_t)tv64.tv_sec;
|
|
tv_p->tv_usec = tv64.tv_usec;
|
|
} else {
|
|
struct user32_timeval tv32;
|
|
|
|
if (sopt->sopt_valsize < sizeof(tv32)) {
|
|
return EINVAL;
|
|
}
|
|
|
|
sopt->sopt_valsize = sizeof(tv32);
|
|
if (sopt->sopt_p != kernproc) {
|
|
error = copyin(sopt->sopt_val, &tv32, sizeof(tv32));
|
|
if (error != 0) {
|
|
return error;
|
|
}
|
|
} else {
|
|
bcopy(CAST_DOWN(caddr_t, sopt->sopt_val), &tv32,
|
|
sizeof(tv32));
|
|
}
|
|
#ifndef __LP64__
|
|
/*
|
|
* K64todo "comparison is always false due to
|
|
* limited range of data type"
|
|
*/
|
|
if (tv32.tv_sec < 0 || tv32.tv_sec > LONG_MAX ||
|
|
tv32.tv_usec < 0 || tv32.tv_usec >= 1000000) {
|
|
return EDOM;
|
|
}
|
|
#endif
|
|
tv_p->tv_sec = tv32.tv_sec;
|
|
tv_p->tv_usec = tv32.tv_usec;
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
int
|
|
soopt_cred_check(struct socket *so, int priv, boolean_t allow_root,
|
|
boolean_t ignore_delegate)
|
|
{
|
|
kauth_cred_t cred = NULL;
|
|
proc_t ep = PROC_NULL;
|
|
uid_t uid;
|
|
int error = 0;
|
|
|
|
if (ignore_delegate == false && so->so_flags & SOF_DELEGATED) {
|
|
ep = proc_find(so->e_pid);
|
|
if (ep) {
|
|
cred = kauth_cred_proc_ref(ep);
|
|
}
|
|
}
|
|
|
|
uid = kauth_cred_getuid(cred ? cred : so->so_cred);
|
|
|
|
/* uid is 0 for root */
|
|
if (uid != 0 || !allow_root) {
|
|
error = priv_check_cred(cred ? cred : so->so_cred, priv, 0);
|
|
}
|
|
if (cred) {
|
|
kauth_cred_unref(&cred);
|
|
}
|
|
if (ep != PROC_NULL) {
|
|
proc_rele(ep);
|
|
}
|
|
|
|
return error;
|
|
}
|
|
|
|
/*
|
|
* Returns: 0 Success
|
|
* EINVAL
|
|
* ENOPROTOOPT
|
|
* ENOBUFS
|
|
* EDOM
|
|
* sooptcopyin:EINVAL
|
|
* sooptcopyin:EFAULT
|
|
* sooptcopyin_timeval:EINVAL
|
|
* sooptcopyin_timeval:EFAULT
|
|
* sooptcopyin_timeval:EDOM
|
|
* <pr_ctloutput>:EOPNOTSUPP[AF_UNIX]
|
|
* <pr_ctloutput>:???w
|
|
* sflt_attach_private:??? [whatever a filter author chooses]
|
|
* <sf_setoption>:??? [whatever a filter author chooses]
|
|
*
|
|
* Notes: Other <pru_listen> returns depend on the protocol family; all
|
|
* <sf_listen> returns depend on what the filter author causes
|
|
* their filter to return.
|
|
*/
|
|
int
|
|
sosetoptlock(struct socket *so, struct sockopt *sopt, int dolock)
|
|
{
|
|
int error, optval;
|
|
int64_t long_optval;
|
|
struct linger l;
|
|
struct timeval tv;
|
|
|
|
if (sopt->sopt_dir != SOPT_SET) {
|
|
sopt->sopt_dir = SOPT_SET;
|
|
}
|
|
|
|
if (dolock) {
|
|
socket_lock(so, 1);
|
|
}
|
|
|
|
if ((so->so_state & (SS_CANTRCVMORE | SS_CANTSENDMORE)) ==
|
|
(SS_CANTRCVMORE | SS_CANTSENDMORE) &&
|
|
(so->so_flags & SOF_NPX_SETOPTSHUT) == 0) {
|
|
/* the socket has been shutdown, no more sockopt's */
|
|
error = EINVAL;
|
|
goto out;
|
|
}
|
|
|
|
error = sflt_setsockopt(so, sopt);
|
|
if (error != 0) {
|
|
if (error == EJUSTRETURN) {
|
|
error = 0;
|
|
}
|
|
goto out;
|
|
}
|
|
|
|
if (sopt->sopt_level != SOL_SOCKET) {
|
|
if (so->so_proto != NULL &&
|
|
so->so_proto->pr_ctloutput != NULL) {
|
|
error = (*so->so_proto->pr_ctloutput)(so, sopt);
|
|
goto out;
|
|
}
|
|
error = ENOPROTOOPT;
|
|
} else {
|
|
/*
|
|
* Allow socket-level (SOL_SOCKET) options to be filtered by
|
|
* the protocol layer, if needed. A zero value returned from
|
|
* the handler means use default socket-level processing as
|
|
* done by the rest of this routine. Otherwise, any other
|
|
* return value indicates that the option is unsupported.
|
|
*/
|
|
if (so->so_proto != NULL && (error = so->so_proto->pr_usrreqs->
|
|
pru_socheckopt(so, sopt)) != 0) {
|
|
goto out;
|
|
}
|
|
|
|
error = 0;
|
|
switch (sopt->sopt_name) {
|
|
case SO_LINGER:
|
|
case SO_LINGER_SEC: {
|
|
error = sooptcopyin(sopt, &l, sizeof(l), sizeof(l));
|
|
if (error != 0) {
|
|
goto out;
|
|
}
|
|
/* Make sure to use sane values */
|
|
if (sopt->sopt_name == SO_LINGER) {
|
|
so->so_linger = (short)l.l_linger;
|
|
} else {
|
|
so->so_linger = (short)((long)l.l_linger * hz);
|
|
}
|
|
if (l.l_onoff != 0) {
|
|
so->so_options |= SO_LINGER;
|
|
} else {
|
|
so->so_options &= ~SO_LINGER;
|
|
}
|
|
break;
|
|
}
|
|
case SO_DEBUG:
|
|
case SO_KEEPALIVE:
|
|
case SO_DONTROUTE:
|
|
case SO_USELOOPBACK:
|
|
case SO_BROADCAST:
|
|
case SO_REUSEADDR:
|
|
case SO_REUSEPORT:
|
|
case SO_OOBINLINE:
|
|
case SO_TIMESTAMP:
|
|
case SO_TIMESTAMP_MONOTONIC:
|
|
case SO_TIMESTAMP_CONTINUOUS:
|
|
case SO_DONTTRUNC:
|
|
case SO_WANTMORE:
|
|
case SO_WANTOOBFLAG:
|
|
case SO_NOWAKEFROMSLEEP:
|
|
case SO_NOAPNFALLBK:
|
|
error = sooptcopyin(sopt, &optval, sizeof(optval),
|
|
sizeof(optval));
|
|
if (error != 0) {
|
|
goto out;
|
|
}
|
|
if (optval) {
|
|
so->so_options |= sopt->sopt_name;
|
|
} else {
|
|
so->so_options &= ~sopt->sopt_name;
|
|
}
|
|
#if SKYWALK
|
|
inp_update_netns_flags(so);
|
|
#endif /* SKYWALK */
|
|
break;
|
|
|
|
case SO_SNDBUF:
|
|
case SO_RCVBUF:
|
|
case SO_SNDLOWAT:
|
|
case SO_RCVLOWAT:
|
|
error = sooptcopyin(sopt, &optval, sizeof(optval),
|
|
sizeof(optval));
|
|
if (error != 0) {
|
|
goto out;
|
|
}
|
|
|
|
/*
|
|
* Values < 1 make no sense for any of these
|
|
* options, so disallow them.
|
|
*/
|
|
if (optval < 1) {
|
|
error = EINVAL;
|
|
goto out;
|
|
}
|
|
|
|
switch (sopt->sopt_name) {
|
|
case SO_SNDBUF:
|
|
case SO_RCVBUF: {
|
|
struct sockbuf *sb =
|
|
(sopt->sopt_name == SO_SNDBUF) ?
|
|
&so->so_snd : &so->so_rcv;
|
|
if (sbreserve(sb, (u_int32_t)optval) == 0) {
|
|
error = ENOBUFS;
|
|
goto out;
|
|
}
|
|
sb->sb_flags |= SB_USRSIZE;
|
|
sb->sb_flags &= ~SB_AUTOSIZE;
|
|
sb->sb_idealsize = (u_int32_t)optval;
|
|
break;
|
|
}
|
|
/*
|
|
* Make sure the low-water is never greater than
|
|
* the high-water.
|
|
*/
|
|
case SO_SNDLOWAT: {
|
|
int space = sbspace(&so->so_snd);
|
|
uint32_t hiwat = so->so_snd.sb_hiwat;
|
|
|
|
if (so->so_snd.sb_flags & SB_UNIX) {
|
|
struct unpcb *unp =
|
|
(struct unpcb *)(so->so_pcb);
|
|
if (unp != NULL &&
|
|
unp->unp_conn != NULL) {
|
|
struct socket *so2 = unp->unp_conn->unp_socket;
|
|
hiwat += unp->unp_conn->unp_cc;
|
|
space = sbspace(&so2->so_rcv);
|
|
}
|
|
}
|
|
|
|
so->so_snd.sb_lowat =
|
|
(optval > hiwat) ?
|
|
hiwat : optval;
|
|
|
|
if (space >= so->so_snd.sb_lowat) {
|
|
sowwakeup(so);
|
|
}
|
|
break;
|
|
}
|
|
case SO_RCVLOWAT: {
|
|
int64_t data_len;
|
|
so->so_rcv.sb_lowat =
|
|
(optval > so->so_rcv.sb_hiwat) ?
|
|
so->so_rcv.sb_hiwat : optval;
|
|
if (so->so_rcv.sb_flags & SB_UNIX) {
|
|
struct unpcb *unp =
|
|
(struct unpcb *)(so->so_pcb);
|
|
if (unp != NULL &&
|
|
unp->unp_conn != NULL) {
|
|
struct socket *so2 = unp->unp_conn->unp_socket;
|
|
data_len = so2->so_snd.sb_cc
|
|
- so2->so_snd.sb_ctl;
|
|
} else {
|
|
data_len = so->so_rcv.sb_cc
|
|
- so->so_rcv.sb_ctl;
|
|
}
|
|
} else {
|
|
data_len = so->so_rcv.sb_cc
|
|
- so->so_rcv.sb_ctl;
|
|
}
|
|
|
|
if (data_len >= so->so_rcv.sb_lowat) {
|
|
sorwakeup(so);
|
|
}
|
|
break;
|
|
}
|
|
}
|
|
break;
|
|
|
|
case SO_SNDTIMEO:
|
|
case SO_RCVTIMEO:
|
|
error = sooptcopyin_timeval(sopt, &tv);
|
|
if (error != 0) {
|
|
goto out;
|
|
}
|
|
|
|
switch (sopt->sopt_name) {
|
|
case SO_SNDTIMEO:
|
|
so->so_snd.sb_timeo = tv;
|
|
break;
|
|
case SO_RCVTIMEO:
|
|
so->so_rcv.sb_timeo = tv;
|
|
break;
|
|
}
|
|
break;
|
|
|
|
case SO_NKE: {
|
|
struct so_nke nke;
|
|
|
|
error = sooptcopyin(sopt, &nke, sizeof(nke),
|
|
sizeof(nke));
|
|
if (error != 0) {
|
|
goto out;
|
|
}
|
|
|
|
error = sflt_attach_internal(so, nke.nke_handle);
|
|
break;
|
|
}
|
|
|
|
case SO_NOSIGPIPE:
|
|
error = sooptcopyin(sopt, &optval, sizeof(optval),
|
|
sizeof(optval));
|
|
if (error != 0) {
|
|
goto out;
|
|
}
|
|
if (optval != 0) {
|
|
so->so_flags |= SOF_NOSIGPIPE;
|
|
} else {
|
|
so->so_flags &= ~SOF_NOSIGPIPE;
|
|
}
|
|
break;
|
|
|
|
case SO_NOADDRERR:
|
|
error = sooptcopyin(sopt, &optval, sizeof(optval),
|
|
sizeof(optval));
|
|
if (error != 0) {
|
|
goto out;
|
|
}
|
|
if (optval != 0) {
|
|
so->so_flags |= SOF_NOADDRAVAIL;
|
|
} else {
|
|
so->so_flags &= ~SOF_NOADDRAVAIL;
|
|
}
|
|
break;
|
|
|
|
case SO_REUSESHAREUID:
|
|
error = sooptcopyin(sopt, &optval, sizeof(optval),
|
|
sizeof(optval));
|
|
if (error != 0) {
|
|
goto out;
|
|
}
|
|
if (optval != 0) {
|
|
so->so_flags |= SOF_REUSESHAREUID;
|
|
} else {
|
|
so->so_flags &= ~SOF_REUSESHAREUID;
|
|
}
|
|
break;
|
|
|
|
case SO_NOTIFYCONFLICT:
|
|
if (kauth_cred_issuser(kauth_cred_get()) == 0) {
|
|
error = EPERM;
|
|
goto out;
|
|
}
|
|
error = sooptcopyin(sopt, &optval, sizeof(optval),
|
|
sizeof(optval));
|
|
if (error != 0) {
|
|
goto out;
|
|
}
|
|
if (optval != 0) {
|
|
so->so_flags |= SOF_NOTIFYCONFLICT;
|
|
} else {
|
|
so->so_flags &= ~SOF_NOTIFYCONFLICT;
|
|
}
|
|
break;
|
|
|
|
case SO_RESTRICTIONS:
|
|
error = sooptcopyin(sopt, &optval, sizeof(optval),
|
|
sizeof(optval));
|
|
if (error != 0) {
|
|
goto out;
|
|
}
|
|
|
|
error = so_set_restrictions(so, optval);
|
|
break;
|
|
|
|
case SO_AWDL_UNRESTRICTED:
|
|
if (SOCK_DOM(so) != PF_INET &&
|
|
SOCK_DOM(so) != PF_INET6) {
|
|
error = EOPNOTSUPP;
|
|
goto out;
|
|
}
|
|
error = sooptcopyin(sopt, &optval, sizeof(optval),
|
|
sizeof(optval));
|
|
if (error != 0) {
|
|
goto out;
|
|
}
|
|
if (optval != 0) {
|
|
error = soopt_cred_check(so,
|
|
PRIV_NET_RESTRICTED_AWDL, false, false);
|
|
if (error == 0) {
|
|
inp_set_awdl_unrestricted(
|
|
sotoinpcb(so));
|
|
}
|
|
} else {
|
|
inp_clear_awdl_unrestricted(sotoinpcb(so));
|
|
}
|
|
break;
|
|
case SO_INTCOPROC_ALLOW:
|
|
if (SOCK_DOM(so) != PF_INET6) {
|
|
error = EOPNOTSUPP;
|
|
goto out;
|
|
}
|
|
error = sooptcopyin(sopt, &optval, sizeof(optval),
|
|
sizeof(optval));
|
|
if (error != 0) {
|
|
goto out;
|
|
}
|
|
if (optval != 0 &&
|
|
inp_get_intcoproc_allowed(sotoinpcb(so)) == FALSE) {
|
|
error = soopt_cred_check(so,
|
|
PRIV_NET_RESTRICTED_INTCOPROC, false, false);
|
|
if (error == 0) {
|
|
inp_set_intcoproc_allowed(
|
|
sotoinpcb(so));
|
|
}
|
|
} else if (optval == 0) {
|
|
inp_clear_intcoproc_allowed(sotoinpcb(so));
|
|
}
|
|
break;
|
|
|
|
case SO_LABEL:
|
|
error = EOPNOTSUPP;
|
|
break;
|
|
|
|
case SO_UPCALLCLOSEWAIT:
|
|
error = sooptcopyin(sopt, &optval, sizeof(optval),
|
|
sizeof(optval));
|
|
if (error != 0) {
|
|
goto out;
|
|
}
|
|
if (optval != 0) {
|
|
so->so_flags |= SOF_UPCALLCLOSEWAIT;
|
|
} else {
|
|
so->so_flags &= ~SOF_UPCALLCLOSEWAIT;
|
|
}
|
|
break;
|
|
|
|
case SO_RANDOMPORT:
|
|
error = sooptcopyin(sopt, &optval, sizeof(optval),
|
|
sizeof(optval));
|
|
if (error != 0) {
|
|
goto out;
|
|
}
|
|
if (optval != 0) {
|
|
so->so_flags |= SOF_BINDRANDOMPORT;
|
|
} else {
|
|
so->so_flags &= ~SOF_BINDRANDOMPORT;
|
|
}
|
|
break;
|
|
|
|
case SO_NP_EXTENSIONS: {
|
|
struct so_np_extensions sonpx;
|
|
|
|
error = sooptcopyin(sopt, &sonpx, sizeof(sonpx),
|
|
sizeof(sonpx));
|
|
if (error != 0) {
|
|
goto out;
|
|
}
|
|
if (sonpx.npx_mask & ~SONPX_MASK_VALID) {
|
|
error = EINVAL;
|
|
goto out;
|
|
}
|
|
/*
|
|
* Only one bit defined for now
|
|
*/
|
|
if ((sonpx.npx_mask & SONPX_SETOPTSHUT)) {
|
|
if ((sonpx.npx_flags & SONPX_SETOPTSHUT)) {
|
|
so->so_flags |= SOF_NPX_SETOPTSHUT;
|
|
} else {
|
|
so->so_flags &= ~SOF_NPX_SETOPTSHUT;
|
|
}
|
|
}
|
|
break;
|
|
}
|
|
|
|
case SO_TRAFFIC_CLASS: {
|
|
error = sooptcopyin(sopt, &optval, sizeof(optval),
|
|
sizeof(optval));
|
|
if (error != 0) {
|
|
goto out;
|
|
}
|
|
if (optval >= SO_TC_NET_SERVICE_OFFSET) {
|
|
int netsvc = optval - SO_TC_NET_SERVICE_OFFSET;
|
|
error = so_set_net_service_type(so, netsvc);
|
|
goto out;
|
|
}
|
|
error = so_set_traffic_class(so, optval);
|
|
if (error != 0) {
|
|
goto out;
|
|
}
|
|
so->so_flags1 &= ~SOF1_TC_NET_SERV_TYPE;
|
|
so->so_netsvctype = _NET_SERVICE_TYPE_UNSPEC;
|
|
break;
|
|
}
|
|
|
|
case SO_RECV_TRAFFIC_CLASS: {
|
|
error = sooptcopyin(sopt, &optval, sizeof(optval),
|
|
sizeof(optval));
|
|
if (error != 0) {
|
|
goto out;
|
|
}
|
|
if (optval == 0) {
|
|
so->so_flags &= ~SOF_RECV_TRAFFIC_CLASS;
|
|
} else {
|
|
so->so_flags |= SOF_RECV_TRAFFIC_CLASS;
|
|
}
|
|
break;
|
|
}
|
|
|
|
#if (DEVELOPMENT || DEBUG)
|
|
case SO_TRAFFIC_CLASS_DBG: {
|
|
struct so_tcdbg so_tcdbg;
|
|
|
|
error = sooptcopyin(sopt, &so_tcdbg,
|
|
sizeof(struct so_tcdbg), sizeof(struct so_tcdbg));
|
|
if (error != 0) {
|
|
goto out;
|
|
}
|
|
error = so_set_tcdbg(so, &so_tcdbg);
|
|
if (error != 0) {
|
|
goto out;
|
|
}
|
|
break;
|
|
}
|
|
#endif /* (DEVELOPMENT || DEBUG) */
|
|
|
|
case SO_PRIVILEGED_TRAFFIC_CLASS:
|
|
error = priv_check_cred(kauth_cred_get(),
|
|
PRIV_NET_PRIVILEGED_TRAFFIC_CLASS, 0);
|
|
if (error != 0) {
|
|
goto out;
|
|
}
|
|
error = sooptcopyin(sopt, &optval, sizeof(optval),
|
|
sizeof(optval));
|
|
if (error != 0) {
|
|
goto out;
|
|
}
|
|
if (optval == 0) {
|
|
so->so_flags &= ~SOF_PRIVILEGED_TRAFFIC_CLASS;
|
|
} else {
|
|
so->so_flags |= SOF_PRIVILEGED_TRAFFIC_CLASS;
|
|
}
|
|
break;
|
|
|
|
#if (DEVELOPMENT || DEBUG)
|
|
case SO_DEFUNCTIT:
|
|
error = sosetdefunct(current_proc(), so, 0, FALSE);
|
|
if (error == 0) {
|
|
error = sodefunct(current_proc(), so, 0);
|
|
}
|
|
|
|
break;
|
|
#endif /* (DEVELOPMENT || DEBUG) */
|
|
|
|
case SO_DEFUNCTOK:
|
|
error = sooptcopyin(sopt, &optval, sizeof(optval),
|
|
sizeof(optval));
|
|
if (error != 0 || (so->so_flags & SOF_DEFUNCT)) {
|
|
if (error == 0) {
|
|
error = EBADF;
|
|
}
|
|
goto out;
|
|
}
|
|
/*
|
|
* Any process can set SO_DEFUNCTOK (clear
|
|
* SOF_NODEFUNCT), but only root can clear
|
|
* SO_DEFUNCTOK (set SOF_NODEFUNCT).
|
|
*/
|
|
if (optval == 0 &&
|
|
kauth_cred_issuser(kauth_cred_get()) == 0) {
|
|
error = EPERM;
|
|
goto out;
|
|
}
|
|
if (optval) {
|
|
so->so_flags &= ~SOF_NODEFUNCT;
|
|
} else {
|
|
so->so_flags |= SOF_NODEFUNCT;
|
|
}
|
|
|
|
if (SOCK_DOM(so) == PF_INET ||
|
|
SOCK_DOM(so) == PF_INET6) {
|
|
char s[MAX_IPv6_STR_LEN];
|
|
char d[MAX_IPv6_STR_LEN];
|
|
struct inpcb *inp = sotoinpcb(so);
|
|
|
|
SODEFUNCTLOG("%s[%d, %s]: so 0x%llu "
|
|
"[%s %s:%d -> %s:%d] is now marked "
|
|
"as %seligible for "
|
|
"defunct\n", __func__, proc_selfpid(),
|
|
proc_best_name(current_proc()),
|
|
so->so_gencnt,
|
|
(SOCK_TYPE(so) == SOCK_STREAM) ?
|
|
"TCP" : "UDP", inet_ntop(SOCK_DOM(so),
|
|
((SOCK_DOM(so) == PF_INET) ?
|
|
(void *)&inp->inp_laddr.s_addr :
|
|
(void *)&inp->in6p_laddr), s, sizeof(s)),
|
|
ntohs(inp->in6p_lport),
|
|
inet_ntop(SOCK_DOM(so),
|
|
(SOCK_DOM(so) == PF_INET) ?
|
|
(void *)&inp->inp_faddr.s_addr :
|
|
(void *)&inp->in6p_faddr, d, sizeof(d)),
|
|
ntohs(inp->in6p_fport),
|
|
(so->so_flags & SOF_NODEFUNCT) ?
|
|
"not " : "");
|
|
} else {
|
|
SODEFUNCTLOG("%s[%d, %s]: so 0x%llu [%d,%d] "
|
|
"is now marked as %seligible for "
|
|
"defunct\n",
|
|
__func__, proc_selfpid(),
|
|
proc_best_name(current_proc()),
|
|
so->so_gencnt,
|
|
SOCK_DOM(so), SOCK_TYPE(so),
|
|
(so->so_flags & SOF_NODEFUNCT) ?
|
|
"not " : "");
|
|
}
|
|
break;
|
|
|
|
case SO_ISDEFUNCT:
|
|
/* This option is not settable */
|
|
error = EINVAL;
|
|
break;
|
|
|
|
case SO_OPPORTUNISTIC:
|
|
error = sooptcopyin(sopt, &optval, sizeof(optval),
|
|
sizeof(optval));
|
|
if (error == 0) {
|
|
error = so_set_opportunistic(so, optval);
|
|
}
|
|
break;
|
|
|
|
case SO_FLUSH:
|
|
/* This option is handled by lower layer(s) */
|
|
error = 0;
|
|
break;
|
|
|
|
case SO_RECV_ANYIF:
|
|
error = sooptcopyin(sopt, &optval, sizeof(optval),
|
|
sizeof(optval));
|
|
if (error == 0) {
|
|
error = so_set_recv_anyif(so, optval);
|
|
}
|
|
break;
|
|
|
|
case SO_TRAFFIC_MGT_BACKGROUND: {
|
|
/* This option is handled by lower layer(s) */
|
|
error = 0;
|
|
break;
|
|
}
|
|
|
|
#if FLOW_DIVERT
|
|
case SO_FLOW_DIVERT_TOKEN:
|
|
error = flow_divert_token_set(so, sopt);
|
|
break;
|
|
#endif /* FLOW_DIVERT */
|
|
|
|
|
|
case SO_DELEGATED:
|
|
if ((error = sooptcopyin(sopt, &optval, sizeof(optval),
|
|
sizeof(optval))) != 0) {
|
|
break;
|
|
}
|
|
|
|
error = so_set_effective_pid(so, optval, sopt->sopt_p, true);
|
|
break;
|
|
|
|
case SO_DELEGATED_UUID: {
|
|
uuid_t euuid;
|
|
|
|
if ((error = sooptcopyin(sopt, &euuid, sizeof(euuid),
|
|
sizeof(euuid))) != 0) {
|
|
break;
|
|
}
|
|
|
|
error = so_set_effective_uuid(so, euuid, sopt->sopt_p, true);
|
|
break;
|
|
}
|
|
|
|
#if NECP
|
|
case SO_NECP_ATTRIBUTES:
|
|
if (SOCK_DOM(so) == PF_MULTIPATH) {
|
|
/* Handled by MPTCP itself */
|
|
break;
|
|
}
|
|
|
|
if (SOCK_DOM(so) != PF_INET && SOCK_DOM(so) != PF_INET6) {
|
|
error = EINVAL;
|
|
goto out;
|
|
}
|
|
|
|
error = necp_set_socket_attributes(&sotoinpcb(so)->inp_necp_attributes, sopt);
|
|
break;
|
|
|
|
case SO_NECP_CLIENTUUID: {
|
|
if (SOCK_DOM(so) == PF_MULTIPATH) {
|
|
/* Handled by MPTCP itself */
|
|
break;
|
|
}
|
|
|
|
if (SOCK_DOM(so) != PF_INET && SOCK_DOM(so) != PF_INET6) {
|
|
error = EINVAL;
|
|
goto out;
|
|
}
|
|
|
|
struct inpcb *inp = sotoinpcb(so);
|
|
if (!uuid_is_null(inp->necp_client_uuid)) {
|
|
// Clear out the old client UUID if present
|
|
necp_inpcb_remove_cb(inp);
|
|
}
|
|
|
|
error = sooptcopyin(sopt, &inp->necp_client_uuid,
|
|
sizeof(uuid_t), sizeof(uuid_t));
|
|
if (error != 0) {
|
|
goto out;
|
|
}
|
|
|
|
if (uuid_is_null(inp->necp_client_uuid)) {
|
|
error = EINVAL;
|
|
goto out;
|
|
}
|
|
|
|
pid_t current_pid = proc_pid(current_proc());
|
|
error = necp_client_register_socket_flow(current_pid,
|
|
inp->necp_client_uuid, inp);
|
|
if (error != 0) {
|
|
uuid_clear(inp->necp_client_uuid);
|
|
goto out;
|
|
}
|
|
|
|
if (inp->inp_lport != 0) {
|
|
// There is a bound local port, so this is not
|
|
// a fresh socket. Assign to the client.
|
|
necp_client_assign_from_socket(current_pid, inp->necp_client_uuid, inp);
|
|
}
|
|
|
|
break;
|
|
}
|
|
case SO_NECP_LISTENUUID: {
|
|
if (SOCK_DOM(so) != PF_INET && SOCK_DOM(so) != PF_INET6) {
|
|
error = EINVAL;
|
|
goto out;
|
|
}
|
|
|
|
struct inpcb *inp = sotoinpcb(so);
|
|
if (!uuid_is_null(inp->necp_client_uuid)) {
|
|
error = EINVAL;
|
|
goto out;
|
|
}
|
|
|
|
error = sooptcopyin(sopt, &inp->necp_client_uuid,
|
|
sizeof(uuid_t), sizeof(uuid_t));
|
|
if (error != 0) {
|
|
goto out;
|
|
}
|
|
|
|
if (uuid_is_null(inp->necp_client_uuid)) {
|
|
error = EINVAL;
|
|
goto out;
|
|
}
|
|
|
|
error = necp_client_register_socket_listener(proc_pid(current_proc()),
|
|
inp->necp_client_uuid, inp);
|
|
if (error != 0) {
|
|
uuid_clear(inp->necp_client_uuid);
|
|
goto out;
|
|
}
|
|
|
|
// Mark that the port registration is held by NECP
|
|
inp->inp_flags2 |= INP2_EXTERNAL_PORT;
|
|
|
|
break;
|
|
}
|
|
|
|
case SO_RESOLVER_SIGNATURE: {
|
|
if (SOCK_DOM(so) != PF_INET && SOCK_DOM(so) != PF_INET6) {
|
|
error = EINVAL;
|
|
goto out;
|
|
}
|
|
error = necp_set_socket_resolver_signature(sotoinpcb(so), sopt);
|
|
break;
|
|
}
|
|
#endif /* NECP */
|
|
|
|
case SO_EXTENDED_BK_IDLE:
|
|
error = sooptcopyin(sopt, &optval, sizeof(optval),
|
|
sizeof(optval));
|
|
if (error == 0) {
|
|
error = so_set_extended_bk_idle(so, optval);
|
|
}
|
|
break;
|
|
|
|
case SO_MARK_CELLFALLBACK:
|
|
error = sooptcopyin(sopt, &optval, sizeof(optval),
|
|
sizeof(optval));
|
|
if (error != 0) {
|
|
goto out;
|
|
}
|
|
if (optval < 0) {
|
|
error = EINVAL;
|
|
goto out;
|
|
}
|
|
if (optval == 0) {
|
|
so->so_flags1 &= ~SOF1_CELLFALLBACK;
|
|
} else {
|
|
so->so_flags1 |= SOF1_CELLFALLBACK;
|
|
}
|
|
break;
|
|
|
|
case SO_MARK_CELLFALLBACK_UUID:
|
|
{
|
|
struct so_mark_cellfallback_uuid_args args;
|
|
|
|
error = sooptcopyin(sopt, &args, sizeof(args),
|
|
sizeof(args));
|
|
if (error != 0) {
|
|
goto out;
|
|
}
|
|
error = nstat_userland_mark_rnf_override(args.flow_uuid,
|
|
args.flow_cellfallback);
|
|
break;
|
|
}
|
|
|
|
case SO_FALLBACK_MODE:
|
|
error = sooptcopyin(sopt, &optval, sizeof(optval),
|
|
sizeof(optval));
|
|
if (error != 0) {
|
|
goto out;
|
|
}
|
|
if (optval < SO_FALLBACK_MODE_NONE ||
|
|
optval > SO_FALLBACK_MODE_PREFER) {
|
|
error = EINVAL;
|
|
goto out;
|
|
}
|
|
so->so_fallback_mode = (u_int8_t)optval;
|
|
break;
|
|
|
|
case SO_MARK_KNOWN_TRACKER: {
|
|
error = sooptcopyin(sopt, &optval, sizeof(optval),
|
|
sizeof(optval));
|
|
if (error != 0) {
|
|
goto out;
|
|
}
|
|
if (optval < 0) {
|
|
error = EINVAL;
|
|
goto out;
|
|
}
|
|
if (optval == 0) {
|
|
so->so_flags1 &= ~SOF1_KNOWN_TRACKER;
|
|
} else {
|
|
so->so_flags1 |= SOF1_KNOWN_TRACKER;
|
|
}
|
|
break;
|
|
}
|
|
|
|
case SO_MARK_KNOWN_TRACKER_NON_APP_INITIATED: {
|
|
error = sooptcopyin(sopt, &optval, sizeof(optval),
|
|
sizeof(optval));
|
|
if (error != 0) {
|
|
goto out;
|
|
}
|
|
if (optval < 0) {
|
|
error = EINVAL;
|
|
goto out;
|
|
}
|
|
if (optval == 0) {
|
|
so->so_flags1 &= ~SOF1_TRACKER_NON_APP_INITIATED;
|
|
} else {
|
|
so->so_flags1 |= SOF1_TRACKER_NON_APP_INITIATED;
|
|
}
|
|
break;
|
|
}
|
|
|
|
case SO_MARK_APPROVED_APP_DOMAIN: {
|
|
error = sooptcopyin(sopt, &optval, sizeof(optval),
|
|
sizeof(optval));
|
|
if (error != 0) {
|
|
goto out;
|
|
}
|
|
if (optval < 0) {
|
|
error = EINVAL;
|
|
goto out;
|
|
}
|
|
if (optval == 0) {
|
|
so->so_flags1 &= ~SOF1_APPROVED_APP_DOMAIN;
|
|
} else {
|
|
so->so_flags1 |= SOF1_APPROVED_APP_DOMAIN;
|
|
}
|
|
break;
|
|
}
|
|
|
|
case SO_STATISTICS_EVENT:
|
|
error = sooptcopyin(sopt, &long_optval,
|
|
sizeof(long_optval), sizeof(long_optval));
|
|
if (error != 0) {
|
|
goto out;
|
|
}
|
|
u_int64_t nstat_event = 0;
|
|
error = so_statistics_event_to_nstat_event(
|
|
&long_optval, &nstat_event);
|
|
if (error != 0) {
|
|
goto out;
|
|
}
|
|
nstat_pcb_event(sotoinpcb(so), nstat_event);
|
|
break;
|
|
|
|
case SO_NET_SERVICE_TYPE: {
|
|
error = sooptcopyin(sopt, &optval, sizeof(optval),
|
|
sizeof(optval));
|
|
if (error != 0) {
|
|
goto out;
|
|
}
|
|
error = so_set_net_service_type(so, optval);
|
|
break;
|
|
}
|
|
|
|
case SO_QOSMARKING_POLICY_OVERRIDE:
|
|
error = priv_check_cred(kauth_cred_get(),
|
|
PRIV_NET_QOSMARKING_POLICY_OVERRIDE, 0);
|
|
if (error != 0) {
|
|
goto out;
|
|
}
|
|
error = sooptcopyin(sopt, &optval, sizeof(optval),
|
|
sizeof(optval));
|
|
if (error != 0) {
|
|
goto out;
|
|
}
|
|
if (optval == 0) {
|
|
so->so_flags1 &= ~SOF1_QOSMARKING_POLICY_OVERRIDE;
|
|
} else {
|
|
so->so_flags1 |= SOF1_QOSMARKING_POLICY_OVERRIDE;
|
|
}
|
|
break;
|
|
|
|
case SO_MPKL_SEND_INFO: {
|
|
struct so_mpkl_send_info so_mpkl_send_info;
|
|
|
|
error = sooptcopyin(sopt, &so_mpkl_send_info,
|
|
sizeof(struct so_mpkl_send_info), sizeof(struct so_mpkl_send_info));
|
|
if (error != 0) {
|
|
goto out;
|
|
}
|
|
uuid_copy(so->so_mpkl_send_uuid, so_mpkl_send_info.mpkl_uuid);
|
|
so->so_mpkl_send_proto = so_mpkl_send_info.mpkl_proto;
|
|
|
|
if (uuid_is_null(so->so_mpkl_send_uuid) && so->so_mpkl_send_proto == 0) {
|
|
so->so_flags1 &= ~SOF1_MPKL_SEND_INFO;
|
|
} else {
|
|
so->so_flags1 |= SOF1_MPKL_SEND_INFO;
|
|
}
|
|
break;
|
|
}
|
|
case SO_WANT_KEV_SOCKET_CLOSED: {
|
|
error = sooptcopyin(sopt, &optval, sizeof(optval),
|
|
sizeof(optval));
|
|
if (error != 0) {
|
|
goto out;
|
|
}
|
|
if (optval == 0) {
|
|
so->so_flags1 &= ~SOF1_WANT_KEV_SOCK_CLOSED;
|
|
} else {
|
|
so->so_flags1 |= SOF1_WANT_KEV_SOCK_CLOSED;
|
|
}
|
|
break;
|
|
}
|
|
case SO_MARK_WAKE_PKT: {
|
|
error = sooptcopyin(sopt, &optval, sizeof(optval),
|
|
sizeof(optval));
|
|
if (error != 0) {
|
|
goto out;
|
|
}
|
|
if (optval == 0) {
|
|
so->so_flags &= ~SOF_MARK_WAKE_PKT;
|
|
} else {
|
|
so->so_flags |= SOF_MARK_WAKE_PKT;
|
|
}
|
|
break;
|
|
}
|
|
case SO_RECV_WAKE_PKT: {
|
|
error = sooptcopyin(sopt, &optval, sizeof(optval),
|
|
sizeof(optval));
|
|
if (error != 0) {
|
|
goto out;
|
|
}
|
|
if (optval == 0) {
|
|
so->so_flags &= ~SOF_RECV_WAKE_PKT;
|
|
} else {
|
|
so->so_flags |= SOF_RECV_WAKE_PKT;
|
|
}
|
|
break;
|
|
}
|
|
case SO_APPLICATION_ID: {
|
|
so_application_id_t application_id = { 0 };
|
|
|
|
if (SOCK_DOM(so) != PF_INET && SOCK_DOM(so) != PF_INET6) {
|
|
error = EINVAL;
|
|
goto out;
|
|
}
|
|
error = sooptcopyin(sopt, &application_id, sizeof(application_id),
|
|
sizeof(application_id));
|
|
if (error != 0) {
|
|
goto out;
|
|
}
|
|
|
|
// The user needs to match
|
|
if (kauth_cred_getuid(so->so_cred) != application_id.uid) {
|
|
error = EINVAL;
|
|
printf("setsockopt: SO_APPLICATION_ID - wrong uid");
|
|
goto out;
|
|
}
|
|
error = so_set_effective_uuid(so, application_id.effective_uuid, sopt->sopt_p, true);
|
|
if (error != 0) {
|
|
printf("setsockopt: SO_APPLICATION_ID - failed to set e_uuid");
|
|
goto out;
|
|
}
|
|
if (application_id.persona_id != PERSONA_ID_NONE) {
|
|
so->so_persona_id = application_id.persona_id;
|
|
}
|
|
break;
|
|
}
|
|
default:
|
|
error = ENOPROTOOPT;
|
|
break;
|
|
}
|
|
if (error == 0 && so->so_proto != NULL &&
|
|
so->so_proto->pr_ctloutput != NULL) {
|
|
(void) so->so_proto->pr_ctloutput(so, sopt);
|
|
}
|
|
}
|
|
out:
|
|
if (dolock) {
|
|
socket_unlock(so, 1);
|
|
}
|
|
return error;
|
|
}
|
|
|
|
/* Helper routines for getsockopt */
|
|
int
|
|
sooptcopyout(struct sockopt *sopt, void *buf, size_t len)
|
|
{
|
|
int error;
|
|
size_t valsize;
|
|
|
|
error = 0;
|
|
|
|
/*
|
|
* Documented get behavior is that we always return a value,
|
|
* possibly truncated to fit in the user's buffer.
|
|
* Traditional behavior is that we always tell the user
|
|
* precisely how much we copied, rather than something useful
|
|
* like the total amount we had available for her.
|
|
* Note that this interface is not idempotent; the entire answer must
|
|
* generated ahead of time.
|
|
*/
|
|
valsize = MIN(len, sopt->sopt_valsize);
|
|
sopt->sopt_valsize = valsize;
|
|
if (sopt->sopt_val != USER_ADDR_NULL) {
|
|
if (sopt->sopt_p != kernproc) {
|
|
error = copyout(buf, sopt->sopt_val, valsize);
|
|
} else {
|
|
bcopy(buf, CAST_DOWN(caddr_t, sopt->sopt_val), valsize);
|
|
}
|
|
}
|
|
return error;
|
|
}
|
|
|
|
static int
|
|
sooptcopyout_timeval(struct sockopt *sopt, const struct timeval *tv_p)
|
|
{
|
|
int error;
|
|
size_t len;
|
|
struct user64_timeval tv64 = {};
|
|
struct user32_timeval tv32 = {};
|
|
const void * val;
|
|
size_t valsize;
|
|
|
|
error = 0;
|
|
if (proc_is64bit(sopt->sopt_p)) {
|
|
len = sizeof(tv64);
|
|
tv64.tv_sec = tv_p->tv_sec;
|
|
tv64.tv_usec = tv_p->tv_usec;
|
|
val = &tv64;
|
|
} else {
|
|
len = sizeof(tv32);
|
|
tv32.tv_sec = (user32_time_t)tv_p->tv_sec;
|
|
tv32.tv_usec = tv_p->tv_usec;
|
|
val = &tv32;
|
|
}
|
|
valsize = MIN(len, sopt->sopt_valsize);
|
|
sopt->sopt_valsize = valsize;
|
|
if (sopt->sopt_val != USER_ADDR_NULL) {
|
|
if (sopt->sopt_p != kernproc) {
|
|
error = copyout(val, sopt->sopt_val, valsize);
|
|
} else {
|
|
bcopy(val, CAST_DOWN(caddr_t, sopt->sopt_val), valsize);
|
|
}
|
|
}
|
|
return error;
|
|
}
|
|
|
|
/*
|
|
* Return: 0 Success
|
|
* ENOPROTOOPT
|
|
* <pr_ctloutput>:EOPNOTSUPP[AF_UNIX]
|
|
* <pr_ctloutput>:???
|
|
* <sf_getoption>:???
|
|
*/
|
|
int
|
|
sogetoptlock(struct socket *so, struct sockopt *sopt, int dolock)
|
|
{
|
|
int error, optval;
|
|
struct linger l;
|
|
struct timeval tv;
|
|
|
|
if (sopt->sopt_dir != SOPT_GET) {
|
|
sopt->sopt_dir = SOPT_GET;
|
|
}
|
|
|
|
if (dolock) {
|
|
socket_lock(so, 1);
|
|
}
|
|
|
|
error = sflt_getsockopt(so, sopt);
|
|
if (error != 0) {
|
|
if (error == EJUSTRETURN) {
|
|
error = 0;
|
|
}
|
|
goto out;
|
|
}
|
|
|
|
if (sopt->sopt_level != SOL_SOCKET) {
|
|
if (so->so_proto != NULL &&
|
|
so->so_proto->pr_ctloutput != NULL) {
|
|
error = (*so->so_proto->pr_ctloutput)(so, sopt);
|
|
goto out;
|
|
}
|
|
error = ENOPROTOOPT;
|
|
} else {
|
|
/*
|
|
* Allow socket-level (SOL_SOCKET) options to be filtered by
|
|
* the protocol layer, if needed. A zero value returned from
|
|
* the handler means use default socket-level processing as
|
|
* done by the rest of this routine. Otherwise, any other
|
|
* return value indicates that the option is unsupported.
|
|
*/
|
|
if (so->so_proto != NULL && (error = so->so_proto->pr_usrreqs->
|
|
pru_socheckopt(so, sopt)) != 0) {
|
|
goto out;
|
|
}
|
|
|
|
error = 0;
|
|
switch (sopt->sopt_name) {
|
|
case SO_LINGER:
|
|
case SO_LINGER_SEC:
|
|
l.l_onoff = ((so->so_options & SO_LINGER) ? 1 : 0);
|
|
l.l_linger = (sopt->sopt_name == SO_LINGER) ?
|
|
so->so_linger : so->so_linger / hz;
|
|
error = sooptcopyout(sopt, &l, sizeof(l));
|
|
break;
|
|
|
|
case SO_USELOOPBACK:
|
|
case SO_DONTROUTE:
|
|
case SO_DEBUG:
|
|
case SO_KEEPALIVE:
|
|
case SO_REUSEADDR:
|
|
case SO_REUSEPORT:
|
|
case SO_BROADCAST:
|
|
case SO_OOBINLINE:
|
|
case SO_TIMESTAMP:
|
|
case SO_TIMESTAMP_MONOTONIC:
|
|
case SO_TIMESTAMP_CONTINUOUS:
|
|
case SO_DONTTRUNC:
|
|
case SO_WANTMORE:
|
|
case SO_WANTOOBFLAG:
|
|
case SO_NOWAKEFROMSLEEP:
|
|
case SO_NOAPNFALLBK:
|
|
optval = so->so_options & sopt->sopt_name;
|
|
integer:
|
|
error = sooptcopyout(sopt, &optval, sizeof(optval));
|
|
break;
|
|
|
|
case SO_TYPE:
|
|
optval = so->so_type;
|
|
goto integer;
|
|
|
|
case SO_NREAD:
|
|
if (so->so_proto->pr_flags & PR_ATOMIC) {
|
|
int pkt_total;
|
|
struct mbuf *m1;
|
|
|
|
pkt_total = 0;
|
|
m1 = so->so_rcv.sb_mb;
|
|
while (m1 != NULL) {
|
|
if (m_has_mtype(m1, MTF_DATA | MTF_HEADER | MTF_OOBDATA)) {
|
|
pkt_total += m1->m_len;
|
|
}
|
|
m1 = m1->m_next;
|
|
}
|
|
optval = pkt_total;
|
|
} else {
|
|
optval = so->so_rcv.sb_cc - so->so_rcv.sb_ctl;
|
|
}
|
|
goto integer;
|
|
|
|
case SO_NUMRCVPKT:
|
|
if (so->so_proto->pr_flags & PR_ATOMIC) {
|
|
int cnt = 0;
|
|
struct mbuf *m1;
|
|
|
|
m1 = so->so_rcv.sb_mb;
|
|
while (m1 != NULL) {
|
|
cnt += 1;
|
|
m1 = m1->m_nextpkt;
|
|
}
|
|
optval = cnt;
|
|
goto integer;
|
|
} else {
|
|
error = ENOPROTOOPT;
|
|
break;
|
|
}
|
|
|
|
case SO_NWRITE:
|
|
optval = so->so_snd.sb_cc;
|
|
goto integer;
|
|
|
|
case SO_ERROR:
|
|
optval = so->so_error;
|
|
so->so_error = 0;
|
|
goto integer;
|
|
|
|
case SO_SNDBUF: {
|
|
u_int32_t hiwat = so->so_snd.sb_hiwat;
|
|
|
|
if (so->so_snd.sb_flags & SB_UNIX) {
|
|
struct unpcb *unp =
|
|
(struct unpcb *)(so->so_pcb);
|
|
if (unp != NULL && unp->unp_conn != NULL) {
|
|
hiwat += unp->unp_conn->unp_cc;
|
|
}
|
|
}
|
|
|
|
optval = hiwat;
|
|
goto integer;
|
|
}
|
|
case SO_RCVBUF:
|
|
optval = so->so_rcv.sb_hiwat;
|
|
goto integer;
|
|
|
|
case SO_SNDLOWAT:
|
|
optval = so->so_snd.sb_lowat;
|
|
goto integer;
|
|
|
|
case SO_RCVLOWAT:
|
|
optval = so->so_rcv.sb_lowat;
|
|
goto integer;
|
|
|
|
case SO_SNDTIMEO:
|
|
case SO_RCVTIMEO:
|
|
tv = (sopt->sopt_name == SO_SNDTIMEO ?
|
|
so->so_snd.sb_timeo : so->so_rcv.sb_timeo);
|
|
|
|
error = sooptcopyout_timeval(sopt, &tv);
|
|
break;
|
|
|
|
case SO_NOSIGPIPE:
|
|
optval = (so->so_flags & SOF_NOSIGPIPE);
|
|
goto integer;
|
|
|
|
case SO_NOADDRERR:
|
|
optval = (so->so_flags & SOF_NOADDRAVAIL);
|
|
goto integer;
|
|
|
|
case SO_REUSESHAREUID:
|
|
optval = (so->so_flags & SOF_REUSESHAREUID);
|
|
goto integer;
|
|
|
|
|
|
case SO_NOTIFYCONFLICT:
|
|
optval = (so->so_flags & SOF_NOTIFYCONFLICT);
|
|
goto integer;
|
|
|
|
case SO_RESTRICTIONS:
|
|
optval = so_get_restrictions(so);
|
|
goto integer;
|
|
|
|
case SO_AWDL_UNRESTRICTED:
|
|
if (SOCK_DOM(so) == PF_INET ||
|
|
SOCK_DOM(so) == PF_INET6) {
|
|
optval = inp_get_awdl_unrestricted(
|
|
sotoinpcb(so));
|
|
goto integer;
|
|
} else {
|
|
error = EOPNOTSUPP;
|
|
}
|
|
break;
|
|
|
|
case SO_INTCOPROC_ALLOW:
|
|
if (SOCK_DOM(so) == PF_INET6) {
|
|
optval = inp_get_intcoproc_allowed(
|
|
sotoinpcb(so));
|
|
goto integer;
|
|
} else {
|
|
error = EOPNOTSUPP;
|
|
}
|
|
break;
|
|
|
|
case SO_LABEL:
|
|
error = EOPNOTSUPP;
|
|
break;
|
|
|
|
case SO_PEERLABEL:
|
|
error = EOPNOTSUPP;
|
|
break;
|
|
|
|
#ifdef __APPLE_API_PRIVATE
|
|
case SO_UPCALLCLOSEWAIT:
|
|
optval = (so->so_flags & SOF_UPCALLCLOSEWAIT);
|
|
goto integer;
|
|
#endif
|
|
case SO_RANDOMPORT:
|
|
optval = (so->so_flags & SOF_BINDRANDOMPORT);
|
|
goto integer;
|
|
|
|
case SO_NP_EXTENSIONS: {
|
|
struct so_np_extensions sonpx = {};
|
|
|
|
sonpx.npx_flags = (so->so_flags & SOF_NPX_SETOPTSHUT) ?
|
|
SONPX_SETOPTSHUT : 0;
|
|
sonpx.npx_mask = SONPX_MASK_VALID;
|
|
|
|
error = sooptcopyout(sopt, &sonpx,
|
|
sizeof(struct so_np_extensions));
|
|
break;
|
|
}
|
|
|
|
case SO_TRAFFIC_CLASS:
|
|
optval = so->so_traffic_class;
|
|
goto integer;
|
|
|
|
case SO_RECV_TRAFFIC_CLASS:
|
|
optval = (so->so_flags & SOF_RECV_TRAFFIC_CLASS);
|
|
goto integer;
|
|
|
|
#if (DEVELOPMENT || DEBUG)
|
|
case SO_TRAFFIC_CLASS_DBG:
|
|
error = sogetopt_tcdbg(so, sopt);
|
|
break;
|
|
#endif /* (DEVELOPMENT || DEBUG) */
|
|
|
|
case SO_PRIVILEGED_TRAFFIC_CLASS:
|
|
optval = (so->so_flags & SOF_PRIVILEGED_TRAFFIC_CLASS);
|
|
goto integer;
|
|
|
|
case SO_DEFUNCTOK:
|
|
optval = !(so->so_flags & SOF_NODEFUNCT);
|
|
goto integer;
|
|
|
|
case SO_ISDEFUNCT:
|
|
optval = (so->so_flags & SOF_DEFUNCT);
|
|
goto integer;
|
|
|
|
case SO_OPPORTUNISTIC:
|
|
optval = so_get_opportunistic(so);
|
|
goto integer;
|
|
|
|
case SO_FLUSH:
|
|
/* This option is not gettable */
|
|
error = EINVAL;
|
|
break;
|
|
|
|
case SO_RECV_ANYIF:
|
|
optval = so_get_recv_anyif(so);
|
|
goto integer;
|
|
|
|
case SO_TRAFFIC_MGT_BACKGROUND:
|
|
/* This option is handled by lower layer(s) */
|
|
if (so->so_proto != NULL &&
|
|
so->so_proto->pr_ctloutput != NULL) {
|
|
(void) so->so_proto->pr_ctloutput(so, sopt);
|
|
}
|
|
break;
|
|
|
|
#if FLOW_DIVERT
|
|
case SO_FLOW_DIVERT_TOKEN:
|
|
error = flow_divert_token_get(so, sopt);
|
|
break;
|
|
#endif /* FLOW_DIVERT */
|
|
|
|
#if NECP
|
|
case SO_NECP_ATTRIBUTES:
|
|
if (SOCK_DOM(so) == PF_MULTIPATH) {
|
|
/* Handled by MPTCP itself */
|
|
break;
|
|
}
|
|
|
|
if (SOCK_DOM(so) != PF_INET && SOCK_DOM(so) != PF_INET6) {
|
|
error = EINVAL;
|
|
goto out;
|
|
}
|
|
|
|
error = necp_get_socket_attributes(&sotoinpcb(so)->inp_necp_attributes, sopt);
|
|
break;
|
|
|
|
case SO_NECP_CLIENTUUID: {
|
|
uuid_t *ncu;
|
|
|
|
if (SOCK_DOM(so) == PF_MULTIPATH) {
|
|
ncu = &mpsotomppcb(so)->necp_client_uuid;
|
|
} else if (SOCK_DOM(so) == PF_INET || SOCK_DOM(so) == PF_INET6) {
|
|
ncu = &sotoinpcb(so)->necp_client_uuid;
|
|
} else {
|
|
error = EINVAL;
|
|
goto out;
|
|
}
|
|
|
|
error = sooptcopyout(sopt, ncu, sizeof(uuid_t));
|
|
break;
|
|
}
|
|
|
|
case SO_NECP_LISTENUUID: {
|
|
uuid_t *nlu;
|
|
|
|
if (SOCK_DOM(so) == PF_INET || SOCK_DOM(so) == PF_INET6) {
|
|
if (sotoinpcb(so)->inp_flags2 & INP2_EXTERNAL_PORT) {
|
|
nlu = &sotoinpcb(so)->necp_client_uuid;
|
|
} else {
|
|
error = ENOENT;
|
|
goto out;
|
|
}
|
|
} else {
|
|
error = EINVAL;
|
|
goto out;
|
|
}
|
|
|
|
error = sooptcopyout(sopt, nlu, sizeof(uuid_t));
|
|
break;
|
|
}
|
|
|
|
case SO_RESOLVER_SIGNATURE: {
|
|
if (SOCK_DOM(so) != PF_INET && SOCK_DOM(so) != PF_INET6) {
|
|
error = EINVAL;
|
|
goto out;
|
|
}
|
|
error = necp_get_socket_resolver_signature(sotoinpcb(so), sopt);
|
|
break;
|
|
}
|
|
|
|
#endif /* NECP */
|
|
|
|
#if CONTENT_FILTER
|
|
case SO_CFIL_SOCK_ID: {
|
|
cfil_sock_id_t sock_id;
|
|
|
|
sock_id = cfil_sock_id_from_socket(so);
|
|
|
|
error = sooptcopyout(sopt, &sock_id,
|
|
sizeof(cfil_sock_id_t));
|
|
break;
|
|
}
|
|
#endif /* CONTENT_FILTER */
|
|
|
|
case SO_EXTENDED_BK_IDLE:
|
|
optval = (so->so_flags1 & SOF1_EXTEND_BK_IDLE_WANTED);
|
|
goto integer;
|
|
case SO_MARK_CELLFALLBACK:
|
|
optval = ((so->so_flags1 & SOF1_CELLFALLBACK) > 0)
|
|
? 1 : 0;
|
|
goto integer;
|
|
case SO_FALLBACK_MODE:
|
|
optval = so->so_fallback_mode;
|
|
goto integer;
|
|
case SO_MARK_KNOWN_TRACKER: {
|
|
optval = ((so->so_flags1 & SOF1_KNOWN_TRACKER) > 0)
|
|
? 1 : 0;
|
|
goto integer;
|
|
}
|
|
case SO_MARK_KNOWN_TRACKER_NON_APP_INITIATED: {
|
|
optval = ((so->so_flags1 & SOF1_TRACKER_NON_APP_INITIATED) > 0)
|
|
? 1 : 0;
|
|
goto integer;
|
|
}
|
|
case SO_MARK_APPROVED_APP_DOMAIN: {
|
|
optval = ((so->so_flags1 & SOF1_APPROVED_APP_DOMAIN) > 0)
|
|
? 1 : 0;
|
|
goto integer;
|
|
}
|
|
case SO_NET_SERVICE_TYPE: {
|
|
if ((so->so_flags1 & SOF1_TC_NET_SERV_TYPE)) {
|
|
optval = so->so_netsvctype;
|
|
} else {
|
|
optval = NET_SERVICE_TYPE_BE;
|
|
}
|
|
goto integer;
|
|
}
|
|
case SO_NETSVC_MARKING_LEVEL:
|
|
optval = so_get_netsvc_marking_level(so);
|
|
goto integer;
|
|
|
|
case SO_MPKL_SEND_INFO: {
|
|
struct so_mpkl_send_info so_mpkl_send_info;
|
|
|
|
uuid_copy(so_mpkl_send_info.mpkl_uuid, so->so_mpkl_send_uuid);
|
|
so_mpkl_send_info.mpkl_proto = so->so_mpkl_send_proto;
|
|
error = sooptcopyout(sopt, &so_mpkl_send_info,
|
|
sizeof(struct so_mpkl_send_info));
|
|
break;
|
|
}
|
|
case SO_MARK_WAKE_PKT:
|
|
optval = (so->so_flags & SOF_MARK_WAKE_PKT);
|
|
goto integer;
|
|
case SO_RECV_WAKE_PKT:
|
|
optval = (so->so_flags & SOF_RECV_WAKE_PKT);
|
|
goto integer;
|
|
case SO_APPLICATION_ID: {
|
|
if (SOCK_DOM(so) != PF_INET && SOCK_DOM(so) != PF_INET6) {
|
|
error = EINVAL;
|
|
goto out;
|
|
}
|
|
so_application_id_t application_id = { 0 };
|
|
application_id.uid = kauth_cred_getuid(so->so_cred);
|
|
uuid_copy(application_id.effective_uuid, !uuid_is_null(so->e_uuid) ? so->e_uuid : so->last_uuid);
|
|
application_id.persona_id = so->so_persona_id;
|
|
error = sooptcopyout(sopt, &application_id, sizeof(so_application_id_t));
|
|
break;
|
|
}
|
|
default:
|
|
error = ENOPROTOOPT;
|
|
break;
|
|
}
|
|
}
|
|
out:
|
|
if (dolock) {
|
|
socket_unlock(so, 1);
|
|
}
|
|
return error;
|
|
}
|
|
|
|
/*
|
|
* The size limits on our soopt_getm is different from that on FreeBSD.
|
|
* We limit the size of options to MCLBYTES. This will have to change
|
|
* if we need to define options that need more space than MCLBYTES.
|
|
*/
|
|
int
|
|
soopt_getm(struct sockopt *sopt, struct mbuf **mp)
|
|
{
|
|
struct mbuf *m, *m_prev;
|
|
int sopt_size = (int)sopt->sopt_valsize;
|
|
int how;
|
|
|
|
if (sopt_size <= 0 || sopt_size > MCLBYTES) {
|
|
return EMSGSIZE;
|
|
}
|
|
|
|
how = sopt->sopt_p != kernproc ? M_WAIT : M_DONTWAIT;
|
|
MGET(m, how, MT_DATA);
|
|
if (m == NULL) {
|
|
return ENOBUFS;
|
|
}
|
|
if (sopt_size > MLEN) {
|
|
MCLGET(m, how);
|
|
if ((m->m_flags & M_EXT) == 0) {
|
|
m_free(m);
|
|
return ENOBUFS;
|
|
}
|
|
m->m_len = min(MCLBYTES, sopt_size);
|
|
} else {
|
|
m->m_len = min(MLEN, sopt_size);
|
|
}
|
|
sopt_size -= m->m_len;
|
|
*mp = m;
|
|
m_prev = m;
|
|
|
|
while (sopt_size > 0) {
|
|
MGET(m, how, MT_DATA);
|
|
if (m == NULL) {
|
|
m_freem(*mp);
|
|
return ENOBUFS;
|
|
}
|
|
if (sopt_size > MLEN) {
|
|
MCLGET(m, how);
|
|
if ((m->m_flags & M_EXT) == 0) {
|
|
m_freem(*mp);
|
|
m_freem(m);
|
|
return ENOBUFS;
|
|
}
|
|
m->m_len = min(MCLBYTES, sopt_size);
|
|
} else {
|
|
m->m_len = min(MLEN, sopt_size);
|
|
}
|
|
sopt_size -= m->m_len;
|
|
m_prev->m_next = m;
|
|
m_prev = m;
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
/* copyin sopt data into mbuf chain */
|
|
int
|
|
soopt_mcopyin(struct sockopt *sopt, struct mbuf *m)
|
|
{
|
|
struct mbuf *m0 = m;
|
|
|
|
if (sopt->sopt_val == USER_ADDR_NULL) {
|
|
return 0;
|
|
}
|
|
while (m != NULL && sopt->sopt_valsize >= m->m_len) {
|
|
if (sopt->sopt_p != kernproc) {
|
|
int error;
|
|
|
|
error = copyin(sopt->sopt_val, mtod(m, char *),
|
|
m->m_len);
|
|
if (error != 0) {
|
|
m_freem(m0);
|
|
return error;
|
|
}
|
|
} else {
|
|
bcopy(CAST_DOWN(caddr_t, sopt->sopt_val),
|
|
mtod(m, char *), m->m_len);
|
|
}
|
|
sopt->sopt_valsize -= m->m_len;
|
|
sopt->sopt_val += m->m_len;
|
|
m = m->m_next;
|
|
}
|
|
/* should be allocated enoughly at ip6_sooptmcopyin() */
|
|
if (m != NULL) {
|
|
panic("soopt_mcopyin");
|
|
/* NOTREACHED */
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
/* copyout mbuf chain data into soopt */
|
|
int
|
|
soopt_mcopyout(struct sockopt *sopt, struct mbuf *m)
|
|
{
|
|
struct mbuf *m0 = m;
|
|
size_t valsize = 0;
|
|
|
|
if (sopt->sopt_val == USER_ADDR_NULL) {
|
|
return 0;
|
|
}
|
|
while (m != NULL && sopt->sopt_valsize >= m->m_len) {
|
|
if (sopt->sopt_p != kernproc) {
|
|
int error;
|
|
|
|
error = copyout(mtod(m, char *), sopt->sopt_val,
|
|
m->m_len);
|
|
if (error != 0) {
|
|
m_freem(m0);
|
|
return error;
|
|
}
|
|
} else {
|
|
bcopy(mtod(m, char *),
|
|
CAST_DOWN(caddr_t, sopt->sopt_val), m->m_len);
|
|
}
|
|
sopt->sopt_valsize -= m->m_len;
|
|
sopt->sopt_val += m->m_len;
|
|
valsize += m->m_len;
|
|
m = m->m_next;
|
|
}
|
|
if (m != NULL) {
|
|
/* enough soopt buffer should be given from user-land */
|
|
m_freem(m0);
|
|
return EINVAL;
|
|
}
|
|
sopt->sopt_valsize = valsize;
|
|
return 0;
|
|
}
|
|
|
|
void
|
|
sohasoutofband(struct socket *so)
|
|
{
|
|
if (so->so_pgid < 0) {
|
|
gsignal(-so->so_pgid, SIGURG);
|
|
} else if (so->so_pgid > 0) {
|
|
proc_signal(so->so_pgid, SIGURG);
|
|
}
|
|
selwakeup(&so->so_rcv.sb_sel);
|
|
if (so->so_rcv.sb_flags & SB_KNOTE) {
|
|
KNOTE(&so->so_rcv.sb_sel.si_note,
|
|
(NOTE_OOB | SO_FILT_HINT_LOCKED));
|
|
}
|
|
}
|
|
|
|
int
|
|
sopoll(struct socket *so, int events, kauth_cred_t cred, void * wql)
|
|
{
|
|
#pragma unused(cred)
|
|
struct proc *p = current_proc();
|
|
int revents = 0;
|
|
|
|
socket_lock(so, 1);
|
|
so_update_last_owner_locked(so, PROC_NULL);
|
|
so_update_policy(so);
|
|
|
|
if (events & (POLLIN | POLLRDNORM)) {
|
|
if (soreadable(so)) {
|
|
revents |= events & (POLLIN | POLLRDNORM);
|
|
}
|
|
}
|
|
|
|
if (events & (POLLOUT | POLLWRNORM)) {
|
|
if (sowriteable(so)) {
|
|
revents |= events & (POLLOUT | POLLWRNORM);
|
|
}
|
|
}
|
|
|
|
if (events & (POLLPRI | POLLRDBAND)) {
|
|
if (so->so_oobmark || (so->so_state & SS_RCVATMARK)) {
|
|
revents |= events & (POLLPRI | POLLRDBAND);
|
|
}
|
|
}
|
|
|
|
if (revents == 0) {
|
|
if (events & (POLLIN | POLLPRI | POLLRDNORM | POLLRDBAND)) {
|
|
/*
|
|
* Darwin sets the flag first,
|
|
* BSD calls selrecord first
|
|
*/
|
|
so->so_rcv.sb_flags |= SB_SEL;
|
|
selrecord(p, &so->so_rcv.sb_sel, wql);
|
|
}
|
|
|
|
if (events & (POLLOUT | POLLWRNORM)) {
|
|
/*
|
|
* Darwin sets the flag first,
|
|
* BSD calls selrecord first
|
|
*/
|
|
so->so_snd.sb_flags |= SB_SEL;
|
|
selrecord(p, &so->so_snd.sb_sel, wql);
|
|
}
|
|
}
|
|
|
|
socket_unlock(so, 1);
|
|
return revents;
|
|
}
|
|
|
|
int
|
|
soo_kqfilter(struct fileproc *fp, struct knote *kn, struct kevent_qos_s *kev)
|
|
{
|
|
struct socket *so = (struct socket *)fp_get_data(fp);
|
|
int result;
|
|
|
|
socket_lock(so, 1);
|
|
so_update_last_owner_locked(so, PROC_NULL);
|
|
so_update_policy(so);
|
|
|
|
switch (kn->kn_filter) {
|
|
case EVFILT_READ:
|
|
kn->kn_filtid = EVFILTID_SOREAD;
|
|
break;
|
|
case EVFILT_WRITE:
|
|
kn->kn_filtid = EVFILTID_SOWRITE;
|
|
break;
|
|
case EVFILT_SOCK:
|
|
kn->kn_filtid = EVFILTID_SCK;
|
|
break;
|
|
case EVFILT_EXCEPT:
|
|
kn->kn_filtid = EVFILTID_SOEXCEPT;
|
|
break;
|
|
default:
|
|
socket_unlock(so, 1);
|
|
knote_set_error(kn, EINVAL);
|
|
return 0;
|
|
}
|
|
|
|
/*
|
|
* call the appropriate sub-filter attach
|
|
* with the socket still locked
|
|
*/
|
|
result = knote_fops(kn)->f_attach(kn, kev);
|
|
|
|
socket_unlock(so, 1);
|
|
|
|
return result;
|
|
}
|
|
|
|
static int
|
|
filt_soread_common(struct knote *kn, struct kevent_qos_s *kev, struct socket *so)
|
|
{
|
|
int retval = 0;
|
|
int64_t data = 0;
|
|
|
|
if (so->so_options & SO_ACCEPTCONN) {
|
|
/*
|
|
* Radar 6615193 handle the listen case dynamically
|
|
* for kqueue read filter. This allows to call listen()
|
|
* after registering the kqueue EVFILT_READ.
|
|
*/
|
|
|
|
retval = !TAILQ_EMPTY(&so->so_comp);
|
|
data = so->so_qlen;
|
|
goto out;
|
|
}
|
|
|
|
/* socket isn't a listener */
|
|
/*
|
|
* NOTE_LOWAT specifies new low water mark in data, i.e.
|
|
* the bytes of protocol data. We therefore exclude any
|
|
* control bytes.
|
|
*/
|
|
data = so->so_rcv.sb_cc - so->so_rcv.sb_ctl;
|
|
|
|
if (kn->kn_sfflags & NOTE_OOB) {
|
|
if (so->so_oobmark || (so->so_state & SS_RCVATMARK)) {
|
|
kn->kn_fflags |= NOTE_OOB;
|
|
data -= so->so_oobmark;
|
|
retval = 1;
|
|
goto out;
|
|
}
|
|
}
|
|
|
|
if ((so->so_state & SS_CANTRCVMORE)
|
|
#if CONTENT_FILTER
|
|
&& cfil_sock_data_pending(&so->so_rcv) == 0
|
|
#endif /* CONTENT_FILTER */
|
|
) {
|
|
kn->kn_flags |= EV_EOF;
|
|
kn->kn_fflags = so->so_error;
|
|
retval = 1;
|
|
goto out;
|
|
}
|
|
|
|
if (so->so_error) { /* temporary udp error */
|
|
retval = 1;
|
|
goto out;
|
|
}
|
|
|
|
int64_t lowwat = so->so_rcv.sb_lowat;
|
|
/*
|
|
* Ensure that when NOTE_LOWAT is used, the derived
|
|
* low water mark is bounded by socket's rcv buf's
|
|
* high and low water mark values.
|
|
*/
|
|
if (kn->kn_sfflags & NOTE_LOWAT) {
|
|
if (kn->kn_sdata > so->so_rcv.sb_hiwat) {
|
|
lowwat = so->so_rcv.sb_hiwat;
|
|
} else if (kn->kn_sdata > lowwat) {
|
|
lowwat = kn->kn_sdata;
|
|
}
|
|
}
|
|
|
|
/*
|
|
* While the `data` field is the amount of data to read,
|
|
* 0-sized packets need to wake up the kqueue, see 58140856,
|
|
* so we need to take control bytes into account too.
|
|
*/
|
|
retval = (so->so_rcv.sb_cc >= lowwat);
|
|
|
|
out:
|
|
if (retval && kev) {
|
|
knote_fill_kevent(kn, kev, data);
|
|
}
|
|
return retval;
|
|
}
|
|
|
|
static int
|
|
filt_sorattach(struct knote *kn, __unused struct kevent_qos_s *kev)
|
|
{
|
|
struct socket *so = (struct socket *)fp_get_data(kn->kn_fp);
|
|
|
|
/* socket locked */
|
|
|
|
/*
|
|
* If the caller explicitly asked for OOB results (e.g. poll())
|
|
* from EVFILT_READ, then save that off in the hookid field
|
|
* and reserve the kn_flags EV_OOBAND bit for output only.
|
|
*/
|
|
if (kn->kn_filter == EVFILT_READ &&
|
|
kn->kn_flags & EV_OOBAND) {
|
|
kn->kn_flags &= ~EV_OOBAND;
|
|
kn->kn_hook32 = EV_OOBAND;
|
|
} else {
|
|
kn->kn_hook32 = 0;
|
|
}
|
|
if (KNOTE_ATTACH(&so->so_rcv.sb_sel.si_note, kn)) {
|
|
so->so_rcv.sb_flags |= SB_KNOTE;
|
|
}
|
|
|
|
/* indicate if event is already fired */
|
|
return filt_soread_common(kn, NULL, so);
|
|
}
|
|
|
|
static void
|
|
filt_sordetach(struct knote *kn)
|
|
{
|
|
struct socket *so = (struct socket *)fp_get_data(kn->kn_fp);
|
|
|
|
socket_lock(so, 1);
|
|
if (so->so_rcv.sb_flags & SB_KNOTE) {
|
|
if (KNOTE_DETACH(&so->so_rcv.sb_sel.si_note, kn)) {
|
|
so->so_rcv.sb_flags &= ~SB_KNOTE;
|
|
}
|
|
}
|
|
socket_unlock(so, 1);
|
|
}
|
|
|
|
/*ARGSUSED*/
|
|
static int
|
|
filt_soread(struct knote *kn, long hint)
|
|
{
|
|
struct socket *so = (struct socket *)fp_get_data(kn->kn_fp);
|
|
int retval;
|
|
|
|
if ((hint & SO_FILT_HINT_LOCKED) == 0) {
|
|
socket_lock(so, 1);
|
|
}
|
|
|
|
retval = filt_soread_common(kn, NULL, so);
|
|
|
|
if ((hint & SO_FILT_HINT_LOCKED) == 0) {
|
|
socket_unlock(so, 1);
|
|
}
|
|
|
|
return retval;
|
|
}
|
|
|
|
static int
|
|
filt_sortouch(struct knote *kn, struct kevent_qos_s *kev)
|
|
{
|
|
struct socket *so = (struct socket *)fp_get_data(kn->kn_fp);
|
|
int retval;
|
|
|
|
socket_lock(so, 1);
|
|
|
|
/* save off the new input fflags and data */
|
|
kn->kn_sfflags = kev->fflags;
|
|
kn->kn_sdata = kev->data;
|
|
|
|
/* determine if changes result in fired events */
|
|
retval = filt_soread_common(kn, NULL, so);
|
|
|
|
socket_unlock(so, 1);
|
|
|
|
return retval;
|
|
}
|
|
|
|
static int
|
|
filt_sorprocess(struct knote *kn, struct kevent_qos_s *kev)
|
|
{
|
|
struct socket *so = (struct socket *)fp_get_data(kn->kn_fp);
|
|
int retval;
|
|
|
|
socket_lock(so, 1);
|
|
retval = filt_soread_common(kn, kev, so);
|
|
socket_unlock(so, 1);
|
|
|
|
return retval;
|
|
}
|
|
|
|
int
|
|
so_wait_for_if_feedback(struct socket *so)
|
|
{
|
|
if ((SOCK_DOM(so) == PF_INET || SOCK_DOM(so) == PF_INET6) &&
|
|
(so->so_state & SS_ISCONNECTED)) {
|
|
struct inpcb *inp = sotoinpcb(so);
|
|
if (INP_WAIT_FOR_IF_FEEDBACK(inp)) {
|
|
return 1;
|
|
}
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
static int
|
|
filt_sowrite_common(struct knote *kn, struct kevent_qos_s *kev, struct socket *so)
|
|
{
|
|
int ret = 0;
|
|
int64_t data = sbspace(&so->so_snd);
|
|
|
|
if (so->so_state & SS_CANTSENDMORE) {
|
|
kn->kn_flags |= EV_EOF;
|
|
kn->kn_fflags = so->so_error;
|
|
ret = 1;
|
|
goto out;
|
|
}
|
|
|
|
if (so->so_error) { /* temporary udp error */
|
|
ret = 1;
|
|
goto out;
|
|
}
|
|
|
|
if (!socanwrite(so)) {
|
|
ret = 0;
|
|
goto out;
|
|
}
|
|
|
|
if (so->so_flags1 & SOF1_PRECONNECT_DATA) {
|
|
ret = 1;
|
|
goto out;
|
|
}
|
|
|
|
int64_t lowwat = so->so_snd.sb_lowat;
|
|
const int64_t hiwat = so->so_snd.sb_hiwat;
|
|
/*
|
|
* Deal with connected UNIX domain sockets which
|
|
* rely on the fact that the sender's socket buffer is
|
|
* actually the receiver's socket buffer.
|
|
*/
|
|
if (SOCK_DOM(so) == PF_LOCAL) {
|
|
struct unpcb *unp = sotounpcb(so);
|
|
if (unp != NULL && unp->unp_conn != NULL &&
|
|
unp->unp_conn->unp_socket != NULL) {
|
|
struct socket *so2 = unp->unp_conn->unp_socket;
|
|
/*
|
|
* At this point we know that `so' is locked
|
|
* and that `unp_conn` isn't going to change.
|
|
* However, we don't lock `so2` because doing so
|
|
* may require unlocking `so'
|
|
* (see unp_get_locks_in_order()).
|
|
*
|
|
* Two cases can happen:
|
|
*
|
|
* 1) we return 1 and tell the application that
|
|
* it can write. Meanwhile, another thread
|
|
* fills up the socket buffer. This will either
|
|
* lead to a blocking send or EWOULDBLOCK
|
|
* which the application should deal with.
|
|
* 2) we return 0 and tell the application that
|
|
* the socket is not writable. Meanwhile,
|
|
* another thread depletes the receive socket
|
|
* buffer. In this case the application will
|
|
* be woken up by sb_notify().
|
|
*
|
|
* MIN() is required because otherwise sosendcheck()
|
|
* may return EWOULDBLOCK since it only considers
|
|
* so->so_snd.
|
|
*/
|
|
data = MIN(data, sbspace(&so2->so_rcv));
|
|
}
|
|
}
|
|
|
|
if (kn->kn_sfflags & NOTE_LOWAT) {
|
|
if (kn->kn_sdata > hiwat) {
|
|
lowwat = hiwat;
|
|
} else if (kn->kn_sdata > lowwat) {
|
|
lowwat = kn->kn_sdata;
|
|
}
|
|
}
|
|
|
|
if (data > 0 && data >= lowwat) {
|
|
if ((so->so_flags & SOF_NOTSENT_LOWAT)
|
|
#if (DEBUG || DEVELOPMENT)
|
|
&& so_notsent_lowat_check == 1
|
|
#endif /* DEBUG || DEVELOPMENT */
|
|
) {
|
|
if ((SOCK_DOM(so) == PF_INET ||
|
|
SOCK_DOM(so) == PF_INET6) &&
|
|
so->so_type == SOCK_STREAM) {
|
|
ret = tcp_notsent_lowat_check(so);
|
|
}
|
|
#if MPTCP
|
|
else if ((SOCK_DOM(so) == PF_MULTIPATH) &&
|
|
(SOCK_PROTO(so) == IPPROTO_TCP)) {
|
|
ret = mptcp_notsent_lowat_check(so);
|
|
}
|
|
#endif
|
|
else {
|
|
ret = 1;
|
|
goto out;
|
|
}
|
|
} else {
|
|
ret = 1;
|
|
}
|
|
}
|
|
if (so_wait_for_if_feedback(so)) {
|
|
ret = 0;
|
|
}
|
|
|
|
out:
|
|
if (ret && kev) {
|
|
knote_fill_kevent(kn, kev, data);
|
|
}
|
|
return ret;
|
|
}
|
|
|
|
static int
|
|
filt_sowattach(struct knote *kn, __unused struct kevent_qos_s *kev)
|
|
{
|
|
struct socket *so = (struct socket *)fp_get_data(kn->kn_fp);
|
|
|
|
/* socket locked */
|
|
if (KNOTE_ATTACH(&so->so_snd.sb_sel.si_note, kn)) {
|
|
so->so_snd.sb_flags |= SB_KNOTE;
|
|
}
|
|
|
|
/* determine if its already fired */
|
|
return filt_sowrite_common(kn, NULL, so);
|
|
}
|
|
|
|
static void
|
|
filt_sowdetach(struct knote *kn)
|
|
{
|
|
struct socket *so = (struct socket *)fp_get_data(kn->kn_fp);
|
|
socket_lock(so, 1);
|
|
|
|
if (so->so_snd.sb_flags & SB_KNOTE) {
|
|
if (KNOTE_DETACH(&so->so_snd.sb_sel.si_note, kn)) {
|
|
so->so_snd.sb_flags &= ~SB_KNOTE;
|
|
}
|
|
}
|
|
socket_unlock(so, 1);
|
|
}
|
|
|
|
/*ARGSUSED*/
|
|
static int
|
|
filt_sowrite(struct knote *kn, long hint)
|
|
{
|
|
struct socket *so = (struct socket *)fp_get_data(kn->kn_fp);
|
|
int ret;
|
|
|
|
if ((hint & SO_FILT_HINT_LOCKED) == 0) {
|
|
socket_lock(so, 1);
|
|
}
|
|
|
|
ret = filt_sowrite_common(kn, NULL, so);
|
|
|
|
if ((hint & SO_FILT_HINT_LOCKED) == 0) {
|
|
socket_unlock(so, 1);
|
|
}
|
|
|
|
return ret;
|
|
}
|
|
|
|
static int
|
|
filt_sowtouch(struct knote *kn, struct kevent_qos_s *kev)
|
|
{
|
|
struct socket *so = (struct socket *)fp_get_data(kn->kn_fp);
|
|
int ret;
|
|
|
|
socket_lock(so, 1);
|
|
|
|
/*save off the new input fflags and data */
|
|
kn->kn_sfflags = kev->fflags;
|
|
kn->kn_sdata = kev->data;
|
|
|
|
/* determine if these changes result in a triggered event */
|
|
ret = filt_sowrite_common(kn, NULL, so);
|
|
|
|
socket_unlock(so, 1);
|
|
|
|
return ret;
|
|
}
|
|
|
|
static int
|
|
filt_sowprocess(struct knote *kn, struct kevent_qos_s *kev)
|
|
{
|
|
struct socket *so = (struct socket *)fp_get_data(kn->kn_fp);
|
|
int ret;
|
|
|
|
socket_lock(so, 1);
|
|
ret = filt_sowrite_common(kn, kev, so);
|
|
socket_unlock(so, 1);
|
|
|
|
return ret;
|
|
}
|
|
|
|
static int
|
|
filt_sockev_common(struct knote *kn, struct kevent_qos_s *kev,
|
|
struct socket *so, long ev_hint)
|
|
{
|
|
int ret = 0;
|
|
int64_t data = 0;
|
|
uint32_t level_trigger = 0;
|
|
|
|
if (ev_hint & SO_FILT_HINT_CONNRESET) {
|
|
kn->kn_fflags |= NOTE_CONNRESET;
|
|
}
|
|
if (ev_hint & SO_FILT_HINT_TIMEOUT) {
|
|
kn->kn_fflags |= NOTE_TIMEOUT;
|
|
}
|
|
if (ev_hint & SO_FILT_HINT_NOSRCADDR) {
|
|
kn->kn_fflags |= NOTE_NOSRCADDR;
|
|
}
|
|
if (ev_hint & SO_FILT_HINT_IFDENIED) {
|
|
kn->kn_fflags |= NOTE_IFDENIED;
|
|
}
|
|
if (ev_hint & SO_FILT_HINT_KEEPALIVE) {
|
|
kn->kn_fflags |= NOTE_KEEPALIVE;
|
|
}
|
|
if (ev_hint & SO_FILT_HINT_ADAPTIVE_WTIMO) {
|
|
kn->kn_fflags |= NOTE_ADAPTIVE_WTIMO;
|
|
}
|
|
if (ev_hint & SO_FILT_HINT_ADAPTIVE_RTIMO) {
|
|
kn->kn_fflags |= NOTE_ADAPTIVE_RTIMO;
|
|
}
|
|
if ((ev_hint & SO_FILT_HINT_CONNECTED) ||
|
|
(so->so_state & SS_ISCONNECTED)) {
|
|
kn->kn_fflags |= NOTE_CONNECTED;
|
|
level_trigger |= NOTE_CONNECTED;
|
|
}
|
|
if ((ev_hint & SO_FILT_HINT_DISCONNECTED) ||
|
|
(so->so_state & SS_ISDISCONNECTED)) {
|
|
kn->kn_fflags |= NOTE_DISCONNECTED;
|
|
level_trigger |= NOTE_DISCONNECTED;
|
|
}
|
|
if (ev_hint & SO_FILT_HINT_CONNINFO_UPDATED) {
|
|
if (so->so_proto != NULL &&
|
|
(so->so_proto->pr_flags & PR_EVCONNINFO)) {
|
|
kn->kn_fflags |= NOTE_CONNINFO_UPDATED;
|
|
}
|
|
}
|
|
if ((ev_hint & SO_FILT_HINT_NOTIFY_ACK) ||
|
|
tcp_notify_ack_active(so)) {
|
|
kn->kn_fflags |= NOTE_NOTIFY_ACK;
|
|
}
|
|
if (ev_hint & SO_FILT_HINT_WAKE_PKT) {
|
|
kn->kn_fflags |= NOTE_WAKE_PKT;
|
|
}
|
|
|
|
if ((so->so_state & SS_CANTRCVMORE)
|
|
#if CONTENT_FILTER
|
|
&& cfil_sock_data_pending(&so->so_rcv) == 0
|
|
#endif /* CONTENT_FILTER */
|
|
) {
|
|
kn->kn_fflags |= NOTE_READCLOSED;
|
|
level_trigger |= NOTE_READCLOSED;
|
|
}
|
|
|
|
if (so->so_state & SS_CANTSENDMORE) {
|
|
kn->kn_fflags |= NOTE_WRITECLOSED;
|
|
level_trigger |= NOTE_WRITECLOSED;
|
|
}
|
|
|
|
if ((ev_hint & SO_FILT_HINT_SUSPEND) ||
|
|
(so->so_flags & SOF_SUSPENDED)) {
|
|
kn->kn_fflags &= ~(NOTE_SUSPEND | NOTE_RESUME);
|
|
|
|
/* If resume event was delivered before, reset it */
|
|
kn->kn_hook32 &= ~NOTE_RESUME;
|
|
|
|
kn->kn_fflags |= NOTE_SUSPEND;
|
|
level_trigger |= NOTE_SUSPEND;
|
|
}
|
|
|
|
if ((ev_hint & SO_FILT_HINT_RESUME) ||
|
|
(so->so_flags & SOF_SUSPENDED) == 0) {
|
|
kn->kn_fflags &= ~(NOTE_SUSPEND | NOTE_RESUME);
|
|
|
|
/* If suspend event was delivered before, reset it */
|
|
kn->kn_hook32 &= ~NOTE_SUSPEND;
|
|
|
|
kn->kn_fflags |= NOTE_RESUME;
|
|
level_trigger |= NOTE_RESUME;
|
|
}
|
|
|
|
if (so->so_error != 0) {
|
|
ret = 1;
|
|
data = so->so_error;
|
|
kn->kn_flags |= EV_EOF;
|
|
} else {
|
|
u_int32_t data32 = 0;
|
|
get_sockev_state(so, &data32);
|
|
data = data32;
|
|
}
|
|
|
|
/* Reset any events that are not requested on this knote */
|
|
kn->kn_fflags &= (kn->kn_sfflags & EVFILT_SOCK_ALL_MASK);
|
|
level_trigger &= (kn->kn_sfflags & EVFILT_SOCK_ALL_MASK);
|
|
|
|
/* Find the level triggerred events that are already delivered */
|
|
level_trigger &= kn->kn_hook32;
|
|
level_trigger &= EVFILT_SOCK_LEVEL_TRIGGER_MASK;
|
|
|
|
/* Do not deliver level triggerred events more than once */
|
|
if ((kn->kn_fflags & ~level_trigger) != 0) {
|
|
ret = 1;
|
|
}
|
|
|
|
if (ret && kev) {
|
|
/*
|
|
* Store the state of the events being delivered. This
|
|
* state can be used to deliver level triggered events
|
|
* ateast once and still avoid waking up the application
|
|
* multiple times as long as the event is active.
|
|
*/
|
|
if (kn->kn_fflags != 0) {
|
|
kn->kn_hook32 |= (kn->kn_fflags &
|
|
EVFILT_SOCK_LEVEL_TRIGGER_MASK);
|
|
}
|
|
|
|
/*
|
|
* NOTE_RESUME and NOTE_SUSPEND are an exception, deliver
|
|
* only one of them and remember the last one that was
|
|
* delivered last
|
|
*/
|
|
if (kn->kn_fflags & NOTE_SUSPEND) {
|
|
kn->kn_hook32 &= ~NOTE_RESUME;
|
|
}
|
|
if (kn->kn_fflags & NOTE_RESUME) {
|
|
kn->kn_hook32 &= ~NOTE_SUSPEND;
|
|
}
|
|
|
|
knote_fill_kevent(kn, kev, data);
|
|
}
|
|
return ret;
|
|
}
|
|
|
|
static int
|
|
filt_sockattach(struct knote *kn, __unused struct kevent_qos_s *kev)
|
|
{
|
|
struct socket *so = (struct socket *)fp_get_data(kn->kn_fp);
|
|
|
|
/* socket locked */
|
|
kn->kn_hook32 = 0;
|
|
if (KNOTE_ATTACH(&so->so_klist, kn)) {
|
|
so->so_flags |= SOF_KNOTE;
|
|
}
|
|
|
|
/* determine if event already fired */
|
|
return filt_sockev_common(kn, NULL, so, 0);
|
|
}
|
|
|
|
static void
|
|
filt_sockdetach(struct knote *kn)
|
|
{
|
|
struct socket *so = (struct socket *)fp_get_data(kn->kn_fp);
|
|
socket_lock(so, 1);
|
|
|
|
if ((so->so_flags & SOF_KNOTE) != 0) {
|
|
if (KNOTE_DETACH(&so->so_klist, kn)) {
|
|
so->so_flags &= ~SOF_KNOTE;
|
|
}
|
|
}
|
|
socket_unlock(so, 1);
|
|
}
|
|
|
|
static int
|
|
filt_sockev(struct knote *kn, long hint)
|
|
{
|
|
int ret = 0, locked = 0;
|
|
struct socket *so = (struct socket *)fp_get_data(kn->kn_fp);
|
|
long ev_hint = (hint & SO_FILT_HINT_EV);
|
|
|
|
if ((hint & SO_FILT_HINT_LOCKED) == 0) {
|
|
socket_lock(so, 1);
|
|
locked = 1;
|
|
}
|
|
|
|
ret = filt_sockev_common(kn, NULL, so, ev_hint);
|
|
|
|
if (locked) {
|
|
socket_unlock(so, 1);
|
|
}
|
|
|
|
return ret;
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
* filt_socktouch - update event state
|
|
*/
|
|
static int
|
|
filt_socktouch(
|
|
struct knote *kn,
|
|
struct kevent_qos_s *kev)
|
|
{
|
|
struct socket *so = (struct socket *)fp_get_data(kn->kn_fp);
|
|
uint32_t changed_flags;
|
|
int ret;
|
|
|
|
socket_lock(so, 1);
|
|
|
|
/* save off the [result] data and fflags */
|
|
changed_flags = (kn->kn_sfflags ^ kn->kn_hook32);
|
|
|
|
/* save off the new input fflags and data */
|
|
kn->kn_sfflags = kev->fflags;
|
|
kn->kn_sdata = kev->data;
|
|
|
|
/* restrict the current results to the (smaller?) set of new interest */
|
|
/*
|
|
* For compatibility with previous implementations, we leave kn_fflags
|
|
* as they were before.
|
|
*/
|
|
//kn->kn_fflags &= kev->fflags;
|
|
|
|
/*
|
|
* Since we keep track of events that are already
|
|
* delivered, if any of those events are not requested
|
|
* anymore the state related to them can be reset
|
|
*/
|
|
kn->kn_hook32 &= ~(changed_flags & EVFILT_SOCK_LEVEL_TRIGGER_MASK);
|
|
|
|
/* determine if we have events to deliver */
|
|
ret = filt_sockev_common(kn, NULL, so, 0);
|
|
|
|
socket_unlock(so, 1);
|
|
|
|
return ret;
|
|
}
|
|
|
|
/*
|
|
* filt_sockprocess - query event fired state and return data
|
|
*/
|
|
static int
|
|
filt_sockprocess(struct knote *kn, struct kevent_qos_s *kev)
|
|
{
|
|
struct socket *so = (struct socket *)fp_get_data(kn->kn_fp);
|
|
int ret = 0;
|
|
|
|
socket_lock(so, 1);
|
|
|
|
ret = filt_sockev_common(kn, kev, so, 0);
|
|
|
|
socket_unlock(so, 1);
|
|
|
|
return ret;
|
|
}
|
|
|
|
void
|
|
get_sockev_state(struct socket *so, u_int32_t *statep)
|
|
{
|
|
u_int32_t state = *(statep);
|
|
|
|
/*
|
|
* If the state variable is already used by a previous event,
|
|
* reset it.
|
|
*/
|
|
if (state != 0) {
|
|
return;
|
|
}
|
|
|
|
if (so->so_state & SS_ISCONNECTED) {
|
|
state |= SOCKEV_CONNECTED;
|
|
} else {
|
|
state &= ~(SOCKEV_CONNECTED);
|
|
}
|
|
state |= ((so->so_state & SS_ISDISCONNECTED) ? SOCKEV_DISCONNECTED : 0);
|
|
*(statep) = state;
|
|
}
|
|
|
|
#define SO_LOCK_HISTORY_STR_LEN \
|
|
(2 * SO_LCKDBG_MAX * (2 + (2 * sizeof (void *)) + 1) + 1)
|
|
|
|
__private_extern__ const char *
|
|
solockhistory_nr(struct socket *so)
|
|
{
|
|
size_t n = 0;
|
|
int i;
|
|
static char lock_history_str[SO_LOCK_HISTORY_STR_LEN];
|
|
|
|
bzero(lock_history_str, sizeof(lock_history_str));
|
|
for (i = SO_LCKDBG_MAX - 1; i >= 0; i--) {
|
|
n += scnprintf(lock_history_str + n,
|
|
SO_LOCK_HISTORY_STR_LEN - n, "%p:%p ",
|
|
so->lock_lr[(so->next_lock_lr + i) % SO_LCKDBG_MAX],
|
|
so->unlock_lr[(so->next_unlock_lr + i) % SO_LCKDBG_MAX]);
|
|
}
|
|
return lock_history_str;
|
|
}
|
|
|
|
lck_mtx_t *
|
|
socket_getlock(struct socket *so, int flags)
|
|
{
|
|
if (so->so_proto->pr_getlock != NULL) {
|
|
return (*so->so_proto->pr_getlock)(so, flags);
|
|
} else {
|
|
return so->so_proto->pr_domain->dom_mtx;
|
|
}
|
|
}
|
|
|
|
void
|
|
socket_lock(struct socket *so, int refcount)
|
|
{
|
|
void *lr_saved;
|
|
|
|
lr_saved = __builtin_return_address(0);
|
|
|
|
if (so->so_proto->pr_lock) {
|
|
(*so->so_proto->pr_lock)(so, refcount, lr_saved);
|
|
} else {
|
|
#ifdef MORE_LOCKING_DEBUG
|
|
LCK_MTX_ASSERT(so->so_proto->pr_domain->dom_mtx,
|
|
LCK_MTX_ASSERT_NOTOWNED);
|
|
#endif
|
|
lck_mtx_lock(so->so_proto->pr_domain->dom_mtx);
|
|
if (refcount) {
|
|
so->so_usecount++;
|
|
}
|
|
so->lock_lr[so->next_lock_lr] = lr_saved;
|
|
so->next_lock_lr = (so->next_lock_lr + 1) % SO_LCKDBG_MAX;
|
|
}
|
|
}
|
|
|
|
void
|
|
socket_lock_assert_owned(struct socket *so)
|
|
{
|
|
lck_mtx_t *mutex_held;
|
|
|
|
if (so->so_proto->pr_getlock != NULL) {
|
|
mutex_held = (*so->so_proto->pr_getlock)(so, 0);
|
|
} else {
|
|
mutex_held = so->so_proto->pr_domain->dom_mtx;
|
|
}
|
|
|
|
LCK_MTX_ASSERT(mutex_held, LCK_MTX_ASSERT_OWNED);
|
|
}
|
|
|
|
int
|
|
socket_try_lock(struct socket *so)
|
|
{
|
|
lck_mtx_t *mtx;
|
|
|
|
if (so->so_proto->pr_getlock != NULL) {
|
|
mtx = (*so->so_proto->pr_getlock)(so, 0);
|
|
} else {
|
|
mtx = so->so_proto->pr_domain->dom_mtx;
|
|
}
|
|
|
|
return lck_mtx_try_lock(mtx);
|
|
}
|
|
|
|
void
|
|
socket_unlock(struct socket *so, int refcount)
|
|
{
|
|
void *lr_saved;
|
|
lck_mtx_t *mutex_held;
|
|
|
|
lr_saved = __builtin_return_address(0);
|
|
|
|
if (so == NULL || so->so_proto == NULL) {
|
|
panic("%s: null so_proto so=%p", __func__, so);
|
|
/* NOTREACHED */
|
|
}
|
|
|
|
if (so->so_proto->pr_unlock) {
|
|
(*so->so_proto->pr_unlock)(so, refcount, lr_saved);
|
|
} else {
|
|
mutex_held = so->so_proto->pr_domain->dom_mtx;
|
|
#ifdef MORE_LOCKING_DEBUG
|
|
LCK_MTX_ASSERT(mutex_held, LCK_MTX_ASSERT_OWNED);
|
|
#endif
|
|
so->unlock_lr[so->next_unlock_lr] = lr_saved;
|
|
so->next_unlock_lr = (so->next_unlock_lr + 1) % SO_LCKDBG_MAX;
|
|
|
|
if (refcount) {
|
|
if (so->so_usecount <= 0) {
|
|
panic("%s: bad refcount=%d so=%p (%d, %d, %d) "
|
|
"lrh=%s", __func__, so->so_usecount, so,
|
|
SOCK_DOM(so), so->so_type,
|
|
SOCK_PROTO(so), solockhistory_nr(so));
|
|
/* NOTREACHED */
|
|
}
|
|
|
|
so->so_usecount--;
|
|
if (so->so_usecount == 0) {
|
|
sofreelastref(so, 1);
|
|
}
|
|
}
|
|
lck_mtx_unlock(mutex_held);
|
|
}
|
|
}
|
|
|
|
/* Called with socket locked, will unlock socket */
|
|
void
|
|
sofree(struct socket *so)
|
|
{
|
|
lck_mtx_t *mutex_held;
|
|
|
|
if (so->so_proto->pr_getlock != NULL) {
|
|
mutex_held = (*so->so_proto->pr_getlock)(so, 0);
|
|
} else {
|
|
mutex_held = so->so_proto->pr_domain->dom_mtx;
|
|
}
|
|
LCK_MTX_ASSERT(mutex_held, LCK_MTX_ASSERT_OWNED);
|
|
|
|
sofreelastref(so, 0);
|
|
}
|
|
|
|
void
|
|
soreference(struct socket *so)
|
|
{
|
|
socket_lock(so, 1); /* locks & take one reference on socket */
|
|
socket_unlock(so, 0); /* unlock only */
|
|
}
|
|
|
|
void
|
|
sodereference(struct socket *so)
|
|
{
|
|
socket_lock(so, 0);
|
|
socket_unlock(so, 1);
|
|
}
|
|
|
|
/*
|
|
* Set or clear SOF_MULTIPAGES on the socket to enable or disable the
|
|
* possibility of using jumbo clusters. Caller must ensure to hold
|
|
* the socket lock.
|
|
*/
|
|
void
|
|
somultipages(struct socket *so, boolean_t set)
|
|
{
|
|
if (set) {
|
|
so->so_flags |= SOF_MULTIPAGES;
|
|
} else {
|
|
so->so_flags &= ~SOF_MULTIPAGES;
|
|
}
|
|
}
|
|
|
|
void
|
|
soif2kcl(struct socket *so, boolean_t set)
|
|
{
|
|
if (set) {
|
|
so->so_flags1 |= SOF1_IF_2KCL;
|
|
} else {
|
|
so->so_flags1 &= ~SOF1_IF_2KCL;
|
|
}
|
|
}
|
|
|
|
int
|
|
so_isdstlocal(struct socket *so)
|
|
{
|
|
struct inpcb *inp = (struct inpcb *)so->so_pcb;
|
|
|
|
if (SOCK_DOM(so) == PF_INET) {
|
|
return inaddr_local(inp->inp_faddr);
|
|
} else if (SOCK_DOM(so) == PF_INET6) {
|
|
return in6addr_local(&inp->in6p_faddr);
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
int
|
|
sosetdefunct(struct proc *p, struct socket *so, int level, boolean_t noforce)
|
|
{
|
|
struct sockbuf *rcv, *snd;
|
|
int err = 0, defunct;
|
|
|
|
rcv = &so->so_rcv;
|
|
snd = &so->so_snd;
|
|
|
|
defunct = (so->so_flags & SOF_DEFUNCT);
|
|
if (defunct) {
|
|
if (!(snd->sb_flags & rcv->sb_flags & SB_DROP)) {
|
|
panic("%s: SB_DROP not set", __func__);
|
|
/* NOTREACHED */
|
|
}
|
|
goto done;
|
|
}
|
|
|
|
if (so->so_flags & SOF_NODEFUNCT) {
|
|
if (noforce) {
|
|
err = EOPNOTSUPP;
|
|
if (p != PROC_NULL) {
|
|
SODEFUNCTLOG("%s[%d, %s]: (target pid %d "
|
|
"name %s level %d) so 0x%llu [%d,%d] "
|
|
"is not eligible for defunct "
|
|
"(%d)\n", __func__, proc_selfpid(),
|
|
proc_best_name(current_proc()), proc_pid(p),
|
|
proc_best_name(p), level,
|
|
so->so_gencnt,
|
|
SOCK_DOM(so), SOCK_TYPE(so), err);
|
|
}
|
|
return err;
|
|
}
|
|
so->so_flags &= ~SOF_NODEFUNCT;
|
|
if (p != PROC_NULL) {
|
|
SODEFUNCTLOG("%s[%d, %s]: (target pid %d "
|
|
"name %s level %d) so 0x%llu [%d,%d] "
|
|
"defunct by force "
|
|
"(%d)\n", __func__, proc_selfpid(),
|
|
proc_best_name(current_proc()), proc_pid(p),
|
|
proc_best_name(p), level,
|
|
so->so_gencnt,
|
|
SOCK_DOM(so), SOCK_TYPE(so), err);
|
|
}
|
|
} else if (so->so_flags1 & SOF1_EXTEND_BK_IDLE_WANTED) {
|
|
struct inpcb *inp = (struct inpcb *)so->so_pcb;
|
|
struct ifnet *ifp = inp->inp_last_outifp;
|
|
|
|
if (ifp && IFNET_IS_CELLULAR(ifp)) {
|
|
OSIncrementAtomic(&soextbkidlestat.so_xbkidle_nocell);
|
|
} else if (so->so_flags & SOF_DELEGATED) {
|
|
OSIncrementAtomic(&soextbkidlestat.so_xbkidle_nodlgtd);
|
|
} else if (soextbkidlestat.so_xbkidle_time == 0) {
|
|
OSIncrementAtomic(&soextbkidlestat.so_xbkidle_notime);
|
|
} else if (noforce && p != PROC_NULL) {
|
|
OSIncrementAtomic(&soextbkidlestat.so_xbkidle_active);
|
|
|
|
so->so_flags1 |= SOF1_EXTEND_BK_IDLE_INPROG;
|
|
so->so_extended_bk_start = net_uptime();
|
|
OSBitOrAtomic(P_LXBKIDLEINPROG, &p->p_ladvflag);
|
|
|
|
inpcb_timer_sched(inp->inp_pcbinfo, INPCB_TIMER_LAZY);
|
|
|
|
err = EOPNOTSUPP;
|
|
SODEFUNCTLOG("%s[%d, %s]: (target pid %d "
|
|
"name %s level %d) so 0x%llu [%d,%d] "
|
|
"extend bk idle "
|
|
"(%d)\n", __func__, proc_selfpid(),
|
|
proc_best_name(current_proc()), proc_pid(p),
|
|
proc_best_name(p), level,
|
|
so->so_gencnt,
|
|
SOCK_DOM(so), SOCK_TYPE(so), err);
|
|
return err;
|
|
} else {
|
|
OSIncrementAtomic(&soextbkidlestat.so_xbkidle_forced);
|
|
}
|
|
}
|
|
|
|
so->so_flags |= SOF_DEFUNCT;
|
|
|
|
/* Prevent further data from being appended to the socket buffers */
|
|
snd->sb_flags |= SB_DROP;
|
|
rcv->sb_flags |= SB_DROP;
|
|
|
|
/* Flush any existing data in the socket buffers */
|
|
if (rcv->sb_cc != 0) {
|
|
rcv->sb_flags &= ~SB_SEL;
|
|
selthreadclear(&rcv->sb_sel);
|
|
sbrelease(rcv);
|
|
}
|
|
if (snd->sb_cc != 0) {
|
|
snd->sb_flags &= ~SB_SEL;
|
|
selthreadclear(&snd->sb_sel);
|
|
sbrelease(snd);
|
|
}
|
|
|
|
done:
|
|
if (p != PROC_NULL) {
|
|
SODEFUNCTLOG("%s[%d, %s]: (target pid %d name %s level %d) "
|
|
"so 0x%llu [%d,%d] %s defunct%s\n", __func__,
|
|
proc_selfpid(), proc_best_name(current_proc()),
|
|
proc_pid(p), proc_best_name(p), level,
|
|
so->so_gencnt, SOCK_DOM(so),
|
|
SOCK_TYPE(so), defunct ? "is already" : "marked as",
|
|
(so->so_flags1 & SOF1_EXTEND_BK_IDLE_WANTED) ?
|
|
" extbkidle" : "");
|
|
}
|
|
return err;
|
|
}
|
|
|
|
int
|
|
sodefunct(struct proc *p, struct socket *so, int level)
|
|
{
|
|
struct sockbuf *rcv, *snd;
|
|
|
|
if (!(so->so_flags & SOF_DEFUNCT)) {
|
|
panic("%s improperly called", __func__);
|
|
/* NOTREACHED */
|
|
}
|
|
if (so->so_state & SS_DEFUNCT) {
|
|
goto done;
|
|
}
|
|
|
|
rcv = &so->so_rcv;
|
|
snd = &so->so_snd;
|
|
|
|
if (SOCK_DOM(so) == PF_INET || SOCK_DOM(so) == PF_INET6) {
|
|
char s[MAX_IPv6_STR_LEN];
|
|
char d[MAX_IPv6_STR_LEN];
|
|
struct inpcb *inp = sotoinpcb(so);
|
|
|
|
if (p != PROC_NULL) {
|
|
SODEFUNCTLOG(
|
|
"%s[%d, %s]: (target pid %d name %s level %d) "
|
|
"so 0x%llu [%s %s:%d -> %s:%d] is now defunct "
|
|
"[rcv_si 0x%x, snd_si 0x%x, rcv_fl 0x%x, "
|
|
" snd_fl 0x%x]\n", __func__,
|
|
proc_selfpid(), proc_best_name(current_proc()),
|
|
proc_pid(p), proc_best_name(p), level,
|
|
so->so_gencnt,
|
|
(SOCK_TYPE(so) == SOCK_STREAM) ? "TCP" : "UDP",
|
|
inet_ntop(SOCK_DOM(so), ((SOCK_DOM(so) == PF_INET) ?
|
|
(void *)&inp->inp_laddr.s_addr :
|
|
(void *)&inp->in6p_laddr),
|
|
s, sizeof(s)), ntohs(inp->in6p_lport),
|
|
inet_ntop(SOCK_DOM(so), (SOCK_DOM(so) == PF_INET) ?
|
|
(void *)&inp->inp_faddr.s_addr :
|
|
(void *)&inp->in6p_faddr,
|
|
d, sizeof(d)), ntohs(inp->in6p_fport),
|
|
(uint32_t)rcv->sb_sel.si_flags,
|
|
(uint32_t)snd->sb_sel.si_flags,
|
|
rcv->sb_flags, snd->sb_flags);
|
|
}
|
|
} else if (p != PROC_NULL) {
|
|
SODEFUNCTLOG("%s[%d, %s]: (target pid %d name %s level %d) "
|
|
"so 0x%llu [%d,%d] is now defunct [rcv_si 0x%x, "
|
|
"snd_si 0x%x, rcv_fl 0x%x, snd_fl 0x%x]\n", __func__,
|
|
proc_selfpid(), proc_best_name(current_proc()),
|
|
proc_pid(p), proc_best_name(p), level,
|
|
so->so_gencnt,
|
|
SOCK_DOM(so), SOCK_TYPE(so),
|
|
(uint32_t)rcv->sb_sel.si_flags,
|
|
(uint32_t)snd->sb_sel.si_flags, rcv->sb_flags,
|
|
snd->sb_flags);
|
|
}
|
|
|
|
/*
|
|
* First tell the protocol the flow is defunct
|
|
*/
|
|
(void) (*so->so_proto->pr_usrreqs->pru_defunct)(so);
|
|
|
|
/*
|
|
* Unwedge threads blocked on sbwait() and sb_lock().
|
|
*/
|
|
sbwakeup(rcv);
|
|
sbwakeup(snd);
|
|
|
|
so->so_flags1 |= SOF1_DEFUNCTINPROG;
|
|
if (rcv->sb_flags & SB_LOCK) {
|
|
sbunlock(rcv, TRUE); /* keep socket locked */
|
|
}
|
|
if (snd->sb_flags & SB_LOCK) {
|
|
sbunlock(snd, TRUE); /* keep socket locked */
|
|
}
|
|
/*
|
|
* Flush the buffers and disconnect. We explicitly call shutdown
|
|
* on both data directions to ensure that SS_CANT{RCV,SEND}MORE
|
|
* states are set for the socket. This would also flush out data
|
|
* hanging off the receive list of this socket.
|
|
*/
|
|
(void) soshutdownlock_final(so, SHUT_RD);
|
|
(void) soshutdownlock_final(so, SHUT_WR);
|
|
(void) sodisconnectlocked(so);
|
|
|
|
/*
|
|
* Explicitly handle connectionless-protocol disconnection
|
|
* and release any remaining data in the socket buffers.
|
|
*/
|
|
if (!(so->so_state & SS_ISDISCONNECTED)) {
|
|
(void) soisdisconnected(so);
|
|
}
|
|
|
|
if (so->so_error == 0) {
|
|
so->so_error = EBADF;
|
|
}
|
|
|
|
if (rcv->sb_cc != 0) {
|
|
rcv->sb_flags &= ~SB_SEL;
|
|
selthreadclear(&rcv->sb_sel);
|
|
sbrelease(rcv);
|
|
}
|
|
if (snd->sb_cc != 0) {
|
|
snd->sb_flags &= ~SB_SEL;
|
|
selthreadclear(&snd->sb_sel);
|
|
sbrelease(snd);
|
|
}
|
|
so->so_state |= SS_DEFUNCT;
|
|
OSIncrementAtomicLong((volatile long *)&sodefunct_calls);
|
|
|
|
done:
|
|
return 0;
|
|
}
|
|
|
|
int
|
|
soresume(struct proc *p, struct socket *so, int locked)
|
|
{
|
|
if (locked == 0) {
|
|
socket_lock(so, 1);
|
|
}
|
|
|
|
if (so->so_flags1 & SOF1_EXTEND_BK_IDLE_INPROG) {
|
|
SODEFUNCTLOG("%s[%d, %s]: (target pid %d name %s) so 0x%llu "
|
|
"[%d,%d] resumed from bk idle\n",
|
|
__func__, proc_selfpid(), proc_best_name(current_proc()),
|
|
proc_pid(p), proc_best_name(p),
|
|
so->so_gencnt,
|
|
SOCK_DOM(so), SOCK_TYPE(so));
|
|
|
|
so->so_flags1 &= ~SOF1_EXTEND_BK_IDLE_INPROG;
|
|
so->so_extended_bk_start = 0;
|
|
OSBitAndAtomic(~P_LXBKIDLEINPROG, &p->p_ladvflag);
|
|
|
|
OSIncrementAtomic(&soextbkidlestat.so_xbkidle_resumed);
|
|
OSDecrementAtomic(&soextbkidlestat.so_xbkidle_active);
|
|
VERIFY(soextbkidlestat.so_xbkidle_active >= 0);
|
|
}
|
|
if (locked == 0) {
|
|
socket_unlock(so, 1);
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
/*
|
|
* Does not attempt to account for sockets that are delegated from
|
|
* the current process
|
|
*/
|
|
int
|
|
so_set_extended_bk_idle(struct socket *so, int optval)
|
|
{
|
|
int error = 0;
|
|
|
|
if ((SOCK_DOM(so) != PF_INET && SOCK_DOM(so) != PF_INET6) ||
|
|
SOCK_PROTO(so) != IPPROTO_TCP) {
|
|
OSDecrementAtomic(&soextbkidlestat.so_xbkidle_notsupp);
|
|
error = EOPNOTSUPP;
|
|
} else if (optval == 0) {
|
|
so->so_flags1 &= ~SOF1_EXTEND_BK_IDLE_WANTED;
|
|
|
|
soresume(current_proc(), so, 1);
|
|
} else {
|
|
struct proc *p = current_proc();
|
|
struct fileproc *fp;
|
|
int count = 0;
|
|
|
|
/*
|
|
* Unlock socket to avoid lock ordering issue with
|
|
* the proc fd table lock
|
|
*/
|
|
socket_unlock(so, 0);
|
|
|
|
proc_fdlock(p);
|
|
fdt_foreach(fp, p) {
|
|
struct socket *so2;
|
|
|
|
if (FILEGLOB_DTYPE(fp->fp_glob) != DTYPE_SOCKET) {
|
|
continue;
|
|
}
|
|
|
|
so2 = (struct socket *)fp_get_data(fp);
|
|
if (so != so2 &&
|
|
so2->so_flags1 & SOF1_EXTEND_BK_IDLE_WANTED) {
|
|
count++;
|
|
}
|
|
if (count >= soextbkidlestat.so_xbkidle_maxperproc) {
|
|
break;
|
|
}
|
|
}
|
|
proc_fdunlock(p);
|
|
|
|
socket_lock(so, 0);
|
|
|
|
if (count >= soextbkidlestat.so_xbkidle_maxperproc) {
|
|
OSIncrementAtomic(&soextbkidlestat.so_xbkidle_toomany);
|
|
error = EBUSY;
|
|
} else if (so->so_flags & SOF_DELEGATED) {
|
|
OSIncrementAtomic(&soextbkidlestat.so_xbkidle_nodlgtd);
|
|
error = EBUSY;
|
|
} else {
|
|
so->so_flags1 |= SOF1_EXTEND_BK_IDLE_WANTED;
|
|
OSIncrementAtomic(&soextbkidlestat.so_xbkidle_wantok);
|
|
}
|
|
SODEFUNCTLOG("%s[%d, %s]: so 0x%llu [%d,%d] "
|
|
"%s marked for extended bk idle\n",
|
|
__func__, proc_selfpid(), proc_best_name(current_proc()),
|
|
so->so_gencnt,
|
|
SOCK_DOM(so), SOCK_TYPE(so),
|
|
(so->so_flags1 & SOF1_EXTEND_BK_IDLE_WANTED) ?
|
|
"is" : "not");
|
|
}
|
|
|
|
return error;
|
|
}
|
|
|
|
static void
|
|
so_stop_extended_bk_idle(struct socket *so)
|
|
{
|
|
so->so_flags1 &= ~SOF1_EXTEND_BK_IDLE_INPROG;
|
|
so->so_extended_bk_start = 0;
|
|
|
|
OSDecrementAtomic(&soextbkidlestat.so_xbkidle_active);
|
|
VERIFY(soextbkidlestat.so_xbkidle_active >= 0);
|
|
/*
|
|
* Force defunct
|
|
*/
|
|
sosetdefunct(current_proc(), so,
|
|
SHUTDOWN_SOCKET_LEVEL_DISCONNECT_INTERNAL, FALSE);
|
|
if (so->so_flags & SOF_DEFUNCT) {
|
|
sodefunct(current_proc(), so,
|
|
SHUTDOWN_SOCKET_LEVEL_DISCONNECT_INTERNAL);
|
|
}
|
|
}
|
|
|
|
void
|
|
so_drain_extended_bk_idle(struct socket *so)
|
|
{
|
|
if (so && (so->so_flags1 & SOF1_EXTEND_BK_IDLE_INPROG)) {
|
|
/*
|
|
* Only penalize sockets that have outstanding data
|
|
*/
|
|
if (so->so_rcv.sb_cc || so->so_snd.sb_cc) {
|
|
so_stop_extended_bk_idle(so);
|
|
|
|
OSIncrementAtomic(&soextbkidlestat.so_xbkidle_drained);
|
|
}
|
|
}
|
|
}
|
|
|
|
/*
|
|
* Return values tells if socket is still in extended background idle
|
|
*/
|
|
int
|
|
so_check_extended_bk_idle_time(struct socket *so)
|
|
{
|
|
int ret = 1;
|
|
|
|
if ((so->so_flags1 & SOF1_EXTEND_BK_IDLE_INPROG)) {
|
|
SODEFUNCTLOG("%s[%d, %s]: so 0x%llu [%d,%d]\n",
|
|
__func__, proc_selfpid(), proc_best_name(current_proc()),
|
|
so->so_gencnt,
|
|
SOCK_DOM(so), SOCK_TYPE(so));
|
|
if (net_uptime() - so->so_extended_bk_start >
|
|
soextbkidlestat.so_xbkidle_time) {
|
|
so_stop_extended_bk_idle(so);
|
|
|
|
OSIncrementAtomic(&soextbkidlestat.so_xbkidle_expired);
|
|
|
|
ret = 0;
|
|
} else {
|
|
struct inpcb *inp = (struct inpcb *)so->so_pcb;
|
|
|
|
inpcb_timer_sched(inp->inp_pcbinfo, INPCB_TIMER_LAZY);
|
|
OSIncrementAtomic(&soextbkidlestat.so_xbkidle_resched);
|
|
}
|
|
}
|
|
|
|
return ret;
|
|
}
|
|
|
|
void
|
|
resume_proc_sockets(proc_t p)
|
|
{
|
|
if (p->p_ladvflag & P_LXBKIDLEINPROG) {
|
|
struct fileproc *fp;
|
|
struct socket *so;
|
|
|
|
proc_fdlock(p);
|
|
fdt_foreach(fp, p) {
|
|
if (FILEGLOB_DTYPE(fp->fp_glob) != DTYPE_SOCKET) {
|
|
continue;
|
|
}
|
|
|
|
so = (struct socket *)fp_get_data(fp);
|
|
(void) soresume(p, so, 0);
|
|
}
|
|
proc_fdunlock(p);
|
|
|
|
OSBitAndAtomic(~P_LXBKIDLEINPROG, &p->p_ladvflag);
|
|
}
|
|
}
|
|
|
|
__private_extern__ int
|
|
so_set_recv_anyif(struct socket *so, int optval)
|
|
{
|
|
int ret = 0;
|
|
|
|
if (SOCK_DOM(so) == PF_INET || SOCK_DOM(so) == PF_INET6) {
|
|
if (optval) {
|
|
sotoinpcb(so)->inp_flags |= INP_RECV_ANYIF;
|
|
} else {
|
|
sotoinpcb(so)->inp_flags &= ~INP_RECV_ANYIF;
|
|
}
|
|
#if SKYWALK
|
|
inp_update_netns_flags(so);
|
|
#endif /* SKYWALK */
|
|
}
|
|
|
|
|
|
return ret;
|
|
}
|
|
|
|
__private_extern__ int
|
|
so_get_recv_anyif(struct socket *so)
|
|
{
|
|
int ret = 0;
|
|
|
|
if (SOCK_DOM(so) == PF_INET || SOCK_DOM(so) == PF_INET6) {
|
|
ret = (sotoinpcb(so)->inp_flags & INP_RECV_ANYIF) ? 1 : 0;
|
|
}
|
|
|
|
return ret;
|
|
}
|
|
|
|
int
|
|
so_set_restrictions(struct socket *so, uint32_t vals)
|
|
{
|
|
int nocell_old, nocell_new;
|
|
int noexpensive_old, noexpensive_new;
|
|
int noconstrained_old, noconstrained_new;
|
|
|
|
/*
|
|
* Deny-type restrictions are trapdoors; once set they cannot be
|
|
* unset for the lifetime of the socket. This allows them to be
|
|
* issued by a framework on behalf of the application without
|
|
* having to worry that they can be undone.
|
|
*
|
|
* Note here that socket-level restrictions overrides any protocol
|
|
* level restrictions. For instance, SO_RESTRICT_DENY_CELLULAR
|
|
* socket restriction issued on the socket has a higher precendence
|
|
* than INP_NO_IFT_CELLULAR. The latter is affected by the UUID
|
|
* policy PROC_UUID_NO_CELLULAR for unrestricted sockets only,
|
|
* i.e. when SO_RESTRICT_DENY_CELLULAR has not been issued.
|
|
*/
|
|
nocell_old = (so->so_restrictions & SO_RESTRICT_DENY_CELLULAR);
|
|
noexpensive_old = (so->so_restrictions & SO_RESTRICT_DENY_EXPENSIVE);
|
|
noconstrained_old = (so->so_restrictions & SO_RESTRICT_DENY_CONSTRAINED);
|
|
so->so_restrictions |= (vals & (SO_RESTRICT_DENY_IN |
|
|
SO_RESTRICT_DENY_OUT | SO_RESTRICT_DENY_CELLULAR |
|
|
SO_RESTRICT_DENY_EXPENSIVE | SO_RESTRICT_DENY_CONSTRAINED));
|
|
nocell_new = (so->so_restrictions & SO_RESTRICT_DENY_CELLULAR);
|
|
noexpensive_new = (so->so_restrictions & SO_RESTRICT_DENY_EXPENSIVE);
|
|
noconstrained_new = (so->so_restrictions & SO_RESTRICT_DENY_CONSTRAINED);
|
|
|
|
/* we can only set, not clear restrictions */
|
|
if ((nocell_new - nocell_old) == 0 &&
|
|
(noexpensive_new - noexpensive_old) == 0 &&
|
|
(noconstrained_new - noconstrained_old) == 0) {
|
|
return 0;
|
|
}
|
|
if (SOCK_DOM(so) == PF_INET || SOCK_DOM(so) == PF_INET6) {
|
|
if (nocell_new - nocell_old != 0) {
|
|
/*
|
|
* if deny cellular is now set, do what's needed
|
|
* for INPCB
|
|
*/
|
|
inp_set_nocellular(sotoinpcb(so));
|
|
}
|
|
if (noexpensive_new - noexpensive_old != 0) {
|
|
inp_set_noexpensive(sotoinpcb(so));
|
|
}
|
|
if (noconstrained_new - noconstrained_old != 0) {
|
|
inp_set_noconstrained(sotoinpcb(so));
|
|
}
|
|
}
|
|
|
|
if (SOCK_DOM(so) == PF_MULTIPATH) {
|
|
mptcp_set_restrictions(so);
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
uint32_t
|
|
so_get_restrictions(struct socket *so)
|
|
{
|
|
return so->so_restrictions & (SO_RESTRICT_DENY_IN |
|
|
SO_RESTRICT_DENY_OUT |
|
|
SO_RESTRICT_DENY_CELLULAR | SO_RESTRICT_DENY_EXPENSIVE);
|
|
}
|
|
|
|
int
|
|
so_set_effective_pid(struct socket *so, int epid, struct proc *p, boolean_t check_cred)
|
|
{
|
|
struct proc *ep = PROC_NULL;
|
|
int error = 0;
|
|
|
|
/* pid 0 is reserved for kernel */
|
|
if (epid == 0) {
|
|
error = EINVAL;
|
|
goto done;
|
|
}
|
|
|
|
/*
|
|
* If this is an in-kernel socket, prevent its delegate
|
|
* association from changing unless the socket option is
|
|
* coming from within the kernel itself.
|
|
*/
|
|
if (so->last_pid == 0 && p != kernproc) {
|
|
error = EACCES;
|
|
goto done;
|
|
}
|
|
|
|
/*
|
|
* If this is issued by a process that's recorded as the
|
|
* real owner of the socket, or if the pid is the same as
|
|
* the process's own pid, then proceed. Otherwise ensure
|
|
* that the issuing process has the necessary privileges.
|
|
*/
|
|
if (check_cred && (epid != so->last_pid || epid != proc_pid(p))) {
|
|
if ((error = priv_check_cred(kauth_cred_get(),
|
|
PRIV_NET_PRIVILEGED_SOCKET_DELEGATE, 0))) {
|
|
error = EACCES;
|
|
goto done;
|
|
}
|
|
}
|
|
|
|
/* Find the process that corresponds to the effective pid */
|
|
if ((ep = proc_find(epid)) == PROC_NULL) {
|
|
error = ESRCH;
|
|
goto done;
|
|
}
|
|
|
|
/*
|
|
* If a process tries to delegate the socket to itself, then
|
|
* there's really nothing to do; treat it as a way for the
|
|
* delegate association to be cleared. Note that we check
|
|
* the passed-in proc rather than calling proc_selfpid(),
|
|
* as we need to check the process issuing the socket option
|
|
* which could be kernproc. Given that we don't allow 0 for
|
|
* effective pid, it means that a delegated in-kernel socket
|
|
* stays delegated during its lifetime (which is probably OK.)
|
|
*/
|
|
if (epid == proc_pid(p)) {
|
|
so->so_flags &= ~SOF_DELEGATED;
|
|
so->e_upid = 0;
|
|
so->e_pid = 0;
|
|
uuid_clear(so->e_uuid);
|
|
} else {
|
|
so->so_flags |= SOF_DELEGATED;
|
|
so->e_upid = proc_uniqueid(ep);
|
|
so->e_pid = proc_pid(ep);
|
|
proc_getexecutableuuid(ep, so->e_uuid, sizeof(so->e_uuid));
|
|
|
|
#if defined(XNU_TARGET_OS_OSX)
|
|
if (ep->p_responsible_pid != so->e_pid) {
|
|
proc_t rp = proc_find(ep->p_responsible_pid);
|
|
if (rp != PROC_NULL) {
|
|
proc_getexecutableuuid(rp, so->so_ruuid, sizeof(so->so_ruuid));
|
|
so->so_rpid = ep->p_responsible_pid;
|
|
proc_rele(rp);
|
|
} else {
|
|
uuid_clear(so->so_ruuid);
|
|
so->so_rpid = -1;
|
|
}
|
|
}
|
|
#endif
|
|
}
|
|
if (so->so_proto != NULL && so->so_proto->pr_update_last_owner != NULL) {
|
|
(*so->so_proto->pr_update_last_owner)(so, NULL, ep);
|
|
}
|
|
done:
|
|
if (error == 0 && net_io_policy_log) {
|
|
uuid_string_t buf;
|
|
|
|
uuid_unparse(so->e_uuid, buf);
|
|
log(LOG_DEBUG, "%s[%s,%d]: so 0x%llx [%d,%d] epid %d (%s) "
|
|
"euuid %s%s\n", __func__, proc_name_address(p),
|
|
proc_pid(p), (uint64_t)DEBUG_KERNEL_ADDRPERM(so),
|
|
SOCK_DOM(so), SOCK_TYPE(so),
|
|
so->e_pid, proc_name_address(ep), buf,
|
|
((so->so_flags & SOF_DELEGATED) ? " [delegated]" : ""));
|
|
} else if (error != 0 && net_io_policy_log) {
|
|
log(LOG_ERR, "%s[%s,%d]: so 0x%llx [%d,%d] epid %d (%s) "
|
|
"ERROR (%d)\n", __func__, proc_name_address(p),
|
|
proc_pid(p), (uint64_t)DEBUG_KERNEL_ADDRPERM(so),
|
|
SOCK_DOM(so), SOCK_TYPE(so),
|
|
epid, (ep == PROC_NULL) ? "PROC_NULL" :
|
|
proc_name_address(ep), error);
|
|
}
|
|
|
|
/* Update this socket's policy upon success */
|
|
if (error == 0) {
|
|
so->so_policy_gencnt *= -1;
|
|
so_update_policy(so);
|
|
#if NECP
|
|
so_update_necp_policy(so, NULL, NULL);
|
|
#endif /* NECP */
|
|
}
|
|
|
|
if (ep != PROC_NULL) {
|
|
proc_rele(ep);
|
|
}
|
|
|
|
return error;
|
|
}
|
|
|
|
int
|
|
so_set_effective_uuid(struct socket *so, uuid_t euuid, struct proc *p, boolean_t check_cred)
|
|
{
|
|
uuid_string_t buf;
|
|
uuid_t uuid;
|
|
int error = 0;
|
|
|
|
/* UUID must not be all-zeroes (reserved for kernel) */
|
|
if (uuid_is_null(euuid)) {
|
|
error = EINVAL;
|
|
goto done;
|
|
}
|
|
|
|
/*
|
|
* If this is an in-kernel socket, prevent its delegate
|
|
* association from changing unless the socket option is
|
|
* coming from within the kernel itself.
|
|
*/
|
|
if (so->last_pid == 0 && p != kernproc) {
|
|
error = EACCES;
|
|
goto done;
|
|
}
|
|
|
|
/* Get the UUID of the issuing process */
|
|
proc_getexecutableuuid(p, uuid, sizeof(uuid));
|
|
|
|
/*
|
|
* If this is issued by a process that's recorded as the
|
|
* real owner of the socket, or if the uuid is the same as
|
|
* the process's own uuid, then proceed. Otherwise ensure
|
|
* that the issuing process has the necessary privileges.
|
|
*/
|
|
if (check_cred &&
|
|
(uuid_compare(euuid, so->last_uuid) != 0 ||
|
|
uuid_compare(euuid, uuid) != 0)) {
|
|
if ((error = priv_check_cred(kauth_cred_get(),
|
|
PRIV_NET_PRIVILEGED_SOCKET_DELEGATE, 0))) {
|
|
error = EACCES;
|
|
goto done;
|
|
}
|
|
}
|
|
|
|
/*
|
|
* If a process tries to delegate the socket to itself, then
|
|
* there's really nothing to do; treat it as a way for the
|
|
* delegate association to be cleared. Note that we check
|
|
* the uuid of the passed-in proc rather than that of the
|
|
* current process, as we need to check the process issuing
|
|
* the socket option which could be kernproc itself. Given
|
|
* that we don't allow 0 for effective uuid, it means that
|
|
* a delegated in-kernel socket stays delegated during its
|
|
* lifetime (which is okay.)
|
|
*/
|
|
if (uuid_compare(euuid, uuid) == 0) {
|
|
so->so_flags &= ~SOF_DELEGATED;
|
|
so->e_upid = 0;
|
|
so->e_pid = 0;
|
|
uuid_clear(so->e_uuid);
|
|
} else {
|
|
so->so_flags |= SOF_DELEGATED;
|
|
/*
|
|
* Unlike so_set_effective_pid(), we only have the UUID
|
|
* here and the process ID is not known. Inherit the
|
|
* real {pid,upid} of the socket.
|
|
*/
|
|
so->e_upid = so->last_upid;
|
|
so->e_pid = so->last_pid;
|
|
uuid_copy(so->e_uuid, euuid);
|
|
}
|
|
/*
|
|
* The following will clear the effective process name as it's the same
|
|
* as the real process
|
|
*/
|
|
if (so->so_proto != NULL && so->so_proto->pr_update_last_owner != NULL) {
|
|
(*so->so_proto->pr_update_last_owner)(so, NULL, NULL);
|
|
}
|
|
done:
|
|
if (error == 0 && net_io_policy_log) {
|
|
uuid_unparse(so->e_uuid, buf);
|
|
log(LOG_DEBUG, "%s[%s,%d]: so 0x%llx [%d,%d] epid %d "
|
|
"euuid %s%s\n", __func__, proc_name_address(p), proc_pid(p),
|
|
(uint64_t)DEBUG_KERNEL_ADDRPERM(so), SOCK_DOM(so),
|
|
SOCK_TYPE(so), so->e_pid, buf,
|
|
((so->so_flags & SOF_DELEGATED) ? " [delegated]" : ""));
|
|
} else if (error != 0 && net_io_policy_log) {
|
|
uuid_unparse(euuid, buf);
|
|
log(LOG_DEBUG, "%s[%s,%d]: so 0x%llx [%d,%d] euuid %s "
|
|
"ERROR (%d)\n", __func__, proc_name_address(p), proc_pid(p),
|
|
(uint64_t)DEBUG_KERNEL_ADDRPERM(so), SOCK_DOM(so),
|
|
SOCK_TYPE(so), buf, error);
|
|
}
|
|
|
|
/* Update this socket's policy upon success */
|
|
if (error == 0) {
|
|
so->so_policy_gencnt *= -1;
|
|
so_update_policy(so);
|
|
#if NECP
|
|
so_update_necp_policy(so, NULL, NULL);
|
|
#endif /* NECP */
|
|
}
|
|
|
|
return error;
|
|
}
|
|
|
|
void
|
|
netpolicy_post_msg(uint32_t ev_code, struct netpolicy_event_data *ev_data,
|
|
uint32_t ev_datalen)
|
|
{
|
|
struct kev_msg ev_msg;
|
|
|
|
/*
|
|
* A netpolicy event always starts with a netpolicy_event_data
|
|
* structure, but the caller can provide for a longer event
|
|
* structure to post, depending on the event code.
|
|
*/
|
|
VERIFY(ev_data != NULL && ev_datalen >= sizeof(*ev_data));
|
|
|
|
bzero(&ev_msg, sizeof(ev_msg));
|
|
ev_msg.vendor_code = KEV_VENDOR_APPLE;
|
|
ev_msg.kev_class = KEV_NETWORK_CLASS;
|
|
ev_msg.kev_subclass = KEV_NETPOLICY_SUBCLASS;
|
|
ev_msg.event_code = ev_code;
|
|
|
|
ev_msg.dv[0].data_ptr = ev_data;
|
|
ev_msg.dv[0].data_length = ev_datalen;
|
|
|
|
kev_post_msg(&ev_msg);
|
|
}
|
|
|
|
void
|
|
socket_post_kev_msg(uint32_t ev_code,
|
|
struct kev_socket_event_data *ev_data,
|
|
uint32_t ev_datalen)
|
|
{
|
|
struct kev_msg ev_msg;
|
|
|
|
bzero(&ev_msg, sizeof(ev_msg));
|
|
ev_msg.vendor_code = KEV_VENDOR_APPLE;
|
|
ev_msg.kev_class = KEV_NETWORK_CLASS;
|
|
ev_msg.kev_subclass = KEV_SOCKET_SUBCLASS;
|
|
ev_msg.event_code = ev_code;
|
|
|
|
ev_msg.dv[0].data_ptr = ev_data;
|
|
ev_msg.dv[0].data_length = ev_datalen;
|
|
|
|
kev_post_msg(&ev_msg);
|
|
}
|
|
|
|
void
|
|
socket_post_kev_msg_closed(struct socket *so)
|
|
{
|
|
struct kev_socket_closed ev = {};
|
|
struct sockaddr *socksa = NULL, *peersa = NULL;
|
|
int err;
|
|
|
|
if ((so->so_flags1 & SOF1_WANT_KEV_SOCK_CLOSED) == 0) {
|
|
return;
|
|
}
|
|
err = (*so->so_proto->pr_usrreqs->pru_sockaddr)(so, &socksa);
|
|
if (err == 0) {
|
|
err = (*so->so_proto->pr_usrreqs->pru_peeraddr)(so,
|
|
&peersa);
|
|
if (err == 0) {
|
|
memcpy(&ev.ev_data.kev_sockname, socksa,
|
|
min(socksa->sa_len,
|
|
sizeof(ev.ev_data.kev_sockname)));
|
|
memcpy(&ev.ev_data.kev_peername, peersa,
|
|
min(peersa->sa_len,
|
|
sizeof(ev.ev_data.kev_peername)));
|
|
socket_post_kev_msg(KEV_SOCKET_CLOSED,
|
|
&ev.ev_data, sizeof(ev));
|
|
}
|
|
}
|
|
free_sockaddr(socksa);
|
|
free_sockaddr(peersa);
|
|
}
|
|
|
|
__attribute__((noinline, cold, not_tail_called, noreturn))
|
|
__private_extern__ int
|
|
assfail(const char *a, const char *f, int l)
|
|
{
|
|
panic("assertion failed: %s, file: %s, line: %d", a, f, l);
|
|
/* NOTREACHED */
|
|
__builtin_unreachable();
|
|
}
|