3562 lines
88 KiB
C
3562 lines
88 KiB
C
/*
|
|
* Copyright (c) 2015-2022 Apple Inc. All rights reserved.
|
|
*
|
|
* @APPLE_OSREFERENCE_LICENSE_HEADER_START@
|
|
*
|
|
* This file contains Original Code and/or Modifications of Original Code
|
|
* as defined in and that are subject to the Apple Public Source License
|
|
* Version 2.0 (the 'License'). You may not use this file except in
|
|
* compliance with the License. The rights granted to you under the License
|
|
* may not be used to create, or enable the creation or redistribution of,
|
|
* unlawful or unlicensed copies of an Apple operating system, or to
|
|
* circumvent, violate, or enable the circumvention or violation of, any
|
|
* terms of an Apple operating system software license agreement.
|
|
*
|
|
* Please obtain a copy of the License at
|
|
* http://www.opensource.apple.com/apsl/ and read it before using this file.
|
|
*
|
|
* The Original Code and all software distributed under the License are
|
|
* distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
|
|
* EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
|
|
* INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
|
|
* FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
|
|
* Please see the License for the specific language governing rights and
|
|
* limitations under the License.
|
|
*
|
|
* @APPLE_OSREFERENCE_LICENSE_HEADER_END@
|
|
*/
|
|
|
|
#include <skywalk/os_skywalk_private.h>
|
|
#include <skywalk/nexus/netif/nx_netif.h>
|
|
#include <skywalk/nexus/flowswitch/nx_flowswitch.h>
|
|
#include <sys/sdt.h>
|
|
|
|
static uint32_t disable_nxctl_check = 0;
|
|
#if (DEVELOPMENT || DEBUG)
|
|
SYSCTL_UINT(_kern_skywalk, OID_AUTO, disable_nxctl_check,
|
|
CTLFLAG_RW | CTLFLAG_LOCKED, &disable_nxctl_check, 0, "");
|
|
#endif
|
|
|
|
LCK_GRP_DECLARE(nexus_lock_group, "sk_nx_lock");
|
|
LCK_GRP_DECLARE(nexus_mbq_lock_group, "sk_nx_mbq_lock");
|
|
LCK_GRP_DECLARE(nexus_pktq_lock_group, "sk_nx_pktq_lock");
|
|
LCK_ATTR_DECLARE(nexus_lock_attr, 0, 0);
|
|
|
|
static STAILQ_HEAD(, nxctl) nxctl_head =
|
|
STAILQ_HEAD_INITIALIZER(nxctl_head);
|
|
static STAILQ_HEAD(, kern_nexus_provider) nxprov_head =
|
|
STAILQ_HEAD_INITIALIZER(nxprov_head);
|
|
|
|
static int nx_cmp(const struct kern_nexus *, const struct kern_nexus *);
|
|
RB_HEAD(kern_nexus_tree, kern_nexus);
|
|
RB_PROTOTYPE_SC(static, kern_nexus_tree, kern_nexus, nx_link, nx_cmp);
|
|
RB_GENERATE(kern_nexus_tree, kern_nexus, nx_link, nx_cmp);
|
|
static struct kern_nexus_tree nx_head;
|
|
|
|
static int nxctl_get_nexus_prov_list(struct nxctl *, struct sockopt *);
|
|
static int nxctl_get_nexus_prov_entry(struct nxctl *, struct sockopt *);
|
|
static int nxctl_get_nexus_list(struct nxctl *, struct sockopt *);
|
|
static int nxctl_nexus_bind(struct nxctl *, struct sockopt *);
|
|
static int nxctl_nexus_unbind(struct nxctl *, struct sockopt *);
|
|
static int nxctl_nexus_config(struct nxctl *, struct sockopt *);
|
|
static int nxctl_get_channel_list(struct nxctl *, struct sockopt *);
|
|
static void nxctl_retain_locked(struct nxctl *);
|
|
static int nxctl_release_locked(struct nxctl *);
|
|
static void nxctl_init(struct nxctl *, struct proc *, struct fileproc *);
|
|
static struct nxctl *nxctl_alloc(struct proc *, struct fileproc *, zalloc_flags_t);
|
|
static void nxctl_free(struct nxctl *);
|
|
|
|
static struct kern_nexus_provider *nxprov_create_common(struct nxctl *,
|
|
struct kern_nexus_domain_provider *, struct nxprov_reg *,
|
|
const struct kern_nexus_provider_init *init, int *);
|
|
static void nxprov_detach(struct kern_nexus_provider *, boolean_t);
|
|
static void nxprov_retain_locked(struct kern_nexus_provider *);
|
|
static int nxprov_release_locked(struct kern_nexus_provider *);
|
|
static struct kern_nexus_provider *nxprov_alloc(
|
|
struct kern_nexus_domain_provider *, zalloc_flags_t);
|
|
static void nxprov_free(struct kern_nexus_provider *);
|
|
|
|
static int nx_init_rings(struct kern_nexus *, struct kern_channel *);
|
|
static void nx_fini_rings(struct kern_nexus *, struct kern_channel *);
|
|
static int nx_init_slots(struct kern_nexus *, struct __kern_channel_ring *);
|
|
static void nx_fini_slots(struct kern_nexus *, struct __kern_channel_ring *);
|
|
static struct kern_nexus *nx_alloc(zalloc_flags_t);
|
|
static void nx_free(struct kern_nexus *);
|
|
|
|
static SKMEM_TYPE_DEFINE(nxctl_zone, struct nxctl);
|
|
|
|
static SKMEM_TYPE_DEFINE(nxbind_zone, struct nxbind);
|
|
|
|
static SKMEM_TYPE_DEFINE(nxprov_zone, struct kern_nexus_provider);
|
|
|
|
static SKMEM_TYPE_DEFINE(nxprov_params_zone, struct nxprov_params);
|
|
|
|
static SKMEM_TYPE_DEFINE(nx_zone, struct kern_nexus);
|
|
|
|
static int __nx_inited = 0;
|
|
|
|
#define SKMEM_TAG_NX_KEY "com.apple.skywalk.nexus.key"
|
|
SKMEM_TAG_DEFINE(skmem_tag_nx_key, SKMEM_TAG_NX_KEY);
|
|
|
|
#define SKMEM_TAG_NX_MIB "com.apple.skywalk.nexus.mib"
|
|
static SKMEM_TAG_DEFINE(skmem_tag_nx_mib, SKMEM_TAG_NX_MIB);
|
|
|
|
#define SKMEM_TAG_NX_PORT "com.apple.skywalk.nexus.port"
|
|
SKMEM_TAG_DEFINE(skmem_tag_nx_port, SKMEM_TAG_NX_PORT);
|
|
|
|
#define SKMEM_TAG_NX_PORT_INFO "com.apple.skywalk.nexus.port.info"
|
|
SKMEM_TAG_DEFINE(skmem_tag_nx_port_info, SKMEM_TAG_NX_PORT_INFO);
|
|
|
|
/*
|
|
* Special nexus controller handle for Skywalk internal use. Unlike all
|
|
* other nexus controller handles that are created by userland or kernel
|
|
* clients, this one never gets closed or freed. It is also not part of
|
|
* the global nxctl_head list.
|
|
*/
|
|
static struct nxctl _kernnxctl;
|
|
static struct nxctl _usernxctl;
|
|
struct nexus_controller kernnxctl = { .ncd_nxctl = &_kernnxctl };
|
|
struct nexus_controller usernxctl = { .ncd_nxctl = &_usernxctl };
|
|
|
|
int
|
|
nexus_init(void)
|
|
{
|
|
SK_LOCK_ASSERT_HELD();
|
|
ASSERT(!__nx_inited);
|
|
|
|
RB_INIT(&nx_head);
|
|
|
|
na_init();
|
|
|
|
/* attach system built-in domains and domain providers */
|
|
nxdom_attach_all();
|
|
|
|
/*
|
|
* Initialize private kernel and shared user nexus controller handle;
|
|
*
|
|
* Shared Kernel controller is used internally for creating nexus providers
|
|
* and nexus instances from within the Skywalk code (e.g. netif_compat).
|
|
*
|
|
* Shared User controller is used userspace by clients(e.g. libnetcore)
|
|
* that would like to call nexus instances for use cases like
|
|
* configuring flow entry that they own indirectly (e.g. via NECP), so
|
|
* that the nexus would perform permission check based on other info
|
|
* (e.g. PID, UUID) and bypass nxctl check (this nxctl has no
|
|
* credentials).
|
|
*/
|
|
nxctl_init(&_kernnxctl, kernproc, NULL);
|
|
nxctl_retain_locked(&_kernnxctl); /* one for us */
|
|
nxctl_init(&_usernxctl, kernproc, NULL);
|
|
nxctl_retain_locked(&_usernxctl); /* one for us */
|
|
nxctl_traffic_rule_init();
|
|
|
|
__nx_inited = 1;
|
|
|
|
return 0;
|
|
}
|
|
|
|
void
|
|
nexus_fini(void)
|
|
{
|
|
SK_LOCK_ASSERT_HELD();
|
|
|
|
if (__nx_inited) {
|
|
nxctl_traffic_rule_fini();
|
|
nxctl_release_locked(&_kernnxctl);
|
|
nxctl_release_locked(&_usernxctl);
|
|
|
|
/* tell all domains they're going away */
|
|
nxdom_detach_all();
|
|
|
|
ASSERT(RB_EMPTY(&nx_head));
|
|
|
|
na_fini();
|
|
|
|
__nx_inited = 0;
|
|
}
|
|
}
|
|
|
|
struct nxctl *
|
|
nxctl_create(struct proc *p, struct fileproc *fp, const uuid_t nxctl_uuid,
|
|
int *err)
|
|
{
|
|
struct nxctl *nxctl = NULL;
|
|
|
|
ASSERT(!uuid_is_null(nxctl_uuid));
|
|
|
|
/* privilege checks would be done when performing nxctl operations */
|
|
|
|
SK_LOCK();
|
|
|
|
nxctl = nxctl_alloc(p, fp, Z_WAITOK);
|
|
|
|
STAILQ_INSERT_TAIL(&nxctl_head, nxctl, nxctl_link);
|
|
nxctl->nxctl_flags |= NEXUSCTLF_ATTACHED;
|
|
uuid_copy(nxctl->nxctl_uuid, nxctl_uuid);
|
|
|
|
nxctl_retain_locked(nxctl); /* one for being in the list */
|
|
nxctl_retain_locked(nxctl); /* one for the caller */
|
|
|
|
#if SK_LOG
|
|
uuid_string_t uuidstr;
|
|
SK_D("nxctl 0x%llx UUID %s", SK_KVA(nxctl),
|
|
sk_uuid_unparse(nxctl->nxctl_uuid, uuidstr));
|
|
#endif /* SK_LOG */
|
|
|
|
SK_UNLOCK();
|
|
|
|
if (*err != 0) {
|
|
nxctl_free(nxctl);
|
|
nxctl = NULL;
|
|
}
|
|
return nxctl;
|
|
}
|
|
|
|
void
|
|
nxctl_close(struct nxctl *nxctl)
|
|
{
|
|
struct kern_nexus_provider *nxprov = NULL, *tnxprov;
|
|
|
|
lck_mtx_lock(&nxctl->nxctl_lock);
|
|
SK_LOCK();
|
|
|
|
ASSERT(!(nxctl->nxctl_flags & NEXUSCTLF_KERNEL));
|
|
|
|
#if SK_LOG
|
|
uuid_string_t uuidstr;
|
|
SK_D("nxctl 0x%llx UUID %s flags 0x%b", SK_KVA(nxctl),
|
|
sk_uuid_unparse(nxctl->nxctl_uuid, uuidstr),
|
|
nxctl->nxctl_flags, NEXUSCTLF_BITS);
|
|
#endif /* SK_LOG */
|
|
|
|
if (!(nxctl->nxctl_flags & NEXUSCTLF_NOFDREF)) {
|
|
nxctl->nxctl_flags |= NEXUSCTLF_NOFDREF;
|
|
nxctl->nxctl_fp = NULL;
|
|
}
|
|
|
|
/* may be called as part of failure cleanup, so check */
|
|
if (nxctl->nxctl_flags & NEXUSCTLF_ATTACHED) {
|
|
/* caller must hold an extra ref */
|
|
ASSERT(nxctl->nxctl_refcnt > 1);
|
|
(void) nxctl_release_locked(nxctl);
|
|
|
|
STAILQ_REMOVE(&nxctl_head, nxctl, nxctl, nxctl_link);
|
|
nxctl->nxctl_flags &= ~NEXUSCTLF_ATTACHED;
|
|
}
|
|
|
|
repeat:
|
|
STAILQ_FOREACH_SAFE(nxprov, &nxprov_head, nxprov_link, tnxprov) {
|
|
/*
|
|
* Close provider only for those which are owned by
|
|
* this control instance. Note that if we close the
|
|
* provider, we need to repeat this search as the
|
|
* list might have been changed by another thread.
|
|
* That's possible since SK_UNLOCK() may be called
|
|
* as a result of calling nxprov_close().
|
|
*/
|
|
if (!(nxprov->nxprov_flags & NXPROVF_CLOSED) &&
|
|
nxprov->nxprov_ctl == nxctl) {
|
|
nxprov_retain_locked(nxprov);
|
|
(void) nxprov_close(nxprov, TRUE);
|
|
(void) nxprov_release_locked(nxprov);
|
|
goto repeat;
|
|
}
|
|
}
|
|
|
|
SK_UNLOCK();
|
|
lck_mtx_unlock(&nxctl->nxctl_lock);
|
|
nxctl_traffic_rule_clean(nxctl);
|
|
}
|
|
|
|
int
|
|
nxctl_set_opt(struct nxctl *nxctl, struct sockopt *sopt)
|
|
{
|
|
#pragma unused(nxctl)
|
|
int err = 0;
|
|
|
|
NXCTL_LOCK_ASSERT_HELD(nxctl);
|
|
|
|
if (sopt->sopt_dir != SOPT_SET) {
|
|
sopt->sopt_dir = SOPT_SET;
|
|
}
|
|
|
|
switch (sopt->sopt_name) {
|
|
case NXOPT_NEXUS_BIND:
|
|
err = nxctl_nexus_bind(nxctl, sopt);
|
|
break;
|
|
|
|
case NXOPT_NEXUS_UNBIND:
|
|
err = nxctl_nexus_unbind(nxctl, sopt);
|
|
break;
|
|
|
|
case NXOPT_NEXUS_CONFIG:
|
|
err = nxctl_nexus_config(nxctl, sopt);
|
|
break;
|
|
|
|
default:
|
|
err = ENOPROTOOPT;
|
|
break;
|
|
}
|
|
|
|
return err;
|
|
}
|
|
|
|
int
|
|
nxctl_get_opt(struct nxctl *nxctl, struct sockopt *sopt)
|
|
{
|
|
#pragma unused(nxctl)
|
|
int err = 0;
|
|
|
|
NXCTL_LOCK_ASSERT_HELD(nxctl);
|
|
|
|
if (sopt->sopt_dir != SOPT_GET) {
|
|
sopt->sopt_dir = SOPT_GET;
|
|
}
|
|
|
|
switch (sopt->sopt_name) {
|
|
case NXOPT_NEXUS_PROV_LIST:
|
|
err = nxctl_get_nexus_prov_list(nxctl, sopt);
|
|
break;
|
|
|
|
case NXOPT_NEXUS_PROV_ENTRY:
|
|
err = nxctl_get_nexus_prov_entry(nxctl, sopt);
|
|
break;
|
|
|
|
case NXOPT_NEXUS_LIST:
|
|
err = nxctl_get_nexus_list(nxctl, sopt);
|
|
break;
|
|
|
|
case NXOPT_CHANNEL_LIST:
|
|
err = nxctl_get_channel_list(nxctl, sopt);
|
|
break;
|
|
|
|
default:
|
|
err = ENOPROTOOPT;
|
|
break;
|
|
}
|
|
|
|
return err;
|
|
}
|
|
|
|
/* Upper bound on # of nrl_num_regs that we'd return to user space */
|
|
#define MAX_NUM_REG_ENTRIES 256
|
|
|
|
/* Hoisted out of line to reduce kernel stack footprint */
|
|
SK_NO_INLINE_ATTRIBUTE
|
|
static int
|
|
nxctl_get_nexus_prov_list(struct nxctl *nxctl, struct sockopt *sopt)
|
|
{
|
|
user_addr_t tmp_ptr = USER_ADDR_NULL;
|
|
struct nxprov_reg_ent *pnre, *nres = NULL;
|
|
struct nxprov_list_req nrlr;
|
|
struct kern_nexus_provider *nxprov = NULL;
|
|
uint32_t nregs = 0, ncregs = 0;
|
|
int err = 0, observeall;
|
|
size_t nres_sz;
|
|
|
|
NXCTL_LOCK_ASSERT_HELD(nxctl);
|
|
|
|
ASSERT(sopt->sopt_p != NULL);
|
|
if (sopt->sopt_val == USER_ADDR_NULL) {
|
|
return EINVAL;
|
|
}
|
|
|
|
err = sooptcopyin(sopt, &nrlr, sizeof(nrlr), sizeof(nrlr));
|
|
if (err != 0) {
|
|
return err;
|
|
}
|
|
|
|
if ((size_t)nrlr.nrl_num_regs > MAX_NUM_REG_ENTRIES) {
|
|
nrlr.nrl_num_regs = MAX_NUM_REG_ENTRIES;
|
|
}
|
|
|
|
/*
|
|
* If the caller specified a buffer, copy out the Nexus provider
|
|
* entries to caller gracefully. We only copy out the number of
|
|
* entries which caller has asked for, but we always tell caller
|
|
* how big the buffer really needs to be.
|
|
*/
|
|
tmp_ptr = nrlr.nrl_regs;
|
|
if (tmp_ptr != USER_ADDR_NULL && nrlr.nrl_num_regs > 0) {
|
|
nres_sz = (size_t)nrlr.nrl_num_regs * sizeof(*nres);
|
|
nres = sk_alloc_data(nres_sz, Z_WAITOK, skmem_tag_sysctl_buf);
|
|
if (__improbable(nres == NULL)) {
|
|
return ENOBUFS;
|
|
}
|
|
}
|
|
|
|
observeall = (skywalk_priv_check_cred(sopt->sopt_p, nxctl->nxctl_cred,
|
|
PRIV_SKYWALK_OBSERVE_ALL) == 0);
|
|
|
|
SK_LOCK();
|
|
/*
|
|
* Count number of providers. If buffer space exists and
|
|
* remains, copy out provider entries.
|
|
*/
|
|
nregs = nrlr.nrl_num_regs;
|
|
pnre = nres;
|
|
|
|
STAILQ_FOREACH(nxprov, &nxprov_head, nxprov_link) {
|
|
/*
|
|
* Return only entries that are visible to the caller,
|
|
* unless it has PRIV_SKYWALK_OBSERVE_ALL.
|
|
*/
|
|
if (nxprov->nxprov_ctl != nxctl && !observeall) {
|
|
continue;
|
|
}
|
|
|
|
if (nres != NULL && nregs > 0) {
|
|
uuid_copy(pnre->npre_prov_uuid, nxprov->nxprov_uuid);
|
|
bcopy(nxprov->nxprov_params, &pnre->npre_prov_params,
|
|
sizeof(struct nxprov_params));
|
|
--nregs;
|
|
++pnre;
|
|
++ncregs;
|
|
}
|
|
}
|
|
SK_UNLOCK();
|
|
|
|
if (ncregs == 0) {
|
|
err = ENOENT;
|
|
}
|
|
|
|
if (nres != NULL) {
|
|
if (err == 0 && tmp_ptr != USER_ADDR_NULL) {
|
|
if (sopt->sopt_p != kernproc) {
|
|
err = copyout(nres, tmp_ptr,
|
|
ncregs * sizeof(*nres));
|
|
} else {
|
|
bcopy(nres, CAST_DOWN(caddr_t, tmp_ptr),
|
|
ncregs * sizeof(*nres));
|
|
}
|
|
}
|
|
sk_free_data(nres, nres_sz);
|
|
nres = NULL;
|
|
}
|
|
|
|
if (err == 0) {
|
|
nrlr.nrl_num_regs = ncregs;
|
|
err = sooptcopyout(sopt, &nrlr, sizeof(nrlr));
|
|
}
|
|
|
|
return err;
|
|
}
|
|
|
|
/* Hoisted out of line to reduce kernel stack footprint */
|
|
SK_NO_INLINE_ATTRIBUTE
|
|
static int
|
|
nxctl_get_nexus_prov_entry(struct nxctl *nxctl, struct sockopt *sopt)
|
|
{
|
|
struct nxprov_reg_ent nre;
|
|
struct kern_nexus_provider *nxprov = NULL;
|
|
int err = 0;
|
|
|
|
NXCTL_LOCK_ASSERT_HELD(nxctl);
|
|
|
|
ASSERT(sopt->sopt_p != NULL);
|
|
if (sopt->sopt_val == USER_ADDR_NULL) {
|
|
return EINVAL;
|
|
}
|
|
|
|
bzero(&nre, sizeof(nre));
|
|
err = sooptcopyin(sopt, &nre, sizeof(nre), sizeof(nre));
|
|
if (err != 0) {
|
|
return err;
|
|
}
|
|
|
|
if (uuid_is_null(nre.npre_prov_uuid)) {
|
|
return EINVAL;
|
|
}
|
|
|
|
SK_LOCK();
|
|
STAILQ_FOREACH(nxprov, &nxprov_head, nxprov_link) {
|
|
if (uuid_compare(nxprov->nxprov_uuid,
|
|
nre.npre_prov_uuid) == 0) {
|
|
/*
|
|
* Return only entries that are visible to the caller,
|
|
* unless it has PRIV_SKYWALK_OBSERVE_ALL.
|
|
*/
|
|
if (nxprov->nxprov_ctl != nxctl) {
|
|
if (skywalk_priv_check_cred(sopt->sopt_p,
|
|
nxctl->nxctl_cred,
|
|
PRIV_SKYWALK_OBSERVE_ALL) != 0) {
|
|
nxprov = NULL;
|
|
break;
|
|
}
|
|
}
|
|
|
|
bcopy(nxprov->nxprov_params, &nre.npre_prov_params,
|
|
sizeof(struct nxprov_params));
|
|
break;
|
|
}
|
|
}
|
|
SK_UNLOCK();
|
|
|
|
if (nxprov != NULL) {
|
|
err = sooptcopyout(sopt, &nre, sizeof(nre));
|
|
} else {
|
|
err = ENOENT;
|
|
}
|
|
|
|
return err;
|
|
}
|
|
|
|
/* Upper bound on # of nl_num_nx_uuids that we'd return to user space */
|
|
#define MAX_NUM_NX_UUIDS 4096
|
|
|
|
/* Hoisted out of line to reduce kernel stack footprint */
|
|
SK_NO_INLINE_ATTRIBUTE
|
|
static int
|
|
nxctl_get_nexus_list(struct nxctl *nxctl, struct sockopt *sopt)
|
|
{
|
|
user_addr_t tmp_ptr = USER_ADDR_NULL;
|
|
uint32_t nuuids = 0, ncuuids = 0;
|
|
uuid_t *puuid, *uuids = NULL;
|
|
size_t uuids_sz;
|
|
struct nx_list_req nlr;
|
|
struct kern_nexus_provider *nxprov = NULL;
|
|
struct kern_nexus *nx = NULL;
|
|
int err = 0, observeall;
|
|
|
|
NXCTL_LOCK_ASSERT_HELD(nxctl);
|
|
|
|
ASSERT(sopt->sopt_p != NULL);
|
|
if (sopt->sopt_val == USER_ADDR_NULL) {
|
|
return EINVAL;
|
|
}
|
|
|
|
err = sooptcopyin(sopt, &nlr, sizeof(nlr), sizeof(nlr));
|
|
if (err != 0) {
|
|
return err;
|
|
}
|
|
|
|
if (uuid_is_null(nlr.nl_prov_uuid)) {
|
|
return EINVAL;
|
|
} else if ((size_t)nlr.nl_num_nx_uuids > MAX_NUM_NX_UUIDS) {
|
|
nlr.nl_num_nx_uuids = MAX_NUM_NX_UUIDS;
|
|
}
|
|
|
|
/*
|
|
* If the caller specified a buffer, copy out the Nexus UUIDs to
|
|
* caller gracefully. We only copy out the number of UUIDs which
|
|
* caller has asked for, but we always tell caller how big the
|
|
* buffer really needs to be.
|
|
*/
|
|
tmp_ptr = nlr.nl_nx_uuids;
|
|
if (tmp_ptr != USER_ADDR_NULL && nlr.nl_num_nx_uuids > 0) {
|
|
uuids_sz = (size_t)nlr.nl_num_nx_uuids * sizeof(uuid_t);
|
|
uuids = sk_alloc_data(uuids_sz, Z_WAITOK, skmem_tag_sysctl_buf);
|
|
if (__improbable(uuids == NULL)) {
|
|
return ENOBUFS;
|
|
}
|
|
}
|
|
|
|
observeall = (skywalk_priv_check_cred(sopt->sopt_p, nxctl->nxctl_cred,
|
|
PRIV_SKYWALK_OBSERVE_ALL) == 0);
|
|
|
|
SK_LOCK();
|
|
STAILQ_FOREACH(nxprov, &nxprov_head, nxprov_link) {
|
|
/*
|
|
* Return only entries that are visible to the caller,
|
|
* unless it has PRIV_SKYWALK_OBSERVE_ALL.
|
|
*/
|
|
if (nxprov->nxprov_ctl != nxctl && !observeall) {
|
|
continue;
|
|
}
|
|
|
|
if (uuid_compare(nxprov->nxprov_uuid, nlr.nl_prov_uuid) == 0) {
|
|
break;
|
|
}
|
|
}
|
|
|
|
if (nxprov != NULL) {
|
|
/*
|
|
* Count number of Nexus. If buffer space exists
|
|
* and remains, copy out the Nexus UUIDs.
|
|
*/
|
|
nuuids = nlr.nl_num_nx_uuids;
|
|
puuid = uuids;
|
|
|
|
STAILQ_FOREACH(nx, &nxprov->nxprov_nx_head, nx_prov_link) {
|
|
++ncuuids;
|
|
if (uuids != NULL && nuuids > 0) {
|
|
uuid_copy(*puuid, nx->nx_uuid);
|
|
--nuuids;
|
|
++puuid;
|
|
}
|
|
}
|
|
} else {
|
|
err = ENOENT;
|
|
}
|
|
SK_UNLOCK();
|
|
|
|
if (uuids != NULL) {
|
|
if (err == 0 && nxprov != NULL && tmp_ptr != USER_ADDR_NULL) {
|
|
uintptr_t cnt_uuid;
|
|
|
|
/* Note: Pointer arithmetic */
|
|
cnt_uuid = (uintptr_t)(puuid - uuids);
|
|
if (cnt_uuid > 0) {
|
|
if (sopt->sopt_p != kernproc) {
|
|
err = copyout(uuids, tmp_ptr,
|
|
cnt_uuid * sizeof(uuid_t));
|
|
} else {
|
|
bcopy(uuids,
|
|
CAST_DOWN(caddr_t, tmp_ptr),
|
|
cnt_uuid * sizeof(uuid_t));
|
|
}
|
|
}
|
|
}
|
|
sk_free_data(uuids, uuids_sz);
|
|
uuids = NULL;
|
|
}
|
|
|
|
if (err == 0) {
|
|
nlr.nl_num_nx_uuids = ncuuids;
|
|
err = sooptcopyout(sopt, &nlr, sizeof(nlr));
|
|
}
|
|
|
|
return err;
|
|
}
|
|
|
|
/* Hoisted out of line to reduce kernel stack footprint */
|
|
SK_NO_INLINE_ATTRIBUTE
|
|
static int
|
|
nxctl_nexus_bind(struct nxctl *nxctl, struct sockopt *sopt)
|
|
{
|
|
boolean_t m_pid, m_exec_uuid, m_key;
|
|
struct nx_bind_req nbr;
|
|
struct proc *p = PROC_NULL;
|
|
struct nxbind *nxb = NULL;
|
|
uint64_t p_uniqueid = -1;
|
|
pid_t p_pid = -1;
|
|
struct kern_nexus *nx = NULL;
|
|
#if SK_LOG
|
|
uuid_string_t exec_uuidstr;
|
|
#endif /* SK_LOG */
|
|
uuid_t p_uuid;
|
|
void *key = NULL;
|
|
int err = 0;
|
|
|
|
NXCTL_LOCK_ASSERT_HELD(nxctl);
|
|
|
|
if (sopt->sopt_val == USER_ADDR_NULL) {
|
|
return EINVAL;
|
|
}
|
|
|
|
uuid_clear(p_uuid);
|
|
bzero(&nbr, sizeof(nbr));
|
|
err = sooptcopyin(sopt, &nbr, sizeof(nbr), sizeof(nbr));
|
|
if (err != 0) {
|
|
return err;
|
|
}
|
|
|
|
if (uuid_is_null(nbr.nb_nx_uuid)) {
|
|
err = EINVAL;
|
|
goto done_unlocked;
|
|
}
|
|
|
|
nbr.nb_flags &= NBR_MATCH_MASK;
|
|
if (nbr.nb_flags == 0) {
|
|
/* must choose one of the match criteria */
|
|
err = EINVAL;
|
|
goto done_unlocked;
|
|
}
|
|
m_pid = !!(nbr.nb_flags & NBR_MATCH_PID);
|
|
m_exec_uuid = !!(nbr.nb_flags & NBR_MATCH_EXEC_UUID);
|
|
m_key = !!(nbr.nb_flags & NBR_MATCH_KEY);
|
|
|
|
if (m_pid || m_exec_uuid) {
|
|
/*
|
|
* Validate process ID. A valid PID is needed when we're
|
|
* asked to match by PID, or if asked to match by executable
|
|
* UUID with a NULL nb_exec_uuid supplied. The latter is
|
|
* to support the case when a userland Nexus provider isn't
|
|
* able to acquire its client's executable UUID, but is
|
|
* able to identify it via PID.
|
|
*/
|
|
if ((m_pid || uuid_is_null(nbr.nb_exec_uuid)) &&
|
|
(p = proc_find(nbr.nb_pid)) == PROC_NULL) {
|
|
err = ESRCH;
|
|
goto done_unlocked;
|
|
}
|
|
/* exclude kernel from the match criteria */
|
|
if (p == kernproc) {
|
|
err = EACCES;
|
|
goto done_unlocked;
|
|
} else if (p != PROC_NULL) {
|
|
proc_getexecutableuuid(p, p_uuid, sizeof(p_uuid));
|
|
p_uniqueid = proc_uniqueid(p);
|
|
p_pid = proc_pid(p);
|
|
} else {
|
|
uuid_copy(p_uuid, nbr.nb_exec_uuid);
|
|
}
|
|
}
|
|
|
|
if (m_key) {
|
|
if (nbr.nb_key_len == 0 || nbr.nb_key_len > NEXUS_MAX_KEY_LEN ||
|
|
nbr.nb_key == USER_ADDR_NULL) {
|
|
err = EINVAL;
|
|
goto done_unlocked;
|
|
}
|
|
|
|
key = sk_alloc_data(nbr.nb_key_len, Z_WAITOK, skmem_tag_nx_key);
|
|
if (__improbable(key == NULL)) {
|
|
err = ENOMEM;
|
|
goto done_unlocked;
|
|
}
|
|
|
|
if (sopt->sopt_p != kernproc) {
|
|
err = copyin(nbr.nb_key, key, nbr.nb_key_len);
|
|
if (err != 0) {
|
|
goto done_unlocked;
|
|
}
|
|
} else {
|
|
bcopy((void *)nbr.nb_key, key, nbr.nb_key_len);
|
|
}
|
|
}
|
|
|
|
SK_LOCK();
|
|
nx = nx_find(nbr.nb_nx_uuid, TRUE);
|
|
if (nx == NULL || (disable_nxctl_check == 0 &&
|
|
nx->nx_prov->nxprov_ctl != nxctl &&
|
|
nxctl != &_kernnxctl)) { /* make exception for kernnxctl */
|
|
err = ENOENT;
|
|
goto done;
|
|
}
|
|
|
|
/* bind isn't applicable on anonymous nexus provider */
|
|
if (NX_ANONYMOUS_PROV(nx)) {
|
|
err = ENXIO;
|
|
goto done;
|
|
}
|
|
|
|
/* port must be within the domain's range */
|
|
if (nbr.nb_port != NEXUS_PORT_ANY &&
|
|
nbr.nb_port >= NXDOM_MAX(NX_DOM(nx), ports)) {
|
|
err = EDOM;
|
|
goto done;
|
|
} else if (nbr.nb_port == NEXUS_PORT_ANY) {
|
|
/* for now, this is allowed only for kernel clients */
|
|
if (sopt->sopt_p != kernproc) {
|
|
err = EPERM;
|
|
goto done;
|
|
}
|
|
}
|
|
|
|
nxb = nxb_alloc(Z_WAITOK);
|
|
|
|
if (m_pid) {
|
|
nxb->nxb_flags |= NXBF_MATCH_UNIQUEID;
|
|
nxb->nxb_uniqueid = p_uniqueid;
|
|
nxb->nxb_pid = p_pid;
|
|
}
|
|
if (m_exec_uuid) {
|
|
nxb->nxb_flags |= NXBF_MATCH_EXEC_UUID;
|
|
ASSERT(!uuid_is_null(p_uuid));
|
|
uuid_copy(nxb->nxb_exec_uuid, p_uuid);
|
|
}
|
|
if (m_key) {
|
|
nxb->nxb_flags |= NXBF_MATCH_KEY;
|
|
ASSERT(key != NULL);
|
|
nxb->nxb_key = key;
|
|
key = NULL; /* let nxb_free() free it */
|
|
ASSERT(nbr.nb_key_len != 0 &&
|
|
nbr.nb_key_len <= NEXUS_MAX_KEY_LEN);
|
|
nxb->nxb_key_len = nbr.nb_key_len;
|
|
}
|
|
|
|
/*
|
|
* Bind the creds to the nexus port. If client doesn't have a port,
|
|
* find one, claim it, and associate the creds to it. Upon success,
|
|
* the nexus may move the nxbind contents (including the key) to
|
|
* its own nxbind instance; in that case, nxb_free() below will not
|
|
* be freeing the key within.
|
|
*/
|
|
err = NX_DOM(nx)->nxdom_bind_port(nx, &nbr.nb_port, nxb, NULL);
|
|
if (err != 0) {
|
|
goto done;
|
|
}
|
|
|
|
ASSERT(nbr.nb_port != NEXUS_PORT_ANY);
|
|
(void) sooptcopyout(sopt, &nbr, sizeof(nbr));
|
|
|
|
SK_D("nexus 0x%llx nxb 0x%llx port %u flags 0x%b pid %d "
|
|
"(uniqueid %llu) exec_uuid %s key 0x%llx key_len %u",
|
|
SK_KVA(nx), SK_KVA(nxb), nbr.nb_port, nxb->nxb_flags,
|
|
NXBF_BITS, nxb->nxb_pid, nxb->nxb_uniqueid,
|
|
sk_uuid_unparse(nxb->nxb_exec_uuid, exec_uuidstr),
|
|
(nxb->nxb_key != NULL) ? SK_KVA(nxb->nxb_key) : 0,
|
|
nxb->nxb_key_len);
|
|
|
|
done:
|
|
if (nx != NULL) {
|
|
(void) nx_release_locked(nx);
|
|
nx = NULL;
|
|
}
|
|
SK_UNLOCK();
|
|
|
|
done_unlocked:
|
|
ASSERT(nx == NULL);
|
|
|
|
if (nxb != NULL) {
|
|
nxb_free(nxb);
|
|
nxb = NULL;
|
|
}
|
|
if (key != NULL) {
|
|
sk_free_data(key, nbr.nb_key_len);
|
|
key = NULL;
|
|
}
|
|
if (p != PROC_NULL) {
|
|
proc_rele(p);
|
|
}
|
|
|
|
return err;
|
|
}
|
|
|
|
/* Hoisted out of line to reduce kernel stack footprint */
|
|
SK_NO_INLINE_ATTRIBUTE
|
|
static int
|
|
nxctl_nexus_unbind(struct nxctl *nxctl, struct sockopt *sopt)
|
|
{
|
|
struct nx_unbind_req nur;
|
|
struct kern_nexus *nx = NULL;
|
|
int err = 0;
|
|
|
|
NXCTL_LOCK_ASSERT_HELD(nxctl);
|
|
|
|
if (sopt->sopt_val == USER_ADDR_NULL) {
|
|
return EINVAL;
|
|
}
|
|
|
|
bzero(&nur, sizeof(nur));
|
|
err = sooptcopyin(sopt, &nur, sizeof(nur), sizeof(nur));
|
|
if (err != 0) {
|
|
return err;
|
|
}
|
|
|
|
if (uuid_is_null(nur.nu_nx_uuid)) {
|
|
return EINVAL;
|
|
}
|
|
|
|
SK_LOCK();
|
|
nx = nx_find(nur.nu_nx_uuid, TRUE);
|
|
if (nx == NULL || (nx->nx_prov->nxprov_ctl != nxctl &&
|
|
nxctl != &_kernnxctl)) { /* make exception for kernnxctl */
|
|
err = ENOENT;
|
|
goto done;
|
|
}
|
|
|
|
/* unbind isn't applicable on anonymous nexus provider */
|
|
if (NX_ANONYMOUS_PROV(nx)) {
|
|
err = ENXIO;
|
|
goto done;
|
|
}
|
|
|
|
if (nur.nu_port == NEXUS_PORT_ANY) {
|
|
err = EINVAL;
|
|
goto done;
|
|
}
|
|
|
|
err = NX_DOM(nx)->nxdom_unbind_port(nx, nur.nu_port);
|
|
|
|
done:
|
|
if (nx != NULL) {
|
|
(void) nx_release_locked(nx);
|
|
nx = NULL;
|
|
}
|
|
SK_UNLOCK();
|
|
|
|
return err;
|
|
}
|
|
|
|
/* Hoisted out of line to reduce kernel stack footprint */
|
|
SK_NO_INLINE_ATTRIBUTE
|
|
static int
|
|
nxctl_nexus_config(struct nxctl *nxctl, struct sockopt *sopt)
|
|
{
|
|
struct kern_nexus *nx = NULL;
|
|
struct nx_cfg_req ncr;
|
|
int err = 0;
|
|
|
|
NXCTL_LOCK_ASSERT_HELD(nxctl);
|
|
|
|
if (sopt->sopt_val == USER_ADDR_NULL) {
|
|
return EINVAL;
|
|
}
|
|
|
|
bzero(&ncr, sizeof(ncr));
|
|
err = sooptcopyin(sopt, &ncr, sizeof(ncr), sizeof(ncr));
|
|
if (err != 0) {
|
|
return err;
|
|
}
|
|
|
|
if (uuid_is_null(ncr.nc_nx_uuid)) {
|
|
return EINVAL;
|
|
}
|
|
|
|
SK_LOCK();
|
|
nx = nx_find(ncr.nc_nx_uuid, TRUE);
|
|
if (nx == NULL || (disable_nxctl_check == 0 &&
|
|
nx->nx_prov->nxprov_ctl != nxctl &&
|
|
nxctl != &_kernnxctl && /* allow kernel/shared user nxctl */
|
|
nxctl != &_usernxctl)) {
|
|
err = ENOENT;
|
|
goto done;
|
|
}
|
|
|
|
if (NX_DOM_PROV(nx)->nxdom_prov_config != NULL) {
|
|
err = NX_DOM_PROV(nx)->nxdom_prov_config(NX_DOM_PROV(nx),
|
|
nx, &ncr, sopt->sopt_dir, sopt->sopt_p, nxctl->nxctl_cred);
|
|
} else {
|
|
err = EPERM;
|
|
}
|
|
|
|
if (err == 0) {
|
|
(void) sooptcopyout(sopt, &ncr, sizeof(ncr));
|
|
}
|
|
done:
|
|
if (nx != NULL) {
|
|
(void) nx_release_locked(nx);
|
|
nx = NULL;
|
|
}
|
|
SK_UNLOCK();
|
|
|
|
return err;
|
|
}
|
|
|
|
struct nxbind *
|
|
nxb_alloc(zalloc_flags_t how)
|
|
{
|
|
struct nxbind *nxb = zalloc_flags(nxbind_zone, how | Z_ZERO);
|
|
|
|
if (nxb) {
|
|
SK_DF(SK_VERB_MEM, "nxb 0x%llx ALLOC", SK_KVA(nxb));
|
|
}
|
|
return nxb;
|
|
}
|
|
|
|
void
|
|
nxb_free(struct nxbind *nxb)
|
|
{
|
|
SK_DF(SK_VERB_MEM, "nxb 0x%llx key 0x%llx FREE", SK_KVA(nxb),
|
|
(nxb->nxb_key != NULL) ? SK_KVA(nxb->nxb_key) : 0);
|
|
|
|
if (nxb->nxb_key != NULL) {
|
|
sk_free_data(nxb->nxb_key, nxb->nxb_key_len);
|
|
nxb->nxb_key = NULL;
|
|
}
|
|
zfree(nxbind_zone, nxb);
|
|
}
|
|
|
|
/*
|
|
* nxb0 is assumed to possess the truth, compare nxb1 against it.
|
|
*/
|
|
boolean_t
|
|
nxb_is_equal(struct nxbind *nxb0, struct nxbind *nxb1)
|
|
{
|
|
ASSERT(nxb0 != NULL && nxb1 != NULL);
|
|
ASSERT(nxb0 != nxb1);
|
|
|
|
/* we always compare using uniqueid and not pid */
|
|
if ((nxb0->nxb_flags & NXBF_MATCH_UNIQUEID) &&
|
|
nxb1->nxb_uniqueid != nxb0->nxb_uniqueid) {
|
|
return FALSE;
|
|
}
|
|
|
|
if ((nxb0->nxb_flags & NXBF_MATCH_EXEC_UUID) &&
|
|
uuid_compare(nxb1->nxb_exec_uuid, nxb0->nxb_exec_uuid) != 0) {
|
|
return FALSE;
|
|
}
|
|
|
|
ASSERT(!(nxb0->nxb_flags & NXBF_MATCH_KEY) ||
|
|
(nxb0->nxb_key_len != 0 && nxb0->nxb_key != NULL));
|
|
|
|
if ((nxb0->nxb_flags & NXBF_MATCH_KEY) &&
|
|
(nxb0->nxb_key_len != nxb1->nxb_key_len ||
|
|
nxb1->nxb_key == NULL || timingsafe_bcmp(nxb1->nxb_key, nxb0->nxb_key,
|
|
nxb1->nxb_key_len) != 0)) {
|
|
return FALSE;
|
|
}
|
|
|
|
return TRUE;
|
|
}
|
|
|
|
void
|
|
nxb_move(struct nxbind *snxb, struct nxbind *dnxb)
|
|
{
|
|
ASSERT(!(snxb->nxb_flags & NXBF_MATCH_KEY) ||
|
|
(snxb->nxb_key_len != 0 && snxb->nxb_key != NULL));
|
|
|
|
/* in case the destination has a key attached, free it first */
|
|
if (dnxb->nxb_key != NULL) {
|
|
sk_free_data(dnxb->nxb_key, dnxb->nxb_key_len);
|
|
dnxb->nxb_key = NULL;
|
|
}
|
|
|
|
/* move everything from src to dst, and then wipe out src */
|
|
bcopy(snxb, dnxb, sizeof(*dnxb));
|
|
bzero(snxb, sizeof(*snxb));
|
|
}
|
|
|
|
/* Upper bound on # of cl_num_ch_uuids that we'd return to user space */
|
|
#define MAX_NUM_CH_UUIDS 4096
|
|
|
|
/* Hoisted out of line to reduce kernel stack footprint */
|
|
SK_NO_INLINE_ATTRIBUTE
|
|
static int
|
|
nxctl_get_channel_list(struct nxctl *nxctl, struct sockopt *sopt)
|
|
{
|
|
user_addr_t tmp_ptr = USER_ADDR_NULL;
|
|
uint32_t nuuids = 0, ncuuids = 0;
|
|
uuid_t *puuid, *uuids = NULL;
|
|
size_t uuids_sz;
|
|
struct ch_list_req clr;
|
|
struct kern_channel *ch = NULL;
|
|
struct kern_nexus *nx = NULL;
|
|
struct kern_nexus find;
|
|
int err = 0, observeall;
|
|
|
|
NXCTL_LOCK_ASSERT_HELD(nxctl);
|
|
|
|
ASSERT(sopt->sopt_p != NULL);
|
|
if (sopt->sopt_val == USER_ADDR_NULL) {
|
|
return EINVAL;
|
|
}
|
|
|
|
err = sooptcopyin(sopt, &clr, sizeof(clr), sizeof(clr));
|
|
if (err != 0) {
|
|
return err;
|
|
}
|
|
|
|
if (uuid_is_null(clr.cl_nx_uuid)) {
|
|
return EINVAL;
|
|
} else if ((size_t)clr.cl_num_ch_uuids > MAX_NUM_CH_UUIDS) {
|
|
clr.cl_num_ch_uuids = MAX_NUM_CH_UUIDS;
|
|
}
|
|
|
|
/*
|
|
* If the caller specified a buffer, copy out the Channel UUIDs to
|
|
* caller gracefully. We only copy out the number of UUIDs which
|
|
* caller has asked for, but we always tell caller how big the
|
|
* buffer really needs to be.
|
|
*/
|
|
tmp_ptr = clr.cl_ch_uuids;
|
|
if (tmp_ptr != USER_ADDR_NULL && clr.cl_num_ch_uuids > 0) {
|
|
uuids_sz = (size_t)clr.cl_num_ch_uuids * sizeof(uuid_t);
|
|
uuids = sk_alloc_data(uuids_sz, Z_WAITOK, skmem_tag_sysctl_buf);
|
|
if (uuids == NULL) {
|
|
return ENOBUFS;
|
|
}
|
|
}
|
|
|
|
observeall = (skywalk_priv_check_cred(sopt->sopt_p, nxctl->nxctl_cred,
|
|
PRIV_SKYWALK_OBSERVE_ALL) == 0);
|
|
|
|
SK_LOCK();
|
|
uuid_copy(find.nx_uuid, clr.cl_nx_uuid);
|
|
nx = RB_FIND(kern_nexus_tree, &nx_head, &find);
|
|
if (nx != NULL && NX_PROV(nx)->nxprov_ctl != nxctl && !observeall) {
|
|
/*
|
|
* Return only entries that are visible to the caller,
|
|
* unless it has PRIV_SKYWALK_OBSERVE_ALL.
|
|
*/
|
|
nx = NULL;
|
|
}
|
|
if (nx != NULL) {
|
|
/*
|
|
* Count number of Channels. If buffer space exists
|
|
* and remains, copy out the Channel UUIDs.
|
|
*/
|
|
nuuids = clr.cl_num_ch_uuids;
|
|
puuid = uuids;
|
|
|
|
STAILQ_FOREACH(ch, &nx->nx_ch_head, ch_link) {
|
|
++ncuuids;
|
|
if (uuids != NULL && nuuids > 0) {
|
|
uuid_copy(*puuid, ch->ch_info->cinfo_ch_id);
|
|
--nuuids;
|
|
++puuid;
|
|
}
|
|
}
|
|
} else {
|
|
err = ENOENT;
|
|
}
|
|
SK_UNLOCK();
|
|
|
|
if (uuids != NULL) {
|
|
if (err == 0 && nx != NULL && tmp_ptr != USER_ADDR_NULL) {
|
|
uintptr_t cnt_uuid;
|
|
|
|
/* Note: Pointer arithmetic */
|
|
cnt_uuid = (uintptr_t)(puuid - uuids);
|
|
ASSERT(cnt_uuid > 0);
|
|
|
|
if (sopt->sopt_p != kernproc) {
|
|
err = copyout(uuids, tmp_ptr,
|
|
cnt_uuid * sizeof(uuid_t));
|
|
} else {
|
|
bcopy(uuids, CAST_DOWN(caddr_t, tmp_ptr),
|
|
cnt_uuid * sizeof(uuid_t));
|
|
}
|
|
}
|
|
sk_free_data(uuids, uuids_sz);
|
|
uuids = NULL;
|
|
}
|
|
|
|
if (err == 0) {
|
|
clr.cl_num_ch_uuids = ncuuids;
|
|
err = sooptcopyout(sopt, &clr, sizeof(clr));
|
|
}
|
|
|
|
return err;
|
|
}
|
|
|
|
static void
|
|
nxctl_init(struct nxctl *nxctl, struct proc *p, struct fileproc *fp)
|
|
{
|
|
uuid_t p_uuid;
|
|
|
|
bzero(nxctl, sizeof(*nxctl));
|
|
|
|
proc_getexecutableuuid(p, p_uuid, sizeof(p_uuid));
|
|
|
|
lck_mtx_init(&nxctl->nxctl_lock, &nexus_lock_group, &nexus_lock_attr);
|
|
uuid_copy(nxctl->nxctl_proc_uuid, p_uuid);
|
|
nxctl->nxctl_proc_uniqueid = proc_uniqueid(p);
|
|
nxctl->nxctl_cred = kauth_cred_proc_ref(p);
|
|
nxctl->nxctl_fp = fp;
|
|
if (nxctl == &_kernnxctl) {
|
|
ASSERT(p == kernproc);
|
|
nxctl->nxctl_flags |= NEXUSCTLF_KERNEL;
|
|
}
|
|
if (nxctl == &_usernxctl) {
|
|
ASSERT(p == kernproc);
|
|
nxctl->nxctl_cred = NULL;
|
|
}
|
|
if (fp == NULL) {
|
|
nxctl->nxctl_flags |= NEXUSCTLF_NOFDREF;
|
|
}
|
|
}
|
|
|
|
static struct nxctl *
|
|
nxctl_alloc(struct proc *p, struct fileproc *fp, zalloc_flags_t how)
|
|
{
|
|
struct nxctl *nxctl = zalloc_flags(nxctl_zone, how);
|
|
|
|
if (nxctl != NULL) {
|
|
nxctl_init(nxctl, p, fp);
|
|
}
|
|
return nxctl;
|
|
}
|
|
|
|
static void
|
|
nxctl_free(struct nxctl *nxctl)
|
|
{
|
|
ASSERT(nxctl->nxctl_refcnt == 0);
|
|
ASSERT(!(nxctl->nxctl_flags & NEXUSCTLF_ATTACHED));
|
|
kauth_cred_unref(&nxctl->nxctl_cred);
|
|
lck_mtx_destroy(&nxctl->nxctl_lock, &nexus_lock_group);
|
|
SK_D("nxctl 0x%llx FREE", SK_KVA(nxctl));
|
|
if (!(nxctl->nxctl_flags & NEXUSCTLF_KERNEL)) {
|
|
zfree(nxctl_zone, nxctl);
|
|
}
|
|
}
|
|
|
|
static void
|
|
nxctl_retain_locked(struct nxctl *nxctl)
|
|
{
|
|
SK_LOCK_ASSERT_HELD();
|
|
|
|
nxctl->nxctl_refcnt++;
|
|
ASSERT(nxctl->nxctl_refcnt != 0);
|
|
}
|
|
|
|
void
|
|
nxctl_retain(struct nxctl *nxctl)
|
|
{
|
|
SK_LOCK();
|
|
nxctl_retain_locked(nxctl);
|
|
SK_UNLOCK();
|
|
}
|
|
|
|
static int
|
|
nxctl_release_locked(struct nxctl *nxctl)
|
|
{
|
|
int oldref = nxctl->nxctl_refcnt;
|
|
|
|
SK_LOCK_ASSERT_HELD();
|
|
|
|
ASSERT(nxctl->nxctl_refcnt != 0);
|
|
if (--nxctl->nxctl_refcnt == 0) {
|
|
nxctl_free(nxctl);
|
|
}
|
|
|
|
return oldref == 1;
|
|
}
|
|
|
|
int
|
|
nxctl_release(struct nxctl *nxctl)
|
|
{
|
|
int lastref;
|
|
|
|
SK_LOCK();
|
|
lastref = nxctl_release_locked(nxctl);
|
|
SK_UNLOCK();
|
|
|
|
return lastref;
|
|
}
|
|
|
|
void
|
|
nxctl_dtor(void *arg)
|
|
{
|
|
struct nxctl *nxctl = arg;
|
|
|
|
nxctl_close(nxctl);
|
|
SK_LOCK();
|
|
(void) nxctl_release_locked(nxctl);
|
|
SK_UNLOCK();
|
|
}
|
|
|
|
int
|
|
nxprov_advise_connect(struct kern_nexus *nx, struct kern_channel *ch,
|
|
struct proc *p)
|
|
{
|
|
struct kern_nexus_provider *nxprov = NX_PROV(nx);
|
|
int err = 0;
|
|
|
|
ASSERT(!(ch->ch_flags & (CHANF_EXT_PRECONNECT | CHANF_EXT_CONNECTED)));
|
|
ASSERT(ch->ch_ctx == NULL);
|
|
|
|
SK_LOCK_ASSERT_HELD();
|
|
LCK_MTX_ASSERT(&ch->ch_lock, LCK_MTX_ASSERT_OWNED);
|
|
|
|
/* monitor channels aren't externally visible/usable, so ignore */
|
|
if ((ch->ch_info->cinfo_ch_mode & CHMODE_MONITOR) ||
|
|
(ch->ch_flags & CHANF_EXT_SKIP) ||
|
|
(nxprov->nxprov_ext.nxpi_pre_connect == NULL ||
|
|
nxprov->nxprov_ext.nxpi_connected == NULL)) {
|
|
return 0;
|
|
}
|
|
|
|
ch_retain_locked(ch);
|
|
lck_mtx_unlock(&ch->ch_lock);
|
|
SK_UNLOCK();
|
|
lck_mtx_lock(&ch->ch_lock);
|
|
|
|
err = nxprov->nxprov_ext.nxpi_pre_connect(nxprov, p, nx,
|
|
ch->ch_info->cinfo_nx_port, ch, &ch->ch_ctx);
|
|
if (err != 0) {
|
|
SK_D("ch 0x%llx flags %b nx 0x%llx pre_connect "
|
|
"error %d", SK_KVA(ch), ch->ch_flags,
|
|
CHANF_BITS, SK_KVA(nx), err);
|
|
ch->ch_ctx = NULL;
|
|
goto done;
|
|
}
|
|
/*
|
|
* Upon ring/slot init failure, this is cleared
|
|
* by nxprov_advise_disconnect() below.
|
|
*/
|
|
os_atomic_or(&ch->ch_flags, CHANF_EXT_PRECONNECT, relaxed);
|
|
if (NXPROV_LLINK(nxprov)) {
|
|
err = nx_netif_llink_ext_init_default_queues(nx);
|
|
} else {
|
|
err = nx_init_rings(nx, ch);
|
|
}
|
|
if (err != 0) {
|
|
goto done;
|
|
}
|
|
ASSERT(err == 0);
|
|
ASSERT((ch->ch_flags & (CHANF_EXT_PRECONNECT |
|
|
CHANF_EXT_CONNECTED)) == CHANF_EXT_PRECONNECT);
|
|
|
|
err = nxprov->nxprov_ext.nxpi_connected(nxprov, nx, ch);
|
|
if (err != 0) {
|
|
SK_D("ch 0x%llx flags %b nx 0x%llx connected error %d",
|
|
SK_KVA(ch), ch->ch_flags, CHANF_BITS, SK_KVA(nx), err);
|
|
goto done;
|
|
}
|
|
os_atomic_or(&ch->ch_flags, CHANF_EXT_CONNECTED, relaxed);
|
|
SK_D("ch 0x%llx flags %b nx 0x%llx connected",
|
|
SK_KVA(ch), ch->ch_flags, CHANF_BITS, SK_KVA(nx));
|
|
|
|
|
|
done:
|
|
lck_mtx_unlock(&ch->ch_lock);
|
|
SK_LOCK();
|
|
lck_mtx_lock(&ch->ch_lock);
|
|
if ((err != 0) &&
|
|
(ch->ch_flags & (CHANF_EXT_CONNECTED | CHANF_EXT_PRECONNECT))) {
|
|
nxprov_advise_disconnect(nx, ch);
|
|
}
|
|
/* caller is expected to hold one, in addition to ourselves */
|
|
VERIFY(ch->ch_refcnt >= 2);
|
|
ch_release_locked(ch);
|
|
|
|
return err;
|
|
}
|
|
|
|
void
|
|
nxprov_advise_disconnect(struct kern_nexus *nx, struct kern_channel *ch)
|
|
{
|
|
struct kern_nexus_provider *nxprov = NX_PROV(nx);
|
|
|
|
SK_LOCK_ASSERT_HELD();
|
|
LCK_MTX_ASSERT(&ch->ch_lock, LCK_MTX_ASSERT_OWNED);
|
|
|
|
/* check as we might be called in the error handling path */
|
|
if (ch->ch_flags & (CHANF_EXT_CONNECTED | CHANF_EXT_PRECONNECT)) {
|
|
ch_retain_locked(ch);
|
|
lck_mtx_unlock(&ch->ch_lock);
|
|
SK_UNLOCK();
|
|
lck_mtx_lock(&ch->ch_lock);
|
|
|
|
ASSERT(!(ch->ch_flags & CHANF_EXT_SKIP));
|
|
if (ch->ch_flags & CHANF_EXT_CONNECTED) {
|
|
nxprov->nxprov_ext.nxpi_pre_disconnect(nxprov, nx, ch);
|
|
os_atomic_andnot(&ch->ch_flags, CHANF_EXT_CONNECTED, relaxed);
|
|
}
|
|
|
|
/*
|
|
* Inform the external domain provider that the rings
|
|
* and slots for this channel are no longer valid.
|
|
*/
|
|
if (NXPROV_LLINK(nxprov)) {
|
|
nx_netif_llink_ext_fini_default_queues(nx);
|
|
} else {
|
|
nx_fini_rings(nx, ch);
|
|
}
|
|
|
|
ASSERT(ch->ch_flags & CHANF_EXT_PRECONNECT);
|
|
nxprov->nxprov_ext.nxpi_disconnected(nxprov, nx, ch);
|
|
os_atomic_andnot(&ch->ch_flags, CHANF_EXT_PRECONNECT, relaxed);
|
|
|
|
SK_D("ch 0x%llx flags %b nx 0x%llx disconnected",
|
|
SK_KVA(ch), ch->ch_flags, CHANF_BITS, SK_KVA(nx));
|
|
|
|
/* We're done with this channel */
|
|
ch->ch_ctx = NULL;
|
|
|
|
lck_mtx_unlock(&ch->ch_lock);
|
|
SK_LOCK();
|
|
lck_mtx_lock(&ch->ch_lock);
|
|
/* caller is expected to hold one, in addition to ourselves */
|
|
VERIFY(ch->ch_refcnt >= 2);
|
|
ch_release_locked(ch);
|
|
}
|
|
ASSERT(!(ch->ch_flags & (CHANF_EXT_CONNECTED | CHANF_EXT_PRECONNECT)));
|
|
ASSERT(ch->ch_ctx == NULL);
|
|
}
|
|
|
|
static struct kern_nexus_provider *
|
|
nxprov_create_common(struct nxctl *nxctl,
|
|
struct kern_nexus_domain_provider *nxdom_prov, struct nxprov_reg *reg,
|
|
const struct kern_nexus_provider_init *init, int *err)
|
|
{
|
|
struct skmem_region_params srp[SKMEM_REGIONS];
|
|
struct kern_nexus_provider *nxprov = NULL;
|
|
struct nxprov_params nxp;
|
|
uint32_t override = 0;
|
|
uint32_t pp_region_config_flags;
|
|
int i;
|
|
|
|
_CASSERT(sizeof(*init) == sizeof(nxprov->nxprov_ext));
|
|
_CASSERT(sizeof(*init) >=
|
|
sizeof(struct kern_nexus_netif_provider_init));
|
|
|
|
SK_LOCK_ASSERT_HELD();
|
|
ASSERT(nxctl != NULL && reg != NULL && nxdom_prov != NULL);
|
|
|
|
pp_region_config_flags = PP_REGION_CONFIG_MD_MAGAZINE_ENABLE |
|
|
PP_REGION_CONFIG_BUF_IODIR_BIDIR;
|
|
/*
|
|
* Special handling for external nexus providers; similar
|
|
* logic to what's done in kern_pbufpool_create().
|
|
*/
|
|
if (init != NULL) {
|
|
if (init->nxpi_flags & NXPIF_MONOLITHIC) {
|
|
pp_region_config_flags |=
|
|
PP_REGION_CONFIG_BUF_MONOLITHIC;
|
|
}
|
|
|
|
if (init->nxpi_flags & NXPIF_INHIBIT_CACHE) {
|
|
pp_region_config_flags |=
|
|
PP_REGION_CONFIG_BUF_NOCACHE;
|
|
}
|
|
}
|
|
|
|
/*
|
|
* For network devices, set the packet metadata memory as persistent
|
|
* so that it is wired at segment creation. This allows us to access
|
|
* it with preemption disabled, as well as for rdar://problem/46511741.
|
|
*/
|
|
if (nxdom_prov->nxdom_prov_dom->nxdom_type == NEXUS_TYPE_NET_IF) {
|
|
pp_region_config_flags |= PP_REGION_CONFIG_MD_PERSISTENT;
|
|
}
|
|
|
|
/* process and validate provider parameters */
|
|
if ((*err = nxdom_prov_validate_params(nxdom_prov, reg,
|
|
&nxp, srp, override, pp_region_config_flags)) != 0) {
|
|
goto done;
|
|
}
|
|
|
|
nxprov = nxprov_alloc(nxdom_prov, Z_WAITOK);
|
|
ASSERT(nxprov->nxprov_dom_prov == nxdom_prov);
|
|
|
|
STAILQ_INIT(&nxprov->nxprov_nx_head);
|
|
STAILQ_INSERT_TAIL(&nxprov_head, nxprov, nxprov_link);
|
|
nxprov->nxprov_flags |= NXPROVF_ATTACHED;
|
|
nxprov->nxprov_ctl = nxctl;
|
|
uuid_generate_random(nxprov->nxprov_uuid);
|
|
bcopy(&nxp, nxprov->nxprov_params, sizeof(struct nxprov_params));
|
|
|
|
if (init != NULL) {
|
|
if (init->nxpi_version == KERN_NEXUS_PROVIDER_VERSION_NETIF) {
|
|
ASSERT(NXPROV_LLINK(nxprov));
|
|
bcopy(init, &nxprov->nxprov_netif_ext,
|
|
sizeof(nxprov->nxprov_netif_ext));
|
|
} else {
|
|
ASSERT(!NXPROV_LLINK(nxprov));
|
|
ASSERT(init->nxpi_version ==
|
|
KERN_NEXUS_PROVIDER_CURRENT_VERSION);
|
|
bcopy(init, &nxprov->nxprov_ext, sizeof(*init));
|
|
}
|
|
nxprov->nxprov_flags |= NXPROVF_EXTERNAL;
|
|
}
|
|
|
|
/* store validated region parameters to the provider */
|
|
for (i = 0; i < SKMEM_REGIONS; i++) {
|
|
nxprov->nxprov_region_params[i] = srp[i];
|
|
}
|
|
|
|
if (nxprov->nxprov_flags & NXPROVF_EXTERNAL) {
|
|
uint32_t nxpi_flags = nxprov->nxprov_ext.nxpi_flags;
|
|
|
|
if (nxpi_flags & NXPIF_VIRTUAL_DEVICE) {
|
|
nxprov->nxprov_flags |= NXPROVF_VIRTUAL_DEVICE;
|
|
}
|
|
} else if (nxdom_prov->nxdom_prov_dom->nxdom_type !=
|
|
NEXUS_TYPE_NET_IF) {
|
|
/*
|
|
* Treat non-netif built-in nexus providers as those
|
|
* meant for inter-process communications, i.e. there
|
|
* is no actual networking hardware involved.
|
|
*/
|
|
nxprov->nxprov_flags |= NXPROVF_VIRTUAL_DEVICE;
|
|
}
|
|
|
|
nxprov_retain_locked(nxprov); /* one for being in the list */
|
|
nxprov_retain_locked(nxprov); /* one for the caller */
|
|
|
|
#if SK_LOG
|
|
uuid_string_t uuidstr;
|
|
SK_D("nxprov 0x%llx UUID %s", SK_KVA(nxprov),
|
|
sk_uuid_unparse(nxprov->nxprov_uuid, uuidstr));
|
|
#endif /* SK_LOG */
|
|
|
|
done:
|
|
return nxprov;
|
|
}
|
|
|
|
struct kern_nexus_provider *
|
|
nxprov_create(struct proc *p, struct nxctl *nxctl, struct nxprov_reg *reg,
|
|
int *err)
|
|
{
|
|
struct nxprov_params *nxp = ®->nxpreg_params;
|
|
struct kern_nexus_domain_provider *nxdom_prov = NULL;
|
|
struct kern_nexus_provider *nxprov = NULL;
|
|
|
|
NXCTL_LOCK_ASSERT_HELD(nxctl);
|
|
|
|
ASSERT(nxctl->nxctl_cred != proc_ucred_unsafe(kernproc));
|
|
*err = 0;
|
|
|
|
switch (nxp->nxp_type) {
|
|
case NEXUS_TYPE_USER_PIPE: /* only for userland */
|
|
*err = skywalk_priv_check_cred(p, nxctl->nxctl_cred,
|
|
PRIV_SKYWALK_REGISTER_USER_PIPE);
|
|
break;
|
|
|
|
case NEXUS_TYPE_FLOW_SWITCH: /* allowed for userland */
|
|
*err = skywalk_priv_check_cred(p, nxctl->nxctl_cred,
|
|
PRIV_SKYWALK_REGISTER_FLOW_SWITCH);
|
|
break;
|
|
|
|
case NEXUS_TYPE_NET_IF: /* allowed for userland */
|
|
*err = skywalk_priv_check_cred(p, nxctl->nxctl_cred,
|
|
PRIV_SKYWALK_REGISTER_NET_IF);
|
|
break;
|
|
|
|
case NEXUS_TYPE_KERNEL_PIPE: /* only for kernel */
|
|
case NEXUS_TYPE_MONITOR: /* invalid */
|
|
default:
|
|
*err = EINVAL;
|
|
goto done;
|
|
}
|
|
|
|
if (*err != 0) {
|
|
goto done;
|
|
}
|
|
|
|
ASSERT(nxp->nxp_type < NEXUS_TYPE_MAX);
|
|
if ((nxdom_prov = nxdom_prov_default[nxp->nxp_type]) == NULL) {
|
|
*err = ENXIO;
|
|
goto done;
|
|
}
|
|
|
|
#if CONFIG_NEXUS_NETIF
|
|
/* make sure netif_compat is the default here */
|
|
ASSERT(nxp->nxp_type != NEXUS_TYPE_NET_IF ||
|
|
strcmp(nxdom_prov->nxdom_prov_name,
|
|
NEXUS_PROVIDER_NET_IF_COMPAT) == 0);
|
|
#endif /* CONFIG_NEXUS_NETIF */
|
|
|
|
SK_LOCK();
|
|
/* callee holds a reference for our caller upon success */
|
|
nxprov = nxprov_create_common(nxctl, nxdom_prov, reg, NULL, err);
|
|
SK_UNLOCK();
|
|
done:
|
|
return nxprov;
|
|
}
|
|
|
|
struct kern_nexus_provider *
|
|
nxprov_create_kern(struct nxctl *nxctl,
|
|
struct kern_nexus_domain_provider *nxdom_prov, struct nxprov_reg *reg,
|
|
const struct kern_nexus_provider_init *init, int *err)
|
|
{
|
|
struct nxprov_params *nxp = ®->nxpreg_params;
|
|
struct kern_nexus_provider *nxprov = NULL;
|
|
|
|
NXCTL_LOCK_ASSERT_HELD(nxctl);
|
|
SK_LOCK_ASSERT_HELD();
|
|
|
|
ASSERT(nxctl->nxctl_cred == proc_ucred_unsafe(kernproc));
|
|
ASSERT(nxp->nxp_type == nxdom_prov->nxdom_prov_dom->nxdom_type);
|
|
ASSERT(init == NULL ||
|
|
init->nxpi_version == KERN_NEXUS_PROVIDER_CURRENT_VERSION ||
|
|
init->nxpi_version == KERN_NEXUS_PROVIDER_VERSION_NETIF);
|
|
|
|
*err = 0;
|
|
|
|
switch (nxp->nxp_type) {
|
|
case NEXUS_TYPE_NET_IF:
|
|
break;
|
|
case NEXUS_TYPE_KERNEL_PIPE:
|
|
if (init == NULL) {
|
|
*err = EINVAL;
|
|
goto done;
|
|
}
|
|
break;
|
|
case NEXUS_TYPE_FLOW_SWITCH:
|
|
if (init != NULL) {
|
|
*err = EINVAL;
|
|
goto done;
|
|
}
|
|
break;
|
|
|
|
case NEXUS_TYPE_USER_PIPE: /* only for userland */
|
|
case NEXUS_TYPE_MONITOR: /* invalid */
|
|
default:
|
|
*err = EINVAL;
|
|
goto done;
|
|
}
|
|
|
|
/* callee holds a reference for our caller upon success */
|
|
nxprov = nxprov_create_common(nxctl, nxdom_prov, reg, init, err);
|
|
|
|
done:
|
|
return nxprov;
|
|
}
|
|
|
|
int
|
|
nxprov_destroy(struct nxctl *nxctl, const uuid_t nxprov_uuid)
|
|
{
|
|
struct kern_nexus_provider *nxprov = NULL;
|
|
int err = 0;
|
|
|
|
NXCTL_LOCK_ASSERT_HELD(nxctl);
|
|
|
|
SK_LOCK();
|
|
|
|
STAILQ_FOREACH(nxprov, &nxprov_head, nxprov_link) {
|
|
if (nxctl == nxprov->nxprov_ctl &&
|
|
uuid_compare(nxprov_uuid, nxprov->nxprov_uuid) == 0) {
|
|
nxprov_retain_locked(nxprov);
|
|
break;
|
|
}
|
|
}
|
|
|
|
if (nxprov == NULL) {
|
|
err = ENOENT;
|
|
} else {
|
|
err = nxprov_close(nxprov, TRUE);
|
|
}
|
|
|
|
if (nxprov != NULL) {
|
|
(void) nxprov_release_locked(nxprov);
|
|
}
|
|
|
|
SK_UNLOCK();
|
|
|
|
return err;
|
|
}
|
|
|
|
int
|
|
nxprov_close(struct kern_nexus_provider *nxprov, boolean_t locked)
|
|
{
|
|
int err = 0;
|
|
|
|
if (!locked) {
|
|
SK_LOCK();
|
|
}
|
|
|
|
SK_LOCK_ASSERT_HELD();
|
|
|
|
#if SK_LOG
|
|
uuid_string_t uuidstr;
|
|
SK_D("nxprov 0x%llx UUID %s flags 0x%b", SK_KVA(nxprov),
|
|
sk_uuid_unparse(nxprov->nxprov_uuid, uuidstr),
|
|
nxprov->nxprov_flags, NXPROVF_BITS);
|
|
#endif /* SK_LOG */
|
|
|
|
if (nxprov->nxprov_flags & NXPROVF_CLOSED) {
|
|
err = EALREADY;
|
|
} else {
|
|
struct kern_nexus *nx, *tnx;
|
|
|
|
nxprov->nxprov_ctl = NULL;
|
|
|
|
STAILQ_FOREACH_SAFE(nx, &nxprov->nxprov_nx_head,
|
|
nx_prov_link, tnx) {
|
|
nx_retain_locked(nx);
|
|
(void) nx_close(nx, TRUE);
|
|
(void) nx_release_locked(nx);
|
|
}
|
|
|
|
if (STAILQ_EMPTY(&nxprov->nxprov_nx_head)) {
|
|
/* no nexus created on this, so detach now */
|
|
nxprov_detach(nxprov, TRUE);
|
|
} else {
|
|
/* detach when last nexus is destroyed */
|
|
ASSERT(nxprov->nxprov_refcnt > 1);
|
|
nxprov->nxprov_flags |= NXPROVF_CLOSED;
|
|
}
|
|
}
|
|
|
|
if (!locked) {
|
|
SK_UNLOCK();
|
|
}
|
|
|
|
return err;
|
|
}
|
|
|
|
static void
|
|
nxprov_detach(struct kern_nexus_provider *nxprov, boolean_t locked)
|
|
{
|
|
if (!locked) {
|
|
SK_LOCK();
|
|
}
|
|
|
|
SK_LOCK_ASSERT_HELD();
|
|
|
|
#if SK_LOG
|
|
uuid_string_t uuidstr;
|
|
SK_D("nxprov 0x%llx UUID %s flags 0x%b", SK_KVA(nxprov),
|
|
sk_uuid_unparse(nxprov->nxprov_uuid, uuidstr),
|
|
nxprov->nxprov_flags, NXPROVF_BITS);
|
|
#endif /* SK_LOG */
|
|
|
|
ASSERT(nxprov->nxprov_flags & NXPROVF_ATTACHED);
|
|
STAILQ_REMOVE(&nxprov_head, nxprov, kern_nexus_provider, nxprov_link);
|
|
nxprov->nxprov_flags &= ~NXPROVF_ATTACHED;
|
|
|
|
/* caller must hold an extra ref */
|
|
ASSERT(nxprov->nxprov_refcnt > 1);
|
|
(void) nxprov_release_locked(nxprov);
|
|
|
|
if (!locked) {
|
|
SK_UNLOCK();
|
|
}
|
|
}
|
|
|
|
static struct kern_nexus_provider *
|
|
nxprov_alloc(struct kern_nexus_domain_provider *nxdom_prov, zalloc_flags_t how)
|
|
{
|
|
struct kern_nexus_provider *nxprov;
|
|
struct nxprov_params *nxp;
|
|
|
|
ASSERT(nxdom_prov != NULL);
|
|
|
|
nxp = nxprov_params_alloc(how);
|
|
if (nxp == NULL) {
|
|
SK_ERR("Failed to allocate nxprov_params");
|
|
return NULL;
|
|
}
|
|
|
|
nxprov = zalloc_flags(nxprov_zone, how | Z_ZERO);
|
|
if (nxprov == NULL) {
|
|
SK_ERR("Failed to allocate nxprov");
|
|
nxprov_params_free(nxp);
|
|
return NULL;
|
|
}
|
|
|
|
nxprov->nxprov_dom_prov = nxdom_prov;
|
|
nxprov->nxprov_params = nxp;
|
|
/* hold a reference for nxprov */
|
|
nxdom_prov_retain_locked(nxdom_prov);
|
|
|
|
return nxprov;
|
|
}
|
|
|
|
static void
|
|
nxprov_free(struct kern_nexus_provider *nxprov)
|
|
{
|
|
struct kern_nexus_domain_provider *nxdom_prov =
|
|
nxprov->nxprov_dom_prov;
|
|
|
|
SK_LOCK_ASSERT_HELD();
|
|
|
|
ASSERT(nxdom_prov != NULL);
|
|
(void) nxdom_prov_release_locked(nxdom_prov);
|
|
nxprov->nxprov_dom_prov = NULL;
|
|
ASSERT(nxprov->nxprov_params != NULL);
|
|
nxprov_params_free(nxprov->nxprov_params);
|
|
nxprov->nxprov_params = NULL;
|
|
ASSERT(!(nxprov->nxprov_flags & NXPROVF_ATTACHED));
|
|
SK_DF(SK_VERB_MEM, "nxprov 0x%llx FREE", SK_KVA(nxprov));
|
|
zfree(nxprov_zone, nxprov);
|
|
}
|
|
|
|
static void
|
|
nxprov_retain_locked(struct kern_nexus_provider *nxprov)
|
|
{
|
|
SK_LOCK_ASSERT_HELD();
|
|
|
|
nxprov->nxprov_refcnt++;
|
|
ASSERT(nxprov->nxprov_refcnt != 0);
|
|
}
|
|
|
|
void
|
|
nxprov_retain(struct kern_nexus_provider *nxprov)
|
|
{
|
|
SK_LOCK();
|
|
nxprov_retain_locked(nxprov);
|
|
SK_UNLOCK();
|
|
}
|
|
|
|
static int
|
|
nxprov_release_locked(struct kern_nexus_provider *nxprov)
|
|
{
|
|
int oldref = nxprov->nxprov_refcnt;
|
|
|
|
SK_LOCK_ASSERT_HELD();
|
|
|
|
ASSERT(nxprov->nxprov_refcnt != 0);
|
|
if (--nxprov->nxprov_refcnt == 0) {
|
|
nxprov_free(nxprov);
|
|
}
|
|
|
|
return oldref == 1;
|
|
}
|
|
|
|
int
|
|
nxprov_release(struct kern_nexus_provider *nxprov)
|
|
{
|
|
int lastref;
|
|
|
|
SK_LOCK();
|
|
lastref = nxprov_release_locked(nxprov);
|
|
SK_UNLOCK();
|
|
|
|
return lastref;
|
|
}
|
|
|
|
struct nxprov_params *
|
|
nxprov_params_alloc(zalloc_flags_t how)
|
|
{
|
|
return zalloc_flags(nxprov_params_zone, how | Z_ZERO);
|
|
}
|
|
|
|
void
|
|
nxprov_params_free(struct nxprov_params *nxp)
|
|
{
|
|
SK_DF(SK_VERB_MEM, "nxp 0x%llx FREE", SK_KVA(nxp));
|
|
zfree(nxprov_params_zone, nxp);
|
|
}
|
|
|
|
static int
|
|
nx_check_pp(struct kern_nexus_provider *nxprov, struct kern_pbufpool *pp)
|
|
{
|
|
struct kern_nexus_domain_provider *nxdom_prov = nxprov->nxprov_dom_prov;
|
|
|
|
if ((pp->pp_flags & (PPF_EXTERNAL | PPF_CLOSED)) != PPF_EXTERNAL) {
|
|
SK_ERR("Rejecting \"%s\" built-in pp", pp->pp_name);
|
|
return ENOTSUP;
|
|
}
|
|
|
|
/*
|
|
* Require that the nexus domain metadata type and the
|
|
* metadata type of the caller-provided pbufpool match.
|
|
*/
|
|
if (nxdom_prov->nxdom_prov_dom->nxdom_md_type !=
|
|
pp->pp_md_type ||
|
|
nxdom_prov->nxdom_prov_dom->nxdom_md_subtype !=
|
|
pp->pp_md_subtype) {
|
|
SK_ERR("Mismatch in metadata type/subtype "
|
|
"(%u/%u != %u/%u)", pp->pp_md_type,
|
|
nxdom_prov->nxdom_prov_dom->nxdom_md_type,
|
|
pp->pp_md_subtype,
|
|
nxdom_prov->nxdom_prov_dom->nxdom_md_subtype);
|
|
return EINVAL;
|
|
}
|
|
|
|
/*
|
|
* Require that the nexus provider memory configuration
|
|
* has the same impedance as the caller-provided one.
|
|
* Both need to be lacking or present; if one of them
|
|
* is set and the other isn't, then we bail.
|
|
*/
|
|
if (!!(PP_BUF_REGION_DEF(pp)->skr_mode & SKR_MODE_MONOLITHIC) ^
|
|
!!(nxprov->nxprov_ext.nxpi_flags & NXPIF_MONOLITHIC)) {
|
|
SK_ERR("Memory config mismatch: monolithic mode");
|
|
return EINVAL;
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
struct kern_nexus *
|
|
nx_create(struct nxctl *nxctl, const uuid_t nxprov_uuid,
|
|
const nexus_type_t dom_type, const void *nx_ctx,
|
|
nexus_ctx_release_fn_t nx_ctx_release, struct kern_pbufpool *tx_pp,
|
|
struct kern_pbufpool *rx_pp, int *err)
|
|
{
|
|
struct kern_nexus_domain_provider *nxdom_prov;
|
|
struct kern_nexus_provider *nxprov = NULL;
|
|
struct kern_nexus *nx = NULL;
|
|
#if SK_LOG
|
|
uuid_string_t uuidstr;
|
|
#endif /* SK_LOG */
|
|
|
|
NXCTL_LOCK_ASSERT_HELD(nxctl);
|
|
|
|
ASSERT(dom_type < NEXUS_TYPE_MAX);
|
|
ASSERT(!uuid_is_null(nxprov_uuid));
|
|
*err = 0;
|
|
|
|
SK_LOCK();
|
|
|
|
STAILQ_FOREACH(nxprov, &nxprov_head, nxprov_link) {
|
|
if (nxctl == nxprov->nxprov_ctl &&
|
|
uuid_compare(nxprov_uuid, nxprov->nxprov_uuid) == 0) {
|
|
break;
|
|
}
|
|
}
|
|
|
|
if (nxprov == NULL || (nxprov->nxprov_flags & NXPROVF_CLOSED)) {
|
|
SK_ERR("Provider not found or has been closed");
|
|
*err = ENOENT;
|
|
goto done;
|
|
}
|
|
|
|
nxdom_prov = nxprov->nxprov_dom_prov;
|
|
if (dom_type != NEXUS_TYPE_UNDEFINED &&
|
|
(nxdom_prov->nxdom_prov_dom->nxdom_type != dom_type)) {
|
|
SK_ERR("Mismatch in domain type (0x%u != 0x%u)",
|
|
dom_type, nxdom_prov->nxdom_prov_dom->nxdom_type);
|
|
nxdom_prov = NULL;
|
|
nxprov = NULL;
|
|
*err = ENODEV;
|
|
goto done;
|
|
}
|
|
|
|
if ((dom_type == NEXUS_TYPE_NET_IF) && NXPROV_LLINK(nxprov) &&
|
|
(!tx_pp || !rx_pp)) {
|
|
#if SK_LOG
|
|
SK_ERR("TX/RX packet pool is required for netif logical link "
|
|
"nexus provider UUID: %s",
|
|
sk_uuid_unparse(nxprov_uuid, uuidstr));
|
|
#endif /* SK_LOG */
|
|
nxdom_prov = NULL;
|
|
nxprov = NULL;
|
|
*err = EINVAL;
|
|
goto done;
|
|
}
|
|
|
|
if ((tx_pp != NULL && (*err = nx_check_pp(nxprov, tx_pp)) != 0) ||
|
|
(rx_pp != NULL && (*err = nx_check_pp(nxprov, rx_pp)) != 0)) {
|
|
goto done;
|
|
}
|
|
|
|
nx = nx_alloc(Z_WAITOK);
|
|
|
|
STAILQ_INIT(&nx->nx_ch_head);
|
|
STAILQ_INIT(&nx->nx_ch_nonxref_head);
|
|
lck_rw_init(&nx->nx_ch_if_adv_lock, &nexus_lock_group,
|
|
&nexus_lock_attr);
|
|
STAILQ_INIT(&nx->nx_ch_if_adv_head);
|
|
uuid_generate_random(nx->nx_uuid);
|
|
nx->nx_prov = nxprov;
|
|
nx->nx_ctx = (void *)(uintptr_t)nx_ctx;
|
|
nx->nx_ctx_release = nx_ctx_release;
|
|
nx->nx_id = nxdom_prov->nxdom_prov_gencnt++;
|
|
|
|
if (tx_pp != NULL) {
|
|
nx->nx_tx_pp = tx_pp;
|
|
pp_retain(tx_pp); /* released by nx_free */
|
|
}
|
|
|
|
if (rx_pp != NULL) {
|
|
nx->nx_rx_pp = rx_pp;
|
|
pp_retain(rx_pp); /* released by nx_free */
|
|
}
|
|
|
|
/* this nexus is alive; tell the nexus constructor to set it up */
|
|
if (nxprov->nxprov_dom_prov->nxdom_prov_nx_ctor != NULL) {
|
|
*err = nxprov->nxprov_dom_prov->nxdom_prov_nx_ctor(nx);
|
|
if (*err != 0) {
|
|
nx->nx_prov = NULL;
|
|
goto done;
|
|
}
|
|
}
|
|
|
|
nxprov_retain_locked(nxprov); /* hold a ref on the nexus reg */
|
|
|
|
STAILQ_INSERT_TAIL(&nxprov->nxprov_nx_head, nx, nx_prov_link);
|
|
nxprov->nxprov_nx_count++;
|
|
RB_INSERT(kern_nexus_tree, &nx_head, nx);
|
|
os_atomic_or(&nx->nx_flags, NXF_ATTACHED, relaxed);
|
|
|
|
nx_retain_locked(nx); /* one for the provider list */
|
|
nx_retain_locked(nx); /* one for the global list */
|
|
nx_retain_locked(nx); /* one for the caller */
|
|
|
|
#if SK_LOG
|
|
SK_D("nexus 0x%llx (%s:%s) UUID %s", SK_KVA(nx),
|
|
nxdom_prov->nxdom_prov_dom->nxdom_name,
|
|
nxdom_prov->nxdom_prov_name, sk_uuid_unparse(nx->nx_uuid, uuidstr));
|
|
#endif /* SK_LOG */
|
|
done:
|
|
SK_UNLOCK();
|
|
|
|
if (*err != 0) {
|
|
if (nx != NULL) {
|
|
nx_free(nx);
|
|
nx = NULL;
|
|
}
|
|
}
|
|
return nx;
|
|
}
|
|
|
|
int
|
|
nx_destroy(struct nxctl *nxctl, const uuid_t nx_uuid)
|
|
{
|
|
struct kern_nexus *nx = NULL;
|
|
struct kern_nexus find;
|
|
int err = 0;
|
|
|
|
NXCTL_LOCK_ASSERT_HELD(nxctl);
|
|
|
|
SK_LOCK();
|
|
|
|
uuid_copy(find.nx_uuid, nx_uuid);
|
|
nx = RB_FIND(kern_nexus_tree, &nx_head, &find);
|
|
if (nx != NULL && nxctl != NX_PROV(nx)->nxprov_ctl) {
|
|
nx = NULL;
|
|
}
|
|
|
|
if (nx != NULL) {
|
|
nx_retain_locked(nx);
|
|
}
|
|
|
|
if (nx == NULL) {
|
|
err = ENOENT;
|
|
} else {
|
|
err = nx_close(nx, TRUE);
|
|
(void) nx_release_locked(nx);
|
|
}
|
|
|
|
SK_UNLOCK();
|
|
|
|
return err;
|
|
}
|
|
|
|
static inline int
|
|
nx_cmp(const struct kern_nexus *a, const struct kern_nexus *b)
|
|
{
|
|
return uuid_compare(a->nx_uuid, b->nx_uuid);
|
|
}
|
|
|
|
struct kern_nexus *
|
|
nx_find(const uuid_t nx_uuid, boolean_t locked)
|
|
{
|
|
struct kern_nexus *nx = NULL;
|
|
struct kern_nexus find;
|
|
|
|
if (!locked) {
|
|
SK_LOCK();
|
|
}
|
|
|
|
SK_LOCK_ASSERT_HELD();
|
|
|
|
uuid_copy(find.nx_uuid, nx_uuid);
|
|
nx = RB_FIND(kern_nexus_tree, &nx_head, &find);
|
|
if (nx != NULL && (nx->nx_flags & NXF_CLOSED)) {
|
|
nx = NULL;
|
|
}
|
|
|
|
/* return reference to caller */
|
|
if (nx != NULL) {
|
|
nx_retain_locked(nx);
|
|
}
|
|
|
|
if (!locked) {
|
|
SK_UNLOCK();
|
|
}
|
|
|
|
return nx;
|
|
}
|
|
|
|
int
|
|
nx_close(struct kern_nexus *nx, boolean_t locked)
|
|
{
|
|
int err = 0;
|
|
|
|
if (!locked) {
|
|
SK_LOCK();
|
|
}
|
|
|
|
SK_LOCK_ASSERT_HELD();
|
|
|
|
|
|
if (nx->nx_flags & NXF_CLOSED) {
|
|
err = EALREADY;
|
|
} else {
|
|
#if SK_LOG
|
|
uuid_string_t uuidstr;
|
|
SK_D("nexus 0x%llx (%s:%s) UUID %s flags 0x%b", SK_KVA(nx),
|
|
NX_DOM(nx)->nxdom_name, NX_DOM_PROV(nx)->nxdom_prov_name,
|
|
sk_uuid_unparse(nx->nx_uuid, uuidstr), nx->nx_flags,
|
|
NXF_BITS);
|
|
#endif /* SK_LOG */
|
|
|
|
if (STAILQ_EMPTY(&nx->nx_ch_head)) {
|
|
/* no regular channels open to it, so detach now */
|
|
nx_detach(nx);
|
|
} else {
|
|
/* detach when the last channel closes */
|
|
ASSERT(nx->nx_refcnt > 3);
|
|
os_atomic_or(&nx->nx_flags, NXF_CLOSED, relaxed);
|
|
}
|
|
}
|
|
|
|
if (!locked) {
|
|
SK_UNLOCK();
|
|
}
|
|
|
|
return err;
|
|
}
|
|
|
|
void
|
|
nx_stop(struct kern_nexus *nx)
|
|
{
|
|
struct kern_nexus_provider *nxprov = nx->nx_prov;
|
|
|
|
SK_LOCK_ASSERT_HELD();
|
|
|
|
/* send a stop message */
|
|
if (nxprov->nxprov_dom_prov->nxdom_prov_nx_stop != NULL) {
|
|
nxprov->nxprov_dom_prov->nxdom_prov_nx_stop(nx);
|
|
}
|
|
}
|
|
|
|
void
|
|
nx_detach(struct kern_nexus *nx)
|
|
{
|
|
struct kern_nexus_provider *nxprov = nx->nx_prov;
|
|
|
|
SK_LOCK_ASSERT_HELD();
|
|
|
|
#if SK_LOG
|
|
uuid_string_t uuidstr;
|
|
SK_D("nexus 0x%llx UUID %s flags 0x%b", SK_KVA(nx),
|
|
sk_uuid_unparse(nx->nx_uuid, uuidstr), nx->nx_flags, NXF_BITS);
|
|
#endif /* SK_LOG */
|
|
|
|
/* Caller must hold extra refs, on top of the two in reg/global lists */
|
|
ASSERT(nx->nx_refcnt >= 3);
|
|
ASSERT(nx->nx_flags & NXF_ATTACHED);
|
|
|
|
/* this nexus is done; let the nexus destructor do final cleanups */
|
|
if (nxprov->nxprov_dom_prov->nxdom_prov_nx_dtor != NULL) {
|
|
nxprov->nxprov_dom_prov->nxdom_prov_nx_dtor(nx);
|
|
}
|
|
|
|
ASSERT(STAILQ_EMPTY(&nx->nx_ch_head));
|
|
ASSERT(STAILQ_EMPTY(&nx->nx_ch_nonxref_head));
|
|
|
|
STAILQ_REMOVE(&nxprov->nxprov_nx_head, nx, kern_nexus, nx_prov_link);
|
|
nxprov->nxprov_nx_count--;
|
|
RB_REMOVE(kern_nexus_tree, &nx_head, nx);
|
|
os_atomic_andnot(&nx->nx_flags, NXF_ATTACHED, relaxed);
|
|
nx->nx_prov = NULL;
|
|
if (nx->nx_ctx_release != NULL) {
|
|
nx->nx_ctx_release(nx->nx_ctx);
|
|
}
|
|
nx->nx_ctx = NULL;
|
|
|
|
(void) nx_release_locked(nx); /* one for the reg list */
|
|
(void) nx_release_locked(nx); /* one for the global list */
|
|
|
|
/*
|
|
* If this was the last nexus and the provider has been closed,
|
|
* detach the provider and and finish up the postponed job.
|
|
*/
|
|
if (STAILQ_EMPTY(&nxprov->nxprov_nx_head) &&
|
|
(nxprov->nxprov_flags & NXPROVF_CLOSED)) {
|
|
nxprov_detach(nxprov, TRUE);
|
|
}
|
|
(void) nxprov_release_locked(nxprov);
|
|
}
|
|
|
|
int
|
|
nx_advisory_alloc(struct kern_nexus *nx, const char *name,
|
|
struct skmem_region_params *srp_nexusadv, nexus_advisory_type_t type)
|
|
{
|
|
struct __kern_nexus_adv_metadata *adv_md;
|
|
|
|
_CASSERT(sizeof(struct __kern_nexus_adv_metadata) == sizeof(uint64_t));
|
|
_CASSERT((sizeof(struct sk_nexusadv) +
|
|
sizeof(struct __kern_nexus_adv_metadata)) <= NX_NEXUSADV_MAX_SZ);
|
|
_CASSERT((sizeof(struct netif_nexus_advisory) +
|
|
sizeof(struct __kern_nexus_adv_metadata)) <= NX_NEXUSADV_MAX_SZ);
|
|
ASSERT(nx->nx_adv.nxv_reg == NULL);
|
|
ASSERT(nx->nx_adv.nxv_adv == NULL);
|
|
ASSERT(type == NEXUS_ADVISORY_TYPE_FLOWSWITCH ||
|
|
type == NEXUS_ADVISORY_TYPE_NETIF);
|
|
|
|
if ((nx->nx_adv.nxv_reg = skmem_region_create(name, srp_nexusadv,
|
|
NULL, NULL, NULL)) == NULL) {
|
|
return ENOMEM;
|
|
}
|
|
|
|
nx->nx_adv.nxv_adv = skmem_region_alloc(nx->nx_adv.nxv_reg, NULL,
|
|
NULL, NULL, (SKMEM_NOSLEEP | SKMEM_PANIC));
|
|
adv_md = nx->nx_adv.nxv_adv;
|
|
adv_md->knam_version = NX_ADVISORY_MD_CURRENT_VERSION;
|
|
adv_md->knam_type = type;
|
|
adv_md->__reserved = 0;
|
|
nx->nx_adv.nxv_adv_type = type;
|
|
nx->nx_adv.flowswitch_nxv_adv = (void *)(adv_md + 1);
|
|
if (type == NEXUS_ADVISORY_TYPE_FLOWSWITCH) {
|
|
nx->nx_adv.flowswitch_nxv_adv->nxadv_ver =
|
|
NX_FLOWSWITCH_ADVISORY_CURRENT_VERSION;
|
|
} else {
|
|
nx->nx_adv.netif_nxv_adv->nna_version =
|
|
NX_NETIF_ADVISORY_CURRENT_VERSION;
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
void
|
|
nx_advisory_free(struct kern_nexus *nx)
|
|
{
|
|
if (nx->nx_adv.nxv_reg != NULL) {
|
|
ASSERT(nx->nx_adv.nxv_adv != NULL);
|
|
skmem_region_free(nx->nx_adv.nxv_reg,
|
|
nx->nx_adv.nxv_adv, NULL);
|
|
nx->nx_adv.nxv_adv = NULL;
|
|
nx->nx_adv.nxv_adv_type = NEXUS_ADVISORY_TYPE_INVALID;
|
|
nx->nx_adv.flowswitch_nxv_adv = NULL;
|
|
skmem_region_release(nx->nx_adv.nxv_reg);
|
|
nx->nx_adv.nxv_reg = NULL;
|
|
}
|
|
|
|
ASSERT(nx->nx_adv.nxv_reg == NULL);
|
|
ASSERT(nx->nx_adv.nxv_adv == NULL);
|
|
ASSERT(nx->nx_adv.nxv_adv_type == NEXUS_ADVISORY_TYPE_INVALID);
|
|
ASSERT(nx->nx_adv.flowswitch_nxv_adv == NULL);
|
|
}
|
|
|
|
static struct kern_nexus *
|
|
nx_alloc(zalloc_flags_t how)
|
|
{
|
|
SK_LOCK_ASSERT_HELD();
|
|
|
|
return zalloc_flags(nx_zone, how | Z_ZERO);
|
|
}
|
|
|
|
static void
|
|
nx_free(struct kern_nexus *nx)
|
|
{
|
|
ASSERT(!(nx->nx_flags & NXF_ATTACHED) && nx->nx_prov == NULL);
|
|
ASSERT(STAILQ_EMPTY(&nx->nx_ch_head));
|
|
ASSERT(STAILQ_EMPTY(&nx->nx_ch_nonxref_head));
|
|
|
|
nx_port_free_all(nx);
|
|
|
|
if (nx->nx_tx_pp != NULL) {
|
|
pp_release(nx->nx_tx_pp);
|
|
nx->nx_tx_pp = NULL;
|
|
}
|
|
if (nx->nx_rx_pp != NULL) {
|
|
pp_release(nx->nx_rx_pp);
|
|
nx->nx_rx_pp = NULL;
|
|
}
|
|
|
|
ASSERT(STAILQ_EMPTY(&nx->nx_ch_if_adv_head));
|
|
lck_rw_destroy(&nx->nx_ch_if_adv_lock, &nexus_lock_group);
|
|
|
|
SK_DF(SK_VERB_MEM, "nexus 0x%llx FREE", SK_KVA(nx));
|
|
zfree(nx_zone, nx);
|
|
}
|
|
|
|
void
|
|
nx_retain_locked(struct kern_nexus *nx)
|
|
{
|
|
SK_LOCK_ASSERT_HELD();
|
|
|
|
nx->nx_refcnt++;
|
|
VERIFY(nx->nx_refcnt > 0);
|
|
}
|
|
|
|
void
|
|
nx_retain(struct kern_nexus *nx)
|
|
{
|
|
SK_LOCK();
|
|
nx_retain_locked(nx);
|
|
SK_UNLOCK();
|
|
}
|
|
|
|
int
|
|
nx_release_locked(struct kern_nexus *nx)
|
|
{
|
|
int oldref = nx->nx_refcnt;
|
|
|
|
SK_LOCK_ASSERT_HELD();
|
|
|
|
VERIFY(nx->nx_refcnt > 0);
|
|
if (--nx->nx_refcnt == 0) {
|
|
nx_free(nx);
|
|
}
|
|
|
|
return oldref == 1;
|
|
}
|
|
|
|
int
|
|
nx_release(struct kern_nexus *nx)
|
|
{
|
|
int lastref;
|
|
|
|
SK_LOCK_ASSERT_NOTHELD();
|
|
|
|
SK_LOCK();
|
|
lastref = nx_release_locked(nx);
|
|
SK_UNLOCK();
|
|
|
|
return lastref;
|
|
}
|
|
|
|
static int
|
|
nx_init_rings(struct kern_nexus *nx, struct kern_channel *ch)
|
|
{
|
|
struct kern_nexus_provider *nxprov = NX_PROV(nx);
|
|
struct nexus_adapter *na = ch->ch_na;
|
|
boolean_t undo = FALSE;
|
|
int ksd_retains = 0;
|
|
enum txrx t;
|
|
int err = 0;
|
|
|
|
ASSERT((ch->ch_flags & (CHANF_EXT_PRECONNECT | CHANF_EXT_CONNECTED)) ==
|
|
CHANF_EXT_PRECONNECT);
|
|
|
|
if (nxprov->nxprov_ext.nxpi_ring_init == NULL) {
|
|
return 0;
|
|
}
|
|
|
|
for_rx_tx(t) {
|
|
uint32_t i;
|
|
|
|
for (i = 0; i < na_get_nrings(na, t); i++) {
|
|
struct __kern_channel_ring *kring = &NAKR(na, t)[i];
|
|
|
|
/* skip host rings */
|
|
if (kring->ckr_flags & CKRF_HOST) {
|
|
continue;
|
|
}
|
|
|
|
if ((err = nxprov->nxprov_ext.nxpi_ring_init(
|
|
nxprov, nx, ch, kring, (kring->ckr_tx == NR_TX),
|
|
&kring->ckr_ctx)) != 0) {
|
|
SK_D("ch 0x%llx flags %b nx 0x%llx kr \"%s\" "
|
|
"(0x%llx) krflags %b ring_init error %d",
|
|
SK_KVA(ch), ch->ch_flags, CHANF_BITS,
|
|
SK_KVA(nx), kring->ckr_name, SK_KVA(kring),
|
|
kring->ckr_flags, CKRF_BITS, err);
|
|
kring->ckr_ctx = NULL;
|
|
undo = TRUE;
|
|
break;
|
|
}
|
|
kring->ckr_flags |= CKRF_EXT_RING_INITED;
|
|
|
|
if ((err = nx_init_slots(nx, kring)) != 0) {
|
|
undo = TRUE;
|
|
break;
|
|
}
|
|
|
|
if (kring->ckr_flags & CKRF_EXT_SLOTS_INITED) {
|
|
++ksd_retains;
|
|
}
|
|
}
|
|
if (undo) {
|
|
break;
|
|
}
|
|
}
|
|
|
|
/*
|
|
* Note: retain KSD even in case of error, as we have set
|
|
* CKRF_EXT_SLOTS_INITED flag for some of the rings
|
|
* nx_fini_rings would take care of release based on it.
|
|
*/
|
|
if (ksd_retains != 0) {
|
|
/*
|
|
* Mark the kernel slot descriptor region as busy; this
|
|
* prevents it from being torn-down at channel defunct
|
|
* time, as we need to invoke the slot_fini() callback
|
|
* for each slot and we need the descriptors until then.
|
|
*/
|
|
skmem_arena_nexus_sd_set_noidle(skmem_arena_nexus(na->na_arena),
|
|
ksd_retains);
|
|
}
|
|
|
|
if (err != 0) {
|
|
ASSERT(undo);
|
|
nx_fini_rings(nx, ch);
|
|
}
|
|
|
|
return err;
|
|
}
|
|
|
|
static void
|
|
nx_fini_rings(struct kern_nexus *nx, struct kern_channel *ch)
|
|
{
|
|
struct kern_nexus_provider *nxprov = NX_PROV(nx);
|
|
struct nexus_adapter *na = ch->ch_na;
|
|
int ksd_releases = 0;
|
|
enum txrx t;
|
|
|
|
for_rx_tx(t) {
|
|
uint32_t i;
|
|
|
|
for (i = 0; i < na_get_nrings(na, t); i++) {
|
|
struct __kern_channel_ring *kring = &NAKR(na, t)[i];
|
|
|
|
if (!(kring->ckr_flags & CKRF_EXT_RING_INITED)) {
|
|
continue;
|
|
}
|
|
|
|
ASSERT(!(kring->ckr_flags & CKRF_HOST));
|
|
ASSERT(nxprov->nxprov_ext.nxpi_ring_fini != NULL);
|
|
nxprov->nxprov_ext.nxpi_ring_fini(nxprov, nx, kring);
|
|
kring->ckr_flags &= ~CKRF_EXT_RING_INITED;
|
|
|
|
if (kring->ckr_flags & CKRF_EXT_SLOTS_INITED) {
|
|
++ksd_releases;
|
|
}
|
|
|
|
/*
|
|
* Undo the work done in nx_init_slots() and inform
|
|
* the external domain provider, if applicable, that
|
|
* the slots for this ring are no longer valid.
|
|
*/
|
|
nx_fini_slots(nx, kring);
|
|
kring->ckr_ctx = NULL;
|
|
}
|
|
}
|
|
|
|
if (ksd_releases != 0) {
|
|
/*
|
|
* Now that we've finished invoking the slot_fini()
|
|
* callbacks, release the busy retain counts held
|
|
* earlier in nx_init_rings(). This will allow the
|
|
* kernel slot descriptor region to be torn down.
|
|
*/
|
|
skmem_arena_nexus_sd_set_noidle(
|
|
skmem_arena_nexus(na->na_arena), -ksd_releases);
|
|
}
|
|
}
|
|
|
|
static int
|
|
nx_init_slots(struct kern_nexus *nx, struct __kern_channel_ring *kring)
|
|
{
|
|
struct kern_nexus_provider *nxprov = NX_PROV(nx);
|
|
struct __slot_desc *slot = kring->ckr_ksds;
|
|
int err = 0;
|
|
uint32_t i;
|
|
|
|
/*
|
|
* If the slot init callback was not provided, or if the
|
|
* kring was not created to hold any slot contexts, don't
|
|
* go any further.
|
|
*/
|
|
if (nxprov->nxprov_ext.nxpi_slot_init == NULL ||
|
|
kring->ckr_slot_ctxs == NULL) {
|
|
return 0;
|
|
}
|
|
|
|
ASSERT(kring->ckr_slot_ctxs_set == 0);
|
|
ASSERT(slot != NULL);
|
|
|
|
for (i = 0; i < kring->ckr_num_slots; i++) {
|
|
struct kern_slot_prop *slot_ctx_prop = NULL;
|
|
void *slot_ctx_arg = NULL;
|
|
|
|
ASSERT(&slot[i] <= kring->ckr_ksds_last);
|
|
if ((err = nxprov->nxprov_ext.nxpi_slot_init(nxprov, nx, kring,
|
|
&slot[i], i, &slot_ctx_prop, &slot_ctx_arg)) != 0) {
|
|
SK_D("nx 0x%llx kr \"%s\" (0x%llx) krflags %b slot %u "
|
|
"slot_init error %d", SK_KVA(nx), kring->ckr_name,
|
|
SK_KVA(kring), kring->ckr_flags, CKRF_BITS, i, err);
|
|
break;
|
|
}
|
|
/* we don't want this to be used by client, so verify here */
|
|
ASSERT(slot_ctx_prop == NULL);
|
|
kring->ckr_slot_ctxs[i].slot_ctx_arg =
|
|
(mach_vm_address_t)slot_ctx_arg;
|
|
kring->ckr_slot_ctxs_set++;
|
|
}
|
|
|
|
if (err != 0) {
|
|
nx_fini_slots(nx, kring);
|
|
} else {
|
|
kring->ckr_flags |= CKRF_EXT_SLOTS_INITED;
|
|
}
|
|
|
|
return err;
|
|
}
|
|
|
|
static void
|
|
nx_fini_slots(struct kern_nexus *nx, struct __kern_channel_ring *kring)
|
|
{
|
|
struct kern_nexus_provider *nxprov = NX_PROV(nx);
|
|
struct __slot_desc *slot = kring->ckr_ksds;
|
|
uint32_t i;
|
|
|
|
ASSERT(!(kring->ckr_flags & CKRF_EXT_SLOTS_INITED) ||
|
|
nxprov->nxprov_ext.nxpi_slot_fini != NULL);
|
|
ASSERT(slot != NULL || !(kring->ckr_flags & CKRF_EXT_SLOTS_INITED));
|
|
|
|
for (i = 0; i < kring->ckr_slot_ctxs_set; i++) {
|
|
ASSERT(slot != NULL && &slot[i] <= kring->ckr_ksds_last);
|
|
if (nxprov->nxprov_ext.nxpi_slot_fini != NULL) {
|
|
nxprov->nxprov_ext.nxpi_slot_fini(nxprov, nx,
|
|
kring, &slot[i], i);
|
|
}
|
|
if (kring->ckr_slot_ctxs != NULL) {
|
|
kring->ckr_slot_ctxs[i].slot_ctx_arg = 0;
|
|
}
|
|
}
|
|
kring->ckr_slot_ctxs_set = 0;
|
|
|
|
/* We're done with this kring */
|
|
kring->ckr_flags &= ~CKRF_EXT_SLOTS_INITED;
|
|
}
|
|
|
|
|
|
/* 64-bit mask with range */
|
|
#define BMASK64(_beg, _end) \
|
|
((NX_PORT_CHUNK_FREE >> (63 - (_end))) & ~((1ULL << (_beg)) - 1))
|
|
|
|
int
|
|
nx_port_find(struct kern_nexus *nx, nexus_port_t first,
|
|
nexus_port_t last, nexus_port_t *nx_port)
|
|
{
|
|
int err = 0;
|
|
|
|
ASSERT(first < last);
|
|
*nx_port = NEXUS_PORT_ANY;
|
|
|
|
if (nx->nx_num_ports == 0 || (first + 1) >= nx->nx_num_ports) {
|
|
/*
|
|
* Left edge of the range is beyond the current map;
|
|
* let nx_port_alloc() handle the growing later.
|
|
*/
|
|
*nx_port = first;
|
|
} else {
|
|
nexus_port_size_t fc = (first / NX_PORT_CHUNK);
|
|
nexus_port_size_t lc = (MIN(last, nx->nx_num_ports) / NX_PORT_CHUNK);
|
|
nexus_port_size_t lim = (nx->nx_num_ports / NX_PORT_CHUNK);
|
|
nexus_port_size_t i, j;
|
|
bitmap_t *bmap;
|
|
|
|
/*
|
|
* The right edge of the range is either within or
|
|
* beyond the current map; scan thru the current
|
|
* map and find the first available port.
|
|
*/
|
|
for (i = fc; i <= lc; i++) {
|
|
bitmap_t mask;
|
|
nexus_port_size_t beg = 0, end = 63;
|
|
|
|
if (i == fc) {
|
|
beg = (first % NX_PORT_CHUNK);
|
|
}
|
|
if (i == (last / NX_PORT_CHUNK)) {
|
|
end = (last % NX_PORT_CHUNK);
|
|
}
|
|
|
|
if (i < lim) {
|
|
bmap = &nx->nx_ports_bmap[i];
|
|
mask = BMASK64(beg, end);
|
|
|
|
j = (nexus_port_size_t)ffsll((*bmap) & mask);
|
|
if (j == 0) {
|
|
continue;
|
|
}
|
|
|
|
--j;
|
|
*nx_port = (i * NX_PORT_CHUNK) + j;
|
|
}
|
|
break;
|
|
}
|
|
|
|
/*
|
|
* If the requested range is within the current map and we
|
|
* couldn't find a port, return an err. Otherwise, return
|
|
* the next port index to trigger growing later.
|
|
*/
|
|
if (*nx_port == NEXUS_PORT_ANY) {
|
|
if (lc == (last / NX_PORT_CHUNK)) {
|
|
err = EBUSY;
|
|
SK_ERR("port unavail in [%u, %u)", first, last);
|
|
} else {
|
|
*nx_port = nx->nx_num_ports;
|
|
}
|
|
}
|
|
}
|
|
|
|
SK_DF(SK_VERB_NXPORT, "nx 0x%llx nx_port %d (err %d)", SK_KVA(nx),
|
|
(int)*nx_port, err);
|
|
|
|
return err;
|
|
}
|
|
|
|
static int
|
|
nx_port_grow(struct kern_nexus *nx, nexus_port_size_t grow)
|
|
{
|
|
ASSERT(NXDOM_MAX(NX_DOM(nx), ports) <= NEXUS_PORT_MAX);
|
|
nexus_port_t dom_port_max = (nexus_port_size_t)NXDOM_MAX(NX_DOM(nx), ports);
|
|
struct nx_port_info *ports;
|
|
size_t limit;
|
|
nexus_port_size_t i, num_ports, old_num_ports;
|
|
bitmap_t *bmap;
|
|
|
|
ASSERT(grow > 0 && (grow % NX_PORT_CHUNK) == 0);
|
|
ASSERT((nx->nx_num_ports % NX_PORT_CHUNK) == 0);
|
|
_CASSERT((sizeof(*bmap) * 8) == NX_PORT_CHUNK);
|
|
ASSERT(powerof2(dom_port_max));
|
|
ASSERT(dom_port_max % NX_PORT_CHUNK == 0);
|
|
|
|
old_num_ports = nx->nx_num_ports;
|
|
num_ports = nx->nx_num_ports + grow;
|
|
limit = P2ROUNDUP(dom_port_max, NX_PORT_CHUNK);
|
|
if (num_ports > limit) {
|
|
SK_ERR("can't grow, total %u grow %u (new %u > dom_max %u)",
|
|
nx->nx_num_ports, grow, num_ports, limit);
|
|
return EDOM;
|
|
}
|
|
|
|
if ((bmap = sk_realloc_data(nx->nx_ports_bmap,
|
|
(old_num_ports / NX_PORT_CHUNK) * sizeof(*bmap),
|
|
(num_ports / NX_PORT_CHUNK) * sizeof(*bmap),
|
|
Z_WAITOK, skmem_tag_nx_port)) == NULL) {
|
|
SK_ERR("bmap alloc failed, num_port %u", num_ports);
|
|
return ENOMEM;
|
|
}
|
|
nx->nx_ports_bmap = bmap;
|
|
|
|
if ((ports = sk_realloc_type_array(struct nx_port_info, old_num_ports,
|
|
num_ports, nx->nx_ports, Z_WAITOK, skmem_tag_nx_port)) == NULL) {
|
|
/* can't free bmap here, otherwise nexus won't work */
|
|
SK_ERR("nx_ports alloc failed, num_port %u", num_ports);
|
|
return ENOMEM;
|
|
}
|
|
|
|
/* initialize the additional new ports */
|
|
bzero(&ports[nx->nx_num_ports], (grow * sizeof(*ports)));
|
|
nx->nx_ports = ports;
|
|
|
|
/* initialize new bitmaps (set all bits) */
|
|
for (i = (nx->nx_num_ports / NX_PORT_CHUNK);
|
|
i < (num_ports / NX_PORT_CHUNK); i++) {
|
|
bmap[i] = NX_PORT_CHUNK_FREE;
|
|
}
|
|
|
|
nx->nx_num_ports = num_ports;
|
|
|
|
SK_DF(SK_VERB_NXPORT, "!!! nx 0x%llx ports %u/%u, %u ports added",
|
|
SK_KVA(nx), nx->nx_active_ports, nx->nx_num_ports, grow);
|
|
|
|
return 0;
|
|
}
|
|
|
|
int
|
|
nx_port_alloc(struct kern_nexus *nx, nexus_port_t nx_port, struct nxbind *nxb,
|
|
struct nexus_adapter **na, struct proc *p)
|
|
{
|
|
struct nx_port_info *npi = NULL;
|
|
struct nxbind *nxb0;
|
|
size_t g;
|
|
uint32_t i, j;
|
|
bitmap_t *bmap;
|
|
bool refonly = false;
|
|
int err = 0;
|
|
|
|
ASSERT(nx_port != NEXUS_PORT_ANY);
|
|
ASSERT((nx->nx_num_ports % NX_PORT_CHUNK) == 0);
|
|
|
|
/* port is zero-based, so adjust here */
|
|
if ((nx_port + 1) > nx->nx_num_ports) {
|
|
g = P2ROUNDUP((nx_port + 1) - nx->nx_num_ports, NX_PORT_CHUNK);
|
|
VERIFY(g <= NEXUS_PORT_MAX);
|
|
if ((err = nx_port_grow(nx, (nexus_port_size_t)g)) != 0) {
|
|
goto done;
|
|
}
|
|
}
|
|
ASSERT(err == 0);
|
|
ASSERT(nx_port < nx->nx_num_ports);
|
|
npi = &nx->nx_ports[nx_port];
|
|
nxb0 = npi->npi_nxb;
|
|
i = nx_port / NX_PORT_CHUNK;
|
|
j = nx_port % NX_PORT_CHUNK;
|
|
bmap = &nx->nx_ports_bmap[i];
|
|
|
|
if (bit_test(*bmap, j)) {
|
|
/* port is not (yet) bound or allocated */
|
|
ASSERT(npi->npi_nah == 0 && npi->npi_nxb == NULL);
|
|
if (p != kernproc && !NX_ANONYMOUS_PROV(nx)) {
|
|
/*
|
|
* If the port allocation is requested by userland
|
|
* and the nexus is non-anonymous, then fail the
|
|
* request.
|
|
*/
|
|
err = EACCES;
|
|
SK_ERR("user proc alloc on named nexus needs binding");
|
|
} else if (na != NULL && *na != NULL) {
|
|
/*
|
|
* Otherwise claim it (clear bit) if the caller
|
|
* supplied an adapter for this port; else, it
|
|
* is just an existential check and so there's
|
|
* no action needed at this point (we'll skip
|
|
* the init below since vpna is NULL).
|
|
*/
|
|
bit_clear(*bmap, j);
|
|
}
|
|
} else {
|
|
/* if port is bound, check if credentials match */
|
|
if (nxb0 != NULL && p != kernproc && !NX_ANONYMOUS_PROV(nx) &&
|
|
(nxb == NULL || !nxb_is_equal(nxb0, nxb))) {
|
|
SK_ERR("nexus binding mismatch");
|
|
err = EACCES;
|
|
} else {
|
|
/*
|
|
* If port is already occupied by an adapter,
|
|
* see if the client is requesting a reference
|
|
* to it; if so, return the adapter. Otherwise,
|
|
* if unoccupied and vpna is non-NULL, associate
|
|
* it with this nexus port via the below init.
|
|
*/
|
|
if (NPI_NA(npi) != NULL) {
|
|
if (na != NULL && *na == NULL) {
|
|
*na = NPI_NA(npi);
|
|
na_retain_locked(*na);
|
|
/* skip the init below */
|
|
refonly = true;
|
|
} else {
|
|
/*
|
|
* If the client supplied an adapter
|
|
* (regardless of its value) for a
|
|
* nexus port that's already occupied,
|
|
* then we fail the request.
|
|
*/
|
|
SK_ERR("nexus adapted exits");
|
|
err = EEXIST;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
done:
|
|
/* initialize the nexus port and the adapter occupying it */
|
|
if (err == 0 && na != NULL && *na != NULL && !refonly) {
|
|
ASSERT(nx_port < nx->nx_num_ports);
|
|
ASSERT(npi->npi_nah == 0);
|
|
ASSERT(nx->nx_active_ports < nx->nx_num_ports);
|
|
ASSERT(!bit_test(nx->nx_ports_bmap[nx_port / NX_PORT_CHUNK],
|
|
(nx_port % NX_PORT_CHUNK)));
|
|
|
|
nx->nx_active_ports++;
|
|
npi->npi_nah = NPI_NA_ENCODE(*na, NEXUS_PORT_STATE_WORKING);
|
|
(*na)->na_nx_port = nx_port;
|
|
}
|
|
|
|
SK_DF(SK_VERB_NXPORT, "nx 0x%llx nx_port %d, ports %u/%u (err %d)",
|
|
SK_KVA(nx), (int)nx_port, nx->nx_active_ports, nx->nx_num_ports,
|
|
err);
|
|
|
|
return err;
|
|
}
|
|
|
|
void
|
|
nx_port_defunct(struct kern_nexus *nx, nexus_port_t nx_port)
|
|
{
|
|
struct nx_port_info *npi = &nx->nx_ports[nx_port];
|
|
|
|
npi->npi_nah = NPI_NA_ENCODE(npi->npi_nah,
|
|
NEXUS_PORT_STATE_DEFUNCT);
|
|
}
|
|
|
|
void
|
|
nx_port_free(struct kern_nexus *nx, nexus_port_t nx_port)
|
|
{
|
|
struct nx_port_info *npi = NULL;
|
|
bitmap_t *bmap;
|
|
uint32_t i, j;
|
|
|
|
ASSERT((nx->nx_num_ports % NX_PORT_CHUNK) == 0);
|
|
ASSERT(nx_port != NEXUS_PORT_ANY && nx_port < nx->nx_num_ports);
|
|
ASSERT(nx->nx_active_ports != 0);
|
|
|
|
i = nx_port / NX_PORT_CHUNK;
|
|
j = nx_port % NX_PORT_CHUNK;
|
|
bmap = &nx->nx_ports_bmap[i];
|
|
ASSERT(!bit_test(*bmap, j));
|
|
|
|
npi = &nx->nx_ports[nx_port];
|
|
npi->npi_nah = 0;
|
|
if (npi->npi_nxb == NULL) {
|
|
/* it's vacant, release it (set bit) */
|
|
bit_set(*bmap, j);
|
|
}
|
|
|
|
nx->nx_active_ports--;
|
|
|
|
//XXX wshen0123@apple.com --- try to shrink bitmap & nx_ports ???
|
|
|
|
SK_DF(SK_VERB_NXPORT, "--- nx 0x%llx nx_port %d, ports %u/%u",
|
|
SK_KVA(nx), (int)nx_port, nx->nx_active_ports, nx->nx_num_ports);
|
|
}
|
|
|
|
int
|
|
nx_port_bind_info(struct kern_nexus *nx, nexus_port_t nx_port,
|
|
struct nxbind *nxb0, void *info)
|
|
{
|
|
struct nx_port_info *npi = NULL;
|
|
size_t g;
|
|
uint32_t i, j;
|
|
bitmap_t *bmap;
|
|
int err = 0;
|
|
|
|
ASSERT(nx_port != NEXUS_PORT_ANY);
|
|
ASSERT(nx_port < NXDOM_MAX(NX_DOM(nx), ports));
|
|
ASSERT((nx->nx_num_ports % NX_PORT_CHUNK) == 0);
|
|
ASSERT(nxb0 != NULL);
|
|
|
|
if ((nx_port) + 1 > nx->nx_num_ports) {
|
|
g = P2ROUNDUP((nx_port + 1) - nx->nx_num_ports, NX_PORT_CHUNK);
|
|
VERIFY(g <= NEXUS_PORT_MAX);
|
|
if ((err = nx_port_grow(nx, (nexus_port_size_t)g)) != 0) {
|
|
goto done;
|
|
}
|
|
}
|
|
ASSERT(err == 0);
|
|
|
|
npi = &nx->nx_ports[nx_port];
|
|
i = nx_port / NX_PORT_CHUNK;
|
|
j = nx_port % NX_PORT_CHUNK;
|
|
bmap = &nx->nx_ports_bmap[i];
|
|
if (bit_test(*bmap, j)) {
|
|
/* port is not (yet) bound or allocated */
|
|
ASSERT(npi->npi_nah == 0 && npi->npi_nxb == NULL);
|
|
|
|
bit_clear(*bmap, j);
|
|
struct nxbind *nxb = nxb_alloc(Z_WAITOK);
|
|
nxb_move(nxb0, nxb);
|
|
npi->npi_nxb = nxb;
|
|
npi->npi_info = info;
|
|
/* claim it (clear bit) */
|
|
bit_clear(*bmap, j);
|
|
ASSERT(err == 0);
|
|
} else {
|
|
/* port is already taken */
|
|
ASSERT(NPI_NA(npi) != NULL || npi->npi_nxb != NULL);
|
|
err = EEXIST;
|
|
}
|
|
done:
|
|
|
|
SK_DF(err ? SK_VERB_ERROR : SK_VERB_NXPORT,
|
|
"+++ nx 0x%llx nx_port %d, ports %u/%u (err %d)", SK_KVA(nx),
|
|
(int)nx_port, nx->nx_active_ports, nx->nx_num_ports, err);
|
|
|
|
return err;
|
|
}
|
|
|
|
int
|
|
nx_port_bind(struct kern_nexus *nx, nexus_port_t nx_port, struct nxbind *nxb0)
|
|
{
|
|
return nx_port_bind_info(nx, nx_port, nxb0, NULL);
|
|
}
|
|
|
|
static int
|
|
nx_port_info_size(void *info, size_t *sz)
|
|
{
|
|
struct nx_port_info_header *hdr = info;
|
|
|
|
switch (hdr->ih_type) {
|
|
case NX_PORT_INFO_TYPE_NETIF:
|
|
break;
|
|
default:
|
|
return EINVAL;
|
|
}
|
|
*sz = hdr->ih_size;
|
|
return 0;
|
|
}
|
|
|
|
int
|
|
nx_port_unbind(struct kern_nexus *nx, nexus_port_t nx_port)
|
|
{
|
|
struct nx_port_info *npi = NULL;
|
|
struct nxbind *nxb;
|
|
uint32_t i, j;
|
|
bitmap_t *bmap;
|
|
int err = 0;
|
|
|
|
ASSERT(nx_port != NEXUS_PORT_ANY);
|
|
|
|
if (nx_port >= nx->nx_num_ports) {
|
|
err = EDOM;
|
|
goto done;
|
|
}
|
|
|
|
npi = &nx->nx_ports[nx_port];
|
|
i = nx_port / NX_PORT_CHUNK;
|
|
j = nx_port % NX_PORT_CHUNK;
|
|
bmap = &nx->nx_ports_bmap[i];
|
|
|
|
if ((nxb = npi->npi_nxb) == NULL) {
|
|
/* must be either free or allocated */
|
|
ASSERT(NPI_NA(npi) == NULL ||
|
|
(!bit_test(*bmap, j) && nx->nx_active_ports > 0));
|
|
err = ENOENT;
|
|
} else {
|
|
nxb_free(nxb);
|
|
npi->npi_nxb = NULL;
|
|
if (npi->npi_info != NULL) {
|
|
size_t sz;
|
|
|
|
VERIFY(nx_port_info_size(npi->npi_info, &sz) == 0);
|
|
sk_free_data(npi->npi_info, sz);
|
|
npi->npi_info = NULL;
|
|
}
|
|
ASSERT(!bit_test(*bmap, j));
|
|
if (NPI_NA(npi) == NULL) {
|
|
/* it's vacant, release it (set bit) */
|
|
bit_set(*bmap, j);
|
|
}
|
|
}
|
|
|
|
done:
|
|
SK_DF(err ? SK_VERB_ERROR : SK_VERB_NXPORT,
|
|
"--- nx 0x%llx nx_port %d, ports %u/%u (err %d)", SK_KVA(nx),
|
|
(int)nx_port, nx->nx_active_ports, nx->nx_num_ports, err);
|
|
|
|
return err;
|
|
}
|
|
|
|
struct nexus_adapter *
|
|
nx_port_get_na(struct kern_nexus *nx, nexus_port_t nx_port)
|
|
{
|
|
if (nx->nx_ports != NULL && nx->nx_num_ports > nx_port) {
|
|
return NPI_NA(&nx->nx_ports[nx_port]);
|
|
} else {
|
|
return NULL;
|
|
}
|
|
}
|
|
|
|
int
|
|
nx_port_get_info(struct kern_nexus *nx, nexus_port_t port,
|
|
nx_port_info_type_t type, void *info, uint32_t len)
|
|
{
|
|
struct nx_port_info *npi;
|
|
struct nx_port_info_header *hdr;
|
|
|
|
if (nx->nx_ports == NULL || port >= nx->nx_num_ports) {
|
|
return ENXIO;
|
|
}
|
|
npi = &nx->nx_ports[port];
|
|
hdr = npi->npi_info;
|
|
if (hdr == NULL) {
|
|
return ENOENT;
|
|
}
|
|
|
|
if (hdr->ih_type != type) {
|
|
return EINVAL;
|
|
}
|
|
|
|
bcopy(npi->npi_info, info, len);
|
|
return 0;
|
|
}
|
|
|
|
bool
|
|
nx_port_is_valid(struct kern_nexus *nx, nexus_port_t nx_port)
|
|
{
|
|
return nx_port < nx->nx_num_ports;
|
|
}
|
|
|
|
bool
|
|
nx_port_is_defunct(struct kern_nexus *nx, nexus_port_t nx_port)
|
|
{
|
|
ASSERT(nx_port_is_valid(nx, nx_port));
|
|
|
|
return NPI_IS_DEFUNCT(&nx->nx_ports[nx_port]);
|
|
}
|
|
|
|
void
|
|
nx_port_free_all(struct kern_nexus *nx)
|
|
{
|
|
uint32_t num_ports;
|
|
|
|
/* uncrustify doesn't handle C blocks properly */
|
|
/* BEGIN IGNORE CODESTYLE */
|
|
nx_port_foreach(nx, ^(nexus_port_t p) {
|
|
struct nxbind *nxb;
|
|
void *info;
|
|
nxb = nx->nx_ports[p].npi_nxb;
|
|
info = nx->nx_ports[p].npi_info;
|
|
if (nxb != NULL) {
|
|
nxb_free(nxb);
|
|
nx->nx_ports[p].npi_nxb = NULL;
|
|
}
|
|
if (info != NULL) {
|
|
size_t sz;
|
|
|
|
VERIFY(nx_port_info_size(info, &sz) == 0);
|
|
skn_free_data(info, info, sz);
|
|
nx->nx_ports[p].npi_info = NULL;
|
|
}
|
|
});
|
|
/* END IGNORE CODESTYLE */
|
|
|
|
num_ports = nx->nx_num_ports;
|
|
nx->nx_num_ports = 0;
|
|
nx->nx_active_ports = 0;
|
|
skn_free_data(ports_bmap,
|
|
nx->nx_ports_bmap, (num_ports / NX_PORT_CHUNK) * sizeof(bitmap_t));
|
|
nx->nx_ports_bmap = NULL;
|
|
sk_free_type_array(struct nx_port_info, num_ports, nx->nx_ports);
|
|
nx->nx_ports = NULL;
|
|
}
|
|
|
|
void
|
|
nx_port_foreach(struct kern_nexus *nx,
|
|
void (^port_handle)(nexus_port_t nx_port))
|
|
{
|
|
for (nexus_port_size_t i = 0; i < (nx->nx_num_ports / NX_PORT_CHUNK); i++) {
|
|
bitmap_t bmap = nx->nx_ports_bmap[i];
|
|
|
|
if (bmap == NX_PORT_CHUNK_FREE) {
|
|
continue;
|
|
}
|
|
|
|
for (nexus_port_size_t j = 0; j < NX_PORT_CHUNK; j++) {
|
|
if (bit_test(bmap, j)) {
|
|
continue;
|
|
}
|
|
port_handle((i * NX_PORT_CHUNK) + j);
|
|
}
|
|
}
|
|
}
|
|
|
|
/*
|
|
* sysctl interfaces
|
|
*/
|
|
static int nexus_provider_list_sysctl SYSCTL_HANDLER_ARGS;
|
|
static int nexus_channel_list_sysctl SYSCTL_HANDLER_ARGS;
|
|
static int nexus_mib_get_sysctl SYSCTL_HANDLER_ARGS;
|
|
|
|
SYSCTL_PROC(_kern_skywalk, OID_AUTO, nexus_provider_list,
|
|
CTLTYPE_STRUCT | CTLFLAG_RD | CTLFLAG_LOCKED,
|
|
0, 0, nexus_provider_list_sysctl, "S,nexus_provider_info_t", "");
|
|
|
|
SYSCTL_PROC(_kern_skywalk, OID_AUTO, nexus_channel_list,
|
|
CTLTYPE_STRUCT | CTLFLAG_RD | CTLFLAG_LOCKED,
|
|
0, 0, nexus_channel_list_sysctl, "S,nexus_channel_entry_t", "");
|
|
|
|
SYSCTL_PROC(_kern_skywalk, OID_AUTO, llink_list,
|
|
CTLTYPE_STRUCT | CTLFLAG_RD | CTLFLAG_LOCKED,
|
|
0, NXMIB_LLINK_LIST, nexus_mib_get_sysctl, "S,nx_llink_info",
|
|
"A list of logical links");
|
|
|
|
SYSCTL_PROC(_kern_skywalk_stats, OID_AUTO, flow,
|
|
CTLTYPE_STRUCT | CTLFLAG_RW | CTLFLAG_LOCKED | CTLFLAG_ANYBODY | CTLFLAG_KERN,
|
|
0, NXMIB_FLOW, nexus_mib_get_sysctl, "S,sk_stats_flow",
|
|
"Nexus inet flows with stats collected in kernel");
|
|
|
|
SYSCTL_PROC(_kern_skywalk_stats, OID_AUTO, flow_owner,
|
|
CTLTYPE_STRUCT | CTLFLAG_RD | CTLFLAG_LOCKED,
|
|
0, NXMIB_FLOW_OWNER, nexus_mib_get_sysctl, "S,sk_stats_flow_owner",
|
|
"Nexus flow owners");
|
|
|
|
SYSCTL_PROC(_kern_skywalk_stats, OID_AUTO, flow_route,
|
|
CTLTYPE_STRUCT | CTLFLAG_RD | CTLFLAG_LOCKED,
|
|
0, NXMIB_FLOW_ROUTE, nexus_mib_get_sysctl, "S,sk_stats_flow_route",
|
|
"Nexus flow routes");
|
|
|
|
SYSCTL_PROC(_kern_skywalk_stats, OID_AUTO, net_if,
|
|
CTLTYPE_STRUCT | CTLFLAG_RD | CTLFLAG_LOCKED,
|
|
0, NXMIB_NETIF_STATS, nexus_mib_get_sysctl, "S,sk_stats_net_if",
|
|
"Nexus netif statistics collected in kernel");
|
|
|
|
SYSCTL_PROC(_kern_skywalk_stats, OID_AUTO, flow_switch,
|
|
CTLTYPE_STRUCT | CTLFLAG_RD | CTLFLAG_LOCKED,
|
|
0, NXMIB_FSW_STATS, nexus_mib_get_sysctl, "S,sk_stats_flow_switch",
|
|
"Nexus flowswitch statistics collected in kernel");
|
|
|
|
SYSCTL_PROC(_kern_skywalk_stats, OID_AUTO, userstack,
|
|
CTLTYPE_STRUCT | CTLFLAG_RD | CTLFLAG_LOCKED,
|
|
0, NXMIB_USERSTACK_STATS, nexus_mib_get_sysctl, "S,sk_stats_userstack",
|
|
"Nexus userstack statistics counter");
|
|
|
|
SYSCTL_PROC(_kern_skywalk_stats, OID_AUTO, flow_adv,
|
|
CTLTYPE_STRUCT | CTLFLAG_RD | CTLFLAG_LOCKED,
|
|
0, NXMIB_FLOW_ADV, nexus_mib_get_sysctl, "S,sk_stats_flow_adv",
|
|
"Nexus flow advisory dump");
|
|
|
|
SYSCTL_PROC(_kern_skywalk_stats, OID_AUTO, netif_queue,
|
|
CTLTYPE_STRUCT | CTLFLAG_RD | CTLFLAG_LOCKED,
|
|
0, NXMIB_NETIF_QUEUE_STATS, nexus_mib_get_sysctl, "S,netif_qstats_info",
|
|
"A list of netif queue stats entries");
|
|
|
|
/*
|
|
* Provider list sysctl
|
|
*/
|
|
static void
|
|
nexus_provider_info_populate(struct kern_nexus_provider *nxprov,
|
|
nexus_provider_info_t info)
|
|
{
|
|
struct kern_nexus *nx;
|
|
uuid_t *uuids;
|
|
|
|
SK_LOCK_ASSERT_HELD();
|
|
|
|
/* provider UUID + params */
|
|
uuid_copy(info->npi_prov_uuid, nxprov->nxprov_uuid);
|
|
bcopy(nxprov->nxprov_params, &info->npi_prov_params,
|
|
sizeof(struct nxprov_params));
|
|
info->npi_instance_uuids_count = nxprov->nxprov_nx_count;
|
|
|
|
/* instance UUID list */
|
|
uuids = info->npi_instance_uuids;
|
|
STAILQ_FOREACH(nx, &nxprov->nxprov_nx_head, nx_prov_link) {
|
|
uuid_copy(*uuids, nx->nx_uuid);
|
|
uuids++;
|
|
}
|
|
}
|
|
|
|
static int
|
|
nexus_provider_list_sysctl SYSCTL_HANDLER_ARGS
|
|
{
|
|
#pragma unused(arg1, arg2, oidp)
|
|
size_t actual_space;
|
|
caddr_t buffer = NULL;
|
|
size_t buffer_space;
|
|
size_t allocated_space;
|
|
int out_error;
|
|
int error = 0;
|
|
struct kern_nexus_provider *nxprov;
|
|
caddr_t scan;
|
|
|
|
if (!kauth_cred_issuser(kauth_cred_get())) {
|
|
return EPERM;
|
|
}
|
|
|
|
net_update_uptime();
|
|
buffer_space = req->oldlen;
|
|
if (req->oldptr != USER_ADDR_NULL && buffer_space != 0) {
|
|
if (buffer_space > SK_SYSCTL_ALLOC_MAX) {
|
|
buffer_space = SK_SYSCTL_ALLOC_MAX;
|
|
}
|
|
allocated_space = buffer_space;
|
|
buffer = sk_alloc_data(allocated_space, Z_WAITOK, skmem_tag_sysctl_buf);
|
|
if (__improbable(buffer == NULL)) {
|
|
return ENOBUFS;
|
|
}
|
|
} else if (req->oldptr == USER_ADDR_NULL) {
|
|
buffer_space = 0;
|
|
}
|
|
actual_space = 0;
|
|
scan = buffer;
|
|
SK_LOCK();
|
|
STAILQ_FOREACH(nxprov, &nxprov_head, nxprov_link) {
|
|
size_t info_size;
|
|
|
|
info_size
|
|
= NEXUS_PROVIDER_INFO_SIZE(nxprov->nxprov_nx_count);
|
|
if (scan != NULL) {
|
|
if (buffer_space < info_size) {
|
|
/* supplied buffer too small, stop copying */
|
|
error = ENOMEM;
|
|
break;
|
|
}
|
|
nexus_provider_info_populate(nxprov, (void *)scan);
|
|
scan += info_size;
|
|
buffer_space -= info_size;
|
|
}
|
|
actual_space += info_size;
|
|
}
|
|
SK_UNLOCK();
|
|
|
|
out_error = SYSCTL_OUT(req, buffer, actual_space);
|
|
if (out_error != 0) {
|
|
error = out_error;
|
|
}
|
|
|
|
if (buffer != NULL) {
|
|
sk_free_data(buffer, allocated_space);
|
|
}
|
|
|
|
return error;
|
|
}
|
|
|
|
/*
|
|
* Channel list sysctl
|
|
*/
|
|
static uint32_t
|
|
channel_ring_count(struct kern_channel *ch, enum txrx which)
|
|
{
|
|
return ch->ch_last[which] - ch->ch_first[which];
|
|
}
|
|
|
|
static void
|
|
populate_ring_entries(struct __kern_channel_ring *kring,
|
|
ring_id_t first, ring_id_t last, nexus_channel_ring_entry_t entries)
|
|
{
|
|
ring_id_t i;
|
|
nexus_channel_ring_entry_t scan;
|
|
struct __kern_channel_ring *ring;
|
|
|
|
scan = entries;
|
|
for (i = first; i < last; i++, scan++) {
|
|
ring = &kring[i];
|
|
|
|
DTRACE_SKYWALK1(populate__ring, struct __kern_channel_ring *,
|
|
ring);
|
|
if (kr_stat_enable == 0) {
|
|
bzero(&scan->ncre_stats, sizeof(scan->ncre_stats));
|
|
bzero(&scan->ncre_user_stats,
|
|
sizeof(scan->ncre_user_stats));
|
|
} else {
|
|
scan->ncre_stats = ring->ckr_stats;
|
|
scan->ncre_user_stats = ring->ckr_usr_stats;
|
|
}
|
|
scan->ncre_error_stats = ring->ckr_err_stats;
|
|
scan->ncre_ring_id = i;
|
|
}
|
|
}
|
|
|
|
/* combine/convert ch_mode/ch_flags into nexus_channel_entry flags */
|
|
static uint32_t
|
|
nexus_channel_get_flags(uint32_t ch_mode, uint32_t ch_flags)
|
|
{
|
|
uint32_t flags = 0;
|
|
|
|
flags |= (ch_mode & CHMODE_MONITOR_TX) ? SCHF_MONITOR_TX : 0;
|
|
flags |= (ch_mode & CHMODE_MONITOR_RX) ? SCHF_MONITOR_RX : 0;
|
|
flags |= (ch_mode & CHMODE_MONITOR_NO_COPY) ? SCHF_MONITOR_NO_COPY : 0;
|
|
flags |= (ch_mode & CHMODE_USER_PACKET_POOL) ? SCHF_USER_PACKET_POOL : 0;
|
|
flags |= (ch_mode & CHMODE_DEFUNCT_OK) ? SCHF_DEFUNCT_OK : 0;
|
|
flags |= (ch_mode & CHMODE_FILTER) ? SCHF_FILTER : 0;
|
|
flags |= (ch_mode & CHMODE_EVENT_RING) ? SCHF_EVENT_RING : 0;
|
|
flags |= (ch_mode & CHMODE_EXCLUSIVE) ? SCHF_EXCLUSIVE : 0;
|
|
flags |= (ch_flags & CHANF_IF_ADV) ? SCHF_IF_ADV : 0;
|
|
flags |= (ch_flags & CHANF_DEFUNCT_SKIP) ? SCHF_DEFUNCT_SKIP : 0;
|
|
flags |= (ch_flags & CHANF_CLOSING) ? SCHF_CLOSING : 0;
|
|
flags |= (ch_flags & CHANF_DEFUNCT) ? SCHF_DEFUNCT : 0;
|
|
flags |= (ch_mode & CHMODE_LOW_LATENCY) ? SCHF_LOW_LATENCY : 0;
|
|
|
|
return flags;
|
|
}
|
|
|
|
SK_NO_INLINE_ATTRIBUTE
|
|
static void
|
|
nexus_channel_entry_populate(struct kern_channel *ch,
|
|
nexus_channel_entry_t entry)
|
|
{
|
|
uint32_t ch_mode = ch->ch_info->cinfo_ch_mode;
|
|
uint32_t ch_flags = ch->ch_flags;
|
|
ring_id_t rx_first = ch->ch_first[NR_RX];
|
|
ring_id_t rx_last = ch->ch_last[NR_RX];
|
|
ring_id_t tx_last = ch->ch_last[NR_TX];
|
|
ring_id_t tx_first = ch->ch_first[NR_TX];
|
|
|
|
uuid_copy(entry->nce_uuid, ch->ch_info->cinfo_ch_id);
|
|
entry->nce_flags = nexus_channel_get_flags(ch_mode, ch_flags);
|
|
entry->nce_port = ch->ch_info->cinfo_nx_port;
|
|
entry->nce_pid = ch->ch_pid;
|
|
entry->nce_fd = ch->ch_fd;
|
|
entry->nce_tx_rings = tx_last - tx_first;
|
|
entry->nce_rx_rings = rx_last - rx_first;
|
|
populate_ring_entries(ch->ch_na->na_tx_rings, tx_first, tx_last,
|
|
entry->nce_ring_entries);
|
|
populate_ring_entries(ch->ch_na->na_rx_rings, rx_first, rx_last,
|
|
entry->nce_ring_entries + entry->nce_tx_rings);
|
|
}
|
|
|
|
SK_NO_INLINE_ATTRIBUTE
|
|
static size_t
|
|
nexus_channel_info_populate(struct kern_nexus *nx,
|
|
nexus_channel_info_t info, size_t buffer_size)
|
|
{
|
|
struct kern_channel *ch = NULL;
|
|
size_t info_size;
|
|
caddr_t scan = NULL;
|
|
|
|
SK_LOCK_ASSERT_HELD();
|
|
|
|
info_size = sizeof(*info);
|
|
|
|
/* channel list */
|
|
if (info != NULL) {
|
|
if (buffer_size < info_size) {
|
|
return info_size;
|
|
}
|
|
|
|
/* instance UUID */
|
|
uuid_copy(info->nci_instance_uuid, nx->nx_uuid);
|
|
info->nci_channel_entries_count = nx->nx_ch_count;
|
|
scan = (caddr_t)info->nci_channel_entries;
|
|
}
|
|
STAILQ_FOREACH(ch, &nx->nx_ch_head, ch_link) {
|
|
size_t entry_size;
|
|
uint32_t ring_count;
|
|
|
|
ring_count = channel_ring_count(ch, NR_TX) +
|
|
channel_ring_count(ch, NR_RX);
|
|
entry_size = NEXUS_CHANNEL_ENTRY_SIZE(ring_count);
|
|
info_size += entry_size;
|
|
if (scan != NULL) {
|
|
if (buffer_size < info_size) {
|
|
return info_size;
|
|
}
|
|
|
|
nexus_channel_entry_populate(ch, (void *)scan);
|
|
scan += entry_size;
|
|
}
|
|
}
|
|
return info_size;
|
|
}
|
|
|
|
static int
|
|
nexus_channel_list_sysctl SYSCTL_HANDLER_ARGS
|
|
{
|
|
#pragma unused(arg1, arg2, oidp)
|
|
size_t actual_space;
|
|
caddr_t buffer = NULL;
|
|
size_t buffer_space;
|
|
size_t allocated_space;
|
|
int out_error;
|
|
struct kern_nexus *nx;
|
|
int error = 0;
|
|
caddr_t scan;
|
|
|
|
if (!kauth_cred_issuser(kauth_cred_get())) {
|
|
return EPERM;
|
|
}
|
|
|
|
net_update_uptime();
|
|
buffer_space = req->oldlen;
|
|
if (req->oldptr != USER_ADDR_NULL && buffer_space != 0) {
|
|
if (buffer_space > SK_SYSCTL_ALLOC_MAX) {
|
|
buffer_space = SK_SYSCTL_ALLOC_MAX;
|
|
}
|
|
allocated_space = buffer_space;
|
|
buffer = sk_alloc_data(allocated_space, Z_WAITOK, skmem_tag_sysctl_buf);
|
|
if (__improbable(buffer == NULL)) {
|
|
return ENOBUFS;
|
|
}
|
|
} else if (req->oldptr == USER_ADDR_NULL) {
|
|
buffer_space = 0;
|
|
}
|
|
actual_space = 0;
|
|
scan = buffer;
|
|
SK_LOCK();
|
|
RB_FOREACH(nx, kern_nexus_tree, &nx_head) {
|
|
size_t info_size;
|
|
|
|
info_size = nexus_channel_info_populate(nx, (void *)scan,
|
|
buffer_space);
|
|
if (scan != NULL) {
|
|
if (buffer_space < info_size) {
|
|
/* supplied buffer too small, stop copying */
|
|
error = ENOMEM;
|
|
break;
|
|
}
|
|
scan += info_size;
|
|
buffer_space -= info_size;
|
|
}
|
|
actual_space += info_size;
|
|
}
|
|
SK_UNLOCK();
|
|
|
|
if (actual_space != 0) {
|
|
out_error = SYSCTL_OUT(req, buffer, actual_space);
|
|
if (out_error != 0) {
|
|
error = out_error;
|
|
}
|
|
}
|
|
if (buffer != NULL) {
|
|
sk_free_data(buffer, allocated_space);
|
|
}
|
|
|
|
return error;
|
|
}
|
|
|
|
static int
|
|
nexus_mib_get_sysctl SYSCTL_HANDLER_ARGS
|
|
{
|
|
#pragma unused(arg1, arg2)
|
|
struct proc *p = req->p;
|
|
struct nexus_mib_filter filter;
|
|
int error = 0;
|
|
size_t actual_space;
|
|
caddr_t buffer = NULL;
|
|
size_t buffer_space;
|
|
size_t allocated_space;
|
|
int out_error;
|
|
struct kern_nexus *nx;
|
|
caddr_t scan;
|
|
|
|
/* Restrict protocol stats access to root user only (like netstat). */
|
|
if (oidp->oid_arg2 == NXMIB_USERSTACK_STATS &&
|
|
!kauth_cred_issuser(kauth_cred_get())) {
|
|
SK_ERR("mib request rejected, EPERM");
|
|
return EPERM;
|
|
}
|
|
|
|
if (req->newptr == USER_ADDR_NULL) {
|
|
/*
|
|
* For flow stats requests, non-root users need to provide a
|
|
* 5-tuple. Otherwise, we do not grant access.
|
|
*/
|
|
if (oidp->oid_arg2 == NXMIB_FLOW &&
|
|
!kauth_cred_issuser(kauth_cred_get())) {
|
|
SK_ERR("mib request rejected: tuple not provided");
|
|
return EPERM;
|
|
}
|
|
/* use subcommand for multiple nodes */
|
|
filter.nmf_type = oidp->oid_arg2;
|
|
filter.nmf_bitmap = 0x0;
|
|
} else if (req->newlen != sizeof(struct nexus_mib_filter)) {
|
|
SK_ERR("mis-matching newlen");
|
|
return EINVAL;
|
|
} else {
|
|
error = SYSCTL_IN(req, &filter, sizeof(struct nexus_mib_filter));
|
|
if (error != 0) {
|
|
SK_ERR("SYSCTL_IN err %d", error);
|
|
return error;
|
|
}
|
|
if (filter.nmf_type != oidp->oid_arg2) {
|
|
SK_ERR("mis-matching nmf_type");
|
|
return EINVAL;
|
|
}
|
|
/*
|
|
* For flow stats requests, non-root users need to set the nexus
|
|
* mib filter to NXMIB_FILTER_INFO_TUPLE. Otherwise, we do not
|
|
* grant access. This ensures that fsw_mib_get_flow looks for a
|
|
* flow entry that matches the given tuple of the non-root user.
|
|
*/
|
|
if (filter.nmf_type == NXMIB_FLOW &&
|
|
(filter.nmf_bitmap & NXMIB_FILTER_INFO_TUPLE) == 0 &&
|
|
!kauth_cred_issuser(kauth_cred_get())) {
|
|
SK_ERR("mib request rejected: tuple filter not set");
|
|
return EPERM;
|
|
}
|
|
}
|
|
|
|
net_update_uptime();
|
|
buffer_space = req->oldlen;
|
|
if (req->oldptr != USER_ADDR_NULL && buffer_space != 0) {
|
|
if (buffer_space > SK_SYSCTL_ALLOC_MAX) {
|
|
buffer_space = SK_SYSCTL_ALLOC_MAX;
|
|
}
|
|
allocated_space = buffer_space;
|
|
buffer = sk_alloc_data(allocated_space, Z_WAITOK, skmem_tag_sysctl_buf);
|
|
if (__improbable(buffer == NULL)) {
|
|
return ENOBUFS;
|
|
}
|
|
} else if (req->oldptr == USER_ADDR_NULL) {
|
|
buffer_space = 0;
|
|
}
|
|
actual_space = 0;
|
|
scan = buffer;
|
|
|
|
SK_LOCK();
|
|
RB_FOREACH(nx, kern_nexus_tree, &nx_head) {
|
|
if (NX_DOM_PROV(nx)->nxdom_prov_nx_mib_get == NULL) {
|
|
continue;
|
|
}
|
|
|
|
size_t size;
|
|
struct kern_nexus_domain_provider *nx_dp = NX_DOM_PROV(nx);
|
|
|
|
size = nx_dp->nxdom_prov_nx_mib_get(nx, &filter, scan,
|
|
buffer_space, p);
|
|
|
|
if (scan != NULL) {
|
|
if (buffer_space < size) {
|
|
/* supplied buffer too small, stop copying */
|
|
error = ENOMEM;
|
|
break;
|
|
}
|
|
scan += size;
|
|
buffer_space -= size;
|
|
}
|
|
actual_space += size;
|
|
}
|
|
SK_UNLOCK();
|
|
|
|
if (actual_space != 0) {
|
|
out_error = SYSCTL_OUT(req, buffer, actual_space);
|
|
if (out_error != 0) {
|
|
error = out_error;
|
|
}
|
|
}
|
|
if (buffer != NULL) {
|
|
sk_free_data(buffer, allocated_space);
|
|
}
|
|
|
|
return error;
|
|
}
|
|
|
|
void
|
|
kern_nexus_walktree(kern_nexus_walktree_f_t *f, void *arg0,
|
|
boolean_t is_sk_locked)
|
|
{
|
|
struct kern_nexus *nx = NULL;
|
|
|
|
if (!is_sk_locked) {
|
|
SK_LOCK();
|
|
} else {
|
|
SK_LOCK_ASSERT_HELD();
|
|
}
|
|
|
|
RB_FOREACH(nx, kern_nexus_tree, &nx_head) {
|
|
(*f)(nx, arg0);
|
|
}
|
|
|
|
if (!is_sk_locked) {
|
|
SK_UNLOCK();
|
|
}
|
|
}
|
|
|
|
errno_t
|
|
kern_nexus_get_pbufpool_info(const uuid_t nx_uuid,
|
|
struct kern_pbufpool_memory_info *rx_pool_info,
|
|
struct kern_pbufpool_memory_info *tx_pool_info)
|
|
{
|
|
struct kern_pbufpool *tpp, *rpp;
|
|
struct kern_nexus *nx;
|
|
errno_t err = 0;
|
|
|
|
nx = nx_find(nx_uuid, FALSE);
|
|
if (nx == NULL) {
|
|
err = ENOENT;
|
|
goto done;
|
|
}
|
|
|
|
if (nx->nx_prov->nxprov_params->nxp_type != NEXUS_TYPE_NET_IF) {
|
|
err = ENOTSUP;
|
|
goto done;
|
|
}
|
|
|
|
err = nx_netif_prov_nx_mem_info(nx, &tpp, &rpp);
|
|
if (err != 0) {
|
|
goto done;
|
|
}
|
|
|
|
if ((tpp == NULL) && (rpp == NULL)) {
|
|
err = ENOENT;
|
|
goto done;
|
|
}
|
|
|
|
if (tx_pool_info != NULL) {
|
|
bzero(tx_pool_info, sizeof(*tx_pool_info));
|
|
}
|
|
if (rx_pool_info != NULL) {
|
|
bzero(rx_pool_info, sizeof(*rx_pool_info));
|
|
}
|
|
|
|
if ((tx_pool_info != NULL) && (tpp != NULL)) {
|
|
err = kern_pbufpool_get_memory_info(tpp, tx_pool_info);
|
|
if (err != 0) {
|
|
goto done;
|
|
}
|
|
}
|
|
|
|
if ((rx_pool_info != NULL) && (rpp != NULL)) {
|
|
err = kern_pbufpool_get_memory_info(rpp, rx_pool_info);
|
|
}
|
|
|
|
done:
|
|
if (nx != NULL) {
|
|
(void) nx_release(nx);
|
|
nx = NULL;
|
|
}
|
|
return err;
|
|
}
|
|
|
|
void
|
|
nx_interface_advisory_notify(struct kern_nexus *nx)
|
|
{
|
|
struct kern_channel *ch;
|
|
struct netif_stats *nifs;
|
|
struct fsw_stats *fsw_stats;
|
|
nexus_type_t nxdom_type = NX_DOM(nx)->nxdom_type;
|
|
|
|
if (nxdom_type == NEXUS_TYPE_NET_IF) {
|
|
nifs = &NX_NETIF_PRIVATE(nx)->nif_stats;
|
|
} else if (nxdom_type == NEXUS_TYPE_FLOW_SWITCH) {
|
|
fsw_stats = &NX_FSW_PRIVATE(nx)->fsw_stats;
|
|
} else {
|
|
VERIFY(0);
|
|
__builtin_unreachable();
|
|
}
|
|
if (!lck_rw_try_lock_shared(&nx->nx_ch_if_adv_lock)) {
|
|
if (nxdom_type == NEXUS_TYPE_NET_IF) {
|
|
STATS_INC(nifs, NETIF_STATS_IF_ADV_UPD_DROP);
|
|
} else {
|
|
STATS_INC(fsw_stats, FSW_STATS_IF_ADV_UPD_DROP);
|
|
}
|
|
return;
|
|
}
|
|
/*
|
|
* if the channel is in "nx_ch_if_adv_head" list, then we can
|
|
* safely assume that the channel is not closed yet.
|
|
* In ch_close_common(), the channel is removed from the
|
|
* "nx_ch_if_adv_head" list holding the "nx_ch_if_adv_lock" in
|
|
* exclusive mode, prior to closing the channel.
|
|
*/
|
|
STAILQ_FOREACH(ch, &nx->nx_ch_if_adv_head, ch_link_if_adv) {
|
|
struct nexus_adapter *na = ch->ch_na;
|
|
|
|
ASSERT(na != NULL);
|
|
na_post_event(&na->na_tx_rings[ch->ch_first[NR_TX]],
|
|
TRUE, FALSE, FALSE, CHAN_FILT_HINT_IF_ADV_UPD);
|
|
if (nxdom_type == NEXUS_TYPE_NET_IF) {
|
|
STATS_INC(nifs, NETIF_STATS_IF_ADV_UPD_SENT);
|
|
} else {
|
|
STATS_INC(fsw_stats, FSW_STATS_IF_ADV_UPD_SENT);
|
|
}
|
|
}
|
|
lck_rw_done(&nx->nx_ch_if_adv_lock);
|
|
}
|