511 lines
15 KiB
C
511 lines
15 KiB
C
/*
|
|
* Copyright (c) 2016-2021 Apple Inc. All rights reserved.
|
|
*
|
|
* @APPLE_OSREFERENCE_LICENSE_HEADER_START@
|
|
*
|
|
* This file contains Original Code and/or Modifications of Original Code
|
|
* as defined in and that are subject to the Apple Public Source License
|
|
* Version 2.0 (the 'License'). You may not use this file except in
|
|
* compliance with the License. The rights granted to you under the License
|
|
* may not be used to create, or enable the creation or redistribution of,
|
|
* unlawful or unlicensed copies of an Apple operating system, or to
|
|
* circumvent, violate, or enable the circumvention or violation of, any
|
|
* terms of an Apple operating system software license agreement.
|
|
*
|
|
* Please obtain a copy of the License at
|
|
* http://www.opensource.apple.com/apsl/ and read it before using this file.
|
|
*
|
|
* The Original Code and all software distributed under the License are
|
|
* distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
|
|
* EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
|
|
* INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
|
|
* FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
|
|
* Please see the License for the specific language governing rights and
|
|
* limitations under the License.
|
|
*
|
|
* @APPLE_OSREFERENCE_LICENSE_HEADER_END@
|
|
*/
|
|
|
|
#include <skywalk/os_skywalk_private.h>
|
|
#include <skywalk/nexus/flowswitch/nx_flowswitch.h>
|
|
#include <skywalk/nexus/flowswitch/fsw_var.h>
|
|
|
|
static void fsw_flow_route_ctor(void *, struct flow_route *);
|
|
static int fsw_flow_route_resolve(void *, struct flow_route *,
|
|
struct __kern_packet *);
|
|
|
|
struct flow_owner *
|
|
fsw_flow_add(struct nx_flowswitch *fsw, struct nx_flow_req *req0, int *error)
|
|
{
|
|
struct kern_nexus *nx = fsw->fsw_nx;
|
|
struct flow_mgr *fm = fsw->fsw_flow_mgr;
|
|
nexus_port_t nx_port = req0->nfr_nx_port;
|
|
struct flow_owner_bucket *fob;
|
|
struct flow_owner *fo = NULL;
|
|
void *fo_context = req0->nfr_context;
|
|
boolean_t nx_bound = FALSE;
|
|
boolean_t new_mapping = FALSE;
|
|
struct nx_flow_req req;
|
|
uuid_t uuid_key;
|
|
bool nx_port_pid_bound;
|
|
uint32_t max_flowadv = nx->nx_prov->nxprov_params->nxp_flowadv_max;
|
|
struct proc *p;
|
|
int pid = req0->nfr_pid;
|
|
bool low_latency = ((req0->nfr_flags & NXFLOWREQF_LOW_LATENCY) != 0);
|
|
#if SK_LOG
|
|
uuid_string_t uuidstr;
|
|
#endif /* SK_LOG */
|
|
|
|
*error = 0;
|
|
|
|
/*
|
|
* Make a local copy of the original request; we'll modify the
|
|
* local copy and write it back to the original upon success.
|
|
*/
|
|
bcopy(req0, &req, sizeof(*req0));
|
|
ASSERT(!uuid_is_null(req.nfr_flow_uuid));
|
|
|
|
/*
|
|
* Interface attach and detach involve holding the flowswitch lock
|
|
* held as writer. Given that we might block in msleep() below,
|
|
* holding the flowswitch RW lock is not an option. Instead, we
|
|
* utilize the detach barrier prevent things from going away while
|
|
* we are here.
|
|
*/
|
|
if (!fsw_detach_barrier_add(fsw)) {
|
|
SK_ERR("netagent detached");
|
|
*error = ENXIO;
|
|
return NULL;
|
|
}
|
|
|
|
/*
|
|
* We insist that PID resolves to a process for flow add, but not for
|
|
* delete. That's because those events may be posted (to us) after the
|
|
* corresponding process has exited, and so we still need to be able to
|
|
* cleanup.
|
|
*/
|
|
p = proc_find(pid);
|
|
if (p == PROC_NULL) {
|
|
SK_ERR("process for pid %d doesn't exist", pid);
|
|
*error = EINVAL;
|
|
fsw_detach_barrier_remove(fsw);
|
|
return NULL;
|
|
}
|
|
req.nfr_proc = p;
|
|
|
|
/*
|
|
* If interface is currently attached, indicate that a bind is in
|
|
* progress, so that upon releasing the lock any threads attempting
|
|
* to detach the interface will wait until we're done.
|
|
*/
|
|
fob = flow_mgr_get_fob_by_pid(fm, pid);
|
|
FOB_LOCK_SPIN(fob);
|
|
while (fob->fob_busy_flags & (FOBF_OPEN_BUSY | FOBF_CLOSE_BUSY)) {
|
|
if (++(fob->fob_open_waiters) == 0) { /* wraparound */
|
|
fob->fob_open_waiters++;
|
|
}
|
|
if ((*error = msleep(&fob->fob_open_waiters, &fob->fob_lock,
|
|
(PZERO + 1) | PSPIN, __FUNCTION__, NULL)) == EINTR) {
|
|
SK_ERR("%s(%d) binding for uuid %s was interrupted",
|
|
sk_proc_name_address(p), pid,
|
|
sk_uuid_unparse(req.nfr_flow_uuid, uuidstr));
|
|
ASSERT(fob->fob_open_waiters > 0);
|
|
fob->fob_open_waiters--;
|
|
FOB_UNLOCK(fob);
|
|
ASSERT(fo == NULL);
|
|
goto unbusy;
|
|
}
|
|
}
|
|
if (__improbable((fob->fob_busy_flags & FOBF_DEAD) != 0)) {
|
|
SK_ERR("%s(%d) binding for flow_uuid %s aborted due to "
|
|
"dead owner", sk_proc_name_address(p), pid,
|
|
sk_uuid_unparse(req.nfr_flow_uuid, uuidstr));
|
|
*error = ENXIO;
|
|
goto done;
|
|
}
|
|
ASSERT(!(fob->fob_busy_flags & FOBF_OPEN_BUSY));
|
|
fob->fob_busy_flags |= FOBF_OPEN_BUSY;
|
|
|
|
do {
|
|
fo = flow_owner_find_by_pid(fob, pid, fo_context, low_latency);
|
|
if (fo == NULL && nx_port == NEXUS_PORT_ANY) {
|
|
struct nxbind nxb;
|
|
|
|
/*
|
|
* Release lock to maintain ordering with the
|
|
* flowswitch lock; busy flag is set above.
|
|
* Also read_random() may block.
|
|
*/
|
|
FOB_UNLOCK(fob);
|
|
|
|
uuid_generate_random(uuid_key);
|
|
|
|
bzero(&nxb, sizeof(nxb));
|
|
nxb.nxb_flags |= NXBF_MATCH_UNIQUEID;
|
|
nxb.nxb_uniqueid = proc_uniqueid(p);
|
|
nxb.nxb_pid = pid;
|
|
nxb.nxb_flags |= NXBF_MATCH_KEY;
|
|
nxb.nxb_key_len = sizeof(uuid_key);
|
|
nxb.nxb_key = sk_alloc_data(nxb.nxb_key_len,
|
|
Z_WAITOK | Z_NOFAIL, skmem_tag_nx_key);
|
|
bcopy(uuid_key, nxb.nxb_key, nxb.nxb_key_len);
|
|
|
|
/*
|
|
* Bind a new nexus port. Directly invoke the
|
|
* nxdom_bind_port() callback of the nexus since
|
|
* the nexus instance is already known. Free
|
|
* the UUID key upon failure; otherwise callee
|
|
* will attach it to the nexus port and clean
|
|
* it up during nxdom_unbind_port().
|
|
*/
|
|
if ((*error = NX_DOM(nx)->nxdom_bind_port(nx,
|
|
&nx_port, &nxb, NULL)) != 0) {
|
|
sk_free_data(nxb.nxb_key, nxb.nxb_key_len);
|
|
SK_ERR("%s(%d) failed to bind flow_uuid %s to a "
|
|
"nx_port (err %d)", sk_proc_name_address(p),
|
|
pid, sk_uuid_unparse(req.nfr_flow_uuid,
|
|
uuidstr), *error);
|
|
nx_port = NEXUS_PORT_ANY;
|
|
FOB_LOCK_SPIN(fob);
|
|
break;
|
|
}
|
|
ASSERT(nx_port != NEXUS_PORT_ANY);
|
|
nx_bound = TRUE;
|
|
|
|
SK_DF(SK_VERB_FLOW, "%s(%d) flow_uuid %s associated with "
|
|
"ephemeral nx_port %d", sk_proc_name_address(p),
|
|
pid, sk_uuid_unparse(req.nfr_flow_uuid, uuidstr),
|
|
(int)nx_port);
|
|
|
|
FOB_LOCK_SPIN(fob);
|
|
/*
|
|
* if there's no interface associated with this,
|
|
* then bail
|
|
*/
|
|
if (__improbable((fob->fob_busy_flags & FOBF_DEAD) !=
|
|
0 || fsw->fsw_ifp == NULL ||
|
|
fsw->fsw_agent_session == NULL)) {
|
|
SK_ERR("%s(%d) binding for flow_uuid %s aborted "
|
|
"(lost race)", sk_proc_name_address(p),
|
|
pid, sk_uuid_unparse(req.nfr_flow_uuid,
|
|
uuidstr));
|
|
*error = ENXIO;
|
|
break;
|
|
}
|
|
nx_port_pid_bound = true;
|
|
uuid_copy(req.nfr_bind_key, uuid_key);
|
|
} else if (fo == NULL) {
|
|
/* make sure request has valid nx_port */
|
|
ASSERT(nx_port != NEXUS_PORT_ANY);
|
|
/*
|
|
* XXX
|
|
* Why is this path supported? Normal flows are not
|
|
* added with a specified port and this check does
|
|
* nothing to verify if the port is used.
|
|
*
|
|
* Using nx_port_is_valid() is wrong because that
|
|
* assumes the array already has non-zero ports.
|
|
*/
|
|
if (__improbable(nx_port >= NX_PORT_CHUNK)) {
|
|
*error = EINVAL;
|
|
break;
|
|
}
|
|
/* read_random() may block */
|
|
FOB_LOCK_CONVERT(fob);
|
|
|
|
nx_port_pid_bound = false;
|
|
uuid_generate_random(uuid_key);
|
|
|
|
SK_DF(SK_VERB_FLOW, "%s(%d) flow_uuid %s associated "
|
|
"with nx_port %d", sk_proc_name_address(p),
|
|
pid, sk_uuid_unparse(req.nfr_flow_uuid, uuidstr),
|
|
(int)nx_port);
|
|
} else {
|
|
/* subsequent request should reuse existing port */
|
|
ASSERT(fo->fo_nx_port != NEXUS_PORT_ANY);
|
|
if (nx_port != NEXUS_PORT_ANY &&
|
|
nx_port != fo->fo_nx_port) {
|
|
*error = EINVAL;
|
|
break;
|
|
}
|
|
/* fillout info for nexus port */
|
|
nx_port = fo->fo_nx_port;
|
|
uuid_copy(uuid_key, fo->fo_key);
|
|
break;
|
|
}
|
|
|
|
FOB_LOCK_CONVERT(fob);
|
|
|
|
ASSERT(nx_port != NEXUS_PORT_ANY);
|
|
ASSERT(fo == NULL);
|
|
fo = flow_owner_alloc(fob, p, nx_port, nx_port_pid_bound,
|
|
(max_flowadv != 0), fsw, NULL, fo_context, low_latency);
|
|
if (fo == NULL) {
|
|
*error = ENOMEM;
|
|
break;
|
|
}
|
|
ASSERT(!uuid_is_null(uuid_key));
|
|
uuid_copy(fo->fo_key, uuid_key);
|
|
new_mapping = TRUE;
|
|
} while (0);
|
|
|
|
if (*error != 0) {
|
|
goto done;
|
|
}
|
|
|
|
/* make sure rule ID isn't already being used */
|
|
struct flow_entry *fe;
|
|
if ((fe = flow_entry_find_by_uuid(fo, req.nfr_flow_uuid)) != NULL) {
|
|
#if SK_LOG
|
|
char dbgbuf[FLOWENTRY_DBGBUF_SIZE];
|
|
SK_DSC(p, "flow uuid collision: \"%s\" already exists at "
|
|
"fe 0x%llx flags 0x%b %s(%d)",
|
|
fe_as_string(fe, dbgbuf, sizeof(dbgbuf)), SK_KVA(fe),
|
|
fe->fe_flags, FLOWENTF_BITS, fe->fe_proc_name, fe->fe_pid);
|
|
#endif /* SK_LOG */
|
|
*error = EEXIST;
|
|
flow_entry_release(&fe);
|
|
goto done;
|
|
}
|
|
|
|
/* return assigned nexus port to caller */
|
|
req.nfr_nx_port = nx_port;
|
|
if (__probable(!fsw_qos_default_restricted())) {
|
|
req.nfr_flags |= NXFLOWREQF_QOS_MARKING;
|
|
} else {
|
|
req.nfr_flags &= ~NXFLOWREQF_QOS_MARKING;
|
|
}
|
|
|
|
FOB_LOCK_CONVERT(fob);
|
|
|
|
*error = flow_mgr_flow_add(nx, fm, fo, fsw->fsw_ifp, &req,
|
|
fsw_flow_route_ctor, fsw_flow_route_resolve, fsw);
|
|
|
|
if (*error == 0) {
|
|
/* replace original request with our (modified) local copy */
|
|
bcopy(&req, req0, sizeof(*req0));
|
|
|
|
SK_DF(SK_VERB_FLOW, "%s(%d) flow_uuid %s is now on "
|
|
"nx_port %d", sk_proc_name_address(p), pid,
|
|
sk_uuid_unparse(req.nfr_flow_uuid, uuidstr),
|
|
(int)nx_port);
|
|
}
|
|
|
|
done:
|
|
if (__improbable(*error != 0)) {
|
|
SK_ERR("%s(%d) failed to add flow_uuid %s (err %d)",
|
|
sk_proc_name_address(p), pid,
|
|
sk_uuid_unparse(req.nfr_flow_uuid, uuidstr), *error);
|
|
if (fo != NULL) {
|
|
if (new_mapping) {
|
|
FOB_LOCK_CONVERT(fob);
|
|
flow_owner_free(fob, fo);
|
|
}
|
|
fo = NULL;
|
|
}
|
|
if (nx_bound) {
|
|
ASSERT(nx_port != NEXUS_PORT_ANY);
|
|
FOB_LOCK_ASSERT_HELD(fob);
|
|
/*
|
|
* Release lock to maintain ordering with the
|
|
* flowswitch lock; busy flag is set above.
|
|
*/
|
|
FOB_UNLOCK(fob);
|
|
(void) NX_DOM(nx)->nxdom_unbind_port(nx, nx_port);
|
|
nx_port = NEXUS_PORT_ANY;
|
|
FOB_LOCK_SPIN(fob);
|
|
}
|
|
}
|
|
fob->fob_busy_flags &= ~FOBF_OPEN_BUSY;
|
|
if (__improbable(fob->fob_open_waiters > 0)) {
|
|
fob->fob_open_waiters = 0;
|
|
wakeup(&fob->fob_open_waiters);
|
|
}
|
|
if (__improbable(fob->fob_close_waiters > 0)) {
|
|
fob->fob_close_waiters = 0;
|
|
wakeup(&fob->fob_close_waiters);
|
|
}
|
|
FOB_UNLOCK(fob);
|
|
|
|
unbusy:
|
|
proc_rele(p);
|
|
p = PROC_NULL;
|
|
/* allow any pending detach to proceed */
|
|
fsw_detach_barrier_remove(fsw);
|
|
|
|
return fo;
|
|
}
|
|
|
|
int
|
|
fsw_flow_del(struct nx_flowswitch *fsw, struct nx_flow_req *req, bool nolinger,
|
|
void *params)
|
|
{
|
|
struct flow_mgr *fm = fsw->fsw_flow_mgr;
|
|
struct kern_nexus *nx = fsw->fsw_nx;
|
|
struct flow_owner_bucket *fob;
|
|
struct flow_owner *fo;
|
|
void *fo_context = req->nfr_context;
|
|
pid_t pid = req->nfr_pid;
|
|
bool low_latency = ((req->nfr_flags & NXFLOWREQF_LOW_LATENCY) != 0);
|
|
int error;
|
|
|
|
ASSERT(!uuid_is_null(req->nfr_flow_uuid));
|
|
|
|
/*
|
|
* we use the detach barrier to prevent flowswith instance from
|
|
* going away while we are here.
|
|
*/
|
|
if (!fsw_detach_barrier_add(fsw)) {
|
|
SK_ERR("netagent detached");
|
|
return ENXIO;
|
|
}
|
|
|
|
/* find mapping */
|
|
fob = flow_mgr_get_fob_by_pid(fm, pid);
|
|
FOB_LOCK_SPIN(fob);
|
|
while (fob->fob_busy_flags & (FOBF_OPEN_BUSY | FOBF_CLOSE_BUSY)) {
|
|
if (++(fob->fob_close_waiters) == 0) { /* wraparound */
|
|
fob->fob_close_waiters++;
|
|
}
|
|
(void) msleep(&fob->fob_close_waiters, &fob->fob_lock,
|
|
(PZERO - 1) | PSPIN, __FUNCTION__, NULL);
|
|
}
|
|
fob->fob_busy_flags |= FOBF_CLOSE_BUSY;
|
|
|
|
fo = flow_owner_find_by_pid(fob, pid, fo_context, low_latency);
|
|
if (fo == NULL) {
|
|
error = ENOENT;
|
|
goto done;
|
|
}
|
|
|
|
FOB_LOCK_CONVERT(fob);
|
|
|
|
/*
|
|
* Unbind flow. Note that if "auto close" is enabled, the flows
|
|
* associated with this fo would have been removed when the channel
|
|
* opened to the nexus port gets closed. If we get ENOENT just
|
|
* treat as as non-fatal and proceed further down.
|
|
*/
|
|
error = flow_owner_destroy_entry(fo, req->nfr_flow_uuid, nolinger,
|
|
params);
|
|
if (error != 0 && error != ENOENT) {
|
|
goto done;
|
|
}
|
|
|
|
/*
|
|
* If the channel that was connected to the nexus port is no longer
|
|
* around, i.e. fsw_port_dtor() has been called, and there are no
|
|
* more flows on the owner, and the owner was bound to PID on the
|
|
* nexus port in fsw_flow_bind(), remove the nexus binding now to make
|
|
* this port available.
|
|
*/
|
|
if (RB_EMPTY(&fo->fo_flow_entry_id_head) &&
|
|
fo->fo_nx_port_destroyed && fo->fo_nx_port_pid_bound) {
|
|
nexus_port_t nx_port = fo->fo_nx_port;
|
|
ASSERT(nx_port != NEXUS_PORT_ANY);
|
|
/*
|
|
* Release lock to maintain ordering with the
|
|
* flowswitch lock; busy flag is set above.
|
|
*/
|
|
FOB_UNLOCK(fob);
|
|
(void) NX_DOM(nx)->nxdom_unbind_port(nx, nx_port);
|
|
FOB_LOCK(fob);
|
|
flow_owner_free(fob, fo);
|
|
fo = NULL;
|
|
}
|
|
error = 0;
|
|
|
|
done:
|
|
#if SK_LOG
|
|
if (__improbable((sk_verbose & SK_VERB_FLOW) != 0)) {
|
|
uuid_string_t uuidstr;
|
|
if (fo != NULL) {
|
|
SK_DF(SK_VERB_FLOW, "%s(%d) flow_uuid %s (err %d)",
|
|
fo->fo_name, fo->fo_pid,
|
|
sk_uuid_unparse(req->nfr_flow_uuid, uuidstr), error);
|
|
} else {
|
|
SK_DF(SK_VERB_FLOW, "pid %d flow_uuid %s (err %d)", pid,
|
|
sk_uuid_unparse(req->nfr_flow_uuid, uuidstr), error);
|
|
}
|
|
}
|
|
#endif /* SK_LOG */
|
|
|
|
fob->fob_busy_flags &= ~FOBF_CLOSE_BUSY;
|
|
if (__improbable(fob->fob_open_waiters > 0)) {
|
|
fob->fob_open_waiters = 0;
|
|
wakeup(&fob->fob_open_waiters);
|
|
}
|
|
if (__improbable(fob->fob_close_waiters > 0)) {
|
|
fob->fob_close_waiters = 0;
|
|
wakeup(&fob->fob_close_waiters);
|
|
}
|
|
FOB_UNLOCK(fob);
|
|
|
|
/* allow any pending detach to proceed */
|
|
fsw_detach_barrier_remove(fsw);
|
|
|
|
return error;
|
|
}
|
|
|
|
int
|
|
fsw_flow_config(struct nx_flowswitch *fsw, struct nx_flow_req *req)
|
|
{
|
|
struct flow_mgr *fm = fsw->fsw_flow_mgr;
|
|
struct flow_entry *fe = NULL;
|
|
struct ns_token *nt = NULL;
|
|
int error = 0;
|
|
|
|
FSW_RLOCK(fsw);
|
|
fe = flow_mgr_get_fe_by_uuid_rlock(fm, req->nfr_flow_uuid);
|
|
if (fe == NULL) {
|
|
SK_ERR("can't find flow");
|
|
error = ENOENT;
|
|
goto done;
|
|
}
|
|
|
|
if (fe->fe_pid != req->nfr_pid) {
|
|
SK_ERR("flow ownership error");
|
|
error = EPERM;
|
|
goto done;
|
|
}
|
|
|
|
/* right now only support NXFLOWREQF_NOWAKEFROMSLEEP config */
|
|
nt = fe->fe_port_reservation;
|
|
if (req->nfr_flags & NXFLOWREQF_NOWAKEFROMSLEEP) {
|
|
os_atomic_or(&fe->fe_flags, FLOWENTF_NOWAKEFROMSLEEP, relaxed);
|
|
netns_change_flags(&nt, NETNS_NOWAKEFROMSLEEP, 0);
|
|
} else {
|
|
os_atomic_andnot(&fe->fe_flags, FLOWENTF_NOWAKEFROMSLEEP, relaxed);
|
|
netns_change_flags(&nt, 0, NETNS_NOWAKEFROMSLEEP);
|
|
}
|
|
#if SK_LOG
|
|
char dbgbuf[FLOWENTRY_DBGBUF_SIZE];
|
|
SK_DF(SK_VERB_FLOW, "%s: NOWAKEFROMSLEEP %d",
|
|
fe_as_string(fe, dbgbuf, sizeof(dbgbuf)),
|
|
req->nfr_flags & NXFLOWREQF_NOWAKEFROMSLEEP ? 1 : 0);
|
|
#endif /* SK_LOG */
|
|
|
|
done:
|
|
if (fe != NULL) {
|
|
flow_entry_release(&fe);
|
|
}
|
|
FSW_RUNLOCK(fsw);
|
|
return error;
|
|
}
|
|
|
|
static void
|
|
fsw_flow_route_ctor(void *arg, struct flow_route *fr)
|
|
{
|
|
struct nx_flowswitch *fsw = arg;
|
|
if (fsw->fsw_ctor != NULL) {
|
|
fsw->fsw_ctor(fsw, fr);
|
|
}
|
|
}
|
|
|
|
static int
|
|
fsw_flow_route_resolve(void *arg, struct flow_route *fr,
|
|
struct __kern_packet *pkt)
|
|
{
|
|
struct nx_flowswitch *fsw = arg;
|
|
return (fsw->fsw_resolve != NULL) ? fsw->fsw_resolve(fsw, fr, pkt) : 0;
|
|
}
|