/* * Copyright (c) 2012-2021 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * * This file contains Original Code and/or Modifications of Original Code * as defined in and that are subject to the Apple Public Source License * Version 2.0 (the 'License'). You may not use this file except in * compliance with the License. The rights granted to you under the License * may not be used to create, or enable the creation or redistribution of, * unlawful or unlicensed copies of an Apple operating system, or to * circumvent, violate, or enable the circumvention or violation of, any * terms of an Apple operating system software license agreement. * * Please obtain a copy of the License at * http://www.opensource.apple.com/apsl/ and read it before using this file. * * The Original Code and all software distributed under the License are * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. * Please see the License for the specific language governing rights and * limitations under the License. * * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ */ /* * Flow Control and Feedback Advisory * * Each mbuf that is being sent out through an interface is tagged with a * unique 32-bit ID which will help to identify all the packets that belong * to a particular flow at the interface layer. Packets carrying such ID * would need to be marked with PKTF_FLOW_ID. Normally, this ID is computed * by the module that generates the flow. There are 3 kinds of flow sources * that are currently recognized: * * a. INPCB (INET/INET6 Protocol Control Block). When a socket is * connected, the flow hash for the socket is computed and stored in * the PCB. Further transmissions on the socket will cause the hash * value to be carried within the mbuf as the flow ID. * * b. Interface. When an interface is attached, the flow hash for the * interface is computed and stored in the ifnet. This value is * normally ignored for most network drivers, except for those that * reside atop another driver, e.g. a virtual interface performing * encapsulation/encryption on the original packet and sending the * newly-generated packet to another interface. Such interface needs * to associate all generated packets with the interface flow hash * value as the flow ID. * * c. PF (Packet Filter). When a packet goes through PF and it is not * already associated with a flow ID, PF will compute a flow hash and * store it in the packet as flow ID. When the packet is associated * with a PF state, the state record will have the flow ID stored * within, in order to avoid recalculating the flow hash. Although PF * is capable of generating flow IDs, it does not participate in flow * advisory, and therefore packets whose IDs are computed by PF will * not have their PKTF_FLOW_ADV packet flag set. * * Activation of flow advisory mechanism is done by setting the PKTF_FLOW_ADV * packet flag; because a flow ID is required, the mechanism will not take * place unless PKTF_FLOW_ID is set as well. The packet must also carry one * of the flow source types FLOWSRC_{INPCB,IFNET} in order to identify where * the flow advisory notification should be delivered to. As noted above, * FLOWSRC_PF does not participate in this mechanism. * * The classq module configured on the interface is responsible for exerting * flow control to the upper layers. This occurs when the number of packets * queued for a flow reaches a limit. The module generating the flow will * cease transmission until further flow advisory notice, and the flow will * be inserted into the classq's flow control list. * * When packets are dequeued from the classq and the number of packets for * a flow goes below a limit, the classq will transfer its flow control list * to the global fadv_list. This will then trigger the flow advisory thread * to run, which will cause the flow source modules to be notified that data * can now be generated for those previously flow-controlled flows. */ #include #include #include #include /* for VERIFY() */ #include #include #include #include #include #include #include #include #include #if SKYWALK #include #endif /* SKYWALK */ /* Lock group and attribute for fadv_lock */ static LCK_GRP_DECLARE(fadv_lock_grp, "fadv_lock"); static LCK_MTX_DECLARE(fadv_lock, &fadv_lock_grp); /* protected by fadv_lock */ static STAILQ_HEAD(fadv_head, flowadv_fcentry) fadv_list = STAILQ_HEAD_INITIALIZER(fadv_list); static thread_t fadv_thread = THREAD_NULL; static uint32_t fadv_active; #define FADV_CACHE_NAME "flowadv" /* cache name */ static int flowadv_thread_cont(int); static void flowadv_thread_func(void *, wait_result_t); void flowadv_init(void) { if (kernel_thread_start(flowadv_thread_func, NULL, &fadv_thread) != KERN_SUCCESS) { panic("%s: couldn't create flow event advisory thread", __func__); /* NOTREACHED */ } thread_deallocate(fadv_thread); } struct flowadv_fcentry * flowadv_alloc_entry(int how) { return kalloc_type(struct flowadv_fcentry, how | Z_ZERO); } void flowadv_free_entry(struct flowadv_fcentry *fce) { kfree_type(struct flowadv_fcentry, fce); } void flowadv_add(struct flowadv_fclist *fcl) { if (STAILQ_EMPTY(fcl)) { return; } lck_mtx_lock_spin(&fadv_lock); STAILQ_CONCAT(&fadv_list, fcl); VERIFY(!STAILQ_EMPTY(&fadv_list)); if (!fadv_active && fadv_thread != THREAD_NULL) { wakeup_one((caddr_t)&fadv_list); } lck_mtx_unlock(&fadv_lock); } void flowadv_add_entry(struct flowadv_fcentry *fce) { lck_mtx_lock_spin(&fadv_lock); STAILQ_INSERT_HEAD(&fadv_list, fce, fce_link); VERIFY(!STAILQ_EMPTY(&fadv_list)); if (!fadv_active && fadv_thread != THREAD_NULL) { wakeup_one((caddr_t)&fadv_list); } lck_mtx_unlock(&fadv_lock); } static int flowadv_thread_cont(int err) { #pragma unused(err) for (;;) { LCK_MTX_ASSERT(&fadv_lock, LCK_MTX_ASSERT_OWNED); while (STAILQ_EMPTY(&fadv_list)) { VERIFY(!fadv_active); (void) msleep0(&fadv_list, &fadv_lock, (PSOCK | PSPIN), "flowadv_cont", 0, flowadv_thread_cont); /* NOTREACHED */ } fadv_active = 1; for (;;) { struct flowadv_fcentry *fce; VERIFY(!STAILQ_EMPTY(&fadv_list)); fce = STAILQ_FIRST(&fadv_list); STAILQ_REMOVE(&fadv_list, fce, flowadv_fcentry, fce_link); STAILQ_NEXT(fce, fce_link) = NULL; lck_mtx_unlock(&fadv_lock); if (fce->fce_event_type == FCE_EVENT_TYPE_CONGESTION_EXPERIENCED) { switch (fce->fce_flowsrc_type) { case FLOWSRC_CHANNEL: kern_channel_flowadv_report_ce_event(fce, fce->fce_ce_cnt, fce->fce_pkts_since_last_report); break; case FLOWSRC_INPCB: case FLOWSRC_IFNET: case FLOWSRC_PF: default: break; } goto next; } switch (fce->fce_flowsrc_type) { case FLOWSRC_INPCB: inp_flowadv(fce->fce_flowid); break; case FLOWSRC_IFNET: #if SKYWALK /* * when using the flowID allocator, IPSec * driver uses the "pkt_flowid" field in mbuf * packet header for the globally unique flowID * and the "pkt_mpriv_srcid" field carries the * interface flow control id (if_flowhash). * For IPSec flows, it is the IPSec driver * network interface which is flow controlled, * instead of the IPSec SA flow. */ ifnet_flowadv(fce->fce_flowsrc_token); #else /* !SKYWALK */ ifnet_flowadv(fce->fce_flowid); #endif /* !SKYWALK */ break; #if SKYWALK case FLOWSRC_CHANNEL: kern_channel_flowadv_clear(fce); break; #endif /* SKYWALK */ case FLOWSRC_PF: default: break; } next: flowadv_free_entry(fce); lck_mtx_lock_spin(&fadv_lock); /* if there's no pending request, we're done */ if (STAILQ_EMPTY(&fadv_list)) { break; } } fadv_active = 0; } } __dead2 static void flowadv_thread_func(void *v, wait_result_t w) { #pragma unused(v, w) lck_mtx_lock(&fadv_lock); (void) msleep0(&fadv_list, &fadv_lock, (PSOCK | PSPIN), "flowadv", 0, flowadv_thread_cont); /* * msleep0() shouldn't have returned as PCATCH was not set; * therefore assert in this case. */ lck_mtx_unlock(&fadv_lock); VERIFY(0); }