855 lines
19 KiB
C
855 lines
19 KiB
C
/*
|
|
* Copyright (c) 2011-2021 Apple Inc. All rights reserved.
|
|
*
|
|
* @APPLE_OSREFERENCE_LICENSE_HEADER_START@
|
|
*
|
|
* This file contains Original Code and/or Modifications of Original Code
|
|
* as defined in and that are subject to the Apple Public Source License
|
|
* Version 2.0 (the 'License'). You may not use this file except in
|
|
* compliance with the License. The rights granted to you under the License
|
|
* may not be used to create, or enable the creation or redistribution of,
|
|
* unlawful or unlicensed copies of an Apple operating system, or to
|
|
* circumvent, violate, or enable the circumvention or violation of, any
|
|
* terms of an Apple operating system software license agreement.
|
|
*
|
|
* Please obtain a copy of the License at
|
|
* http://www.opensource.apple.com/apsl/ and read it before using this file.
|
|
*
|
|
* The Original Code and all software distributed under the License are
|
|
* distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
|
|
* EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
|
|
* INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
|
|
* FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
|
|
* Please see the License for the specific language governing rights and
|
|
* limitations under the License.
|
|
*
|
|
* @APPLE_OSREFERENCE_LICENSE_HEADER_END@
|
|
*/
|
|
|
|
#include <sys/cdefs.h>
|
|
|
|
#include <sys/param.h>
|
|
#include <sys/malloc.h>
|
|
#include <sys/mbuf.h>
|
|
#include <sys/systm.h>
|
|
#include <sys/kernel.h>
|
|
#include <sys/errno.h>
|
|
#include <sys/mcache.h>
|
|
#include <sys/sysctl.h>
|
|
|
|
#include <dev/random/randomdev.h>
|
|
#include <net/if.h>
|
|
#include <net/if_var.h>
|
|
#include <net/if_dl.h>
|
|
#include <net/if_types.h>
|
|
#include <net/net_osdep.h>
|
|
#include <net/pktsched/pktsched.h>
|
|
#include <net/pktsched/pktsched_fq_codel.h>
|
|
#include <net/pktsched/pktsched_netem.h>
|
|
|
|
#define _IP_VHL
|
|
#include <netinet/ip.h>
|
|
#include <netinet/ip6.h>
|
|
|
|
#include <pexpert/pexpert.h>
|
|
|
|
#if SKYWALK
|
|
#include <skywalk/os_skywalk_private.h>
|
|
#endif /* SKYWALK */
|
|
|
|
u_int32_t machclk_freq = 0;
|
|
u_int64_t machclk_per_sec = 0;
|
|
u_int32_t pktsched_verbose = 0; /* more noise if greater than 1 */
|
|
|
|
static void init_machclk(void);
|
|
|
|
SYSCTL_NODE(_net, OID_AUTO, pktsched, CTLFLAG_RW | CTLFLAG_LOCKED, 0, "pktsched");
|
|
|
|
SYSCTL_UINT(_net_pktsched, OID_AUTO, verbose, CTLFLAG_RW | CTLFLAG_LOCKED,
|
|
&pktsched_verbose, 0, "Packet scheduler verbosity level");
|
|
|
|
void
|
|
pktsched_init(void)
|
|
{
|
|
init_machclk();
|
|
if (machclk_freq == 0) {
|
|
panic("%s: no CPU clock available!", __func__);
|
|
/* NOTREACHED */
|
|
}
|
|
pktsched_fq_init();
|
|
}
|
|
|
|
static void
|
|
init_machclk(void)
|
|
{
|
|
/*
|
|
* Initialize machclk_freq using the timerbase frequency
|
|
* value from device specific info.
|
|
*/
|
|
machclk_freq = (uint32_t)gPEClockFrequencyInfo.timebase_frequency_hz;
|
|
|
|
clock_interval_to_absolutetime_interval(1, NSEC_PER_SEC,
|
|
&machclk_per_sec);
|
|
}
|
|
|
|
u_int64_t
|
|
pktsched_abs_to_nsecs(u_int64_t abstime)
|
|
{
|
|
u_int64_t nsecs;
|
|
|
|
absolutetime_to_nanoseconds(abstime, &nsecs);
|
|
return nsecs;
|
|
}
|
|
|
|
u_int64_t
|
|
pktsched_nsecs_to_abstime(u_int64_t nsecs)
|
|
{
|
|
u_int64_t abstime;
|
|
|
|
nanoseconds_to_absolutetime(nsecs, &abstime);
|
|
return abstime;
|
|
}
|
|
|
|
int
|
|
pktsched_setup(struct ifclassq *ifq, u_int32_t scheduler, u_int32_t sflags,
|
|
classq_pkt_type_t ptype)
|
|
{
|
|
int error = 0;
|
|
u_int32_t rflags;
|
|
|
|
IFCQ_LOCK_ASSERT_HELD(ifq);
|
|
|
|
VERIFY(machclk_freq != 0);
|
|
|
|
/* Nothing to do unless the scheduler type changes */
|
|
if (ifq->ifcq_type == scheduler) {
|
|
return 0;
|
|
}
|
|
|
|
/*
|
|
* Remember the flags that need to be restored upon success, as
|
|
* they may be cleared when we tear down existing scheduler.
|
|
*/
|
|
rflags = (ifq->ifcq_flags & IFCQF_ENABLED);
|
|
|
|
if (ifq->ifcq_type != PKTSCHEDT_NONE) {
|
|
pktsched_teardown(ifq);
|
|
|
|
/* Teardown should have succeeded */
|
|
VERIFY(ifq->ifcq_type == PKTSCHEDT_NONE);
|
|
VERIFY(ifq->ifcq_disc == NULL);
|
|
}
|
|
|
|
error = fq_if_setup_ifclassq(ifq, sflags, ptype);
|
|
if (error == 0) {
|
|
ifq->ifcq_flags |= rflags;
|
|
}
|
|
|
|
return error;
|
|
}
|
|
|
|
void
|
|
pktsched_teardown(struct ifclassq *ifq)
|
|
{
|
|
IFCQ_LOCK_ASSERT_HELD(ifq);
|
|
if_qflush(ifq->ifcq_ifp, ifq, true);
|
|
VERIFY(IFCQ_IS_EMPTY(ifq));
|
|
ifq->ifcq_flags &= ~IFCQF_ENABLED;
|
|
if (ifq->ifcq_type == PKTSCHEDT_FQ_CODEL) {
|
|
/* Could be PKTSCHEDT_NONE */
|
|
fq_if_teardown_ifclassq(ifq);
|
|
}
|
|
return;
|
|
}
|
|
|
|
int
|
|
pktsched_getqstats(struct ifclassq *ifq, u_int32_t gid, u_int32_t qid,
|
|
struct if_ifclassq_stats *ifqs)
|
|
{
|
|
int error = 0;
|
|
|
|
IFCQ_LOCK_ASSERT_HELD(ifq);
|
|
|
|
if (ifq->ifcq_type == PKTSCHEDT_FQ_CODEL) {
|
|
/* Could be PKTSCHEDT_NONE */
|
|
error = fq_if_getqstats_ifclassq(ifq, (uint8_t)gid, qid, ifqs);
|
|
}
|
|
|
|
return error;
|
|
}
|
|
|
|
void
|
|
pktsched_pkt_encap(pktsched_pkt_t *pkt, classq_pkt_t *cpkt)
|
|
{
|
|
pkt->pktsched_pkt = *cpkt;
|
|
pkt->pktsched_tail = *cpkt;
|
|
pkt->pktsched_pcnt = 1;
|
|
|
|
switch (cpkt->cp_ptype) {
|
|
case QP_MBUF:
|
|
pkt->pktsched_plen =
|
|
(uint32_t)m_pktlen(pkt->pktsched_pkt_mbuf);
|
|
break;
|
|
|
|
#if SKYWALK
|
|
case QP_PACKET:
|
|
pkt->pktsched_plen = pkt->pktsched_pkt_kpkt->pkt_length;
|
|
break;
|
|
#endif /* SKYWALK */
|
|
|
|
default:
|
|
VERIFY(0);
|
|
/* NOTREACHED */
|
|
__builtin_unreachable();
|
|
}
|
|
}
|
|
|
|
void
|
|
pktsched_pkt_encap_chain(pktsched_pkt_t *pkt, classq_pkt_t *cpkt,
|
|
classq_pkt_t *tail, uint32_t cnt, uint32_t bytes)
|
|
{
|
|
pkt->pktsched_pkt = *cpkt;
|
|
pkt->pktsched_tail = *tail;
|
|
pkt->pktsched_pcnt = cnt;
|
|
pkt->pktsched_plen = bytes;
|
|
|
|
switch (cpkt->cp_ptype) {
|
|
case QP_MBUF:
|
|
break;
|
|
|
|
#if SKYWALK
|
|
case QP_PACKET:
|
|
break;
|
|
#endif /* SKYWALK */
|
|
|
|
default:
|
|
VERIFY(0);
|
|
/* NOTREACHED */
|
|
__builtin_unreachable();
|
|
}
|
|
}
|
|
|
|
int
|
|
pktsched_clone_pkt(pktsched_pkt_t *pkt1, pktsched_pkt_t *pkt2)
|
|
{
|
|
struct mbuf *m1, *m2;
|
|
#if SKYWALK
|
|
struct __kern_packet *p1;
|
|
kern_packet_t ph2;
|
|
int err;
|
|
#endif /* SKYWALK */
|
|
|
|
ASSERT(pkt1 != NULL);
|
|
ASSERT(pkt1->pktsched_pkt_mbuf != NULL);
|
|
ASSERT(pkt1->pktsched_pcnt == 1);
|
|
|
|
/* allow in place clone, but make sure pkt2->pktsched_pkt won't leak */
|
|
ASSERT((pkt1 == pkt2 && pkt1->pktsched_pkt_mbuf ==
|
|
pkt2->pktsched_pkt_mbuf) || (pkt1 != pkt2 &&
|
|
pkt2->pktsched_pkt_mbuf == NULL));
|
|
|
|
switch (pkt1->pktsched_ptype) {
|
|
case QP_MBUF:
|
|
m1 = (struct mbuf *)pkt1->pktsched_pkt_mbuf;
|
|
m2 = m_dup(m1, M_NOWAIT);
|
|
if (__improbable(m2 == NULL)) {
|
|
return ENOBUFS;
|
|
}
|
|
pkt2->pktsched_pkt_mbuf = m2;
|
|
break;
|
|
|
|
#if SKYWALK
|
|
case QP_PACKET:
|
|
p1 = (struct __kern_packet *)pkt1->pktsched_pkt_kpkt;
|
|
err = kern_packet_clone_nosleep(SK_PTR_ENCODE(p1,
|
|
METADATA_TYPE(p1), METADATA_SUBTYPE(p1)), &ph2,
|
|
KPKT_COPY_HEAVY);
|
|
if (__improbable(err != 0)) {
|
|
return err;
|
|
}
|
|
ASSERT(ph2 != 0);
|
|
VERIFY(kern_packet_finalize(ph2) == 0);
|
|
pkt2->pktsched_pkt_kpkt = SK_PTR_ADDR_KPKT(ph2);
|
|
break;
|
|
#endif /* SKYWALK */
|
|
|
|
default:
|
|
VERIFY(0);
|
|
/* NOTREACHED */
|
|
__builtin_unreachable();
|
|
}
|
|
|
|
pkt2->pktsched_plen = pkt1->pktsched_plen;
|
|
pkt2->pktsched_ptype = pkt1->pktsched_ptype;
|
|
pkt2->pktsched_tail = pkt2->pktsched_pkt;
|
|
pkt2->pktsched_pcnt = 1;
|
|
return 0;
|
|
}
|
|
|
|
void
|
|
pktsched_corrupt_packet(pktsched_pkt_t *pkt)
|
|
{
|
|
struct mbuf *m = NULL;
|
|
uint8_t *data = NULL;
|
|
uint32_t data_len = 0;
|
|
uint32_t rand32, rand_off, rand_bit;
|
|
#if SKYWALK
|
|
struct __kern_packet *p = NULL;
|
|
#endif /* SKYWALK */
|
|
|
|
switch (pkt->pktsched_ptype) {
|
|
case QP_MBUF:
|
|
m = pkt->pktsched_pkt_mbuf;
|
|
data = mtod(m, uint8_t *);
|
|
data_len = m->m_pkthdr.len;
|
|
break;
|
|
#if SKYWALK
|
|
case QP_PACKET:
|
|
p = pkt->pktsched_pkt_kpkt;
|
|
if (p->pkt_pflags & PKT_F_MBUF_DATA) {
|
|
m = p->pkt_mbuf;
|
|
data = mtod(m, uint8_t *);
|
|
data_len = m->m_pkthdr.len;
|
|
} else {
|
|
MD_BUFLET_ADDR_DLEN(p, data, data_len);
|
|
}
|
|
break;
|
|
#endif /* SKYWALK */
|
|
|
|
default:
|
|
/* NOTREACHED */
|
|
VERIFY(0);
|
|
__builtin_unreachable();
|
|
}
|
|
|
|
read_frandom(&rand32, sizeof(rand32));
|
|
rand_bit = rand32 & 0x8;
|
|
rand_off = (rand32 >> 3) % data_len;
|
|
data[rand_off] ^= 1 << rand_bit;
|
|
}
|
|
|
|
void
|
|
pktsched_free_pkt(pktsched_pkt_t *pkt)
|
|
{
|
|
uint32_t cnt = pkt->pktsched_pcnt;
|
|
ASSERT(cnt != 0);
|
|
|
|
switch (pkt->pktsched_ptype) {
|
|
case QP_MBUF: {
|
|
struct mbuf *m;
|
|
|
|
m = pkt->pktsched_pkt_mbuf;
|
|
if (cnt == 1) {
|
|
VERIFY(m->m_nextpkt == NULL);
|
|
} else {
|
|
VERIFY(m->m_nextpkt != NULL);
|
|
}
|
|
m_freem_list(m);
|
|
break;
|
|
}
|
|
#if SKYWALK
|
|
case QP_PACKET: {
|
|
struct __kern_packet *kpkt;
|
|
int pcnt = 0;
|
|
|
|
kpkt = pkt->pktsched_pkt_kpkt;
|
|
if (cnt == 1) {
|
|
VERIFY(kpkt->pkt_nextpkt == NULL);
|
|
} else {
|
|
VERIFY(kpkt->pkt_nextpkt != NULL);
|
|
}
|
|
pp_free_packet_chain(kpkt, &pcnt);
|
|
VERIFY(cnt == (uint32_t)pcnt);
|
|
break;
|
|
}
|
|
#endif /* SKYWALK */
|
|
|
|
default:
|
|
VERIFY(0);
|
|
/* NOTREACHED */
|
|
__builtin_unreachable();
|
|
}
|
|
pkt->pktsched_pkt = CLASSQ_PKT_INITIALIZER(pkt->pktsched_pkt);
|
|
pkt->pktsched_tail = CLASSQ_PKT_INITIALIZER(pkt->pktsched_tail);
|
|
pkt->pktsched_plen = 0;
|
|
pkt->pktsched_pcnt = 0;
|
|
}
|
|
|
|
mbuf_svc_class_t
|
|
pktsched_get_pkt_svc(pktsched_pkt_t *pkt)
|
|
{
|
|
mbuf_svc_class_t svc = MBUF_SC_UNSPEC;
|
|
|
|
switch (pkt->pktsched_ptype) {
|
|
case QP_MBUF:
|
|
svc = m_get_service_class(pkt->pktsched_pkt_mbuf);
|
|
break;
|
|
|
|
#if SKYWALK
|
|
case QP_PACKET:
|
|
svc = pkt->pktsched_pkt_kpkt->pkt_svc_class;
|
|
break;
|
|
#endif /* SKYWALK */
|
|
|
|
default:
|
|
VERIFY(0);
|
|
/* NOTREACHED */
|
|
__builtin_unreachable();
|
|
}
|
|
|
|
return svc;
|
|
}
|
|
|
|
void
|
|
pktsched_get_pkt_vars(pktsched_pkt_t *pkt, volatile uint32_t **flags,
|
|
uint64_t **timestamp, uint32_t *flowid, uint8_t *flowsrc, uint8_t *proto,
|
|
uint32_t *comp_gencnt, uint64_t *pkt_tx_time)
|
|
{
|
|
switch (pkt->pktsched_ptype) {
|
|
case QP_MBUF: {
|
|
struct pkthdr *pkth = &(pkt->pktsched_pkt_mbuf->m_pkthdr);
|
|
|
|
if (flags != NULL) {
|
|
*flags = &pkth->pkt_flags;
|
|
}
|
|
if (timestamp != NULL) {
|
|
*timestamp = &pkth->pkt_timestamp;
|
|
}
|
|
if (flowid != NULL) {
|
|
*flowid = pkth->pkt_flowid;
|
|
}
|
|
if (flowsrc != NULL) {
|
|
*flowsrc = pkth->pkt_flowsrc;
|
|
}
|
|
if (proto != NULL) {
|
|
/*
|
|
* rdar://100524205 - We want to use the pkt_ext_flags
|
|
* to denote QUIC packets, but AQM is already written in
|
|
* such a way where IPPROTO_QUIC is used to denote QUIC
|
|
* packets.
|
|
*/
|
|
if (pkth->pkt_ext_flags & PKTF_EXT_QUIC) {
|
|
*proto = IPPROTO_QUIC;
|
|
} else {
|
|
*proto = pkth->pkt_proto;
|
|
}
|
|
}
|
|
if (comp_gencnt != NULL) {
|
|
*comp_gencnt = pkth->comp_gencnt;
|
|
}
|
|
if (pkt_tx_time != NULL) {
|
|
struct m_tag *tag;
|
|
tag = m_tag_locate(pkt->pktsched_pkt_mbuf, KERNEL_MODULE_TAG_ID,
|
|
KERNEL_TAG_TYPE_AQM);
|
|
if (__improbable(tag != NULL)) {
|
|
*pkt_tx_time = *(uint64_t *)tag->m_tag_data;
|
|
} else {
|
|
*pkt_tx_time = 0;
|
|
}
|
|
}
|
|
|
|
break;
|
|
}
|
|
|
|
#if SKYWALK
|
|
case QP_PACKET: {
|
|
struct __kern_packet *kp = pkt->pktsched_pkt_kpkt;
|
|
|
|
if (flags != NULL) {
|
|
/* use lower-32 bit for common flags */
|
|
*flags = &kp->pkt_pflags32;
|
|
}
|
|
if (timestamp != NULL) {
|
|
*timestamp = &kp->pkt_timestamp;
|
|
}
|
|
if (flowid != NULL) {
|
|
*flowid = kp->pkt_flow_token;
|
|
}
|
|
if (flowsrc != NULL) {
|
|
*flowsrc = (uint8_t)kp->pkt_flowsrc_type;
|
|
}
|
|
if (proto != NULL) {
|
|
*proto = kp->pkt_transport_protocol;
|
|
}
|
|
if (comp_gencnt != NULL) {
|
|
*comp_gencnt = kp->pkt_comp_gencnt;
|
|
}
|
|
if (pkt_tx_time != NULL && (kp->pkt_pflags & PKT_F_OPT_TX_TIMESTAMP) != 0) {
|
|
*pkt_tx_time = kp->pkt_com_opt->__po_pkt_tx_time;
|
|
}
|
|
|
|
break;
|
|
}
|
|
#endif /* SKYWALK */
|
|
|
|
default:
|
|
VERIFY(0);
|
|
/* NOTREACHED */
|
|
__builtin_unreachable();
|
|
}
|
|
}
|
|
|
|
struct flowadv_fcentry *
|
|
pktsched_alloc_fcentry(pktsched_pkt_t *pkt, struct ifnet *ifp, int how)
|
|
{
|
|
#pragma unused(ifp)
|
|
struct flowadv_fcentry *fce = NULL;
|
|
|
|
switch (pkt->pktsched_ptype) {
|
|
case QP_MBUF: {
|
|
struct mbuf *m = pkt->pktsched_pkt_mbuf;
|
|
|
|
fce = flowadv_alloc_entry(how);
|
|
if (fce == NULL) {
|
|
break;
|
|
}
|
|
|
|
_CASSERT(sizeof(m->m_pkthdr.pkt_flowid) ==
|
|
sizeof(fce->fce_flowid));
|
|
|
|
fce->fce_flowsrc_type = m->m_pkthdr.pkt_flowsrc;
|
|
fce->fce_flowid = m->m_pkthdr.pkt_flowid;
|
|
#if SKYWALK
|
|
_CASSERT(sizeof(m->m_pkthdr.pkt_mpriv_srcid) ==
|
|
sizeof(fce->fce_flowsrc_token));
|
|
_CASSERT(sizeof(m->m_pkthdr.pkt_mpriv_fidx) ==
|
|
sizeof(fce->fce_flowsrc_fidx));
|
|
|
|
if (fce->fce_flowsrc_type == FLOWSRC_CHANNEL) {
|
|
fce->fce_flowsrc_fidx = m->m_pkthdr.pkt_mpriv_fidx;
|
|
fce->fce_flowsrc_token = m->m_pkthdr.pkt_mpriv_srcid;
|
|
fce->fce_ifp = ifp;
|
|
}
|
|
#endif /* SKYWALK */
|
|
break;
|
|
}
|
|
|
|
#if SKYWALK
|
|
case QP_PACKET: {
|
|
struct __kern_packet *kp = pkt->pktsched_pkt_kpkt;
|
|
|
|
fce = flowadv_alloc_entry(how);
|
|
if (fce == NULL) {
|
|
break;
|
|
}
|
|
|
|
_CASSERT(sizeof(fce->fce_flowid) ==
|
|
sizeof(kp->pkt_flow_token));
|
|
_CASSERT(sizeof(fce->fce_flowsrc_fidx) ==
|
|
sizeof(kp->pkt_flowsrc_fidx));
|
|
_CASSERT(sizeof(fce->fce_flowsrc_token) ==
|
|
sizeof(kp->pkt_flowsrc_token));
|
|
|
|
ASSERT(kp->pkt_pflags & PKT_F_FLOW_ADV);
|
|
fce->fce_flowsrc_type = kp->pkt_flowsrc_type;
|
|
fce->fce_flowid = kp->pkt_flow_token;
|
|
fce->fce_flowsrc_fidx = kp->pkt_flowsrc_fidx;
|
|
fce->fce_flowsrc_token = kp->pkt_flowsrc_token;
|
|
fce->fce_ifp = ifp;
|
|
break;
|
|
}
|
|
#endif /* SKYWALK */
|
|
|
|
default:
|
|
VERIFY(0);
|
|
/* NOTREACHED */
|
|
__builtin_unreachable();
|
|
}
|
|
|
|
return fce;
|
|
}
|
|
|
|
uint32_t *
|
|
pktsched_get_pkt_sfb_vars(pktsched_pkt_t *pkt, uint32_t **sfb_flags)
|
|
{
|
|
uint32_t *hashp = NULL;
|
|
|
|
switch (pkt->pktsched_ptype) {
|
|
case QP_MBUF: {
|
|
struct pkthdr *pkth = &(pkt->pktsched_pkt_mbuf->m_pkthdr);
|
|
|
|
_CASSERT(sizeof(pkth->pkt_mpriv_hash) == sizeof(uint32_t));
|
|
_CASSERT(sizeof(pkth->pkt_mpriv_flags) == sizeof(uint32_t));
|
|
*sfb_flags = &pkth->pkt_mpriv_flags;
|
|
hashp = &pkth->pkt_mpriv_hash;
|
|
break;
|
|
}
|
|
|
|
#if SKYWALK
|
|
case QP_PACKET: {
|
|
struct __kern_packet *kp = pkt->pktsched_pkt_kpkt;
|
|
|
|
_CASSERT(sizeof(kp->pkt_classq_hash) == sizeof(uint32_t));
|
|
_CASSERT(sizeof(kp->pkt_classq_flags) == sizeof(uint32_t));
|
|
*sfb_flags = &kp->pkt_classq_flags;
|
|
hashp = &kp->pkt_classq_hash;
|
|
break;
|
|
}
|
|
#endif /* SKYWALK */
|
|
|
|
default:
|
|
VERIFY(0);
|
|
/* NOTREACHED */
|
|
__builtin_unreachable();
|
|
}
|
|
|
|
return hashp;
|
|
}
|
|
|
|
static int
|
|
pktsched_mbuf_mark_ecn(struct mbuf* m)
|
|
{
|
|
struct mbuf *m0;
|
|
void *hdr;
|
|
int af;
|
|
uint8_t ipv;
|
|
|
|
hdr = m->m_pkthdr.pkt_hdr;
|
|
/* verify that hdr is within the mbuf data */
|
|
for (m0 = m; m0 != NULL; m0 = m0->m_next) {
|
|
if (((caddr_t)hdr >= m_mtod_current(m0)) &&
|
|
((caddr_t)hdr < m_mtod_current(m0) + m0->m_len)) {
|
|
break;
|
|
}
|
|
}
|
|
if (m0 == NULL) {
|
|
return EINVAL;
|
|
}
|
|
ipv = IP_VHL_V(*(uint8_t *)hdr);
|
|
if (ipv == 4) {
|
|
af = AF_INET;
|
|
} else if (ipv == 6) {
|
|
af = AF_INET6;
|
|
} else {
|
|
af = AF_UNSPEC;
|
|
}
|
|
|
|
switch (af) {
|
|
case AF_INET: {
|
|
struct ip *ip = hdr;
|
|
uint8_t otos;
|
|
int sum;
|
|
|
|
if (((uintptr_t)ip + sizeof(*ip)) >
|
|
((uintptr_t)mbuf_datastart(m0) + mbuf_maxlen(m0))) {
|
|
return EINVAL; /* out of bounds */
|
|
}
|
|
if ((ip->ip_tos & IPTOS_ECN_MASK) == IPTOS_ECN_NOTECT) {
|
|
return EINVAL; /* not-ECT */
|
|
}
|
|
if ((ip->ip_tos & IPTOS_ECN_MASK) == IPTOS_ECN_CE) {
|
|
return 0; /* already marked */
|
|
}
|
|
/*
|
|
* ecn-capable but not marked,
|
|
* mark CE and update checksum
|
|
*/
|
|
otos = ip->ip_tos;
|
|
ip->ip_tos |= IPTOS_ECN_CE;
|
|
/*
|
|
* update checksum (from RFC1624) only if hw
|
|
* checksum is not supported.
|
|
* HC' = ~(~HC + ~m + m')
|
|
*/
|
|
if ((m->m_pkthdr.csum_flags & CSUM_DELAY_IP) == 0) {
|
|
sum = ~ntohs(ip->ip_sum) & 0xffff;
|
|
sum += (~otos & 0xffff) + ip->ip_tos;
|
|
sum = (sum >> 16) + (sum & 0xffff);
|
|
sum += (sum >> 16); /* add carry */
|
|
ip->ip_sum = htons(~sum & 0xffff);
|
|
}
|
|
return 0;
|
|
}
|
|
case AF_INET6: {
|
|
struct ip6_hdr *ip6 = hdr;
|
|
u_int32_t flowlabel;
|
|
|
|
if (((uintptr_t)ip6 + sizeof(*ip6)) >
|
|
((uintptr_t)mbuf_datastart(m0) + mbuf_maxlen(m0))) {
|
|
return EINVAL; /* out of bounds */
|
|
}
|
|
flowlabel = ntohl(ip6->ip6_flow);
|
|
if ((flowlabel & (IPTOS_ECN_MASK << 20)) ==
|
|
(IPTOS_ECN_NOTECT << 20)) {
|
|
return EINVAL; /* not-ECT */
|
|
}
|
|
if ((flowlabel & (IPTOS_ECN_MASK << 20)) ==
|
|
(IPTOS_ECN_CE << 20)) {
|
|
return 0; /* already marked */
|
|
}
|
|
/*
|
|
* ecn-capable but not marked, mark CE
|
|
*/
|
|
flowlabel |= (IPTOS_ECN_CE << 20);
|
|
ip6->ip6_flow = htonl(flowlabel);
|
|
return 0;
|
|
}
|
|
default:
|
|
return EPROTONOSUPPORT;
|
|
}
|
|
}
|
|
|
|
static int
|
|
pktsched_kpkt_mark_ecn(struct __kern_packet *kpkt)
|
|
{
|
|
uint8_t ipv = 0, *l3_hdr;
|
|
|
|
if ((kpkt->pkt_qum_qflags & QUM_F_FLOW_CLASSIFIED) != 0) {
|
|
ipv = kpkt->pkt_flow_ip_ver;
|
|
l3_hdr = (uint8_t *)kpkt->pkt_flow_ip_hdr;
|
|
} else {
|
|
uint8_t *pkt_buf;
|
|
uint32_t bdlen, bdlim, bdoff;
|
|
MD_BUFLET_ADDR_ABS_DLEN(kpkt, pkt_buf, bdlen, bdlim, bdoff);
|
|
|
|
/* takes care of both IPv4 and IPv6 */
|
|
l3_hdr = pkt_buf + kpkt->pkt_headroom + kpkt->pkt_l2_len;
|
|
ipv = IP_VHL_V(*(uint8_t *)l3_hdr);
|
|
if (ipv == 4) {
|
|
ipv = IPVERSION;
|
|
} else if (ipv == 6) {
|
|
ipv = IPV6_VERSION;
|
|
} else {
|
|
ipv = 0;
|
|
}
|
|
}
|
|
|
|
switch (ipv) {
|
|
case IPVERSION: {
|
|
uint8_t otos;
|
|
int sum;
|
|
|
|
struct ip *ip = (struct ip *)(void *)l3_hdr;
|
|
if ((ip->ip_tos & IPTOS_ECN_MASK) == IPTOS_ECN_NOTECT) {
|
|
return EINVAL; /* not-ECT */
|
|
}
|
|
if ((ip->ip_tos & IPTOS_ECN_MASK) == IPTOS_ECN_CE) {
|
|
return 0; /* already marked */
|
|
}
|
|
/*
|
|
* ecn-capable but not marked,
|
|
* mark CE and update checksum
|
|
*/
|
|
otos = ip->ip_tos;
|
|
ip->ip_tos |= IPTOS_ECN_CE;
|
|
|
|
sum = ~ntohs(ip->ip_sum) & 0xffff;
|
|
sum += (~otos & 0xffff) + ip->ip_tos;
|
|
sum = (sum >> 16) + (sum & 0xffff);
|
|
sum += (sum >> 16); /* add carry */
|
|
ip->ip_sum = htons(~sum & 0xffff);
|
|
|
|
return 0;
|
|
}
|
|
case IPV6_VERSION: {
|
|
struct ip6_hdr *ip6 = (struct ip6_hdr *)l3_hdr;
|
|
u_int32_t flowlabel;
|
|
flowlabel = ntohl(ip6->ip6_flow);
|
|
if ((flowlabel & (IPTOS_ECN_MASK << 20)) ==
|
|
(IPTOS_ECN_NOTECT << 20)) {
|
|
return EINVAL; /* not-ECT */
|
|
}
|
|
if ((flowlabel & (IPTOS_ECN_MASK << 20)) ==
|
|
(IPTOS_ECN_CE << 20)) {
|
|
return 0; /* already marked */
|
|
}
|
|
/*
|
|
* ecn-capable but not marked, mark CE
|
|
*/
|
|
flowlabel |= (IPTOS_ECN_CE << 20);
|
|
ip6->ip6_flow = htonl(flowlabel);
|
|
|
|
return 0;
|
|
}
|
|
default:
|
|
return EPROTONOSUPPORT;
|
|
}
|
|
}
|
|
|
|
int
|
|
pktsched_mark_ecn(pktsched_pkt_t *pkt)
|
|
{
|
|
switch (pkt->pktsched_ptype) {
|
|
case QP_MBUF:
|
|
return pktsched_mbuf_mark_ecn(pkt->pktsched_pkt_mbuf);
|
|
case QP_PACKET:
|
|
return pktsched_kpkt_mark_ecn(pkt->pktsched_pkt_kpkt);
|
|
default:
|
|
VERIFY(0);
|
|
/* NOTREACHED */
|
|
__builtin_unreachable();
|
|
}
|
|
}
|
|
|
|
boolean_t
|
|
pktsched_is_pkt_l4s(pktsched_pkt_t *pkt)
|
|
{
|
|
switch (pkt->pktsched_ptype) {
|
|
case QP_MBUF: {
|
|
struct pkthdr *pkth = &(pkt->pktsched_pkt_mbuf->m_pkthdr);
|
|
return (pkth->pkt_ext_flags & PKTF_EXT_L4S) != 0;
|
|
}
|
|
case QP_PACKET: {
|
|
struct __kern_packet *kp = pkt->pktsched_pkt_kpkt;
|
|
return (kp->pkt_pflags & PKT_F_L4S) != 0;
|
|
}
|
|
|
|
default:
|
|
VERIFY(0);
|
|
/* NOTREACHED */
|
|
__builtin_unreachable();
|
|
}
|
|
return FALSE;
|
|
}
|
|
|
|
struct aqm_tag_container {
|
|
struct m_tag aqm_m_tag;
|
|
uint64_t aqm_tag;
|
|
};
|
|
|
|
static struct m_tag *
|
|
m_tag_kalloc_aqm(u_int32_t id, u_int16_t type, uint16_t len, int wait)
|
|
{
|
|
struct aqm_tag_container *tag_container;
|
|
struct m_tag *tag = NULL;
|
|
|
|
assert3u(id, ==, KERNEL_MODULE_TAG_ID);
|
|
assert3u(type, ==, KERNEL_TAG_TYPE_AQM);
|
|
assert3u(len, ==, sizeof(uint64_t));
|
|
|
|
if (len != sizeof(uint64_t)) {
|
|
return NULL;
|
|
}
|
|
|
|
tag_container = kalloc_type(struct aqm_tag_container, wait | M_ZERO);
|
|
if (tag_container != NULL) {
|
|
tag = &tag_container->aqm_m_tag;
|
|
|
|
assert3p(tag, ==, tag_container);
|
|
|
|
M_TAG_INIT(tag, id, type, len, &tag_container->aqm_tag, NULL);
|
|
}
|
|
|
|
return tag;
|
|
}
|
|
|
|
static void
|
|
m_tag_kfree_aqm(struct m_tag *tag)
|
|
{
|
|
struct aqm_tag_container *tag_container = (struct aqm_tag_container *)tag;
|
|
|
|
assert3u(tag->m_tag_len, ==, sizeof(uint64_t));
|
|
|
|
kfree_type(struct aqm_tag_container, tag_container);
|
|
}
|
|
|
|
void
|
|
pktsched_register_m_tag(void)
|
|
{
|
|
int error;
|
|
|
|
error = m_register_internal_tag_type(KERNEL_TAG_TYPE_AQM, sizeof(uint64_t),
|
|
m_tag_kalloc_aqm, m_tag_kfree_aqm);
|
|
|
|
assert3u(error, ==, 0);
|
|
}
|