2590 lines
68 KiB
C
2590 lines
68 KiB
C
|
/*
|
||
|
* Copyright (c) 2016-2021 Apple Inc. All rights reserved.
|
||
|
*
|
||
|
* @APPLE_OSREFERENCE_LICENSE_HEADER_START@
|
||
|
*
|
||
|
* This file contains Original Code and/or Modifications of Original Code
|
||
|
* as defined in and that are subject to the Apple Public Source License
|
||
|
* Version 2.0 (the 'License'). You may not use this file except in
|
||
|
* compliance with the License. The rights granted to you under the License
|
||
|
* may not be used to create, or enable the creation or redistribution of,
|
||
|
* unlawful or unlicensed copies of an Apple operating system, or to
|
||
|
* circumvent, violate, or enable the circumvention or violation of, any
|
||
|
* terms of an Apple operating system software license agreement.
|
||
|
*
|
||
|
* Please obtain a copy of the License at
|
||
|
* http://www.opensource.apple.com/apsl/ and read it before using this file.
|
||
|
*
|
||
|
* The Original Code and all software distributed under the License are
|
||
|
* distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
|
||
|
* EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
|
||
|
* INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
|
||
|
* FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
|
||
|
* Please see the License for the specific language governing rights and
|
||
|
* limitations under the License.
|
||
|
*
|
||
|
* @APPLE_OSREFERENCE_LICENSE_HEADER_END@
|
||
|
*/
|
||
|
|
||
|
#include <sys/types.h>
|
||
|
#include <sys/param.h>
|
||
|
#include <kern/zalloc.h>
|
||
|
#include <net/ethernet.h>
|
||
|
#include <net/if_var.h>
|
||
|
#include <net/if.h>
|
||
|
#include <net/classq/classq.h>
|
||
|
#include <net/classq/classq_fq_codel.h>
|
||
|
#include <net/pktsched/pktsched_fq_codel.h>
|
||
|
#include <os/log.h>
|
||
|
#include <pexpert/pexpert.h> /* for PE_parse_boot_argn */
|
||
|
#include <mach/thread_act.h>
|
||
|
#include <kern/thread.h>
|
||
|
#include <kern/sched_prim.h>
|
||
|
|
||
|
#define FQ_CODEL_DEFAULT_QUANTUM 1500
|
||
|
|
||
|
#define FQ_CODEL_QUANTUM_BK_SYS(_q) (_q)
|
||
|
#define FQ_CODEL_QUANTUM_BK(_q) (_q)
|
||
|
#define FQ_CODEL_QUANTUM_BE(_q) (_q)
|
||
|
#define FQ_CODEL_QUANTUM_RD(_q) (_q)
|
||
|
#define FQ_CODEL_QUANTUM_OAM(_q) (_q)
|
||
|
#define FQ_CODEL_QUANTUM_AV(_q) (_q * 2)
|
||
|
#define FQ_CODEL_QUANTUM_RV(_q) (_q * 2)
|
||
|
#define FQ_CODEL_QUANTUM_VI(_q) (_q * 2)
|
||
|
#define FQ_CODEL_QUANTUM_VO(_q) ((_q * 2) / 5)
|
||
|
#define FQ_CODEL_QUANTUM_CTL(_q) ((_q * 2) / 5)
|
||
|
|
||
|
static KALLOC_TYPE_DEFINE(fq_if_zone, fq_if_t, NET_KT_DEFAULT);
|
||
|
static KALLOC_TYPE_DEFINE(fq_if_grp_zone, fq_if_group_t, NET_KT_DEFAULT);
|
||
|
|
||
|
SYSCTL_NODE(_net_classq, OID_AUTO, fq_codel, CTLFLAG_RW | CTLFLAG_LOCKED,
|
||
|
0, "FQ-CODEL parameters");
|
||
|
|
||
|
SYSCTL_INT(_net_classq_fq_codel, OID_AUTO, fq_enable_pacing, CTLFLAG_RW | CTLFLAG_LOCKED,
|
||
|
&ifclassq_enable_pacing, 0, "Enable pacing");
|
||
|
|
||
|
static uint64_t fq_empty_purge_delay = FQ_EMPTY_PURGE_DELAY;
|
||
|
#if (DEVELOPMENT || DEBUG)
|
||
|
SYSCTL_QUAD(_net_classq_fq_codel, OID_AUTO, fq_empty_purge_delay, CTLFLAG_RW |
|
||
|
CTLFLAG_LOCKED, &fq_empty_purge_delay, "Empty flow queue purge delay (ns)");
|
||
|
#endif /* !DEVELOPMENT && !DEBUG */
|
||
|
|
||
|
unsigned int ifclassq_enable_pacing = 1;
|
||
|
|
||
|
typedef STAILQ_HEAD(, flowq) flowq_dqlist_t;
|
||
|
|
||
|
static fq_if_t *fq_if_alloc(struct ifclassq *, classq_pkt_type_t);
|
||
|
static void fq_if_destroy(fq_if_t *fqs);
|
||
|
static void fq_if_classq_init(fq_if_group_t *fqg, uint32_t priority,
|
||
|
uint32_t quantum, uint32_t drr_max, uint32_t svc_class);
|
||
|
static void fq_if_dequeue(fq_if_t *, fq_if_classq_t *, uint32_t,
|
||
|
int64_t, classq_pkt_t *, classq_pkt_t *, uint32_t *,
|
||
|
uint32_t *, flowq_dqlist_t *, bool, uint64_t, bool*, uint64_t*);
|
||
|
void fq_if_stat_sc(fq_if_t *fqs, cqrq_stat_sc_t *stat);
|
||
|
static void fq_if_purge(fq_if_t *);
|
||
|
static void fq_if_purge_classq(fq_if_t *, fq_if_classq_t *);
|
||
|
static void fq_if_purge_flow(fq_if_t *, fq_t *, uint32_t *, uint32_t *,
|
||
|
uint64_t);
|
||
|
static void fq_if_empty_new_flow(fq_t *fq, fq_if_classq_t *fq_cl);
|
||
|
static void fq_if_empty_old_flow(fq_if_t *fqs, fq_if_classq_t *fq_cl,
|
||
|
fq_t *fq, uint64_t now);
|
||
|
static void fq_if_purge_empty_flow(fq_if_t *fqs, fq_t *fq);
|
||
|
static void fq_if_purge_empty_flow_list(fq_if_t *fqs, uint64_t now,
|
||
|
bool purge_all);
|
||
|
static inline void fq_if_reuse_empty_flow(fq_if_t *fqs, fq_t *fq, uint64_t now);
|
||
|
static int fq_if_dequeue_sc_classq_multi_separate(struct ifclassq *ifq,
|
||
|
mbuf_svc_class_t svc, u_int32_t maxpktcnt, u_int32_t maxbytecnt,
|
||
|
classq_pkt_t *first_packet, classq_pkt_t *last_packet, u_int32_t *retpktcnt,
|
||
|
u_int32_t *retbytecnt, uint8_t grp_idx);
|
||
|
static void fq_if_grp_stat_sc(fq_if_t *fqs, fq_if_group_t *grp,
|
||
|
cqrq_stat_sc_t *stat, uint64_t now);
|
||
|
static void fq_if_purge_grp(fq_if_t *fqs, fq_if_group_t *grp);
|
||
|
static inline boolean_t fq_if_is_grp_combined(fq_if_t *fqs, uint8_t grp_idx);
|
||
|
static void fq_if_destroy_grps(fq_if_t *fqs);
|
||
|
|
||
|
uint32_t fq_codel_drr_max_values[FQ_IF_MAX_CLASSES] = {
|
||
|
[FQ_IF_CTL_INDEX] = 8,
|
||
|
[FQ_IF_VO_INDEX] = 8,
|
||
|
[FQ_IF_VI_INDEX] = 6,
|
||
|
[FQ_IF_RV_INDEX] = 6,
|
||
|
[FQ_IF_AV_INDEX] = 6,
|
||
|
[FQ_IF_OAM_INDEX] = 4,
|
||
|
[FQ_IF_RD_INDEX] = 4,
|
||
|
[FQ_IF_BE_INDEX] = 4,
|
||
|
[FQ_IF_BK_INDEX] = 2,
|
||
|
[FQ_IF_BK_SYS_INDEX] = 2,
|
||
|
};
|
||
|
|
||
|
#define FQ_CODEL_DRR_MAX(_s) fq_codel_drr_max_values[FQ_IF_##_s##_INDEX]
|
||
|
|
||
|
static boolean_t fq_if_grps_bitmap_zeros(fq_grp_tailq_t *grp_list, int pri,
|
||
|
fq_if_state state);
|
||
|
static void fq_if_grps_bitmap_cpy(fq_grp_tailq_t *grp_list, int pri,
|
||
|
fq_if_state dst_state, fq_if_state src_state);
|
||
|
static void fq_if_grps_bitmap_clr(fq_grp_tailq_t *grp_list, int pri,
|
||
|
fq_if_state state);
|
||
|
static int fq_if_grps_bitmap_ffs(fq_grp_tailq_t *grp_list, int pri,
|
||
|
fq_if_state state, fq_if_group_t **selected_grp);
|
||
|
static void fq_if_grps_bitmap_move(fq_grp_tailq_t *grp_list, int pri,
|
||
|
fq_if_state dst_state, fq_if_state src_state);
|
||
|
|
||
|
static boolean_t fq_if_grps_sc_bitmap_zeros(fq_grp_tailq_t *grp_list, int pri,
|
||
|
fq_if_state state);
|
||
|
static void fq_if_grps_sc_bitmap_cpy(fq_grp_tailq_t *grp_list, int pri,
|
||
|
fq_if_state dst_state, fq_if_state src_state);
|
||
|
static void fq_if_grps_sc_bitmap_clr(fq_grp_tailq_t *grp_list, int pri,
|
||
|
fq_if_state state);
|
||
|
static int fq_if_grps_sc_bitmap_ffs(fq_grp_tailq_t *grp_list, int pri,
|
||
|
fq_if_state state, fq_if_group_t **selected_grp);
|
||
|
static void fq_if_grps_sc_bitmap_move(fq_grp_tailq_t *grp_list, int pri,
|
||
|
fq_if_state dst_state, fq_if_state src_state);
|
||
|
|
||
|
bitmap_ops_t fq_if_grps_bitmap_ops =
|
||
|
{
|
||
|
.ffs = fq_if_grps_bitmap_ffs,
|
||
|
.zeros = fq_if_grps_bitmap_zeros,
|
||
|
.cpy = fq_if_grps_bitmap_cpy,
|
||
|
.clr = fq_if_grps_bitmap_clr,
|
||
|
.move = fq_if_grps_bitmap_move,
|
||
|
};
|
||
|
|
||
|
bitmap_ops_t fq_if_grps_sc_bitmap_ops =
|
||
|
{
|
||
|
.ffs = fq_if_grps_sc_bitmap_ffs,
|
||
|
.zeros = fq_if_grps_sc_bitmap_zeros,
|
||
|
.cpy = fq_if_grps_sc_bitmap_cpy,
|
||
|
.clr = fq_if_grps_sc_bitmap_clr,
|
||
|
.move = fq_if_grps_sc_bitmap_move,
|
||
|
};
|
||
|
|
||
|
void
|
||
|
pktsched_fq_init(void)
|
||
|
{
|
||
|
PE_parse_boot_argn("ifclassq_enable_pacing", &ifclassq_enable_pacing,
|
||
|
sizeof(ifclassq_enable_pacing));
|
||
|
|
||
|
// format looks like ifcq_drr_max=8,8,6
|
||
|
char buf[(FQ_IF_MAX_CLASSES) * 3];
|
||
|
size_t i, len, pri_index = 0;
|
||
|
uint32_t drr = 0;
|
||
|
if (!PE_parse_boot_arg_str("ifcq_drr_max", buf, sizeof(buf))) {
|
||
|
return;
|
||
|
}
|
||
|
|
||
|
len = strlen(buf);
|
||
|
for (i = 0; i < len + 1 && pri_index < FQ_IF_MAX_CLASSES; i++) {
|
||
|
if (buf[i] != ',' && buf[i] != '\0') {
|
||
|
VERIFY(buf[i] >= '0' && buf[i] <= '9');
|
||
|
drr = drr * 10 + buf[i] - '0';
|
||
|
continue;
|
||
|
}
|
||
|
fq_codel_drr_max_values[pri_index] = drr;
|
||
|
pri_index += 1;
|
||
|
drr = 0;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
#define FQ_IF_FLOW_HASH_ID(_flowid_) \
|
||
|
(((_flowid_) >> FQ_IF_HASH_TAG_SHIFT) & FQ_IF_HASH_TAG_MASK)
|
||
|
|
||
|
#define FQ_IF_CLASSQ_IDLE(_fcl_) \
|
||
|
(STAILQ_EMPTY(&(_fcl_)->fcl_new_flows) && \
|
||
|
STAILQ_EMPTY(&(_fcl_)->fcl_old_flows))
|
||
|
|
||
|
typedef void (* fq_if_append_pkt_t)(classq_pkt_t *, classq_pkt_t *);
|
||
|
typedef boolean_t (* fq_getq_flow_t)(fq_if_t *, fq_if_classq_t *, fq_t *,
|
||
|
int64_t, uint32_t, classq_pkt_t *, classq_pkt_t *, uint32_t *,
|
||
|
uint32_t *, boolean_t *, uint64_t);
|
||
|
|
||
|
static void
|
||
|
fq_if_append_mbuf(classq_pkt_t *pkt, classq_pkt_t *next_pkt)
|
||
|
{
|
||
|
pkt->cp_mbuf->m_nextpkt = next_pkt->cp_mbuf;
|
||
|
}
|
||
|
|
||
|
static inline uint64_t
|
||
|
fq_codel_get_time(void)
|
||
|
{
|
||
|
struct timespec ts;
|
||
|
uint64_t now;
|
||
|
|
||
|
nanouptime(&ts);
|
||
|
now = ((uint64_t)ts.tv_sec * NSEC_PER_SEC) + ts.tv_nsec;
|
||
|
return now;
|
||
|
}
|
||
|
|
||
|
#if SKYWALK
|
||
|
static void
|
||
|
fq_if_append_pkt(classq_pkt_t *pkt, classq_pkt_t *next_pkt)
|
||
|
{
|
||
|
pkt->cp_kpkt->pkt_nextpkt = next_pkt->cp_kpkt;
|
||
|
}
|
||
|
#endif /* SKYWALK */
|
||
|
|
||
|
#if SKYWALK
|
||
|
static boolean_t
|
||
|
fq_getq_flow_kpkt(fq_if_t *fqs, fq_if_classq_t *fq_cl, fq_t *fq,
|
||
|
int64_t byte_limit, uint32_t pkt_limit, classq_pkt_t *head,
|
||
|
classq_pkt_t *tail, uint32_t *byte_cnt, uint32_t *pkt_cnt,
|
||
|
boolean_t *qempty, uint64_t now)
|
||
|
{
|
||
|
uint32_t plen;
|
||
|
pktsched_pkt_t pkt;
|
||
|
boolean_t limit_reached = FALSE;
|
||
|
struct ifclassq *ifq = fqs->fqs_ifq;
|
||
|
struct ifnet *ifp = ifq->ifcq_ifp;
|
||
|
|
||
|
/*
|
||
|
* Assert to make sure pflags is part of PKT_F_COMMON_MASK;
|
||
|
* all common flags need to be declared in that mask.
|
||
|
*/
|
||
|
while (fq->fq_deficit > 0 && limit_reached == FALSE &&
|
||
|
!KPKTQ_EMPTY(&fq->fq_kpktq) && fq_tx_time_ready(fqs, fq, now, NULL)) {
|
||
|
_PKTSCHED_PKT_INIT(&pkt);
|
||
|
fq_getq_flow(fqs, fq, &pkt, now);
|
||
|
ASSERT(pkt.pktsched_ptype == QP_PACKET);
|
||
|
|
||
|
plen = pktsched_get_pkt_len(&pkt);
|
||
|
fq->fq_deficit -= plen;
|
||
|
if (__improbable((fq->fq_flags & FQF_FRESH_FLOW) != 0)) {
|
||
|
pkt.pktsched_pkt_kpkt->pkt_pflags |= PKT_F_NEW_FLOW;
|
||
|
fq->fq_flags &= ~FQF_FRESH_FLOW;
|
||
|
}
|
||
|
|
||
|
if (head->cp_kpkt == NULL) {
|
||
|
*head = pkt.pktsched_pkt;
|
||
|
} else {
|
||
|
ASSERT(tail->cp_kpkt != NULL);
|
||
|
ASSERT(tail->cp_kpkt->pkt_nextpkt == NULL);
|
||
|
tail->cp_kpkt->pkt_nextpkt = pkt.pktsched_pkt_kpkt;
|
||
|
}
|
||
|
*tail = pkt.pktsched_pkt;
|
||
|
tail->cp_kpkt->pkt_nextpkt = NULL;
|
||
|
fq_cl->fcl_stat.fcl_dequeue++;
|
||
|
fq_cl->fcl_stat.fcl_dequeue_bytes += plen;
|
||
|
*pkt_cnt += 1;
|
||
|
*byte_cnt += plen;
|
||
|
|
||
|
ifclassq_set_packet_metadata(ifq, ifp, &pkt.pktsched_pkt);
|
||
|
|
||
|
/* Check if the limit is reached */
|
||
|
if (*pkt_cnt >= pkt_limit || *byte_cnt >= byte_limit) {
|
||
|
limit_reached = TRUE;
|
||
|
}
|
||
|
}
|
||
|
KDBG(AQM_KTRACE_STATS_FLOW_DEQUEUE, fq->fq_flowhash,
|
||
|
AQM_KTRACE_FQ_GRP_SC_IDX(fq),
|
||
|
fq->fq_bytes, fq->fq_min_qdelay);
|
||
|
|
||
|
*qempty = KPKTQ_EMPTY(&fq->fq_kpktq);
|
||
|
return limit_reached;
|
||
|
}
|
||
|
#endif /* SKYWALK */
|
||
|
|
||
|
static boolean_t
|
||
|
fq_getq_flow_mbuf(fq_if_t *fqs, fq_if_classq_t *fq_cl, fq_t *fq,
|
||
|
int64_t byte_limit, uint32_t pkt_limit, classq_pkt_t *head,
|
||
|
classq_pkt_t *tail, uint32_t *byte_cnt, uint32_t *pkt_cnt,
|
||
|
boolean_t *qempty, uint64_t now)
|
||
|
{
|
||
|
u_int32_t plen;
|
||
|
pktsched_pkt_t pkt;
|
||
|
boolean_t limit_reached = FALSE;
|
||
|
struct ifclassq *ifq = fqs->fqs_ifq;
|
||
|
struct ifnet *ifp = ifq->ifcq_ifp;
|
||
|
|
||
|
while (fq->fq_deficit > 0 && limit_reached == FALSE &&
|
||
|
!MBUFQ_EMPTY(&fq->fq_mbufq) && fq_tx_time_ready(fqs, fq, now, NULL)) {
|
||
|
_PKTSCHED_PKT_INIT(&pkt);
|
||
|
fq_getq_flow(fqs, fq, &pkt, now);
|
||
|
ASSERT(pkt.pktsched_ptype == QP_MBUF);
|
||
|
|
||
|
plen = pktsched_get_pkt_len(&pkt);
|
||
|
fq->fq_deficit -= plen;
|
||
|
|
||
|
if (__improbable((fq->fq_flags & FQF_FRESH_FLOW) != 0)) {
|
||
|
pkt.pktsched_pkt_mbuf->m_pkthdr.pkt_flags |= PKTF_NEW_FLOW;
|
||
|
fq->fq_flags &= ~FQF_FRESH_FLOW;
|
||
|
}
|
||
|
|
||
|
if (head->cp_mbuf == NULL) {
|
||
|
*head = pkt.pktsched_pkt;
|
||
|
} else {
|
||
|
ASSERT(tail->cp_mbuf != NULL);
|
||
|
ASSERT(tail->cp_mbuf->m_nextpkt == NULL);
|
||
|
tail->cp_mbuf->m_nextpkt = pkt.pktsched_pkt_mbuf;
|
||
|
}
|
||
|
*tail = pkt.pktsched_pkt;
|
||
|
tail->cp_mbuf->m_nextpkt = NULL;
|
||
|
fq_cl->fcl_stat.fcl_dequeue++;
|
||
|
fq_cl->fcl_stat.fcl_dequeue_bytes += plen;
|
||
|
*pkt_cnt += 1;
|
||
|
*byte_cnt += plen;
|
||
|
|
||
|
ifclassq_set_packet_metadata(ifq, ifp, &pkt.pktsched_pkt);
|
||
|
|
||
|
/* Check if the limit is reached */
|
||
|
if (*pkt_cnt >= pkt_limit || *byte_cnt >= byte_limit) {
|
||
|
limit_reached = TRUE;
|
||
|
}
|
||
|
}
|
||
|
KDBG(AQM_KTRACE_STATS_FLOW_DEQUEUE, fq->fq_flowhash,
|
||
|
AQM_KTRACE_FQ_GRP_SC_IDX(fq),
|
||
|
fq->fq_bytes, fq->fq_min_qdelay);
|
||
|
|
||
|
*qempty = MBUFQ_EMPTY(&fq->fq_mbufq);
|
||
|
return limit_reached;
|
||
|
}
|
||
|
|
||
|
static void
|
||
|
fq_if_pacemaker_tcall(thread_call_param_t arg0, thread_call_param_t arg1)
|
||
|
{
|
||
|
#pragma unused(arg1)
|
||
|
struct ifnet* ifp = (struct ifnet*)arg0;
|
||
|
ASSERT(ifp != NULL);
|
||
|
|
||
|
ifnet_start_ignore_delay(ifp);
|
||
|
}
|
||
|
|
||
|
fq_if_t *
|
||
|
fq_if_alloc(struct ifclassq *ifq, classq_pkt_type_t ptype)
|
||
|
{
|
||
|
fq_if_t *fqs;
|
||
|
|
||
|
ASSERT(ifq->ifcq_ifp != NULL);
|
||
|
fqs = zalloc_flags(fq_if_zone, Z_WAITOK | Z_ZERO);
|
||
|
fqs->fqs_ifq = ifq;
|
||
|
fqs->fqs_ptype = ptype;
|
||
|
|
||
|
/* Configure packet drop limit across all queues */
|
||
|
fqs->fqs_pkt_droplimit = IFCQ_PKT_DROP_LIMIT(ifq);
|
||
|
STAILQ_INIT(&fqs->fqs_fclist);
|
||
|
TAILQ_INIT(&fqs->fqs_empty_list);
|
||
|
TAILQ_INIT(&fqs->fqs_combined_grp_list);
|
||
|
fqs->fqs_pacemaker_tcall = thread_call_allocate_with_options(fq_if_pacemaker_tcall,
|
||
|
(thread_call_param_t)(ifq->ifcq_ifp), THREAD_CALL_PRIORITY_KERNEL,
|
||
|
THREAD_CALL_OPTIONS_ONCE);
|
||
|
ASSERT(fqs->fqs_pacemaker_tcall != NULL);
|
||
|
|
||
|
return fqs;
|
||
|
}
|
||
|
|
||
|
void
|
||
|
fq_if_destroy(fq_if_t *fqs)
|
||
|
{
|
||
|
struct ifnet *ifp = fqs->fqs_ifq->ifcq_ifp;
|
||
|
thread_call_t tcall = fqs->fqs_pacemaker_tcall;
|
||
|
|
||
|
VERIFY(ifp != NULL);
|
||
|
ASSERT(tcall != NULL);
|
||
|
IFCQ_LOCK_ASSERT_HELD(fqs->fqs_ifq);
|
||
|
LCK_MTX_ASSERT(&ifp->if_start_lock, LCK_MTX_ASSERT_NOTOWNED);
|
||
|
IFCQ_CONVERT_LOCK(fqs->fqs_ifq);
|
||
|
|
||
|
/*
|
||
|
* Since we are holding the IFCQ lock here, another thread cannot enter AQM
|
||
|
* and schedule a pacemaker call. So we do not need a sleep wait loop here
|
||
|
* cancel wait and free should succeed in one call.
|
||
|
*/
|
||
|
thread_call_cancel_wait(tcall);
|
||
|
ASSERT(thread_call_free(tcall));
|
||
|
|
||
|
fq_if_purge(fqs);
|
||
|
fq_if_destroy_grps(fqs);
|
||
|
|
||
|
fqs->fqs_ifq = NULL;
|
||
|
zfree(fq_if_zone, fqs);
|
||
|
}
|
||
|
|
||
|
static inline uint8_t
|
||
|
fq_if_service_to_priority(fq_if_t *fqs, mbuf_svc_class_t svc)
|
||
|
{
|
||
|
uint8_t pri;
|
||
|
|
||
|
if (fqs->fqs_flags & FQS_DRIVER_MANAGED) {
|
||
|
switch (svc) {
|
||
|
case MBUF_SC_BK_SYS:
|
||
|
case MBUF_SC_BK:
|
||
|
pri = FQ_IF_BK_INDEX;
|
||
|
break;
|
||
|
case MBUF_SC_BE:
|
||
|
case MBUF_SC_RD:
|
||
|
case MBUF_SC_OAM:
|
||
|
pri = FQ_IF_BE_INDEX;
|
||
|
break;
|
||
|
case MBUF_SC_AV:
|
||
|
case MBUF_SC_RV:
|
||
|
case MBUF_SC_VI:
|
||
|
case MBUF_SC_SIG:
|
||
|
pri = FQ_IF_VI_INDEX;
|
||
|
break;
|
||
|
case MBUF_SC_VO:
|
||
|
case MBUF_SC_CTL:
|
||
|
pri = FQ_IF_VO_INDEX;
|
||
|
break;
|
||
|
default:
|
||
|
pri = FQ_IF_BE_INDEX; /* Use best effort by default */
|
||
|
break;
|
||
|
}
|
||
|
return pri;
|
||
|
}
|
||
|
|
||
|
/* scheduler is not managed by the driver */
|
||
|
switch (svc) {
|
||
|
case MBUF_SC_BK_SYS:
|
||
|
pri = FQ_IF_BK_SYS_INDEX;
|
||
|
break;
|
||
|
case MBUF_SC_BK:
|
||
|
pri = FQ_IF_BK_INDEX;
|
||
|
break;
|
||
|
case MBUF_SC_BE:
|
||
|
pri = FQ_IF_BE_INDEX;
|
||
|
break;
|
||
|
case MBUF_SC_RD:
|
||
|
pri = FQ_IF_RD_INDEX;
|
||
|
break;
|
||
|
case MBUF_SC_OAM:
|
||
|
pri = FQ_IF_OAM_INDEX;
|
||
|
break;
|
||
|
case MBUF_SC_AV:
|
||
|
pri = FQ_IF_AV_INDEX;
|
||
|
break;
|
||
|
case MBUF_SC_RV:
|
||
|
pri = FQ_IF_RV_INDEX;
|
||
|
break;
|
||
|
case MBUF_SC_VI:
|
||
|
pri = FQ_IF_VI_INDEX;
|
||
|
break;
|
||
|
case MBUF_SC_SIG:
|
||
|
pri = FQ_IF_SIG_INDEX;
|
||
|
break;
|
||
|
case MBUF_SC_VO:
|
||
|
pri = FQ_IF_VO_INDEX;
|
||
|
break;
|
||
|
case MBUF_SC_CTL:
|
||
|
pri = FQ_IF_CTL_INDEX;
|
||
|
break;
|
||
|
default:
|
||
|
pri = FQ_IF_BE_INDEX; /* Use best effort by default */
|
||
|
break;
|
||
|
}
|
||
|
return pri;
|
||
|
}
|
||
|
|
||
|
void
|
||
|
fq_if_classq_init(fq_if_group_t *fqg, uint32_t pri, uint32_t quantum,
|
||
|
uint32_t drr_max, uint32_t svc_class)
|
||
|
{
|
||
|
fq_if_classq_t *fq_cl;
|
||
|
VERIFY(pri < FQ_IF_MAX_CLASSES);
|
||
|
fq_cl = &fqg->fqg_classq[pri];
|
||
|
|
||
|
VERIFY(fq_cl->fcl_quantum == 0);
|
||
|
VERIFY(quantum != 0);
|
||
|
fq_cl->fcl_quantum = quantum;
|
||
|
fq_cl->fcl_pri = pri;
|
||
|
fq_cl->fcl_drr_max = drr_max;
|
||
|
fq_cl->fcl_service_class = svc_class;
|
||
|
fq_cl->fcl_next_tx_time = 0;
|
||
|
fq_cl->fcl_flags = 0;
|
||
|
STAILQ_INIT(&fq_cl->fcl_new_flows);
|
||
|
STAILQ_INIT(&fq_cl->fcl_old_flows);
|
||
|
}
|
||
|
|
||
|
int
|
||
|
fq_if_enqueue_classq(struct ifclassq *ifq, classq_pkt_t *head,
|
||
|
classq_pkt_t *tail, uint32_t cnt, uint32_t bytes, boolean_t *pdrop)
|
||
|
{
|
||
|
uint8_t pri, grp_idx = 0;
|
||
|
fq_if_t *fqs;
|
||
|
fq_if_classq_t *fq_cl;
|
||
|
fq_if_group_t *fq_group;
|
||
|
int ret;
|
||
|
mbuf_svc_class_t svc;
|
||
|
pktsched_pkt_t pkt;
|
||
|
|
||
|
pktsched_pkt_encap_chain(&pkt, head, tail, cnt, bytes);
|
||
|
|
||
|
fqs = (fq_if_t *)ifq->ifcq_disc;
|
||
|
svc = pktsched_get_pkt_svc(&pkt);
|
||
|
#if SKYWALK
|
||
|
if (head->cp_ptype == QP_PACKET) {
|
||
|
grp_idx = head->cp_kpkt->pkt_qset_idx;
|
||
|
}
|
||
|
#endif /* SKYWALK */
|
||
|
pri = fq_if_service_to_priority(fqs, svc);
|
||
|
VERIFY(pri < FQ_IF_MAX_CLASSES);
|
||
|
|
||
|
IFCQ_LOCK_SPIN(ifq);
|
||
|
fq_group = fq_if_find_grp(fqs, grp_idx);
|
||
|
fq_cl = &fq_group->fqg_classq[pri];
|
||
|
|
||
|
if (__improbable(svc == MBUF_SC_BK_SYS && fqs->fqs_throttle == 1)) {
|
||
|
IFCQ_UNLOCK(ifq);
|
||
|
/* BK_SYS is currently throttled */
|
||
|
os_atomic_inc(&fq_cl->fcl_stat.fcl_throttle_drops, relaxed);
|
||
|
pktsched_free_pkt(&pkt);
|
||
|
*pdrop = TRUE;
|
||
|
ret = EQSUSPENDED;
|
||
|
goto done;
|
||
|
}
|
||
|
|
||
|
ASSERT(pkt.pktsched_ptype == fqs->fqs_ptype);
|
||
|
ret = fq_addq(fqs, fq_group, &pkt, fq_cl);
|
||
|
if (!FQ_IF_CLASSQ_IDLE(fq_cl)) {
|
||
|
if (((fq_group->fqg_bitmaps[FQ_IF_ER] | fq_group->fqg_bitmaps[FQ_IF_EB]) &
|
||
|
(1 << pri)) == 0) {
|
||
|
/*
|
||
|
* this group is not in ER or EB groups,
|
||
|
* mark it as IB
|
||
|
*/
|
||
|
pktsched_bit_set(pri, &fq_group->fqg_bitmaps[FQ_IF_IB]);
|
||
|
}
|
||
|
}
|
||
|
|
||
|
if (__improbable(ret != 0)) {
|
||
|
if (ret == CLASSQEQ_SUCCESS_FC) {
|
||
|
/* packet enqueued, return advisory feedback */
|
||
|
ret = EQFULL;
|
||
|
*pdrop = FALSE;
|
||
|
} else if (ret == CLASSQEQ_COMPRESSED) {
|
||
|
ret = 0;
|
||
|
*pdrop = FALSE;
|
||
|
} else {
|
||
|
IFCQ_UNLOCK(ifq);
|
||
|
*pdrop = TRUE;
|
||
|
pktsched_free_pkt(&pkt);
|
||
|
switch (ret) {
|
||
|
case CLASSQEQ_DROP:
|
||
|
ret = ENOBUFS;
|
||
|
goto done;
|
||
|
case CLASSQEQ_DROP_FC:
|
||
|
ret = EQFULL;
|
||
|
goto done;
|
||
|
case CLASSQEQ_DROP_SP:
|
||
|
ret = EQSUSPENDED;
|
||
|
goto done;
|
||
|
default:
|
||
|
VERIFY(0);
|
||
|
/* NOTREACHED */
|
||
|
__builtin_unreachable();
|
||
|
}
|
||
|
/* NOTREACHED */
|
||
|
__builtin_unreachable();
|
||
|
}
|
||
|
} else {
|
||
|
*pdrop = FALSE;
|
||
|
}
|
||
|
IFCQ_ADD_LEN(ifq, cnt);
|
||
|
IFCQ_INC_BYTES(ifq, bytes);
|
||
|
|
||
|
|
||
|
FQS_GRP_ADD_LEN(fqs, grp_idx, cnt);
|
||
|
FQS_GRP_INC_BYTES(fqs, grp_idx, bytes);
|
||
|
|
||
|
IFCQ_UNLOCK(ifq);
|
||
|
done:
|
||
|
#if DEBUG || DEVELOPMENT
|
||
|
if (__improbable((ret == EQFULL) && (ifclassq_flow_control_adv == 0))) {
|
||
|
ret = 0;
|
||
|
}
|
||
|
#endif /* DEBUG || DEVELOPMENT */
|
||
|
return ret;
|
||
|
}
|
||
|
|
||
|
void
|
||
|
fq_if_dequeue_classq(struct ifclassq *ifq, classq_pkt_t *pkt, uint8_t grp_idx)
|
||
|
{
|
||
|
(void) fq_if_dequeue_classq_multi(ifq, 1,
|
||
|
CLASSQ_DEQUEUE_MAX_BYTE_LIMIT, pkt, NULL, NULL, NULL, grp_idx);
|
||
|
}
|
||
|
|
||
|
void
|
||
|
fq_if_dequeue_sc_classq(struct ifclassq *ifq, mbuf_svc_class_t svc,
|
||
|
classq_pkt_t *pkt, uint8_t grp_idx)
|
||
|
{
|
||
|
(void) fq_if_dequeue_sc_classq_multi(ifq, svc, 1,
|
||
|
CLASSQ_DEQUEUE_MAX_BYTE_LIMIT, pkt, NULL, NULL, NULL, grp_idx);
|
||
|
}
|
||
|
|
||
|
static inline void
|
||
|
fq_dqlist_add(flowq_dqlist_t *fq_dqlist_head, fq_t *fq)
|
||
|
{
|
||
|
ASSERT(fq->fq_dq_head.cp_mbuf == NULL);
|
||
|
ASSERT(!fq->fq_in_dqlist);
|
||
|
STAILQ_INSERT_TAIL(fq_dqlist_head, fq, fq_dqlink);
|
||
|
fq->fq_in_dqlist = true;
|
||
|
}
|
||
|
|
||
|
static inline void
|
||
|
fq_dqlist_remove(flowq_dqlist_t *fq_dqlist_head, fq_t *fq, classq_pkt_t *head,
|
||
|
classq_pkt_t *tail, classq_pkt_type_t ptype)
|
||
|
{
|
||
|
ASSERT(fq->fq_in_dqlist);
|
||
|
if (fq->fq_dq_head.cp_mbuf == NULL) {
|
||
|
goto done;
|
||
|
}
|
||
|
|
||
|
if (head->cp_mbuf == NULL) {
|
||
|
*head = fq->fq_dq_head;
|
||
|
} else {
|
||
|
ASSERT(tail->cp_mbuf != NULL);
|
||
|
|
||
|
switch (ptype) {
|
||
|
case QP_MBUF:
|
||
|
ASSERT(tail->cp_mbuf->m_nextpkt == NULL);
|
||
|
tail->cp_mbuf->m_nextpkt = fq->fq_dq_head.cp_mbuf;
|
||
|
ASSERT(fq->fq_dq_tail.cp_mbuf->m_nextpkt == NULL);
|
||
|
break;
|
||
|
#if SKYWALK
|
||
|
case QP_PACKET:
|
||
|
ASSERT(tail->cp_kpkt->pkt_nextpkt == NULL);
|
||
|
tail->cp_kpkt->pkt_nextpkt = fq->fq_dq_head.cp_kpkt;
|
||
|
ASSERT(fq->fq_dq_tail.cp_kpkt->pkt_nextpkt == NULL);
|
||
|
break;
|
||
|
#endif /* SKYWALK */
|
||
|
default:
|
||
|
VERIFY(0);
|
||
|
/* NOTREACHED */
|
||
|
__builtin_unreachable();
|
||
|
}
|
||
|
}
|
||
|
*tail = fq->fq_dq_tail;
|
||
|
done:
|
||
|
STAILQ_REMOVE(fq_dqlist_head, fq, flowq, fq_dqlink);
|
||
|
CLASSQ_PKT_INIT(&fq->fq_dq_head);
|
||
|
CLASSQ_PKT_INIT(&fq->fq_dq_tail);
|
||
|
fq->fq_in_dqlist = false;
|
||
|
}
|
||
|
|
||
|
static inline void
|
||
|
fq_dqlist_get_packet_list(flowq_dqlist_t *fq_dqlist_head, classq_pkt_t *head,
|
||
|
classq_pkt_t *tail, classq_pkt_type_t ptype)
|
||
|
{
|
||
|
fq_t *fq, *tfq;
|
||
|
|
||
|
STAILQ_FOREACH_SAFE(fq, fq_dqlist_head, fq_dqlink, tfq) {
|
||
|
fq_dqlist_remove(fq_dqlist_head, fq, head, tail, ptype);
|
||
|
}
|
||
|
}
|
||
|
|
||
|
static int
|
||
|
fq_if_grps_bitmap_ffs(fq_grp_tailq_t *grp_list, int pri, fq_if_state state,
|
||
|
fq_if_group_t **selected_grp)
|
||
|
{
|
||
|
#pragma unused(pri)
|
||
|
|
||
|
fq_if_group_t *grp;
|
||
|
uint32_t highest_pri = FQ_IF_MAX_CLASSES;
|
||
|
int ret_pri = 0;
|
||
|
|
||
|
TAILQ_FOREACH(grp, grp_list, fqg_grp_link) {
|
||
|
uint32_t cur_pri = pktsched_ffs(grp->fqg_bitmaps[state]);
|
||
|
/* bitmap is empty in this case */
|
||
|
if (cur_pri == 0) {
|
||
|
continue;
|
||
|
}
|
||
|
if (cur_pri <= highest_pri) {
|
||
|
highest_pri = cur_pri;
|
||
|
ret_pri = cur_pri;
|
||
|
*selected_grp = grp;
|
||
|
}
|
||
|
}
|
||
|
return ret_pri;
|
||
|
}
|
||
|
|
||
|
static boolean_t
|
||
|
fq_if_grps_bitmap_zeros(fq_grp_tailq_t *grp_list, int pri, fq_if_state state)
|
||
|
{
|
||
|
#pragma unused(pri)
|
||
|
|
||
|
fq_if_group_t *grp;
|
||
|
|
||
|
TAILQ_FOREACH(grp, grp_list, fqg_grp_link) {
|
||
|
if (grp->fqg_bitmaps[state] != 0) {
|
||
|
return FALSE;
|
||
|
}
|
||
|
}
|
||
|
return TRUE;
|
||
|
}
|
||
|
|
||
|
static void
|
||
|
fq_if_grps_bitmap_cpy(fq_grp_tailq_t *grp_list, int pri, fq_if_state dst_state,
|
||
|
fq_if_state src_state)
|
||
|
{
|
||
|
#pragma unused(pri)
|
||
|
|
||
|
fq_if_group_t *grp;
|
||
|
TAILQ_FOREACH(grp, grp_list, fqg_grp_link) {
|
||
|
grp->fqg_bitmaps[dst_state] = grp->fqg_bitmaps[src_state];
|
||
|
}
|
||
|
}
|
||
|
|
||
|
static void
|
||
|
fq_if_grps_bitmap_clr(fq_grp_tailq_t *grp_list, int pri, fq_if_state state)
|
||
|
{
|
||
|
#pragma unused(pri)
|
||
|
|
||
|
fq_if_group_t *grp;
|
||
|
TAILQ_FOREACH(grp, grp_list, fqg_grp_link) {
|
||
|
grp->fqg_bitmaps[state] = 0;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
static void
|
||
|
fq_if_grps_bitmap_move(fq_grp_tailq_t *grp_list, int pri, fq_if_state dst_state,
|
||
|
fq_if_state src_state)
|
||
|
{
|
||
|
#pragma unused(pri)
|
||
|
|
||
|
fq_if_group_t *grp;
|
||
|
TAILQ_FOREACH(grp, grp_list, fqg_grp_link) {
|
||
|
grp->fqg_bitmaps[dst_state] =
|
||
|
grp->fqg_bitmaps[dst_state] | grp->fqg_bitmaps[src_state];
|
||
|
grp->fqg_bitmaps[src_state] = 0;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
static int
|
||
|
fq_if_grps_sc_bitmap_ffs(fq_grp_tailq_t *grp_list, int pri, fq_if_state state,
|
||
|
fq_if_group_t **selected_grp)
|
||
|
{
|
||
|
fq_if_group_t *grp;
|
||
|
int ret_pri = 0;
|
||
|
|
||
|
TAILQ_FOREACH(grp, grp_list, fqg_grp_link) {
|
||
|
if (pktsched_bit_tst(pri, &grp->fqg_bitmaps[state])) {
|
||
|
/* +1 to match the semantics of pktsched_ffs */
|
||
|
ret_pri = pri + 1;
|
||
|
*selected_grp = grp;
|
||
|
break;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
return ret_pri;
|
||
|
}
|
||
|
|
||
|
static boolean_t
|
||
|
fq_if_grps_sc_bitmap_zeros(fq_grp_tailq_t *grp_list, int pri, fq_if_state state)
|
||
|
{
|
||
|
fq_if_group_t *grp;
|
||
|
|
||
|
TAILQ_FOREACH(grp, grp_list, fqg_grp_link) {
|
||
|
if (pktsched_bit_tst(pri, &grp->fqg_bitmaps[state])) {
|
||
|
return FALSE;
|
||
|
}
|
||
|
}
|
||
|
return TRUE;
|
||
|
}
|
||
|
|
||
|
static void
|
||
|
fq_if_grps_sc_bitmap_cpy(fq_grp_tailq_t *grp_list, int pri, fq_if_state dst_state,
|
||
|
fq_if_state src_state)
|
||
|
{
|
||
|
fq_if_group_t *grp;
|
||
|
|
||
|
TAILQ_FOREACH(grp, grp_list, fqg_grp_link) {
|
||
|
pktsched_bit_cpy(pri, &grp->fqg_bitmaps[dst_state],
|
||
|
&grp->fqg_bitmaps[src_state]);
|
||
|
}
|
||
|
}
|
||
|
|
||
|
static void
|
||
|
fq_if_grps_sc_bitmap_clr(fq_grp_tailq_t *grp_list, int pri, fq_if_state state)
|
||
|
{
|
||
|
fq_if_group_t *grp;
|
||
|
|
||
|
TAILQ_FOREACH(grp, grp_list, fqg_grp_link) {
|
||
|
pktsched_bit_clr(pri, &grp->fqg_bitmaps[state]);
|
||
|
}
|
||
|
}
|
||
|
|
||
|
static void
|
||
|
fq_if_grps_sc_bitmap_move(fq_grp_tailq_t *grp_list, int pri, fq_if_state dst_state,
|
||
|
fq_if_state src_state)
|
||
|
{
|
||
|
fq_if_group_t *grp;
|
||
|
|
||
|
TAILQ_FOREACH(grp, grp_list, fqg_grp_link) {
|
||
|
pktsched_bit_move(pri, &grp->fqg_bitmaps[dst_state],
|
||
|
&grp->fqg_bitmaps[src_state]);
|
||
|
pktsched_bit_clr(pri, &grp->fqg_bitmaps[src_state]);
|
||
|
}
|
||
|
}
|
||
|
|
||
|
/*
|
||
|
* Pacemaker is only scheduled when no packet can be dequeued from AQM
|
||
|
* due to pacing. Pacemaker will doorbell the driver when current >= next_tx_time.
|
||
|
* This only applies to L4S traffic at this moment.
|
||
|
*/
|
||
|
static void
|
||
|
fq_if_schedule_pacemaker(fq_if_t *fqs, uint64_t now, uint64_t next_tx_time)
|
||
|
{
|
||
|
uint64_t deadline = 0;
|
||
|
if (!ifclassq_enable_pacing || !ifclassq_enable_l4s) {
|
||
|
return;
|
||
|
}
|
||
|
ASSERT(next_tx_time != FQ_INVALID_TX_TS);
|
||
|
ASSERT(fqs->fqs_pacemaker_tcall != NULL);
|
||
|
ASSERT(now < next_tx_time);
|
||
|
|
||
|
DTRACE_SKYWALK2(pacemaker__schedule, struct ifnet*, fqs->fqs_ifq->ifcq_ifp,
|
||
|
uint64_t, next_tx_time - now);
|
||
|
KDBG(AQM_KTRACE_TX_PACEMAKER, fqs->fqs_ifq->ifcq_ifp->if_index, now,
|
||
|
next_tx_time, next_tx_time - now);
|
||
|
|
||
|
clock_interval_to_deadline((uint32_t)(next_tx_time - now), 1, &deadline);
|
||
|
thread_call_enter_delayed(fqs->fqs_pacemaker_tcall, deadline);
|
||
|
}
|
||
|
|
||
|
static int
|
||
|
fq_if_dequeue_classq_multi_common(struct ifclassq *ifq, mbuf_svc_class_t svc,
|
||
|
u_int32_t maxpktcnt, u_int32_t maxbytecnt, classq_pkt_t *first_packet,
|
||
|
classq_pkt_t *last_packet, u_int32_t *retpktcnt, u_int32_t *retbytecnt,
|
||
|
uint8_t grp_idx)
|
||
|
{
|
||
|
uint32_t total_pktcnt = 0, total_bytecnt = 0;
|
||
|
classq_pkt_t first = CLASSQ_PKT_INITIALIZER(fisrt);
|
||
|
classq_pkt_t last = CLASSQ_PKT_INITIALIZER(last);
|
||
|
classq_pkt_t tmp = CLASSQ_PKT_INITIALIZER(tmp);
|
||
|
fq_if_append_pkt_t append_pkt;
|
||
|
flowq_dqlist_t fq_dqlist_head;
|
||
|
fq_if_classq_t *fq_cl;
|
||
|
fq_grp_tailq_t *grp_list, tmp_grp_list;
|
||
|
fq_if_group_t *fq_grp = NULL;
|
||
|
fq_if_t *fqs;
|
||
|
uint64_t now, next_tx_time = FQ_INVALID_TX_TS;
|
||
|
int pri = 0, svc_pri = 0;
|
||
|
bool all_paced = true;
|
||
|
|
||
|
IFCQ_LOCK_ASSERT_HELD(ifq);
|
||
|
|
||
|
fqs = (fq_if_t *)ifq->ifcq_disc;
|
||
|
STAILQ_INIT(&fq_dqlist_head);
|
||
|
|
||
|
switch (fqs->fqs_ptype) {
|
||
|
case QP_MBUF:
|
||
|
append_pkt = fq_if_append_mbuf;
|
||
|
break;
|
||
|
|
||
|
#if SKYWALK
|
||
|
case QP_PACKET:
|
||
|
append_pkt = fq_if_append_pkt;
|
||
|
break;
|
||
|
#endif /* SKYWALK */
|
||
|
|
||
|
default:
|
||
|
VERIFY(0);
|
||
|
/* NOTREACHED */
|
||
|
__builtin_unreachable();
|
||
|
}
|
||
|
|
||
|
now = fq_codel_get_time();
|
||
|
if (fqs->fqs_flags & FQS_DRIVER_MANAGED) {
|
||
|
svc_pri = fq_if_service_to_priority(fqs, svc);
|
||
|
} else {
|
||
|
VERIFY(svc == MBUF_SC_UNSPEC);
|
||
|
}
|
||
|
|
||
|
if (fq_if_is_grp_combined(fqs, grp_idx)) {
|
||
|
grp_list = &fqs->fqs_combined_grp_list;
|
||
|
VERIFY(!TAILQ_EMPTY(grp_list));
|
||
|
} else {
|
||
|
grp_list = &tmp_grp_list;
|
||
|
fq_grp = fq_if_find_grp(fqs, grp_idx);
|
||
|
TAILQ_INIT(grp_list);
|
||
|
TAILQ_INSERT_TAIL(grp_list, fq_grp, fqg_grp_link);
|
||
|
}
|
||
|
|
||
|
for (;;) {
|
||
|
uint32_t pktcnt = 0, bytecnt = 0;
|
||
|
classq_pkt_t head = CLASSQ_PKT_INITIALIZER(head);
|
||
|
classq_pkt_t tail = CLASSQ_PKT_INITIALIZER(tail);
|
||
|
bool fq_cl_all_paced = false;
|
||
|
uint64_t fq_cl_next_tx_time = FQ_INVALID_TX_TS;
|
||
|
|
||
|
if (fqs->grp_bitmaps_zeros(grp_list, svc_pri, FQ_IF_ER) &&
|
||
|
fqs->grp_bitmaps_zeros(grp_list, svc_pri, FQ_IF_EB)) {
|
||
|
fqs->grp_bitmaps_cpy(grp_list, svc_pri, FQ_IF_EB, FQ_IF_IB);
|
||
|
fqs->grp_bitmaps_clr(grp_list, svc_pri, FQ_IF_IB);
|
||
|
if (fqs->grp_bitmaps_zeros(grp_list, svc_pri, FQ_IF_EB)) {
|
||
|
if (ifclassq_enable_pacing && ifclassq_enable_l4s) {
|
||
|
/*
|
||
|
* Move fq_cl in IR back to ER, so that they will inspected with priority
|
||
|
* the next time the driver dequeues
|
||
|
*/
|
||
|
fqs->grp_bitmaps_cpy(grp_list, svc_pri, FQ_IF_ER, FQ_IF_IR);
|
||
|
fqs->grp_bitmaps_clr(grp_list, svc_pri, FQ_IF_IR);
|
||
|
}
|
||
|
break;
|
||
|
}
|
||
|
}
|
||
|
pri = fqs->grp_bitmaps_ffs(grp_list, svc_pri, FQ_IF_ER, &fq_grp);
|
||
|
if (pri == 0) {
|
||
|
/*
|
||
|
* There are no ER flows, move the highest
|
||
|
* priority one from EB if there are any in that
|
||
|
* category
|
||
|
*/
|
||
|
pri = fqs->grp_bitmaps_ffs(grp_list, svc_pri, FQ_IF_EB, &fq_grp);
|
||
|
VERIFY(pri > 0);
|
||
|
VERIFY(fq_grp != NULL);
|
||
|
pktsched_bit_clr((pri - 1), &fq_grp->fqg_bitmaps[FQ_IF_EB]);
|
||
|
pktsched_bit_set((pri - 1), &fq_grp->fqg_bitmaps[FQ_IF_ER]);
|
||
|
}
|
||
|
VERIFY(fq_grp != NULL);
|
||
|
pri--; /* index starts at 0 */
|
||
|
fq_cl = &fq_grp->fqg_classq[pri];
|
||
|
|
||
|
if (fq_cl->fcl_budget <= 0) {
|
||
|
/* Update the budget */
|
||
|
fq_cl->fcl_budget += (min(fq_cl->fcl_drr_max,
|
||
|
fq_cl->fcl_stat.fcl_flows_cnt) *
|
||
|
fq_cl->fcl_quantum);
|
||
|
if (fq_cl->fcl_budget <= 0) {
|
||
|
goto state_change;
|
||
|
}
|
||
|
}
|
||
|
fq_if_dequeue(fqs, fq_cl, (maxpktcnt - total_pktcnt),
|
||
|
(maxbytecnt - total_bytecnt), &head, &tail, &pktcnt,
|
||
|
&bytecnt, &fq_dqlist_head, true, now, &fq_cl_all_paced,
|
||
|
&fq_cl_next_tx_time);
|
||
|
if (head.cp_mbuf != NULL) {
|
||
|
ASSERT(STAILQ_EMPTY(&fq_dqlist_head));
|
||
|
if (first.cp_mbuf == NULL) {
|
||
|
first = head;
|
||
|
} else {
|
||
|
ASSERT(last.cp_mbuf != NULL);
|
||
|
append_pkt(&last, &head);
|
||
|
}
|
||
|
last = tail;
|
||
|
append_pkt(&last, &tmp);
|
||
|
}
|
||
|
if (fq_cl_all_paced && fq_cl_next_tx_time < next_tx_time) {
|
||
|
fq_cl->fcl_stat.fcl_fcl_pacemaker_needed++;
|
||
|
next_tx_time = fq_cl_next_tx_time;
|
||
|
}
|
||
|
fq_cl->fcl_budget -= bytecnt;
|
||
|
total_pktcnt += pktcnt;
|
||
|
total_bytecnt += bytecnt;
|
||
|
|
||
|
/*
|
||
|
* If the class has exceeded the budget but still has data
|
||
|
* to send, move it to IB
|
||
|
*/
|
||
|
state_change:
|
||
|
VERIFY(fq_grp != NULL);
|
||
|
all_paced &= fq_cl_all_paced;
|
||
|
if (!FQ_IF_CLASSQ_IDLE(fq_cl)) {
|
||
|
if (fq_cl->fcl_budget <= 0) {
|
||
|
pktsched_bit_set(pri, &fq_grp->fqg_bitmaps[FQ_IF_IB]);
|
||
|
pktsched_bit_clr(pri, &fq_grp->fqg_bitmaps[FQ_IF_ER]);
|
||
|
} else if (fq_cl_all_paced) {
|
||
|
if (ifclassq_enable_pacing && ifclassq_enable_l4s) {
|
||
|
/*
|
||
|
* If a fq_cl still has budget but only paced queues, park it
|
||
|
* to IR so that we will not keep loopping over it
|
||
|
*/
|
||
|
pktsched_bit_set(pri, &fq_grp->fqg_bitmaps[FQ_IF_IR]);
|
||
|
pktsched_bit_clr(pri, &fq_grp->fqg_bitmaps[FQ_IF_ER]);
|
||
|
}
|
||
|
}
|
||
|
} else {
|
||
|
pktsched_bit_clr(pri, &fq_grp->fqg_bitmaps[FQ_IF_ER]);
|
||
|
VERIFY(((fq_grp->fqg_bitmaps[FQ_IF_ER] |
|
||
|
fq_grp->fqg_bitmaps[FQ_IF_EB] |
|
||
|
fq_grp->fqg_bitmaps[FQ_IF_IB]) & (1 << pri)) == 0);
|
||
|
fq_cl->fcl_budget = 0;
|
||
|
}
|
||
|
if (total_pktcnt >= maxpktcnt || total_bytecnt >= maxbytecnt) {
|
||
|
if (ifclassq_enable_pacing && ifclassq_enable_l4s) {
|
||
|
/*
|
||
|
* Move fq_cl in IR back to ER, so that they will inspected with priority
|
||
|
* the next time the driver dequeues
|
||
|
*/
|
||
|
fqs->grp_bitmaps_move(grp_list, svc_pri, FQ_IF_ER, FQ_IF_IR);
|
||
|
}
|
||
|
break;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
if (!fq_if_is_grp_combined(fqs, grp_idx)) {
|
||
|
TAILQ_REMOVE(grp_list, fq_grp, fqg_grp_link);
|
||
|
VERIFY(TAILQ_EMPTY(grp_list));
|
||
|
}
|
||
|
|
||
|
fq_dqlist_get_packet_list(&fq_dqlist_head, &first, &last,
|
||
|
fqs->fqs_ptype);
|
||
|
|
||
|
if (__probable(first_packet != NULL)) {
|
||
|
*first_packet = first;
|
||
|
}
|
||
|
if (last_packet != NULL) {
|
||
|
*last_packet = last;
|
||
|
}
|
||
|
if (retpktcnt != NULL) {
|
||
|
*retpktcnt = total_pktcnt;
|
||
|
}
|
||
|
if (retbytecnt != NULL) {
|
||
|
*retbytecnt = total_bytecnt;
|
||
|
}
|
||
|
if (next_tx_time != FQ_INVALID_TX_TS) {
|
||
|
ASSERT(next_tx_time > now);
|
||
|
fq_if_schedule_pacemaker(fqs, now, next_tx_time);
|
||
|
}
|
||
|
|
||
|
IFCQ_XMIT_ADD(ifq, total_pktcnt, total_bytecnt);
|
||
|
fq_if_purge_empty_flow_list(fqs, now, false);
|
||
|
return 0;
|
||
|
}
|
||
|
|
||
|
int
|
||
|
fq_if_dequeue_classq_multi(struct ifclassq *ifq, u_int32_t maxpktcnt,
|
||
|
u_int32_t maxbytecnt, classq_pkt_t *first_packet,
|
||
|
classq_pkt_t *last_packet, u_int32_t *retpktcnt,
|
||
|
u_int32_t *retbytecnt, uint8_t grp_idx)
|
||
|
{
|
||
|
return fq_if_dequeue_classq_multi_common(ifq, MBUF_SC_UNSPEC, maxpktcnt, maxbytecnt,
|
||
|
first_packet, last_packet, retpktcnt, retbytecnt, grp_idx);
|
||
|
}
|
||
|
|
||
|
int
|
||
|
fq_if_dequeue_sc_classq_multi(struct ifclassq *ifq, mbuf_svc_class_t svc,
|
||
|
u_int32_t maxpktcnt, u_int32_t maxbytecnt, classq_pkt_t *first_packet,
|
||
|
classq_pkt_t *last_packet, u_int32_t *retpktcnt, u_int32_t *retbytecnt,
|
||
|
uint8_t grp_idx)
|
||
|
{
|
||
|
fq_if_t *fqs = (fq_if_t *)ifq->ifcq_disc;
|
||
|
|
||
|
if (fq_if_is_grp_combined(fqs, grp_idx)) {
|
||
|
return fq_if_dequeue_classq_multi_common(ifq, svc, maxpktcnt, maxbytecnt,
|
||
|
first_packet, last_packet, retpktcnt, retbytecnt, grp_idx);
|
||
|
} else {
|
||
|
/*
|
||
|
* take a shortcut here since there is no need to schedule
|
||
|
* one single service class.
|
||
|
*/
|
||
|
return fq_if_dequeue_sc_classq_multi_separate(ifq, svc, maxpktcnt, maxbytecnt,
|
||
|
first_packet, last_packet, retpktcnt, retbytecnt, grp_idx);
|
||
|
}
|
||
|
}
|
||
|
|
||
|
static int
|
||
|
fq_if_dequeue_sc_classq_multi_separate(struct ifclassq *ifq, mbuf_svc_class_t svc,
|
||
|
u_int32_t maxpktcnt, u_int32_t maxbytecnt, classq_pkt_t *first_packet,
|
||
|
classq_pkt_t *last_packet, u_int32_t *retpktcnt, u_int32_t *retbytecnt,
|
||
|
uint8_t grp_idx)
|
||
|
{
|
||
|
fq_if_t *fqs = (fq_if_t *)ifq->ifcq_disc;
|
||
|
uint8_t pri;
|
||
|
u_int32_t total_pktcnt = 0, total_bytecnt = 0;
|
||
|
fq_if_classq_t *fq_cl;
|
||
|
classq_pkt_t first = CLASSQ_PKT_INITIALIZER(fisrt);
|
||
|
classq_pkt_t last = CLASSQ_PKT_INITIALIZER(last);
|
||
|
fq_if_append_pkt_t append_pkt;
|
||
|
flowq_dqlist_t fq_dqlist_head;
|
||
|
fq_if_group_t *fq_grp;
|
||
|
uint64_t now;
|
||
|
|
||
|
switch (fqs->fqs_ptype) {
|
||
|
case QP_MBUF:
|
||
|
append_pkt = fq_if_append_mbuf;
|
||
|
break;
|
||
|
|
||
|
#if SKYWALK
|
||
|
case QP_PACKET:
|
||
|
append_pkt = fq_if_append_pkt;
|
||
|
break;
|
||
|
#endif /* SKYWALK */
|
||
|
|
||
|
default:
|
||
|
VERIFY(0);
|
||
|
/* NOTREACHED */
|
||
|
__builtin_unreachable();
|
||
|
}
|
||
|
|
||
|
STAILQ_INIT(&fq_dqlist_head);
|
||
|
now = fq_codel_get_time();
|
||
|
|
||
|
pri = fq_if_service_to_priority(fqs, svc);
|
||
|
fq_grp = fq_if_find_grp(fqs, grp_idx);
|
||
|
fq_cl = &fq_grp->fqg_classq[pri];
|
||
|
|
||
|
/*
|
||
|
* Now we have the queue for a particular service class. We need
|
||
|
* to dequeue as many packets as needed, first from the new flows
|
||
|
* and then from the old flows.
|
||
|
*/
|
||
|
while (total_pktcnt < maxpktcnt && total_bytecnt < maxbytecnt &&
|
||
|
fq_cl->fcl_stat.fcl_pkt_cnt > 0) {
|
||
|
classq_pkt_t head = CLASSQ_PKT_INITIALIZER(head);
|
||
|
classq_pkt_t tail = CLASSQ_PKT_INITIALIZER(tail);
|
||
|
u_int32_t pktcnt = 0, bytecnt = 0;
|
||
|
bool all_paced = false;
|
||
|
uint64_t next_tx_time = FQ_INVALID_TX_TS;
|
||
|
|
||
|
fq_if_dequeue(fqs, fq_cl, (maxpktcnt - total_pktcnt),
|
||
|
(maxbytecnt - total_bytecnt), &head, &tail, &pktcnt,
|
||
|
&bytecnt, &fq_dqlist_head, false, now, &all_paced, &next_tx_time);
|
||
|
if (head.cp_mbuf != NULL) {
|
||
|
if (first.cp_mbuf == NULL) {
|
||
|
first = head;
|
||
|
} else {
|
||
|
ASSERT(last.cp_mbuf != NULL);
|
||
|
append_pkt(&last, &head);
|
||
|
}
|
||
|
last = tail;
|
||
|
}
|
||
|
total_pktcnt += pktcnt;
|
||
|
total_bytecnt += bytecnt;
|
||
|
|
||
|
if (next_tx_time != FQ_INVALID_TX_TS) {
|
||
|
ASSERT(next_tx_time > now);
|
||
|
fq_cl->fcl_stat.fcl_fcl_pacemaker_needed++;
|
||
|
fq_if_schedule_pacemaker(fqs, now, next_tx_time);
|
||
|
break;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
/*
|
||
|
* Mark classq as IB if it's not idle, so that we can
|
||
|
* start without re-init the bitmaps when it's switched
|
||
|
* to combined mode.
|
||
|
*/
|
||
|
if (!FQ_IF_CLASSQ_IDLE(fq_cl)) {
|
||
|
pktsched_bit_set(pri, &fq_grp->fqg_bitmaps[FQ_IF_IB]);
|
||
|
pktsched_bit_clr(pri, &fq_grp->fqg_bitmaps[FQ_IF_ER]);
|
||
|
pktsched_bit_clr(pri, &fq_grp->fqg_bitmaps[FQ_IF_EB]);
|
||
|
} else {
|
||
|
pktsched_bit_clr(pri, &fq_grp->fqg_bitmaps[FQ_IF_IB]);
|
||
|
VERIFY(((fq_grp->fqg_bitmaps[FQ_IF_ER] |
|
||
|
fq_grp->fqg_bitmaps[FQ_IF_EB] |
|
||
|
fq_grp->fqg_bitmaps[FQ_IF_IB]) & (1 << pri)) == 0);
|
||
|
}
|
||
|
|
||
|
fq_dqlist_get_packet_list(&fq_dqlist_head, &first, &last, fqs->fqs_ptype);
|
||
|
|
||
|
if (__probable(first_packet != NULL)) {
|
||
|
*first_packet = first;
|
||
|
}
|
||
|
if (last_packet != NULL) {
|
||
|
*last_packet = last;
|
||
|
}
|
||
|
if (retpktcnt != NULL) {
|
||
|
*retpktcnt = total_pktcnt;
|
||
|
}
|
||
|
if (retbytecnt != NULL) {
|
||
|
*retbytecnt = total_bytecnt;
|
||
|
}
|
||
|
|
||
|
IFCQ_XMIT_ADD(ifq, total_pktcnt, total_bytecnt);
|
||
|
fq_if_purge_empty_flow_list(fqs, now, false);
|
||
|
return 0;
|
||
|
}
|
||
|
|
||
|
static void
|
||
|
fq_if_purge_flow(fq_if_t *fqs, fq_t *fq, uint32_t *pktsp,
|
||
|
uint32_t *bytesp, uint64_t now)
|
||
|
{
|
||
|
fq_if_classq_t *fq_cl;
|
||
|
u_int32_t pkts, bytes;
|
||
|
pktsched_pkt_t pkt;
|
||
|
fq_if_group_t *grp;
|
||
|
|
||
|
fq_cl = &FQ_CLASSQ(fq);
|
||
|
grp = FQ_GROUP(fq);
|
||
|
pkts = bytes = 0;
|
||
|
_PKTSCHED_PKT_INIT(&pkt);
|
||
|
for (;;) {
|
||
|
fq_getq_flow(fqs, fq, &pkt, now);
|
||
|
if (pkt.pktsched_pkt_mbuf == NULL) {
|
||
|
VERIFY(pkt.pktsched_ptype == QP_INVALID);
|
||
|
break;
|
||
|
}
|
||
|
pkts++;
|
||
|
bytes += pktsched_get_pkt_len(&pkt);
|
||
|
pktsched_free_pkt(&pkt);
|
||
|
}
|
||
|
KDBG(AQM_KTRACE_STATS_FLOW_DEQUEUE, fq->fq_flowhash,
|
||
|
AQM_KTRACE_FQ_GRP_SC_IDX(fq), fq->fq_bytes, fq->fq_min_qdelay);
|
||
|
|
||
|
IFCQ_DROP_ADD(fqs->fqs_ifq, pkts, bytes);
|
||
|
|
||
|
/* move through the flow queue states */
|
||
|
VERIFY((fq->fq_flags & (FQF_NEW_FLOW | FQF_OLD_FLOW | FQF_EMPTY_FLOW)));
|
||
|
if (fq->fq_flags & FQF_NEW_FLOW) {
|
||
|
fq_if_empty_new_flow(fq, fq_cl);
|
||
|
}
|
||
|
if (fq->fq_flags & FQF_OLD_FLOW) {
|
||
|
fq_if_empty_old_flow(fqs, fq_cl, fq, now);
|
||
|
}
|
||
|
if (fq->fq_flags & FQF_EMPTY_FLOW) {
|
||
|
fq_if_purge_empty_flow(fqs, fq);
|
||
|
fq = NULL;
|
||
|
}
|
||
|
|
||
|
if (FQ_IF_CLASSQ_IDLE(fq_cl)) {
|
||
|
int i;
|
||
|
for (i = FQ_IF_ER; i < FQ_IF_MAX_STATE; i++) {
|
||
|
pktsched_bit_clr(fq_cl->fcl_pri, &grp->fqg_bitmaps[i]);
|
||
|
}
|
||
|
}
|
||
|
|
||
|
if (pktsp != NULL) {
|
||
|
*pktsp = pkts;
|
||
|
}
|
||
|
if (bytesp != NULL) {
|
||
|
*bytesp = bytes;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
static void
|
||
|
fq_if_purge_classq(fq_if_t *fqs, fq_if_classq_t *fq_cl)
|
||
|
{
|
||
|
fq_t *fq, *tfq;
|
||
|
uint64_t now;
|
||
|
|
||
|
now = fq_codel_get_time();
|
||
|
/*
|
||
|
* Take each flow from new/old flow list and flush mbufs
|
||
|
* in that flow
|
||
|
*/
|
||
|
STAILQ_FOREACH_SAFE(fq, &fq_cl->fcl_new_flows, fq_actlink, tfq) {
|
||
|
fq_if_purge_flow(fqs, fq, NULL, NULL, now);
|
||
|
}
|
||
|
STAILQ_FOREACH_SAFE(fq, &fq_cl->fcl_old_flows, fq_actlink, tfq) {
|
||
|
fq_if_purge_flow(fqs, fq, NULL, NULL, now);
|
||
|
}
|
||
|
VERIFY(STAILQ_EMPTY(&fq_cl->fcl_new_flows));
|
||
|
VERIFY(STAILQ_EMPTY(&fq_cl->fcl_old_flows));
|
||
|
|
||
|
STAILQ_INIT(&fq_cl->fcl_new_flows);
|
||
|
STAILQ_INIT(&fq_cl->fcl_old_flows);
|
||
|
fq_cl->fcl_budget = 0;
|
||
|
}
|
||
|
|
||
|
static void
|
||
|
fq_if_purge(fq_if_t *fqs)
|
||
|
{
|
||
|
uint64_t now;
|
||
|
fq_if_group_t *grp;
|
||
|
int i;
|
||
|
|
||
|
IFCQ_CONVERT_LOCK(fqs->fqs_ifq);
|
||
|
for (uint8_t grp_idx = 0; grp_idx < FQ_IF_MAX_GROUPS; grp_idx++) {
|
||
|
if (fqs->fqs_classq_groups[grp_idx] == NULL) {
|
||
|
continue;
|
||
|
}
|
||
|
|
||
|
grp = fq_if_find_grp(fqs, grp_idx);
|
||
|
fq_if_purge_grp(fqs, grp);
|
||
|
}
|
||
|
|
||
|
now = fq_codel_get_time();
|
||
|
fq_if_purge_empty_flow_list(fqs, now, true);
|
||
|
|
||
|
VERIFY(STAILQ_EMPTY(&fqs->fqs_fclist));
|
||
|
VERIFY(TAILQ_EMPTY(&fqs->fqs_empty_list));
|
||
|
|
||
|
fqs->fqs_large_flow = NULL;
|
||
|
for (i = 0; i < FQ_IF_HASH_TABLE_SIZE; i++) {
|
||
|
VERIFY(SLIST_EMPTY(&fqs->fqs_flows[i]));
|
||
|
}
|
||
|
|
||
|
IFCQ_LEN(fqs->fqs_ifq) = 0;
|
||
|
IFCQ_BYTES(fqs->fqs_ifq) = 0;
|
||
|
}
|
||
|
|
||
|
static void
|
||
|
fq_if_purge_sc(fq_if_t *fqs, cqrq_purge_sc_t *req)
|
||
|
{
|
||
|
fq_t *fq;
|
||
|
uint64_t now;
|
||
|
fq_if_group_t *grp;
|
||
|
|
||
|
IFCQ_LOCK_ASSERT_HELD(fqs->fqs_ifq);
|
||
|
req->packets = req->bytes = 0;
|
||
|
VERIFY(req->flow != 0);
|
||
|
|
||
|
now = fq_codel_get_time();
|
||
|
|
||
|
for (uint8_t grp_idx = 0; grp_idx < FQ_IF_MAX_GROUPS; grp_idx++) {
|
||
|
if (fqs->fqs_classq_groups[grp_idx] == NULL) {
|
||
|
continue;
|
||
|
}
|
||
|
uint32_t bytes = 0, pkts = 0;
|
||
|
|
||
|
grp = fq_if_find_grp(fqs, grp_idx);
|
||
|
/*
|
||
|
* Packet and traffic type are needed only if we want
|
||
|
* to create a flow queue.
|
||
|
*/
|
||
|
fq = fq_if_hash_pkt(fqs, grp, req->flow, req->sc, 0, false, FQ_TFC_C);
|
||
|
if (fq != NULL) {
|
||
|
fq_if_purge_flow(fqs, fq, &pkts, &bytes, now);
|
||
|
req->bytes += bytes;
|
||
|
req->packets += pkts;
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
|
||
|
static uint16_t
|
||
|
fq_if_calc_quantum(struct ifnet *ifp)
|
||
|
{
|
||
|
uint16_t quantum;
|
||
|
|
||
|
switch (ifp->if_family) {
|
||
|
case IFNET_FAMILY_ETHERNET:
|
||
|
VERIFY((ifp->if_mtu + ETHER_HDR_LEN) <= UINT16_MAX);
|
||
|
quantum = (uint16_t)ifp->if_mtu + ETHER_HDR_LEN;
|
||
|
break;
|
||
|
|
||
|
case IFNET_FAMILY_CELLULAR:
|
||
|
case IFNET_FAMILY_IPSEC:
|
||
|
case IFNET_FAMILY_UTUN:
|
||
|
VERIFY(ifp->if_mtu <= UINT16_MAX);
|
||
|
quantum = (uint16_t)ifp->if_mtu;
|
||
|
break;
|
||
|
|
||
|
default:
|
||
|
quantum = FQ_CODEL_DEFAULT_QUANTUM;
|
||
|
break;
|
||
|
}
|
||
|
|
||
|
if ((ifp->if_hwassist & IFNET_TSOF) != 0) {
|
||
|
VERIFY(ifp->if_tso_v4_mtu <= UINT16_MAX);
|
||
|
VERIFY(ifp->if_tso_v6_mtu <= UINT16_MAX);
|
||
|
quantum = (uint16_t)MAX(ifp->if_tso_v4_mtu, ifp->if_tso_v6_mtu);
|
||
|
quantum = (quantum != 0) ? quantum : IF_MAXMTU;
|
||
|
}
|
||
|
|
||
|
quantum = MAX(FQ_CODEL_DEFAULT_QUANTUM, quantum);
|
||
|
#if DEBUG || DEVELOPMENT
|
||
|
quantum = (fq_codel_quantum != 0) ? fq_codel_quantum : quantum;
|
||
|
#endif /* DEBUG || DEVELOPMENT */
|
||
|
VERIFY(quantum != 0);
|
||
|
return quantum;
|
||
|
}
|
||
|
|
||
|
static void
|
||
|
fq_if_mtu_update(fq_if_t *fqs)
|
||
|
{
|
||
|
#define _FQ_CLASSQ_UPDATE_QUANTUM(_grp, _s, _q) \
|
||
|
(_grp)->fqg_classq[FQ_IF_ ## _s ## _INDEX].fcl_quantum = \
|
||
|
FQ_CODEL_QUANTUM_ ## _s(_q) \
|
||
|
|
||
|
uint32_t quantum;
|
||
|
fq_if_group_t *grp;
|
||
|
|
||
|
quantum = fq_if_calc_quantum(fqs->fqs_ifq->ifcq_ifp);
|
||
|
|
||
|
for (uint8_t grp_idx = 0; grp_idx < FQ_IF_MAX_GROUPS; grp_idx++) {
|
||
|
if (fqs->fqs_classq_groups[grp_idx] == NULL) {
|
||
|
continue;
|
||
|
}
|
||
|
|
||
|
grp = fq_if_find_grp(fqs, grp_idx);
|
||
|
|
||
|
if ((fqs->fqs_flags & FQS_DRIVER_MANAGED) != 0) {
|
||
|
_FQ_CLASSQ_UPDATE_QUANTUM(grp, BK, quantum);
|
||
|
_FQ_CLASSQ_UPDATE_QUANTUM(grp, BE, quantum);
|
||
|
_FQ_CLASSQ_UPDATE_QUANTUM(grp, VI, quantum);
|
||
|
_FQ_CLASSQ_UPDATE_QUANTUM(grp, VO, quantum);
|
||
|
} else {
|
||
|
_FQ_CLASSQ_UPDATE_QUANTUM(grp, BK_SYS, quantum);
|
||
|
_FQ_CLASSQ_UPDATE_QUANTUM(grp, BK, quantum);
|
||
|
_FQ_CLASSQ_UPDATE_QUANTUM(grp, BE, quantum);
|
||
|
_FQ_CLASSQ_UPDATE_QUANTUM(grp, RD, quantum);
|
||
|
_FQ_CLASSQ_UPDATE_QUANTUM(grp, OAM, quantum);
|
||
|
_FQ_CLASSQ_UPDATE_QUANTUM(grp, AV, quantum);
|
||
|
_FQ_CLASSQ_UPDATE_QUANTUM(grp, RV, quantum);
|
||
|
_FQ_CLASSQ_UPDATE_QUANTUM(grp, VI, quantum);
|
||
|
_FQ_CLASSQ_UPDATE_QUANTUM(grp, VO, quantum);
|
||
|
_FQ_CLASSQ_UPDATE_QUANTUM(grp, CTL, quantum);
|
||
|
}
|
||
|
}
|
||
|
#undef _FQ_CLASSQ_UPDATE_QUANTUM
|
||
|
}
|
||
|
|
||
|
static void
|
||
|
fq_if_event(fq_if_t *fqs, cqev_t ev)
|
||
|
{
|
||
|
IFCQ_LOCK_ASSERT_HELD(fqs->fqs_ifq);
|
||
|
|
||
|
switch (ev) {
|
||
|
case CLASSQ_EV_LINK_UP:
|
||
|
case CLASSQ_EV_LINK_DOWN:
|
||
|
fq_if_purge(fqs);
|
||
|
break;
|
||
|
case CLASSQ_EV_LINK_MTU:
|
||
|
fq_if_mtu_update(fqs);
|
||
|
break;
|
||
|
default:
|
||
|
break;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
static void
|
||
|
fq_if_classq_suspend(fq_if_t *fqs, fq_if_classq_t *fq_cl)
|
||
|
{
|
||
|
fq_if_purge_classq(fqs, fq_cl);
|
||
|
fqs->fqs_throttle = 1;
|
||
|
fq_cl->fcl_stat.fcl_throttle_on++;
|
||
|
KDBG(AQM_KTRACE_AON_THROTTLE | DBG_FUNC_START,
|
||
|
fqs->fqs_ifq->ifcq_ifp->if_index, 0, 0, 0);
|
||
|
}
|
||
|
|
||
|
static void
|
||
|
fq_if_classq_resume(fq_if_t *fqs, fq_if_classq_t *fq_cl)
|
||
|
{
|
||
|
VERIFY(FQ_IF_CLASSQ_IDLE(fq_cl));
|
||
|
fqs->fqs_throttle = 0;
|
||
|
fq_cl->fcl_stat.fcl_throttle_off++;
|
||
|
KDBG(AQM_KTRACE_AON_THROTTLE | DBG_FUNC_END,
|
||
|
fqs->fqs_ifq->ifcq_ifp->if_index, 0, 0, 0);
|
||
|
}
|
||
|
|
||
|
|
||
|
static int
|
||
|
fq_if_throttle(fq_if_t *fqs, cqrq_throttle_t *tr)
|
||
|
{
|
||
|
struct ifclassq *ifq = fqs->fqs_ifq;
|
||
|
uint8_t index;
|
||
|
fq_if_group_t *grp;
|
||
|
|
||
|
#if !MACH_ASSERT
|
||
|
#pragma unused(ifq)
|
||
|
#endif
|
||
|
IFCQ_LOCK_ASSERT_HELD(ifq);
|
||
|
|
||
|
if (!tr->set) {
|
||
|
tr->level = fqs->fqs_throttle;
|
||
|
return 0;
|
||
|
}
|
||
|
|
||
|
if (tr->level == fqs->fqs_throttle) {
|
||
|
return EALREADY;
|
||
|
}
|
||
|
|
||
|
/* Throttling is allowed on BK_SYS class only */
|
||
|
index = fq_if_service_to_priority(fqs, MBUF_SC_BK_SYS);
|
||
|
|
||
|
for (uint8_t grp_idx = 0; grp_idx < FQ_IF_MAX_GROUPS; grp_idx++) {
|
||
|
if (fqs->fqs_classq_groups[grp_idx] == NULL) {
|
||
|
continue;
|
||
|
}
|
||
|
grp = fq_if_find_grp(fqs, grp_idx);
|
||
|
switch (tr->level) {
|
||
|
case IFNET_THROTTLE_OFF:
|
||
|
fq_if_classq_resume(fqs, &grp->fqg_classq[index]);
|
||
|
break;
|
||
|
case IFNET_THROTTLE_OPPORTUNISTIC:
|
||
|
fq_if_classq_suspend(fqs, &grp->fqg_classq[index]);
|
||
|
break;
|
||
|
default:
|
||
|
break;
|
||
|
}
|
||
|
}
|
||
|
return 0;
|
||
|
}
|
||
|
|
||
|
static inline boolean_t
|
||
|
fq_if_is_fq_cl_paced(fq_if_classq_t *fq_cl, uint64_t now)
|
||
|
{
|
||
|
if ((fq_cl->fcl_flags & FCL_PACED) != 0 && fq_cl->fcl_next_tx_time > now) {
|
||
|
return true;
|
||
|
}
|
||
|
|
||
|
fq_cl->fcl_flags &= ~FCL_PACED;
|
||
|
fq_cl->fcl_next_tx_time = 0;
|
||
|
return false;
|
||
|
}
|
||
|
|
||
|
static void
|
||
|
fq_if_grp_stat_sc(fq_if_t *fqs, fq_if_group_t *grp, cqrq_stat_sc_t *stat, uint64_t now)
|
||
|
{
|
||
|
uint8_t pri;
|
||
|
fq_if_classq_t *fq_cl;
|
||
|
|
||
|
ASSERT(stat != NULL);
|
||
|
pri = fq_if_service_to_priority(fqs, stat->sc);
|
||
|
|
||
|
fq_cl = &grp->fqg_classq[pri];
|
||
|
stat->packets = (uint32_t)fq_cl->fcl_stat.fcl_pkt_cnt;
|
||
|
stat->bytes = (uint32_t)fq_cl->fcl_stat.fcl_byte_cnt;
|
||
|
|
||
|
if (ifclassq_enable_pacing && ifclassq_enable_l4s &&
|
||
|
fq_if_is_fq_cl_paced(fq_cl, now)) {
|
||
|
stat->packets = 0;
|
||
|
stat->bytes = 0;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
static boolean_t
|
||
|
fq_if_is_grp_all_paced(fq_if_group_t *grp)
|
||
|
{
|
||
|
fq_if_classq_t *fq_cl;
|
||
|
uint64_t now;
|
||
|
|
||
|
if (!ifclassq_enable_pacing || !ifclassq_enable_l4s) {
|
||
|
return false;
|
||
|
}
|
||
|
|
||
|
now = fq_codel_get_time();
|
||
|
for (uint8_t fq_cl_idx = 0; fq_cl_idx < FQ_IF_MAX_CLASSES; fq_cl_idx++) {
|
||
|
fq_cl = &grp->fqg_classq[fq_cl_idx];
|
||
|
if (fq_cl == NULL || FQ_IF_CLASSQ_IDLE(fq_cl)) {
|
||
|
continue;
|
||
|
}
|
||
|
if (!fq_if_is_fq_cl_paced(fq_cl, now)) {
|
||
|
return false;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
return true;
|
||
|
}
|
||
|
|
||
|
boolean_t
|
||
|
fq_if_is_all_paced(struct ifclassq *ifq)
|
||
|
{
|
||
|
fq_if_group_t *grp;
|
||
|
fq_if_t *fqs = (fq_if_t *)ifq->ifcq_disc;
|
||
|
|
||
|
IFCQ_LOCK_ASSERT_HELD(ifq);
|
||
|
|
||
|
if (!ifclassq_enable_pacing || !ifclassq_enable_l4s) {
|
||
|
return false;
|
||
|
}
|
||
|
|
||
|
for (uint8_t grp_idx = 0; grp_idx < FQ_IF_MAX_GROUPS; grp_idx++) {
|
||
|
grp = fqs->fqs_classq_groups[grp_idx];
|
||
|
if (grp == NULL || FQG_BYTES(grp) == 0) {
|
||
|
continue;
|
||
|
}
|
||
|
|
||
|
if (!fq_if_is_grp_all_paced(grp)) {
|
||
|
return false;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
return true;
|
||
|
}
|
||
|
|
||
|
void
|
||
|
fq_if_stat_sc(fq_if_t *fqs, cqrq_stat_sc_t *stat)
|
||
|
{
|
||
|
cqrq_stat_sc_t grp_sc_stat;
|
||
|
fq_if_group_t *grp;
|
||
|
uint64_t now = fq_codel_get_time();
|
||
|
|
||
|
if (stat == NULL) {
|
||
|
return;
|
||
|
}
|
||
|
grp_sc_stat.sc = stat->sc;
|
||
|
stat->packets = 0;
|
||
|
stat->bytes = 0;
|
||
|
|
||
|
if (stat->grp_idx == IF_CLASSQ_ALL_GRPS) {
|
||
|
if (stat->sc == MBUF_SC_UNSPEC) {
|
||
|
if (!fq_if_is_all_paced(fqs->fqs_ifq)) {
|
||
|
stat->packets = IFCQ_LEN(fqs->fqs_ifq);
|
||
|
stat->bytes = IFCQ_BYTES(fqs->fqs_ifq);
|
||
|
}
|
||
|
} else {
|
||
|
for (uint8_t grp_idx = 0; grp_idx < FQ_IF_MAX_GROUPS; grp_idx++) {
|
||
|
grp = fqs->fqs_classq_groups[grp_idx];
|
||
|
if (grp == NULL) {
|
||
|
continue;
|
||
|
}
|
||
|
|
||
|
fq_if_grp_stat_sc(fqs, grp, &grp_sc_stat, now);
|
||
|
stat->packets += grp_sc_stat.packets;
|
||
|
stat->bytes += grp_sc_stat.bytes;
|
||
|
}
|
||
|
}
|
||
|
return;
|
||
|
}
|
||
|
|
||
|
if (stat->sc == MBUF_SC_UNSPEC) {
|
||
|
if (fq_if_is_grp_combined(fqs, stat->grp_idx)) {
|
||
|
TAILQ_FOREACH(grp, &fqs->fqs_combined_grp_list, fqg_grp_link) {
|
||
|
if (fq_if_is_grp_all_paced(grp)) {
|
||
|
continue;
|
||
|
}
|
||
|
stat->packets += FQG_LEN(grp);
|
||
|
stat->bytes += FQG_BYTES(grp);
|
||
|
}
|
||
|
} else {
|
||
|
grp = fq_if_find_grp(fqs, stat->grp_idx);
|
||
|
if (!fq_if_is_grp_all_paced(grp)) {
|
||
|
stat->packets = FQG_LEN(grp);
|
||
|
stat->bytes = FQG_BYTES(grp);
|
||
|
}
|
||
|
}
|
||
|
} else {
|
||
|
if (fq_if_is_grp_combined(fqs, stat->grp_idx)) {
|
||
|
TAILQ_FOREACH(grp, &fqs->fqs_combined_grp_list, fqg_grp_link) {
|
||
|
if (fq_if_is_grp_all_paced(grp)) {
|
||
|
continue;
|
||
|
}
|
||
|
fq_if_grp_stat_sc(fqs, grp, &grp_sc_stat, now);
|
||
|
stat->packets += grp_sc_stat.packets;
|
||
|
stat->bytes += grp_sc_stat.bytes;
|
||
|
}
|
||
|
} else {
|
||
|
grp = fq_if_find_grp(fqs, stat->grp_idx);
|
||
|
fq_if_grp_stat_sc(fqs, grp, stat, now);
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
|
||
|
int
|
||
|
fq_if_request_classq(struct ifclassq *ifq, cqrq_t rq, void *arg)
|
||
|
{
|
||
|
int err = 0;
|
||
|
fq_if_t *fqs = (fq_if_t *)ifq->ifcq_disc;
|
||
|
|
||
|
IFCQ_LOCK_ASSERT_HELD(ifq);
|
||
|
|
||
|
/*
|
||
|
* These are usually slow operations, convert the lock ahead of time
|
||
|
*/
|
||
|
IFCQ_CONVERT_LOCK(fqs->fqs_ifq);
|
||
|
switch (rq) {
|
||
|
case CLASSQRQ_PURGE:
|
||
|
fq_if_purge(fqs);
|
||
|
break;
|
||
|
case CLASSQRQ_PURGE_SC:
|
||
|
fq_if_purge_sc(fqs, (cqrq_purge_sc_t *)arg);
|
||
|
break;
|
||
|
case CLASSQRQ_EVENT:
|
||
|
fq_if_event(fqs, (cqev_t)arg);
|
||
|
break;
|
||
|
case CLASSQRQ_THROTTLE:
|
||
|
fq_if_throttle(fqs, (cqrq_throttle_t *)arg);
|
||
|
break;
|
||
|
case CLASSQRQ_STAT_SC:
|
||
|
fq_if_stat_sc(fqs, (cqrq_stat_sc_t *)arg);
|
||
|
break;
|
||
|
}
|
||
|
return err;
|
||
|
}
|
||
|
|
||
|
int
|
||
|
fq_if_setup_ifclassq(struct ifclassq *ifq, u_int32_t flags,
|
||
|
classq_pkt_type_t ptype)
|
||
|
{
|
||
|
fq_if_t *fqs = NULL;
|
||
|
int err = 0;
|
||
|
|
||
|
IFCQ_LOCK_ASSERT_HELD(ifq);
|
||
|
VERIFY(ifq->ifcq_disc == NULL);
|
||
|
VERIFY(ifq->ifcq_type == PKTSCHEDT_NONE);
|
||
|
|
||
|
fqs = fq_if_alloc(ifq, ptype);
|
||
|
if (fqs == NULL) {
|
||
|
return ENOMEM;
|
||
|
}
|
||
|
if (flags & PKTSCHEDF_QALG_DRIVER_MANAGED) {
|
||
|
fqs->fqs_flags |= FQS_DRIVER_MANAGED;
|
||
|
fqs->fqs_bm_ops = &fq_if_grps_sc_bitmap_ops;
|
||
|
} else {
|
||
|
fqs->fqs_bm_ops = &fq_if_grps_bitmap_ops;
|
||
|
}
|
||
|
|
||
|
err = ifclassq_attach(ifq, PKTSCHEDT_FQ_CODEL, fqs);
|
||
|
if (err != 0) {
|
||
|
os_log_error(OS_LOG_DEFAULT, "%s: error from ifclassq_attach, "
|
||
|
"failed to attach fq_if: %d\n", __func__, err);
|
||
|
fq_if_destroy(fqs);
|
||
|
return err;
|
||
|
}
|
||
|
|
||
|
/*
|
||
|
* Always create one group. If qset 0 is added later,
|
||
|
* this group will be updated.
|
||
|
*/
|
||
|
err = fq_if_create_grp(ifq, 0, IF_CLASSQ_DEF);
|
||
|
if (err != 0) {
|
||
|
os_log_error(OS_LOG_DEFAULT, "%s: error from fq_if_create_grp, "
|
||
|
"failed to create a fq group: %d\n", __func__, err);
|
||
|
fq_if_destroy(fqs);
|
||
|
}
|
||
|
|
||
|
return err;
|
||
|
}
|
||
|
|
||
|
fq_t *
|
||
|
fq_if_hash_pkt(fq_if_t *fqs, fq_if_group_t *fq_grp, u_int32_t flowid,
|
||
|
mbuf_svc_class_t svc_class, u_int64_t now, bool create,
|
||
|
fq_tfc_type_t tfc_type)
|
||
|
{
|
||
|
fq_t *fq = NULL;
|
||
|
flowq_list_t *fq_list;
|
||
|
fq_if_classq_t *fq_cl;
|
||
|
u_int8_t fqs_hash_id;
|
||
|
u_int8_t scidx;
|
||
|
|
||
|
scidx = fq_if_service_to_priority(fqs, svc_class);
|
||
|
|
||
|
fqs_hash_id = FQ_IF_FLOW_HASH_ID(flowid);
|
||
|
|
||
|
fq_list = &fqs->fqs_flows[fqs_hash_id];
|
||
|
|
||
|
SLIST_FOREACH(fq, fq_list, fq_hashlink) {
|
||
|
if (fq->fq_flowhash == flowid &&
|
||
|
fq->fq_sc_index == scidx &&
|
||
|
fq->fq_tfc_type == tfc_type &&
|
||
|
fq->fq_group == fq_grp) {
|
||
|
break;
|
||
|
}
|
||
|
}
|
||
|
if (fq == NULL && create) {
|
||
|
/* If the flow is not already on the list, allocate it */
|
||
|
IFCQ_CONVERT_LOCK(fqs->fqs_ifq);
|
||
|
fq = fq_alloc(fqs->fqs_ptype);
|
||
|
if (fq != NULL) {
|
||
|
fq->fq_flowhash = flowid;
|
||
|
fq->fq_sc_index = scidx;
|
||
|
fq->fq_group = fq_grp;
|
||
|
fq->fq_tfc_type = tfc_type;
|
||
|
fq_cl = &FQ_CLASSQ(fq);
|
||
|
fq->fq_flags = (FQF_FLOWCTL_CAPABLE | FQF_FRESH_FLOW);
|
||
|
fq->fq_updatetime = now + FQ_UPDATE_INTERVAL(fq);
|
||
|
fq->fq_next_tx_time = FQ_INVALID_TX_TS;
|
||
|
SLIST_INSERT_HEAD(fq_list, fq, fq_hashlink);
|
||
|
fq_cl->fcl_stat.fcl_flows_cnt++;
|
||
|
}
|
||
|
KDBG(AQM_KTRACE_STATS_FLOW_ALLOC,
|
||
|
fqs->fqs_ifq->ifcq_ifp->if_index, fq->fq_flowhash,
|
||
|
AQM_KTRACE_FQ_GRP_SC_IDX(fq), 0);
|
||
|
} else if ((fq != NULL) && (fq->fq_flags & FQF_EMPTY_FLOW)) {
|
||
|
fq_if_reuse_empty_flow(fqs, fq, now);
|
||
|
}
|
||
|
|
||
|
/*
|
||
|
* If getq time is not set because this is the first packet or after
|
||
|
* idle time, set it now so that we can detect a stall.
|
||
|
*/
|
||
|
if (fq != NULL && fq->fq_getqtime == 0) {
|
||
|
fq->fq_getqtime = now;
|
||
|
}
|
||
|
|
||
|
return fq;
|
||
|
}
|
||
|
|
||
|
void
|
||
|
fq_if_destroy_flow(fq_if_t *fqs, fq_if_classq_t *fq_cl, fq_t *fq)
|
||
|
{
|
||
|
u_int8_t hash_id;
|
||
|
|
||
|
ASSERT((fq->fq_flags & FQF_EMPTY_FLOW) == 0);
|
||
|
hash_id = FQ_IF_FLOW_HASH_ID(fq->fq_flowhash);
|
||
|
SLIST_REMOVE(&fqs->fqs_flows[hash_id], fq, flowq,
|
||
|
fq_hashlink);
|
||
|
IFCQ_CONVERT_LOCK(fqs->fqs_ifq);
|
||
|
if (__improbable(fq->fq_flags & FQF_FLOWCTL_ON)) {
|
||
|
fq_if_flow_feedback(fqs, fq, fq_cl);
|
||
|
}
|
||
|
KDBG(AQM_KTRACE_STATS_FLOW_DESTROY,
|
||
|
fqs->fqs_ifq->ifcq_ifp->if_index, fq->fq_flowhash,
|
||
|
AQM_KTRACE_FQ_GRP_SC_IDX(fq), 0);
|
||
|
fq_destroy(fq, fqs->fqs_ptype);
|
||
|
}
|
||
|
|
||
|
inline boolean_t
|
||
|
fq_if_at_drop_limit(fq_if_t *fqs)
|
||
|
{
|
||
|
return (IFCQ_LEN(fqs->fqs_ifq) >= fqs->fqs_pkt_droplimit) ?
|
||
|
TRUE : FALSE;
|
||
|
}
|
||
|
|
||
|
inline boolean_t
|
||
|
fq_if_almost_at_drop_limit(fq_if_t *fqs)
|
||
|
{
|
||
|
/*
|
||
|
* Whether we are above 90% of the queue limit. This is used to tell if we
|
||
|
* can stop flow controlling the largest flow.
|
||
|
*/
|
||
|
return IFCQ_LEN(fqs->fqs_ifq) >= fqs->fqs_pkt_droplimit * 9 / 10;
|
||
|
}
|
||
|
|
||
|
static inline void
|
||
|
fq_if_reuse_empty_flow(fq_if_t *fqs, fq_t *fq, uint64_t now)
|
||
|
{
|
||
|
ASSERT(fq->fq_flags & FQF_EMPTY_FLOW);
|
||
|
TAILQ_REMOVE(&fqs->fqs_empty_list, fq, fq_empty_link);
|
||
|
STAILQ_NEXT(fq, fq_actlink) = NULL;
|
||
|
fq->fq_flags &= ~FQF_FLOW_STATE_MASK;
|
||
|
fq->fq_empty_purge_time = 0;
|
||
|
fq->fq_getqtime = 0;
|
||
|
fq->fq_updatetime = now + FQ_UPDATE_INTERVAL(fq);
|
||
|
fqs->fqs_empty_list_cnt--;
|
||
|
fq_if_classq_t *fq_cl = &FQ_CLASSQ(fq);
|
||
|
fq_cl->fcl_stat.fcl_flows_cnt++;
|
||
|
}
|
||
|
|
||
|
inline void
|
||
|
fq_if_move_to_empty_flow(fq_if_t *fqs, fq_if_classq_t *fq_cl, fq_t *fq,
|
||
|
uint64_t now)
|
||
|
{
|
||
|
ASSERT(fq->fq_flags & ~(FQF_NEW_FLOW | FQF_OLD_FLOW | FQF_FLOWCTL_ON));
|
||
|
fq->fq_empty_purge_time = now + fq_empty_purge_delay;
|
||
|
TAILQ_INSERT_TAIL(&fqs->fqs_empty_list, fq, fq_empty_link);
|
||
|
fq->fq_flags |= FQF_EMPTY_FLOW;
|
||
|
FQ_CLEAR_OVERWHELMING(fq);
|
||
|
fqs->fqs_empty_list_cnt++;
|
||
|
/*
|
||
|
* fcl_flows_cnt is used in budget determination for the class.
|
||
|
* empty flow shouldn't contribute to the budget.
|
||
|
*/
|
||
|
fq_cl->fcl_stat.fcl_flows_cnt--;
|
||
|
}
|
||
|
|
||
|
static void
|
||
|
fq_if_purge_empty_flow(fq_if_t *fqs, fq_t *fq)
|
||
|
{
|
||
|
fq_if_classq_t *fq_cl;
|
||
|
fq_cl = &FQ_CLASSQ(fq);
|
||
|
|
||
|
ASSERT((fq->fq_flags & FQF_EMPTY_FLOW) != 0);
|
||
|
TAILQ_REMOVE(&fqs->fqs_empty_list, fq, fq_empty_link);
|
||
|
fq->fq_flags &= ~FQF_EMPTY_FLOW;
|
||
|
fqs->fqs_empty_list_cnt--;
|
||
|
/* Remove from the hash list and free the flow queue */
|
||
|
fq_if_destroy_flow(fqs, fq_cl, fq);
|
||
|
}
|
||
|
|
||
|
static void
|
||
|
fq_if_purge_empty_flow_list(fq_if_t *fqs, uint64_t now, bool purge_all)
|
||
|
{
|
||
|
fq_t *fq, *tmp;
|
||
|
int i = 0;
|
||
|
|
||
|
if (fqs->fqs_empty_list_cnt == 0) {
|
||
|
ASSERT(TAILQ_EMPTY(&fqs->fqs_empty_list));
|
||
|
return;
|
||
|
}
|
||
|
|
||
|
TAILQ_FOREACH_SAFE(fq, &fqs->fqs_empty_list, fq_empty_link, tmp) {
|
||
|
if (!purge_all && ((now < fq->fq_empty_purge_time) ||
|
||
|
(i++ == FQ_EMPTY_PURGE_MAX))) {
|
||
|
break;
|
||
|
}
|
||
|
fq_if_purge_empty_flow(fqs, fq);
|
||
|
}
|
||
|
|
||
|
if (__improbable(purge_all)) {
|
||
|
VERIFY(fqs->fqs_empty_list_cnt == 0);
|
||
|
VERIFY(TAILQ_EMPTY(&fqs->fqs_empty_list));
|
||
|
}
|
||
|
}
|
||
|
|
||
|
static void
|
||
|
fq_if_empty_old_flow(fq_if_t *fqs, fq_if_classq_t *fq_cl, fq_t *fq,
|
||
|
uint64_t now)
|
||
|
{
|
||
|
/*
|
||
|
* Remove the flow queue from the old flows list.
|
||
|
*/
|
||
|
STAILQ_REMOVE(&fq_cl->fcl_old_flows, fq, flowq, fq_actlink);
|
||
|
fq->fq_flags &= ~FQF_OLD_FLOW;
|
||
|
fq_cl->fcl_stat.fcl_oldflows_cnt--;
|
||
|
VERIFY(fq->fq_bytes == 0);
|
||
|
|
||
|
/* release any flow control */
|
||
|
if (__improbable(fq->fq_flags & FQF_FLOWCTL_ON)) {
|
||
|
fq_if_flow_feedback(fqs, fq, fq_cl);
|
||
|
}
|
||
|
|
||
|
/* move the flow queue to empty flows list */
|
||
|
fq_if_move_to_empty_flow(fqs, fq_cl, fq, now);
|
||
|
}
|
||
|
|
||
|
static void
|
||
|
fq_if_empty_new_flow(fq_t *fq, fq_if_classq_t *fq_cl)
|
||
|
{
|
||
|
/* Move to the end of old queue list */
|
||
|
STAILQ_REMOVE(&fq_cl->fcl_new_flows, fq,
|
||
|
flowq, fq_actlink);
|
||
|
fq->fq_flags &= ~FQF_NEW_FLOW;
|
||
|
fq_cl->fcl_stat.fcl_newflows_cnt--;
|
||
|
|
||
|
STAILQ_INSERT_TAIL(&fq_cl->fcl_old_flows, fq, fq_actlink);
|
||
|
fq->fq_flags |= FQF_OLD_FLOW;
|
||
|
fq_cl->fcl_stat.fcl_oldflows_cnt++;
|
||
|
}
|
||
|
|
||
|
inline void
|
||
|
fq_if_drop_packet(fq_if_t *fqs, uint64_t now)
|
||
|
{
|
||
|
fq_t *fq = fqs->fqs_large_flow;
|
||
|
fq_if_classq_t *fq_cl;
|
||
|
pktsched_pkt_t pkt;
|
||
|
volatile uint32_t *pkt_flags;
|
||
|
uint64_t *pkt_timestamp;
|
||
|
|
||
|
if (fq == NULL) {
|
||
|
return;
|
||
|
}
|
||
|
/* queue can not be empty on the largest flow */
|
||
|
VERIFY(!fq_empty(fq, fqs->fqs_ptype));
|
||
|
|
||
|
fq_cl = &FQ_CLASSQ(fq);
|
||
|
_PKTSCHED_PKT_INIT(&pkt);
|
||
|
fq_getq_flow_internal(fqs, fq, &pkt);
|
||
|
ASSERT(pkt.pktsched_ptype != QP_INVALID);
|
||
|
|
||
|
pktsched_get_pkt_vars(&pkt, &pkt_flags, &pkt_timestamp, NULL, NULL,
|
||
|
NULL, NULL, NULL);
|
||
|
|
||
|
IFCQ_CONVERT_LOCK(fqs->fqs_ifq);
|
||
|
*pkt_timestamp = 0;
|
||
|
switch (pkt.pktsched_ptype) {
|
||
|
case QP_MBUF:
|
||
|
*pkt_flags &= ~PKTF_PRIV_GUARDED;
|
||
|
break;
|
||
|
#if SKYWALK
|
||
|
case QP_PACKET:
|
||
|
/* sanity check */
|
||
|
ASSERT((*pkt_flags & ~PKT_F_COMMON_MASK) == 0);
|
||
|
break;
|
||
|
#endif /* SKYWALK */
|
||
|
default:
|
||
|
VERIFY(0);
|
||
|
/* NOTREACHED */
|
||
|
__builtin_unreachable();
|
||
|
}
|
||
|
|
||
|
if (fq_empty(fq, fqs->fqs_ptype)) {
|
||
|
fqs->fqs_large_flow = NULL;
|
||
|
if (fq->fq_flags & FQF_OLD_FLOW) {
|
||
|
fq_if_empty_old_flow(fqs, fq_cl, fq, now);
|
||
|
} else {
|
||
|
VERIFY(fq->fq_flags & FQF_NEW_FLOW);
|
||
|
fq_if_empty_new_flow(fq, fq_cl);
|
||
|
}
|
||
|
}
|
||
|
IFCQ_DROP_ADD(fqs->fqs_ifq, 1, pktsched_get_pkt_len(&pkt));
|
||
|
|
||
|
pktsched_free_pkt(&pkt);
|
||
|
fq_cl->fcl_stat.fcl_drop_overflow++;
|
||
|
}
|
||
|
|
||
|
inline void
|
||
|
fq_if_is_flow_heavy(fq_if_t *fqs, fq_t *fq)
|
||
|
{
|
||
|
fq_t *prev_fq;
|
||
|
|
||
|
if (fqs->fqs_large_flow != NULL &&
|
||
|
fqs->fqs_large_flow->fq_bytes < FQ_IF_LARGE_FLOW_BYTE_LIMIT) {
|
||
|
fqs->fqs_large_flow = NULL;
|
||
|
}
|
||
|
|
||
|
if (fq == NULL || fq->fq_bytes < FQ_IF_LARGE_FLOW_BYTE_LIMIT) {
|
||
|
return;
|
||
|
}
|
||
|
|
||
|
prev_fq = fqs->fqs_large_flow;
|
||
|
if (prev_fq == NULL) {
|
||
|
if (!fq_empty(fq, fqs->fqs_ptype)) {
|
||
|
fqs->fqs_large_flow = fq;
|
||
|
}
|
||
|
return;
|
||
|
} else if (fq->fq_bytes > prev_fq->fq_bytes) {
|
||
|
fqs->fqs_large_flow = fq;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
boolean_t
|
||
|
fq_if_add_fcentry(fq_if_t *fqs, pktsched_pkt_t *pkt, uint8_t flowsrc,
|
||
|
fq_t *fq, fq_if_classq_t *fq_cl)
|
||
|
{
|
||
|
struct flowadv_fcentry *fce;
|
||
|
|
||
|
#if DEBUG || DEVELOPMENT
|
||
|
if (__improbable(ifclassq_flow_control_adv == 0)) {
|
||
|
os_log(OS_LOG_DEFAULT, "%s: skipped flow control", __func__);
|
||
|
return TRUE;
|
||
|
}
|
||
|
#endif /* DEBUG || DEVELOPMENT */
|
||
|
|
||
|
STAILQ_FOREACH(fce, &fqs->fqs_fclist, fce_link) {
|
||
|
if ((uint8_t)fce->fce_flowsrc_type == flowsrc &&
|
||
|
fce->fce_flowid == fq->fq_flowhash) {
|
||
|
/* Already on flowcontrol list */
|
||
|
return TRUE;
|
||
|
}
|
||
|
}
|
||
|
IFCQ_CONVERT_LOCK(fqs->fqs_ifq);
|
||
|
fce = pktsched_alloc_fcentry(pkt, fqs->fqs_ifq->ifcq_ifp, M_WAITOK);
|
||
|
if (fce != NULL) {
|
||
|
/* XXX Add number of bytes in the queue */
|
||
|
STAILQ_INSERT_TAIL(&fqs->fqs_fclist, fce, fce_link);
|
||
|
fq_cl->fcl_stat.fcl_flow_control++;
|
||
|
os_log(OS_LOG_DEFAULT, "%s: num: %d, scidx: %d, flowsrc: %d, "
|
||
|
"flow: 0x%x, iface: %s, B:%u\n", __func__,
|
||
|
fq_cl->fcl_stat.fcl_flow_control,
|
||
|
fq->fq_sc_index, fce->fce_flowsrc_type, fq->fq_flowhash,
|
||
|
if_name(fqs->fqs_ifq->ifcq_ifp), fq->fq_bytes);
|
||
|
KDBG(AQM_KTRACE_STATS_FLOW_CTL | DBG_FUNC_START,
|
||
|
fq->fq_flowhash, AQM_KTRACE_FQ_GRP_SC_IDX(fq),
|
||
|
fq->fq_bytes, fq->fq_min_qdelay);
|
||
|
}
|
||
|
return (fce != NULL) ? TRUE : FALSE;
|
||
|
}
|
||
|
|
||
|
static void
|
||
|
fq_if_remove_fcentry(fq_if_t *fqs, struct flowadv_fcentry *fce)
|
||
|
{
|
||
|
STAILQ_REMOVE(&fqs->fqs_fclist, fce, flowadv_fcentry, fce_link);
|
||
|
STAILQ_NEXT(fce, fce_link) = NULL;
|
||
|
flowadv_add_entry(fce);
|
||
|
}
|
||
|
|
||
|
void
|
||
|
fq_if_flow_feedback(fq_if_t *fqs, fq_t *fq, fq_if_classq_t *fq_cl)
|
||
|
{
|
||
|
struct flowadv_fcentry *fce = NULL;
|
||
|
|
||
|
IFCQ_CONVERT_LOCK(fqs->fqs_ifq);
|
||
|
STAILQ_FOREACH(fce, &fqs->fqs_fclist, fce_link) {
|
||
|
if (fce->fce_flowid == fq->fq_flowhash) {
|
||
|
break;
|
||
|
}
|
||
|
}
|
||
|
if (fce != NULL) {
|
||
|
fq_cl->fcl_stat.fcl_flow_feedback++;
|
||
|
fce->fce_event_type = FCE_EVENT_TYPE_FLOW_CONTROL_FEEDBACK;
|
||
|
os_log(OS_LOG_DEFAULT, "%s: num: %d, scidx: %d, flowsrc: %d, "
|
||
|
"flow: 0x%x, iface: %s grp: %hhu, B:%u\n", __func__,
|
||
|
fq_cl->fcl_stat.fcl_flow_feedback, fq->fq_sc_index,
|
||
|
fce->fce_flowsrc_type, fce->fce_flowid,
|
||
|
if_name(fqs->fqs_ifq->ifcq_ifp), FQ_GROUP(fq)->fqg_index,
|
||
|
fq->fq_bytes);
|
||
|
fq_if_remove_fcentry(fqs, fce);
|
||
|
KDBG(AQM_KTRACE_STATS_FLOW_CTL | DBG_FUNC_END,
|
||
|
fq->fq_flowhash, AQM_KTRACE_FQ_GRP_SC_IDX(fq),
|
||
|
fq->fq_bytes, fq->fq_min_qdelay);
|
||
|
}
|
||
|
fq->fq_flags &= ~FQF_FLOWCTL_ON;
|
||
|
}
|
||
|
|
||
|
boolean_t
|
||
|
fq_if_report_ce(fq_if_t *fqs, pktsched_pkt_t *pkt, uint32_t ce_cnt,
|
||
|
uint32_t pkt_cnt)
|
||
|
{
|
||
|
struct flowadv_fcentry *fce;
|
||
|
|
||
|
#if DEBUG || DEVELOPMENT
|
||
|
if (__improbable(ifclassq_flow_control_adv == 0)) {
|
||
|
os_log(OS_LOG_DEFAULT, "%s: skipped flow control", __func__);
|
||
|
return TRUE;
|
||
|
}
|
||
|
#endif /* DEBUG || DEVELOPMENT */
|
||
|
|
||
|
IFCQ_CONVERT_LOCK(fqs->fqs_ifq);
|
||
|
fce = pktsched_alloc_fcentry(pkt, fqs->fqs_ifq->ifcq_ifp, M_WAITOK);
|
||
|
if (fce != NULL) {
|
||
|
fce->fce_event_type = FCE_EVENT_TYPE_CONGESTION_EXPERIENCED;
|
||
|
fce->fce_ce_cnt = ce_cnt;
|
||
|
fce->fce_pkts_since_last_report = pkt_cnt;
|
||
|
|
||
|
flowadv_add_entry(fce);
|
||
|
}
|
||
|
return (fce != NULL) ? TRUE : FALSE;
|
||
|
}
|
||
|
|
||
|
|
||
|
void
|
||
|
fq_if_dequeue(fq_if_t *fqs, fq_if_classq_t *fq_cl, uint32_t pktlimit,
|
||
|
int64_t bytelimit, classq_pkt_t *top, classq_pkt_t *bottom,
|
||
|
uint32_t *retpktcnt, uint32_t *retbytecnt, flowq_dqlist_t *fq_dqlist,
|
||
|
bool budget_restricted, uint64_t now, bool *fq_cl_paced,
|
||
|
uint64_t *next_tx_time)
|
||
|
{
|
||
|
fq_t *fq = NULL, *tfq = NULL;
|
||
|
flowq_stailq_t temp_stailq;
|
||
|
uint32_t pktcnt, bytecnt;
|
||
|
boolean_t qempty, limit_reached = FALSE;
|
||
|
bool all_paced = true;
|
||
|
classq_pkt_t last = CLASSQ_PKT_INITIALIZER(last);
|
||
|
fq_getq_flow_t fq_getq_flow_fn;
|
||
|
classq_pkt_t *head, *tail;
|
||
|
uint64_t fq_cl_tx_time = FQ_INVALID_TX_TS;
|
||
|
|
||
|
switch (fqs->fqs_ptype) {
|
||
|
case QP_MBUF:
|
||
|
fq_getq_flow_fn = fq_getq_flow_mbuf;
|
||
|
break;
|
||
|
|
||
|
#if SKYWALK
|
||
|
case QP_PACKET:
|
||
|
fq_getq_flow_fn = fq_getq_flow_kpkt;
|
||
|
break;
|
||
|
#endif /* SKYWALK */
|
||
|
|
||
|
default:
|
||
|
VERIFY(0);
|
||
|
/* NOTREACHED */
|
||
|
__builtin_unreachable();
|
||
|
}
|
||
|
|
||
|
/*
|
||
|
* maximum byte limit should not be greater than the budget for
|
||
|
* this class
|
||
|
*/
|
||
|
if (bytelimit > fq_cl->fcl_budget && budget_restricted) {
|
||
|
bytelimit = fq_cl->fcl_budget;
|
||
|
}
|
||
|
|
||
|
VERIFY(pktlimit > 0 && bytelimit > 0 && top != NULL);
|
||
|
pktcnt = bytecnt = 0;
|
||
|
STAILQ_INIT(&temp_stailq);
|
||
|
|
||
|
STAILQ_FOREACH_SAFE(fq, &fq_cl->fcl_new_flows, fq_actlink, tfq) {
|
||
|
ASSERT((fq->fq_flags & (FQF_NEW_FLOW | FQF_OLD_FLOW)) ==
|
||
|
FQF_NEW_FLOW);
|
||
|
uint64_t fq_tx_time;
|
||
|
if (__improbable(!fq_tx_time_ready(fqs, fq, now, &fq_tx_time))) {
|
||
|
ASSERT(fq_tx_time != FQ_INVALID_TX_TS);
|
||
|
if (fq_tx_time < fq_cl_tx_time) {
|
||
|
fq_cl_tx_time = fq_tx_time;
|
||
|
}
|
||
|
continue;
|
||
|
}
|
||
|
all_paced = false;
|
||
|
|
||
|
if (fq_dqlist != NULL) {
|
||
|
if (!fq->fq_in_dqlist) {
|
||
|
fq_dqlist_add(fq_dqlist, fq);
|
||
|
}
|
||
|
head = &fq->fq_dq_head;
|
||
|
tail = &fq->fq_dq_tail;
|
||
|
} else {
|
||
|
ASSERT(!fq->fq_in_dqlist);
|
||
|
head = top;
|
||
|
tail = &last;
|
||
|
}
|
||
|
|
||
|
limit_reached = fq_getq_flow_fn(fqs, fq_cl, fq, bytelimit,
|
||
|
pktlimit, head, tail, &bytecnt, &pktcnt, &qempty, now);
|
||
|
|
||
|
/*
|
||
|
* From RFC 8290:
|
||
|
* if that queue has a negative number of credits (i.e., it has already
|
||
|
* dequeued at least a quantum of bytes), it is given an additional
|
||
|
* quantum of credits, the queue is put onto _the end of_ the list of
|
||
|
* old queues, and the routine selects the next queue and starts again.
|
||
|
*/
|
||
|
if (fq->fq_deficit <= 0 || qempty) {
|
||
|
fq->fq_deficit += fq_cl->fcl_quantum;
|
||
|
fq_if_empty_new_flow(fq, fq_cl);
|
||
|
}
|
||
|
//TODO: add credit when it's now paced? so that the fq is trated the same as empty
|
||
|
|
||
|
if (!fq_tx_time_ready(fqs, fq, now, &fq_tx_time)) {
|
||
|
ASSERT(fq_tx_time != FQ_INVALID_TX_TS);
|
||
|
if (fq_tx_time < fq_cl_tx_time) {
|
||
|
fq_cl_tx_time = fq_tx_time;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
if (limit_reached) {
|
||
|
goto done;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
STAILQ_FOREACH_SAFE(fq, &fq_cl->fcl_old_flows, fq_actlink, tfq) {
|
||
|
VERIFY((fq->fq_flags & (FQF_NEW_FLOW | FQF_OLD_FLOW)) ==
|
||
|
FQF_OLD_FLOW);
|
||
|
bool destroy = true;
|
||
|
uint64_t fq_tx_time;
|
||
|
|
||
|
if (__improbable(!fq_tx_time_ready(fqs, fq, now, &fq_tx_time))) {
|
||
|
ASSERT(fq_tx_time != FQ_INVALID_TX_TS);
|
||
|
if (fq_tx_time < fq_cl_tx_time) {
|
||
|
fq_cl_tx_time = fq_tx_time;
|
||
|
}
|
||
|
continue;
|
||
|
}
|
||
|
all_paced = false;
|
||
|
|
||
|
if (fq_dqlist != NULL) {
|
||
|
if (!fq->fq_in_dqlist) {
|
||
|
fq_dqlist_add(fq_dqlist, fq);
|
||
|
}
|
||
|
head = &fq->fq_dq_head;
|
||
|
tail = &fq->fq_dq_tail;
|
||
|
destroy = false;
|
||
|
} else {
|
||
|
ASSERT(!fq->fq_in_dqlist);
|
||
|
head = top;
|
||
|
tail = &last;
|
||
|
}
|
||
|
|
||
|
limit_reached = fq_getq_flow_fn(fqs, fq_cl, fq, bytelimit,
|
||
|
pktlimit, head, tail, &bytecnt, &pktcnt, &qempty, now);
|
||
|
|
||
|
if (!fq_tx_time_ready(fqs, fq, now, &fq_tx_time)) {
|
||
|
ASSERT(fq_tx_time != FQ_INVALID_TX_TS);
|
||
|
if (fq_tx_time < fq_cl_tx_time) {
|
||
|
fq_cl_tx_time = fq_tx_time;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
if (qempty) {
|
||
|
fq_if_empty_old_flow(fqs, fq_cl, fq, now);
|
||
|
} else if (fq->fq_deficit <= 0) {
|
||
|
STAILQ_REMOVE(&fq_cl->fcl_old_flows, fq,
|
||
|
flowq, fq_actlink);
|
||
|
/*
|
||
|
* Move to the end of the old queues list. We do not
|
||
|
* need to update the flow count since this flow
|
||
|
* will be added to the tail again
|
||
|
*/
|
||
|
STAILQ_INSERT_TAIL(&temp_stailq, fq, fq_actlink);
|
||
|
fq->fq_deficit += fq_cl->fcl_quantum;
|
||
|
}
|
||
|
if (limit_reached) {
|
||
|
break;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
done:
|
||
|
if (all_paced) {
|
||
|
fq_cl->fcl_flags |= FCL_PACED;
|
||
|
fq_cl->fcl_next_tx_time = fq_cl_tx_time;
|
||
|
}
|
||
|
if (!STAILQ_EMPTY(&fq_cl->fcl_old_flows)) {
|
||
|
STAILQ_CONCAT(&fq_cl->fcl_old_flows, &temp_stailq);
|
||
|
} else if (!STAILQ_EMPTY(&temp_stailq)) {
|
||
|
fq_cl->fcl_old_flows = temp_stailq;
|
||
|
}
|
||
|
if (last.cp_mbuf != NULL) {
|
||
|
VERIFY(top->cp_mbuf != NULL);
|
||
|
if (bottom != NULL) {
|
||
|
*bottom = last;
|
||
|
}
|
||
|
}
|
||
|
if (retpktcnt != NULL) {
|
||
|
*retpktcnt = pktcnt;
|
||
|
}
|
||
|
if (retbytecnt != NULL) {
|
||
|
*retbytecnt = bytecnt;
|
||
|
}
|
||
|
if (fq_cl_paced != NULL) {
|
||
|
*fq_cl_paced = all_paced;
|
||
|
}
|
||
|
if (next_tx_time != NULL) {
|
||
|
*next_tx_time = fq_cl_tx_time;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
void
|
||
|
fq_if_teardown_ifclassq(struct ifclassq *ifq)
|
||
|
{
|
||
|
fq_if_t *fqs = (fq_if_t *)ifq->ifcq_disc;
|
||
|
|
||
|
IFCQ_LOCK_ASSERT_HELD(ifq);
|
||
|
VERIFY(fqs != NULL && ifq->ifcq_type == PKTSCHEDT_FQ_CODEL);
|
||
|
fq_if_destroy(fqs);
|
||
|
ifq->ifcq_disc = NULL;
|
||
|
ifclassq_detach(ifq);
|
||
|
}
|
||
|
|
||
|
static void
|
||
|
fq_export_flowstats(fq_if_t *fqs, fq_t *fq,
|
||
|
struct fq_codel_flowstats *flowstat)
|
||
|
{
|
||
|
bzero(flowstat, sizeof(*flowstat));
|
||
|
flowstat->fqst_min_qdelay = (uint32_t)fq->fq_min_qdelay;
|
||
|
flowstat->fqst_bytes = fq->fq_bytes;
|
||
|
flowstat->fqst_flowhash = fq->fq_flowhash;
|
||
|
if (fq->fq_flags & FQF_NEW_FLOW) {
|
||
|
flowstat->fqst_flags |= FQ_FLOWSTATS_NEW_FLOW;
|
||
|
}
|
||
|
if (fq->fq_flags & FQF_OLD_FLOW) {
|
||
|
flowstat->fqst_flags |= FQ_FLOWSTATS_OLD_FLOW;
|
||
|
}
|
||
|
if (fq->fq_flags & FQF_DELAY_HIGH) {
|
||
|
flowstat->fqst_flags |= FQ_FLOWSTATS_DELAY_HIGH;
|
||
|
}
|
||
|
if (fq->fq_flags & FQF_FLOWCTL_ON) {
|
||
|
flowstat->fqst_flags |= FQ_FLOWSTATS_FLOWCTL_ON;
|
||
|
}
|
||
|
if (fqs->fqs_large_flow == fq) {
|
||
|
flowstat->fqst_flags |= FQ_FLOWSTATS_LARGE_FLOW;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
int
|
||
|
fq_if_getqstats_ifclassq(struct ifclassq *ifq, uint8_t gid, u_int32_t qid,
|
||
|
struct if_ifclassq_stats *ifqs)
|
||
|
{
|
||
|
struct fq_codel_classstats *fcls;
|
||
|
fq_if_classq_t *fq_cl;
|
||
|
fq_if_t *fqs;
|
||
|
fq_t *fq = NULL;
|
||
|
fq_if_group_t *grp;
|
||
|
u_int32_t i, flowstat_cnt;
|
||
|
|
||
|
if (qid >= FQ_IF_MAX_CLASSES || gid >= FQ_IF_MAX_GROUPS) {
|
||
|
return EINVAL;
|
||
|
}
|
||
|
|
||
|
fqs = (fq_if_t *)ifq->ifcq_disc;
|
||
|
if (fqs->fqs_classq_groups[gid] == NULL) {
|
||
|
return ENXIO;
|
||
|
}
|
||
|
|
||
|
fcls = &ifqs->ifqs_fq_codel_stats;
|
||
|
|
||
|
fq_cl = &FQS_CLASSQ(fqs, gid, qid);
|
||
|
grp = fq_if_find_grp(fqs, gid);
|
||
|
|
||
|
fcls->fcls_pri = fq_cl->fcl_pri;
|
||
|
fcls->fcls_service_class = fq_cl->fcl_service_class;
|
||
|
fcls->fcls_quantum = fq_cl->fcl_quantum;
|
||
|
fcls->fcls_drr_max = fq_cl->fcl_drr_max;
|
||
|
fcls->fcls_budget = fq_cl->fcl_budget;
|
||
|
fcls->fcls_l4s_target_qdelay = grp->fqg_target_qdelays[FQ_TFC_L4S];
|
||
|
fcls->fcls_target_qdelay = grp->fqg_target_qdelays[FQ_TFC_C];
|
||
|
fcls->fcls_update_interval = grp->fqg_update_intervals[FQ_TFC_C];
|
||
|
fcls->fcls_flow_control = fq_cl->fcl_stat.fcl_flow_control;
|
||
|
fcls->fcls_flow_feedback = fq_cl->fcl_stat.fcl_flow_feedback;
|
||
|
fcls->fcls_dequeue_stall = fq_cl->fcl_stat.fcl_dequeue_stall;
|
||
|
fcls->fcls_drop_overflow = fq_cl->fcl_stat.fcl_drop_overflow;
|
||
|
fcls->fcls_drop_early = fq_cl->fcl_stat.fcl_drop_early;
|
||
|
fcls->fcls_drop_memfailure = fq_cl->fcl_stat.fcl_drop_memfailure;
|
||
|
fcls->fcls_flows_cnt = fq_cl->fcl_stat.fcl_flows_cnt;
|
||
|
fcls->fcls_newflows_cnt = fq_cl->fcl_stat.fcl_newflows_cnt;
|
||
|
fcls->fcls_oldflows_cnt = fq_cl->fcl_stat.fcl_oldflows_cnt;
|
||
|
fcls->fcls_pkt_cnt = fq_cl->fcl_stat.fcl_pkt_cnt;
|
||
|
fcls->fcls_flow_control_fail = fq_cl->fcl_stat.fcl_flow_control_fail;
|
||
|
fcls->fcls_flow_control_fail = fq_cl->fcl_stat.fcl_flow_control_fail;
|
||
|
fcls->fcls_dequeue = fq_cl->fcl_stat.fcl_dequeue;
|
||
|
fcls->fcls_dequeue_bytes = fq_cl->fcl_stat.fcl_dequeue_bytes;
|
||
|
fcls->fcls_byte_cnt = fq_cl->fcl_stat.fcl_byte_cnt;
|
||
|
fcls->fcls_throttle_on = fq_cl->fcl_stat.fcl_throttle_on;
|
||
|
fcls->fcls_throttle_off = fq_cl->fcl_stat.fcl_throttle_off;
|
||
|
fcls->fcls_throttle_drops = fq_cl->fcl_stat.fcl_throttle_drops;
|
||
|
fcls->fcls_dup_rexmts = fq_cl->fcl_stat.fcl_dup_rexmts;
|
||
|
fcls->fcls_pkts_compressible = fq_cl->fcl_stat.fcl_pkts_compressible;
|
||
|
fcls->fcls_pkts_compressed = fq_cl->fcl_stat.fcl_pkts_compressed;
|
||
|
fcls->fcls_min_qdelay = fq_cl->fcl_stat.fcl_min_qdelay;
|
||
|
fcls->fcls_max_qdelay = fq_cl->fcl_stat.fcl_max_qdelay;
|
||
|
fcls->fcls_avg_qdelay = fq_cl->fcl_stat.fcl_avg_qdelay;
|
||
|
fcls->fcls_overwhelming = fq_cl->fcl_stat.fcl_overwhelming;
|
||
|
fcls->fcls_ce_marked = fq_cl->fcl_stat.fcl_ce_marked;
|
||
|
fcls->fcls_ce_reported = fq_cl->fcl_stat.fcl_ce_reported;
|
||
|
fcls->fcls_ce_mark_failures = fq_cl->fcl_stat.fcl_ce_mark_failures;
|
||
|
fcls->fcls_l4s_pkts = fq_cl->fcl_stat.fcl_l4s_pkts;
|
||
|
fcls->fcls_ignore_tx_time = fq_cl->fcl_stat.fcl_ignore_tx_time;
|
||
|
fcls->fcls_paced_pkts = fq_cl->fcl_stat.fcl_paced_pkts;
|
||
|
fcls->fcls_fcl_pacing_needed = fq_cl->fcl_stat.fcl_fcl_pacemaker_needed;
|
||
|
|
||
|
/* Gather per flow stats */
|
||
|
flowstat_cnt = min((fcls->fcls_newflows_cnt +
|
||
|
fcls->fcls_oldflows_cnt), FQ_IF_MAX_FLOWSTATS);
|
||
|
i = 0;
|
||
|
STAILQ_FOREACH(fq, &fq_cl->fcl_new_flows, fq_actlink) {
|
||
|
if (i >= fcls->fcls_newflows_cnt || i >= flowstat_cnt) {
|
||
|
break;
|
||
|
}
|
||
|
|
||
|
/* leave space for a few old flows */
|
||
|
if ((flowstat_cnt - i) < fcls->fcls_oldflows_cnt &&
|
||
|
i >= (FQ_IF_MAX_FLOWSTATS >> 1)) {
|
||
|
break;
|
||
|
}
|
||
|
fq_export_flowstats(fqs, fq, &fcls->fcls_flowstats[i]);
|
||
|
i++;
|
||
|
}
|
||
|
STAILQ_FOREACH(fq, &fq_cl->fcl_old_flows, fq_actlink) {
|
||
|
if (i >= flowstat_cnt) {
|
||
|
break;
|
||
|
}
|
||
|
fq_export_flowstats(fqs, fq, &fcls->fcls_flowstats[i]);
|
||
|
i++;
|
||
|
}
|
||
|
VERIFY(i <= flowstat_cnt);
|
||
|
fcls->fcls_flowstats_cnt = i;
|
||
|
return 0;
|
||
|
}
|
||
|
|
||
|
int
|
||
|
fq_if_create_grp(struct ifclassq *ifcq, uint8_t grp_idx, uint8_t flags)
|
||
|
{
|
||
|
#define _FQ_CLASSQ_INIT(_grp, _s, _q) \
|
||
|
fq_if_classq_init(_grp, FQ_IF_ ## _s ##_INDEX, \
|
||
|
FQ_CODEL_QUANTUM_ ## _s(_q), FQ_CODEL_DRR_MAX(_s), \
|
||
|
MBUF_SC_ ## _s );
|
||
|
|
||
|
fq_if_group_t *grp;
|
||
|
fq_if_t *fqs;
|
||
|
uint32_t quantum, calc_flags = IF_CLASSQ_DEF;
|
||
|
struct ifnet *ifp = ifcq->ifcq_ifp;
|
||
|
|
||
|
VERIFY(grp_idx < FQ_IF_MAX_GROUPS);
|
||
|
|
||
|
fqs = (fq_if_t *)ifcq->ifcq_disc;
|
||
|
|
||
|
if (grp_idx == 0 && fqs->fqs_classq_groups[grp_idx] != NULL) {
|
||
|
grp = fqs->fqs_classq_groups[grp_idx];
|
||
|
goto update;
|
||
|
}
|
||
|
|
||
|
if (fqs->fqs_classq_groups[grp_idx] != NULL) {
|
||
|
return EINVAL;
|
||
|
}
|
||
|
|
||
|
grp = zalloc_flags(fq_if_grp_zone, Z_WAITOK | Z_ZERO);
|
||
|
if (grp == NULL) {
|
||
|
return ENOMEM;
|
||
|
}
|
||
|
|
||
|
fqs->fqs_classq_groups[grp_idx] = grp;
|
||
|
grp->fqg_index = grp_idx;
|
||
|
|
||
|
quantum = fq_if_calc_quantum(ifp);
|
||
|
if (fqs->fqs_flags & FQS_DRIVER_MANAGED) {
|
||
|
_FQ_CLASSQ_INIT(grp, BK, quantum);
|
||
|
_FQ_CLASSQ_INIT(grp, BE, quantum);
|
||
|
_FQ_CLASSQ_INIT(grp, VI, quantum);
|
||
|
_FQ_CLASSQ_INIT(grp, VO, quantum);
|
||
|
} else {
|
||
|
/* SIG shares same INDEX with VI */
|
||
|
_CASSERT(SCIDX_SIG == SCIDX_VI);
|
||
|
_CASSERT(FQ_IF_SIG_INDEX == FQ_IF_VI_INDEX);
|
||
|
|
||
|
_FQ_CLASSQ_INIT(grp, BK_SYS, quantum);
|
||
|
_FQ_CLASSQ_INIT(grp, BK, quantum);
|
||
|
_FQ_CLASSQ_INIT(grp, BE, quantum);
|
||
|
_FQ_CLASSQ_INIT(grp, RD, quantum);
|
||
|
_FQ_CLASSQ_INIT(grp, OAM, quantum);
|
||
|
_FQ_CLASSQ_INIT(grp, AV, quantum);
|
||
|
_FQ_CLASSQ_INIT(grp, RV, quantum);
|
||
|
_FQ_CLASSQ_INIT(grp, VI, quantum);
|
||
|
_FQ_CLASSQ_INIT(grp, VO, quantum);
|
||
|
_FQ_CLASSQ_INIT(grp, CTL, quantum);
|
||
|
}
|
||
|
|
||
|
update:
|
||
|
if (flags & IF_DEFAULT_GRP) {
|
||
|
fq_if_set_grp_combined(ifcq, grp_idx);
|
||
|
grp->fqg_flags |= FQ_IF_DEFAULT_GRP;
|
||
|
} else {
|
||
|
fq_if_set_grp_separated(ifcq, grp_idx);
|
||
|
grp->fqg_flags &= ~FQ_IF_DEFAULT_GRP;
|
||
|
}
|
||
|
|
||
|
calc_flags |= (flags & IF_CLASSQ_LOW_LATENCY);
|
||
|
ifclassq_calc_target_qdelay(ifp, &grp->fqg_target_qdelays[FQ_TFC_C],
|
||
|
calc_flags);
|
||
|
ifclassq_calc_target_qdelay(ifp, &grp->fqg_target_qdelays[FQ_TFC_L4S],
|
||
|
calc_flags | IF_CLASSQ_L4S);
|
||
|
|
||
|
ifclassq_calc_update_interval(&grp->fqg_update_intervals[FQ_TFC_C],
|
||
|
calc_flags);
|
||
|
ifclassq_calc_update_interval(&grp->fqg_update_intervals[FQ_TFC_L4S],
|
||
|
calc_flags | IF_CLASSQ_L4S);
|
||
|
|
||
|
return 0;
|
||
|
#undef _FQ_CLASSQ_INIT
|
||
|
}
|
||
|
|
||
|
fq_if_group_t *
|
||
|
fq_if_find_grp(fq_if_t *fqs, uint8_t grp_idx)
|
||
|
{
|
||
|
fq_if_group_t *grp;
|
||
|
|
||
|
IFCQ_LOCK_ASSERT_HELD(fqs->fqs_ifq);
|
||
|
VERIFY(grp_idx < FQ_IF_MAX_GROUPS);
|
||
|
|
||
|
grp = fqs->fqs_classq_groups[grp_idx];
|
||
|
VERIFY(grp != NULL);
|
||
|
|
||
|
return grp;
|
||
|
}
|
||
|
|
||
|
static void
|
||
|
fq_if_purge_grp(fq_if_t *fqs, fq_if_group_t *grp)
|
||
|
{
|
||
|
for (uint8_t i = 0; i < FQ_IF_MAX_CLASSES; i++) {
|
||
|
fq_if_purge_classq(fqs, &grp->fqg_classq[i]);
|
||
|
}
|
||
|
|
||
|
bzero(&grp->fqg_bitmaps, sizeof(grp->fqg_bitmaps));
|
||
|
grp->fqg_len = 0;
|
||
|
grp->fqg_bytes = 0;
|
||
|
fq_if_set_grp_separated(fqs->fqs_ifq, grp->fqg_index);
|
||
|
}
|
||
|
|
||
|
void
|
||
|
fq_if_destroy_grps(fq_if_t *fqs)
|
||
|
{
|
||
|
fq_if_group_t *grp;
|
||
|
|
||
|
IFCQ_LOCK_ASSERT_HELD(fqs->fqs_ifq);
|
||
|
|
||
|
for (uint8_t grp_idx = 0; grp_idx < FQ_IF_MAX_GROUPS; grp_idx++) {
|
||
|
if (fqs->fqs_classq_groups[grp_idx] == NULL) {
|
||
|
continue;
|
||
|
}
|
||
|
|
||
|
grp = fq_if_find_grp(fqs, grp_idx);
|
||
|
fq_if_purge_grp(fqs, grp);
|
||
|
zfree(fq_if_grp_zone, grp);
|
||
|
fqs->fqs_classq_groups[grp_idx] = NULL;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
static inline boolean_t
|
||
|
fq_if_is_grp_combined(fq_if_t *fqs, uint8_t grp_idx)
|
||
|
{
|
||
|
return pktsched_bit_tst(grp_idx, &fqs->fqs_combined_grp_bitmap);
|
||
|
}
|
||
|
|
||
|
void
|
||
|
fq_if_set_grp_combined(struct ifclassq *ifcq, uint8_t grp_idx)
|
||
|
{
|
||
|
fq_if_t *fqs;
|
||
|
fq_if_group_t *grp;
|
||
|
|
||
|
IFCQ_LOCK_ASSERT_HELD(ifcq);
|
||
|
|
||
|
fqs = (fq_if_t *)ifcq->ifcq_disc;
|
||
|
grp = fq_if_find_grp(fqs, grp_idx);
|
||
|
|
||
|
if (fq_if_is_grp_combined(fqs, grp_idx)) {
|
||
|
return;
|
||
|
}
|
||
|
|
||
|
/*
|
||
|
* We keep the current fq_deficit and fcl_budget when combining a group.
|
||
|
* That might disrupt the AQM but only for a moment.
|
||
|
*/
|
||
|
pktsched_bit_set(grp_idx, &fqs->fqs_combined_grp_bitmap);
|
||
|
TAILQ_INSERT_TAIL(&fqs->fqs_combined_grp_list, grp, fqg_grp_link);
|
||
|
}
|
||
|
|
||
|
void
|
||
|
fq_if_set_grp_separated(struct ifclassq *ifcq, uint8_t grp_idx)
|
||
|
{
|
||
|
fq_if_t *fqs;
|
||
|
fq_if_group_t *grp;
|
||
|
|
||
|
IFCQ_LOCK_ASSERT_HELD(ifcq);
|
||
|
|
||
|
fqs = (fq_if_t *)ifcq->ifcq_disc;
|
||
|
grp = fq_if_find_grp(fqs, grp_idx);
|
||
|
|
||
|
if (!fq_if_is_grp_combined(fqs, grp_idx)) {
|
||
|
return;
|
||
|
}
|
||
|
|
||
|
pktsched_bit_clr(grp_idx, &fqs->fqs_combined_grp_bitmap);
|
||
|
TAILQ_REMOVE(&fqs->fqs_combined_grp_list, grp, fqg_grp_link);
|
||
|
}
|