/* * Copyright (c) 2012-2021 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * * This file contains Original Code and/or Modifications of Original Code * as defined in and that are subject to the Apple Public Source License * Version 2.0 (the 'License'). You may not use this file except in * compliance with the License. The rights granted to you under the License * may not be used to create, or enable the creation or redistribution of, * unlawful or unlicensed copies of an Apple operating system, or to * circumvent, violate, or enable the circumvention or violation of, any * terms of an Apple operating system software license agreement. * * Please obtain a copy of the License at * http://www.opensource.apple.com/apsl/ and read it before using this file. * * The Original Code and all software distributed under the License are * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. * Please see the License for the specific language governing rights and * limitations under the License. * * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ */ /* * A note on the MPTCP/NECP-interactions: * * MPTCP uses NECP-callbacks to get notified of interface/policy events. * MPTCP registers to these events at the MPTCP-layer for interface-events * through a call to necp_client_register_multipath_cb. * To get per-flow events (aka per TCP-subflow), we register to it with * necp_client_register_socket_flow. Both registrations happen by using the * necp-client-uuid that comes from the app. * * The locking is rather tricky. In general, we expect the lock-ordering to * happen from necp-fd -> necp->client -> mpp_lock. * * There are however some subtleties. * * 1. When registering the multipath_cb, we are holding the mpp_lock. This is * safe, because it is the very first time this MPTCP-connection goes into NECP. * As we go into NECP we take the NECP-locks and thus are guaranteed that no * NECP-locks will deadlock us. Because these NECP-events will also first take * the NECP-locks. Either they win the race and thus won't find our * MPTCP-connection. Or, MPTCP wins the race and thus it will safely install * the callbacks while holding the NECP lock. * * 2. When registering the subflow-callbacks we must unlock the mpp_lock. This, * because we have already registered callbacks and we might race against an * NECP-event that will match on our socket. So, we have to unlock to be safe. * * 3. When removing the multipath_cb, we do it in mp_pcbdispose(). The * so_usecount has reached 0. We must be careful to not remove the mpp_socket * pointers before we unregistered the callback. Because, again we might be * racing against an NECP-event. Unregistering must happen with an unlocked * mpp_lock, because of the lock-ordering constraint. It could be that * before we had a chance to unregister an NECP-event triggers. That's why * we need to check for the so_usecount in mptcp_session_necp_cb. If we get * there while the socket is being garbage-collected, the use-count will go * down to 0 and we exit. Removal of the multipath_cb again happens by taking * the NECP-locks so any running NECP-events will finish first and exit cleanly. * * 4. When removing the subflow-callback, we do it in in_pcbdispose(). Again, * the socket-lock must be unlocked for lock-ordering constraints. This gets a * bit tricky here, as in tcp_garbage_collect we hold the mp_so and so lock. * So, we drop the mp_so-lock as soon as the subflow is unlinked with * mptcp_subflow_del. Then, in in_pcbdispose we drop the subflow-lock. * If an NECP-event was waiting on the lock in mptcp_subflow_necp_cb, when it * gets it, it will realize that the subflow became non-MPTCP and retry (see * tcp_lock). Then it waits again on the subflow-lock. When we drop this lock * in in_pcbdispose, and enter necp_inpcb_dispose, this one will have to wait * for the NECP-lock (held by the other thread that is taking care of the NECP- * event). So, the event now finally gets the subflow-lock and then hits an * so_usecount that is 0 and exits. Eventually, we can remove the subflow from * the NECP callback. */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include int mptcp_enable = 1; SYSCTL_INT(_net_inet_mptcp, OID_AUTO, enable, CTLFLAG_RW | CTLFLAG_LOCKED, &mptcp_enable, 0, "Enable Multipath TCP Support"); /* * Number of times to try negotiating MPTCP on SYN retransmissions. * We haven't seen any reports of a middlebox that is dropping all SYN-segments * that have an MPTCP-option. Thus, let's be generous and retransmit it 4 times. */ int mptcp_mpcap_retries = 4; SYSCTL_INT(_net_inet_mptcp, OID_AUTO, mptcp_cap_retr, CTLFLAG_RW | CTLFLAG_LOCKED, &mptcp_mpcap_retries, 0, "Number of MP Capable SYN Retries"); /* * By default, DSS checksum is turned off, revisit if we ever do * MPTCP for non SSL Traffic. */ int mptcp_dss_csum = 0; SYSCTL_INT(_net_inet_mptcp, OID_AUTO, dss_csum, CTLFLAG_RW | CTLFLAG_LOCKED, &mptcp_dss_csum, 0, "Enable DSS checksum"); /* * When mptcp_fail_thresh number of retransmissions are sent, subflow failover * is attempted on a different path. */ int mptcp_fail_thresh = 1; SYSCTL_INT(_net_inet_mptcp, OID_AUTO, fail, CTLFLAG_RW | CTLFLAG_LOCKED, &mptcp_fail_thresh, 0, "Failover threshold"); /* * MPTCP subflows have TCP keepalives set to ON. Set a conservative keeptime * as carrier networks mostly have a 30 minute to 60 minute NAT Timeout. * Some carrier networks have a timeout of 10 or 15 minutes. */ int mptcp_subflow_keeptime = 60 * 14; SYSCTL_INT(_net_inet_mptcp, OID_AUTO, keepalive, CTLFLAG_RW | CTLFLAG_LOCKED, &mptcp_subflow_keeptime, 0, "Keepalive in seconds"); int mptcp_rtthist_rtthresh = 600; SYSCTL_INT(_net_inet_mptcp, OID_AUTO, rtthist_thresh, CTLFLAG_RW | CTLFLAG_LOCKED, &mptcp_rtthist_rtthresh, 0, "Rtt threshold"); int mptcp_rtothresh = 1500; SYSCTL_INT(_net_inet_mptcp, OID_AUTO, rto_thresh, CTLFLAG_RW | CTLFLAG_LOCKED, &mptcp_rtothresh, 0, "RTO threshold"); /* * Probe the preferred path, when it is not in use */ uint32_t mptcp_probeto = 1000; SYSCTL_UINT(_net_inet_mptcp, OID_AUTO, probeto, CTLFLAG_RW | CTLFLAG_LOCKED, &mptcp_probeto, 0, "Disable probing by setting to 0"); uint32_t mptcp_probecnt = 5; SYSCTL_UINT(_net_inet_mptcp, OID_AUTO, probecnt, CTLFLAG_RW | CTLFLAG_LOCKED, &mptcp_probecnt, 0, "Number of probe writes"); uint32_t mptcp_enable_v1 = 1; SYSCTL_UINT(_net_inet_mptcp, OID_AUTO, enable_v1, CTLFLAG_RW | CTLFLAG_LOCKED, &mptcp_enable_v1, 0, "Enable or disable v1"); static int sysctl_mptcp_version_check SYSCTL_HANDLER_ARGS { #pragma unused(arg1, arg2) int error; int new_value = *(int *)oidp->oid_arg1; int old_value = *(int *)oidp->oid_arg1; error = sysctl_handle_int(oidp, &new_value, 0, req); if (!error) { if (new_value != MPTCP_VERSION_0 && new_value != MPTCP_VERSION_1) { return EINVAL; } *(int *)oidp->oid_arg1 = new_value; } os_log(OS_LOG_DEFAULT, "%s:%u sysctl net.inet.tcp.mptcp_preferred_version: %d -> %d)", proc_best_name(current_proc()), proc_selfpid(), old_value, *(int *)oidp->oid_arg1); return error; } int mptcp_preferred_version = MPTCP_VERSION_1; SYSCTL_PROC(_net_inet_tcp, OID_AUTO, mptcp_preferred_version, CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED, &mptcp_preferred_version, 0, &sysctl_mptcp_version_check, "I", ""); int mptcp_reass_total_qlen = 0; SYSCTL_INT(_net_inet_mptcp, OID_AUTO, reass_qlen, CTLFLAG_RD | CTLFLAG_LOCKED, &mptcp_reass_total_qlen, 0, "Total number of MPTCP segments in reassembly queues"); static int mptcp_reass_present(struct socket *mp_so) { struct mptses *mpte = mpsotompte(mp_so); struct mptcb *mp_tp = mpte->mpte_mptcb; struct tseg_qent *q; int dowakeup = 0; int flags = 0; int count = 0; /* * Present data to user, advancing rcv_nxt through * completed sequence space. */ if (mp_tp->mpt_state < MPTCPS_ESTABLISHED) { return flags; } q = LIST_FIRST(&mp_tp->mpt_segq); if (!q || q->tqe_m->m_pkthdr.mp_dsn != mp_tp->mpt_rcvnxt) { return flags; } /* * If there is already another thread doing reassembly for this * connection, it is better to let it finish the job -- * (radar 16316196) */ if (mp_tp->mpt_flags & MPTCPF_REASS_INPROG) { return flags; } mp_tp->mpt_flags |= MPTCPF_REASS_INPROG; do { mp_tp->mpt_rcvnxt += q->tqe_len; LIST_REMOVE(q, tqe_q); if (mp_so->so_state & SS_CANTRCVMORE) { m_freem(q->tqe_m); } else { flags = !!(q->tqe_m->m_pkthdr.pkt_flags & PKTF_MPTCP_DFIN); if (sbappendstream_rcvdemux(mp_so, q->tqe_m)) { dowakeup = 1; } } zfree(tcp_reass_zone, q); mp_tp->mpt_reassqlen--; count++; q = LIST_FIRST(&mp_tp->mpt_segq); } while (q && q->tqe_m->m_pkthdr.mp_dsn == mp_tp->mpt_rcvnxt); mp_tp->mpt_flags &= ~MPTCPF_REASS_INPROG; if (count > 0) { OSAddAtomic(-count, &mptcp_reass_total_qlen); } if (dowakeup) { sorwakeup(mp_so); /* done with socket lock held */ } return flags; } static int mptcp_reass(struct socket *mp_so, struct pkthdr *phdr, int *tlenp, struct mbuf *m) { struct mptcb *mp_tp = mpsotomppcb(mp_so)->mpp_pcbe->mpte_mptcb; u_int64_t mb_dsn = phdr->mp_dsn; struct tseg_qent *q; struct tseg_qent *p = NULL; struct tseg_qent *nq; struct tseg_qent *te = NULL; uint32_t qlimit; /* * Limit the number of segments in the reassembly queue to prevent * holding on to too many segments (and thus running out of mbufs). * Make sure to let the missing segment through which caused this * queue. Always keep one global queue entry spare to be able to * process the missing segment. */ qlimit = MIN(MAX(100, mp_so->so_rcv.sb_hiwat >> 10), (tcp_autorcvbuf_max >> 10)); if (mb_dsn != mp_tp->mpt_rcvnxt && (mp_tp->mpt_reassqlen + 1) >= qlimit) { tcpstat.tcps_mptcp_rcvmemdrop++; m_freem(m); *tlenp = 0; return 0; } /* Allocate a new queue entry. If we can't, just drop the pkt. XXX */ te = zalloc_flags(tcp_reass_zone, Z_WAITOK | Z_NOFAIL); mp_tp->mpt_reassqlen++; OSIncrementAtomic(&mptcp_reass_total_qlen); /* * Find a segment which begins after this one does. */ LIST_FOREACH(q, &mp_tp->mpt_segq, tqe_q) { if (MPTCP_SEQ_GT(q->tqe_m->m_pkthdr.mp_dsn, mb_dsn)) { break; } p = q; } /* * If there is a preceding segment, it may provide some of * our data already. If so, drop the data from the incoming * segment. If it provides all of our data, drop us. */ if (p != NULL) { int64_t i; /* conversion to int (in i) handles seq wraparound */ i = p->tqe_m->m_pkthdr.mp_dsn + p->tqe_len - mb_dsn; if (i > 0) { if (i >= *tlenp) { tcpstat.tcps_mptcp_rcvduppack++; m_freem(m); zfree(tcp_reass_zone, te); te = NULL; mp_tp->mpt_reassqlen--; OSDecrementAtomic(&mptcp_reass_total_qlen); /* * Try to present any queued data * at the left window edge to the user. * This is needed after the 3-WHS * completes. */ goto out; } VERIFY(i <= INT_MAX); m_adj(m, (int)i); *tlenp -= i; phdr->mp_dsn += i; } } tcpstat.tcps_mp_oodata++; /* * While we overlap succeeding segments trim them or, * if they are completely covered, dequeue them. */ while (q) { int64_t i = (mb_dsn + *tlenp) - q->tqe_m->m_pkthdr.mp_dsn; if (i <= 0) { break; } if (i < q->tqe_len) { q->tqe_m->m_pkthdr.mp_dsn += i; q->tqe_len -= i; VERIFY(i <= INT_MAX); m_adj(q->tqe_m, (int)i); break; } nq = LIST_NEXT(q, tqe_q); LIST_REMOVE(q, tqe_q); m_freem(q->tqe_m); zfree(tcp_reass_zone, q); mp_tp->mpt_reassqlen--; OSDecrementAtomic(&mptcp_reass_total_qlen); q = nq; } /* Insert the new segment queue entry into place. */ te->tqe_m = m; te->tqe_th = NULL; te->tqe_len = *tlenp; if (p == NULL) { LIST_INSERT_HEAD(&mp_tp->mpt_segq, te, tqe_q); } else { LIST_INSERT_AFTER(p, te, tqe_q); } out: return mptcp_reass_present(mp_so); } /* * MPTCP input, called when data has been read from a subflow socket. */ void mptcp_input(struct mptses *mpte, struct mbuf *m) { struct socket *mp_so; struct mptcb *mp_tp = NULL; int count = 0, wakeup = 0; struct mbuf *save = NULL, *prev = NULL; struct mbuf *freelist = NULL, *tail = NULL; if (__improbable((m->m_flags & M_PKTHDR) == 0)) { panic("mbuf invalid: %p", m); } mp_so = mptetoso(mpte); mp_tp = mpte->mpte_mptcb; socket_lock_assert_owned(mp_so); DTRACE_MPTCP(input); mp_tp->mpt_rcvwnd = mptcp_sbspace(mp_tp); /* * Each mbuf contains MPTCP Data Sequence Map * Process the data for reassembly, delivery to MPTCP socket * client, etc. * */ count = mp_so->so_rcv.sb_cc; /* * In the degraded fallback case, data is accepted without DSS map */ if (mp_tp->mpt_flags & MPTCPF_FALLBACK_TO_TCP) { struct mbuf *iter; int mb_dfin; fallback: mb_dfin = 0; mptcp_sbrcv_grow(mp_tp); iter = m; while (iter) { if ((iter->m_flags & M_PKTHDR) && (iter->m_pkthdr.pkt_flags & PKTF_MPTCP_DFIN)) { mb_dfin = 1; } if ((iter->m_flags & M_PKTHDR) && m_pktlen(iter) == 0) { /* Don't add zero-length packets, so jump it! */ if (prev == NULL) { m = iter->m_next; m_free(iter); iter = m; } else { prev->m_next = iter->m_next; m_free(iter); iter = prev->m_next; } /* It was a zero-length packet so next one must be a pkthdr */ VERIFY(iter == NULL || iter->m_flags & M_PKTHDR); } else { prev = iter; iter = iter->m_next; } } /* * assume degraded flow as this may be the first packet * without DSS, and the subflow state is not updated yet. */ if (sbappendstream_rcvdemux(mp_so, m)) { sorwakeup(mp_so); } DTRACE_MPTCP5(receive__degraded, struct mbuf *, m, struct socket *, mp_so, struct sockbuf *, &mp_so->so_rcv, struct sockbuf *, &mp_so->so_snd, struct mptses *, mpte); count = mp_so->so_rcv.sb_cc - count; mp_tp->mpt_rcvnxt += count; if (mb_dfin) { mptcp_close_fsm(mp_tp, MPCE_RECV_DATA_FIN); socantrcvmore(mp_so); } return; } do { u_int64_t mb_dsn; int32_t mb_datalen; int64_t todrop; int mb_dfin = 0; VERIFY(m->m_flags & M_PKTHDR); /* If fallback occurs, mbufs will not have PKTF_MPTCP set */ if (!(m->m_pkthdr.pkt_flags & PKTF_MPTCP)) { goto fallback; } save = m->m_next; /* * A single TCP packet formed of multiple mbufs * holds DSS mapping in the first mbuf of the chain. * Other mbufs in the chain may have M_PKTHDR set * even though they belong to the same TCP packet * and therefore use the DSS mapping stored in the * first mbuf of the mbuf chain. mptcp_input() can * get an mbuf chain with multiple TCP packets. */ while (save && (!(save->m_flags & M_PKTHDR) || !(save->m_pkthdr.pkt_flags & PKTF_MPTCP))) { prev = save; save = save->m_next; } if (prev) { prev->m_next = NULL; } else { m->m_next = NULL; } mb_dsn = m->m_pkthdr.mp_dsn; mb_datalen = m->m_pkthdr.mp_rlen; todrop = (mb_dsn + mb_datalen) - (mp_tp->mpt_rcvnxt + mp_tp->mpt_rcvwnd); if (todrop > 0) { tcpstat.tcps_mptcp_rcvpackafterwin++; os_log_info(mptcp_log_handle, "%s - %lx: dropping dsn %u dlen %u rcvnxt %u rcvwnd %u todrop %lld\n", __func__, (unsigned long)VM_KERNEL_ADDRPERM(mpte), (uint32_t)mb_dsn, mb_datalen, (uint32_t)mp_tp->mpt_rcvnxt, mp_tp->mpt_rcvwnd, todrop); if (todrop >= mb_datalen) { if (freelist == NULL) { freelist = m; } else { tail->m_next = m; } if (prev != NULL) { tail = prev; } else { tail = m; } m = save; prev = save = NULL; continue; } else { VERIFY(todrop <= INT_MAX); m_adj(m, (int)-todrop); mb_datalen -= todrop; m->m_pkthdr.mp_rlen -= todrop; } /* * We drop from the right edge of the mbuf, thus the * DATA_FIN is dropped as well */ m->m_pkthdr.pkt_flags &= ~PKTF_MPTCP_DFIN; } if (MPTCP_SEQ_LT(mb_dsn, mp_tp->mpt_rcvnxt)) { if (MPTCP_SEQ_LEQ((mb_dsn + mb_datalen), mp_tp->mpt_rcvnxt)) { if (freelist == NULL) { freelist = m; } else { tail->m_next = m; } if (prev != NULL) { tail = prev; } else { tail = m; } m = save; prev = save = NULL; continue; } else { VERIFY((mp_tp->mpt_rcvnxt - mb_dsn) <= INT_MAX); m_adj(m, (int)(mp_tp->mpt_rcvnxt - mb_dsn)); mb_datalen -= (mp_tp->mpt_rcvnxt - mb_dsn); mb_dsn = mp_tp->mpt_rcvnxt; VERIFY(mb_datalen >= 0 && mb_datalen <= USHRT_MAX); m->m_pkthdr.mp_rlen = (uint16_t)mb_datalen; m->m_pkthdr.mp_dsn = mb_dsn; } } if (MPTCP_SEQ_GT(mb_dsn, mp_tp->mpt_rcvnxt) || !LIST_EMPTY(&mp_tp->mpt_segq)) { mb_dfin = mptcp_reass(mp_so, &m->m_pkthdr, &mb_datalen, m); goto next; } mb_dfin = !!(m->m_pkthdr.pkt_flags & PKTF_MPTCP_DFIN); mptcp_sbrcv_grow(mp_tp); if (sbappendstream_rcvdemux(mp_so, m)) { wakeup = 1; } DTRACE_MPTCP6(receive, struct mbuf *, m, struct socket *, mp_so, struct sockbuf *, &mp_so->so_rcv, struct sockbuf *, &mp_so->so_snd, struct mptses *, mpte, struct mptcb *, mp_tp); count = mp_so->so_rcv.sb_cc - count; tcpstat.tcps_mp_rcvtotal++; tcpstat.tcps_mp_rcvbytes += count; mp_tp->mpt_rcvnxt += count; next: if (mb_dfin) { mptcp_close_fsm(mp_tp, MPCE_RECV_DATA_FIN); socantrcvmore(mp_so); } m = save; prev = save = NULL; count = mp_so->so_rcv.sb_cc; } while (m); if (freelist) { m_freem(freelist); } if (wakeup) { sorwakeup(mp_so); } } boolean_t mptcp_can_send_more(struct mptcb *mp_tp, boolean_t ignore_reinject) { struct socket *mp_so = mptetoso(mp_tp->mpt_mpte); /* * Always send if there is data in the reinject-queue. */ if (!ignore_reinject && mp_tp->mpt_mpte->mpte_reinjectq) { return TRUE; } /* * Don't send, if: * * 1. snd_nxt >= snd_max : Means, basically everything has been sent. * Except when using TFO, we might be doing a 0-byte write. * 2. snd_una + snd_wnd <= snd_nxt: No space in the receiver's window * 3. snd_nxt + 1 == snd_max and we are closing: A DATA_FIN is scheduled. */ if (!(mp_so->so_flags1 & SOF1_PRECONNECT_DATA) && MPTCP_SEQ_GEQ(mp_tp->mpt_sndnxt, mp_tp->mpt_sndmax)) { return FALSE; } if (MPTCP_SEQ_LEQ(mp_tp->mpt_snduna + mp_tp->mpt_sndwnd, mp_tp->mpt_sndnxt)) { return FALSE; } if (mp_tp->mpt_sndnxt + 1 == mp_tp->mpt_sndmax && mp_tp->mpt_state > MPTCPS_CLOSE_WAIT) { return FALSE; } if (mp_tp->mpt_state >= MPTCPS_FIN_WAIT_2) { return FALSE; } return TRUE; } /* * MPTCP output. */ int mptcp_output(struct mptses *mpte) { struct mptcb *mp_tp; struct mptsub *mpts; struct mptsub *mpts_tried = NULL; struct socket *mp_so; struct mptsub *preferred_mpts = NULL; uint64_t old_snd_nxt; int error = 0; mp_so = mptetoso(mpte); mp_tp = mpte->mpte_mptcb; socket_lock_assert_owned(mp_so); if (mp_so->so_flags & SOF_DEFUNCT) { return 0; } VERIFY(!(mpte->mpte_mppcb->mpp_flags & MPP_WUPCALL)); mpte->mpte_mppcb->mpp_flags |= MPP_WUPCALL; old_snd_nxt = mp_tp->mpt_sndnxt; while (mptcp_can_send_more(mp_tp, FALSE)) { /* get the "best" subflow to be used for transmission */ mpts = mptcp_get_subflow(mpte, &preferred_mpts); if (mpts == NULL) { break; } /* In case there's just one flow, we reattempt later */ if (mpts_tried != NULL && (mpts == mpts_tried || (mpts->mpts_flags & MPTSF_FAILINGOVER))) { mpts_tried->mpts_flags &= ~MPTSF_FAILINGOVER; mpts_tried->mpts_flags |= MPTSF_ACTIVE; mptcp_start_timer(mpte, MPTT_REXMT); break; } /* * Automatic sizing of send socket buffer. Increase the send * socket buffer size if all of the following criteria are met * 1. the receiver has enough buffer space for this data * 2. send buffer is filled to 7/8th with data (so we actually * have data to make use of it); */ if ((mp_so->so_snd.sb_flags & (SB_AUTOSIZE | SB_TRIM)) == SB_AUTOSIZE) { if ((mp_tp->mpt_sndwnd / 4 * 5) >= mp_so->so_snd.sb_hiwat && mp_so->so_snd.sb_cc >= (mp_so->so_snd.sb_hiwat / 8 * 7)) { if (sbreserve(&mp_so->so_snd, min(mp_so->so_snd.sb_hiwat + tcp_autosndbuf_inc, tcp_autosndbuf_max)) == 1) { mp_so->so_snd.sb_idealsize = mp_so->so_snd.sb_hiwat; } } } DTRACE_MPTCP3(output, struct mptses *, mpte, struct mptsub *, mpts, struct socket *, mp_so); error = mptcp_subflow_output(mpte, mpts, 0); if (error) { /* can be a temporary loss of source address or other error */ mpts->mpts_flags |= MPTSF_FAILINGOVER; mpts->mpts_flags &= ~MPTSF_ACTIVE; mpts_tried = mpts; if (error != ECANCELED) { os_log_error(mptcp_log_handle, "%s - %lx: Error = %d mpts_flags %#x\n", __func__, (unsigned long)VM_KERNEL_ADDRPERM(mpte), error, mpts->mpts_flags); } break; } /* The model is to have only one active flow at a time */ mpts->mpts_flags |= MPTSF_ACTIVE; mpts->mpts_probesoon = mpts->mpts_probecnt = 0; /* Allows us to update the smoothed rtt */ if (mptcp_probeto && mpts != preferred_mpts && preferred_mpts != NULL) { if (preferred_mpts->mpts_probesoon) { if ((tcp_now - preferred_mpts->mpts_probesoon) > mptcp_probeto) { mptcp_subflow_output(mpte, preferred_mpts, MPTCP_SUBOUT_PROBING); if (preferred_mpts->mpts_probecnt >= mptcp_probecnt) { preferred_mpts->mpts_probesoon = 0; preferred_mpts->mpts_probecnt = 0; } } } else { preferred_mpts->mpts_probesoon = tcp_now; preferred_mpts->mpts_probecnt = 0; } } if (mpte->mpte_active_sub == NULL) { mpte->mpte_active_sub = mpts; } else if (mpte->mpte_active_sub != mpts) { mpte->mpte_active_sub->mpts_flags &= ~MPTSF_ACTIVE; mpte->mpte_active_sub = mpts; mptcpstats_inc_switch(mpte, mpts); } } if (mp_tp->mpt_state > MPTCPS_CLOSE_WAIT) { if (mp_tp->mpt_sndnxt + 1 == mp_tp->mpt_sndmax && mp_tp->mpt_snduna == mp_tp->mpt_sndnxt) { mptcp_finish_usrclosed(mpte); } } mptcp_handle_deferred_upcalls(mpte->mpte_mppcb, MPP_WUPCALL); /* subflow errors should not be percolated back up */ return 0; } static struct mptsub * mptcp_choose_subflow(struct mptsub *mpts, struct mptsub *curbest, int *currtt) { struct tcpcb *tp = sototcpcb(mpts->mpts_socket); /* * Lower RTT? Take it, if it's our first one, or * it doesn't has any loss, or the current one has * loss as well. */ if (tp->t_srtt && *currtt > tp->t_srtt && (curbest == NULL || tp->t_rxtshift == 0 || sototcpcb(curbest->mpts_socket)->t_rxtshift)) { *currtt = tp->t_srtt; return mpts; } /* * If we find a subflow without loss, take it always! */ if (curbest && sototcpcb(curbest->mpts_socket)->t_rxtshift && tp->t_rxtshift == 0) { *currtt = tp->t_srtt; return mpts; } return curbest != NULL ? curbest : mpts; } static struct mptsub * mptcp_return_subflow(struct mptsub *mpts) { if (mpts && mptcp_subflow_cwnd_space(mpts->mpts_socket) <= 0) { return NULL; } return mpts; } static boolean_t mptcp_subflow_is_slow(struct mptses *mpte, struct mptsub *mpts) { struct tcpcb *tp = sototcpcb(mpts->mpts_socket); int fail_thresh = mptcp_fail_thresh; if (mpte->mpte_svctype == MPTCP_SVCTYPE_HANDOVER || mpte->mpte_svctype == MPTCP_SVCTYPE_PURE_HANDOVER) { fail_thresh *= 2; } return tp->t_rxtshift >= fail_thresh && (mptetoso(mpte)->so_snd.sb_cc || mpte->mpte_reinjectq); } /* * Return the most eligible subflow to be used for sending data. */ struct mptsub * mptcp_get_subflow(struct mptses *mpte, struct mptsub **preferred) { struct tcpcb *besttp, *secondtp; struct inpcb *bestinp, *secondinp; struct mptsub *mpts; struct mptsub *best = NULL; struct mptsub *second_best = NULL; int exp_rtt = INT_MAX, cheap_rtt = INT_MAX; /* * First Step: * Choose the best subflow for cellular and non-cellular interfaces. */ TAILQ_FOREACH(mpts, &mpte->mpte_subflows, mpts_entry) { struct socket *so = mpts->mpts_socket; struct tcpcb *tp = sototcpcb(so); struct inpcb *inp = sotoinpcb(so); /* * First, the hard conditions to reject subflows * (e.g., not connected,...) */ if (inp->inp_last_outifp == NULL) { continue; } if (INP_WAIT_FOR_IF_FEEDBACK(inp)) { continue; } /* There can only be one subflow in degraded state */ if (mpts->mpts_flags & MPTSF_MP_DEGRADED) { best = mpts; break; } /* * If this subflow is waiting to finally send, do it! */ if (so->so_flags1 & SOF1_PRECONNECT_DATA) { return mptcp_return_subflow(mpts); } /* * Only send if the subflow is MP_CAPABLE. The exceptions to * this rule (degraded or TFO) have been taken care of above. */ if (!(mpts->mpts_flags & MPTSF_MP_CAPABLE)) { continue; } if ((so->so_state & SS_ISDISCONNECTED) || !(so->so_state & SS_ISCONNECTED) || !TCPS_HAVEESTABLISHED(tp->t_state) || tp->t_state > TCPS_CLOSE_WAIT) { continue; } /* * Second, the soft conditions to find the subflow with best * conditions for each set (aka cellular vs non-cellular) */ if (IFNET_IS_CELLULAR(inp->inp_last_outifp)) { second_best = mptcp_choose_subflow(mpts, second_best, &exp_rtt); } else { best = mptcp_choose_subflow(mpts, best, &cheap_rtt); } } /* * If there is no preferred or backup subflow, and there is no active * subflow use the last usable subflow. */ if (best == NULL) { return mptcp_return_subflow(second_best); } if (second_best == NULL) { return mptcp_return_subflow(best); } besttp = sototcpcb(best->mpts_socket); bestinp = sotoinpcb(best->mpts_socket); secondtp = sototcpcb(second_best->mpts_socket); secondinp = sotoinpcb(second_best->mpts_socket); if (preferred != NULL) { *preferred = mptcp_return_subflow(best); } /* * Second Step: Among best and second_best. Choose the one that is * most appropriate for this particular service-type. */ if (mpte->mpte_svctype == MPTCP_SVCTYPE_PURE_HANDOVER) { return mptcp_return_subflow(best); } else if (mpte->mpte_svctype == MPTCP_SVCTYPE_HANDOVER) { /* * Only handover if Symptoms tells us to do so. */ if (!IFNET_IS_CELLULAR(bestinp->inp_last_outifp) && mptcp_wifi_quality_for_session(mpte) != MPTCP_WIFI_QUALITY_GOOD && mptcp_subflow_is_slow(mpte, best)) { return mptcp_return_subflow(second_best); } return mptcp_return_subflow(best); } else if (mpte->mpte_svctype == MPTCP_SVCTYPE_INTERACTIVE) { int rtt_thresh = mptcp_rtthist_rtthresh << TCP_RTT_SHIFT; int rto_thresh = mptcp_rtothresh; /* Adjust with symptoms information */ if (!IFNET_IS_CELLULAR(bestinp->inp_last_outifp) && mptcp_wifi_quality_for_session(mpte) != MPTCP_WIFI_QUALITY_GOOD) { rtt_thresh /= 2; rto_thresh /= 2; } if (besttp->t_srtt && secondtp->t_srtt && besttp->t_srtt >= rtt_thresh && secondtp->t_srtt < rtt_thresh) { tcpstat.tcps_mp_sel_rtt++; return mptcp_return_subflow(second_best); } if (mptcp_subflow_is_slow(mpte, best) && secondtp->t_rxtshift == 0) { return mptcp_return_subflow(second_best); } /* Compare RTOs, select second_best if best's rto exceeds rtothresh */ if (besttp->t_rxtcur && secondtp->t_rxtcur && besttp->t_rxtcur >= rto_thresh && secondtp->t_rxtcur < rto_thresh) { tcpstat.tcps_mp_sel_rto++; return mptcp_return_subflow(second_best); } /* * None of the above conditions for sending on the secondary * were true. So, let's schedule on the best one, if he still * has some space in the congestion-window. */ return mptcp_return_subflow(best); } else if (mpte->mpte_svctype >= MPTCP_SVCTYPE_AGGREGATE) { struct mptsub *tmp; /* * We only care about RTT when aggregating */ if (besttp->t_srtt > secondtp->t_srtt) { tmp = best; best = second_best; besttp = secondtp; bestinp = secondinp; second_best = tmp; secondtp = sototcpcb(second_best->mpts_socket); secondinp = sotoinpcb(second_best->mpts_socket); } /* Is there still space in the congestion window? */ if (mptcp_subflow_cwnd_space(bestinp->inp_socket) <= 0) { return mptcp_return_subflow(second_best); } return mptcp_return_subflow(best); } else { panic("Unknown service-type configured for MPTCP"); } return NULL; } void mptcp_close_fsm(struct mptcb *mp_tp, uint32_t event) { struct socket *mp_so = mptetoso(mp_tp->mpt_mpte); socket_lock_assert_owned(mp_so); DTRACE_MPTCP2(state__change, struct mptcb *, mp_tp, uint32_t, event); switch (mp_tp->mpt_state) { case MPTCPS_CLOSED: case MPTCPS_LISTEN: mp_tp->mpt_state = MPTCPS_TERMINATE; break; case MPTCPS_ESTABLISHED: if (event == MPCE_CLOSE) { mp_tp->mpt_state = MPTCPS_FIN_WAIT_1; mp_tp->mpt_sndmax += 1; /* adjust for Data FIN */ } else if (event == MPCE_RECV_DATA_FIN) { mp_tp->mpt_rcvnxt += 1; /* adj remote data FIN */ mp_tp->mpt_state = MPTCPS_CLOSE_WAIT; } break; case MPTCPS_CLOSE_WAIT: if (event == MPCE_CLOSE) { mp_tp->mpt_state = MPTCPS_LAST_ACK; mp_tp->mpt_sndmax += 1; /* adjust for Data FIN */ } break; case MPTCPS_FIN_WAIT_1: if (event == MPCE_RECV_DATA_ACK) { mp_tp->mpt_state = MPTCPS_FIN_WAIT_2; } else if (event == MPCE_RECV_DATA_FIN) { mp_tp->mpt_rcvnxt += 1; /* adj remote data FIN */ mp_tp->mpt_state = MPTCPS_CLOSING; } break; case MPTCPS_CLOSING: if (event == MPCE_RECV_DATA_ACK) { mp_tp->mpt_state = MPTCPS_TIME_WAIT; } break; case MPTCPS_LAST_ACK: if (event == MPCE_RECV_DATA_ACK) { mptcp_close(mp_tp->mpt_mpte, mp_tp); } break; case MPTCPS_FIN_WAIT_2: if (event == MPCE_RECV_DATA_FIN) { mp_tp->mpt_rcvnxt += 1; /* adj remote data FIN */ mp_tp->mpt_state = MPTCPS_TIME_WAIT; } break; case MPTCPS_TIME_WAIT: case MPTCPS_TERMINATE: break; default: VERIFY(0); /* NOTREACHED */ } DTRACE_MPTCP2(state__change, struct mptcb *, mp_tp, uint32_t, event); } /* If you change this function, match up mptcp_update_rcv_state_f */ void mptcp_update_dss_rcv_state(struct mptcp_dsn_opt *dss_info, struct tcpcb *tp, uint16_t csum) { struct mptcb *mp_tp = tptomptp(tp); u_int64_t full_dsn = 0; NTOHL(dss_info->mdss_dsn); NTOHL(dss_info->mdss_subflow_seqn); NTOHS(dss_info->mdss_data_len); /* XXX for autosndbuf grow sb here */ MPTCP_EXTEND_DSN(mp_tp->mpt_rcvnxt, dss_info->mdss_dsn, full_dsn); mptcp_update_rcv_state_meat(mp_tp, tp, full_dsn, dss_info->mdss_subflow_seqn, dss_info->mdss_data_len, csum); } void mptcp_update_rcv_state_meat(struct mptcb *mp_tp, struct tcpcb *tp, u_int64_t full_dsn, u_int32_t seqn, u_int16_t mdss_data_len, uint16_t csum) { if (mdss_data_len == 0) { os_log_error(mptcp_log_handle, "%s - %lx: Infinite Mapping.\n", __func__, (unsigned long)VM_KERNEL_ADDRPERM(mp_tp->mpt_mpte)); if ((mp_tp->mpt_flags & MPTCPF_CHECKSUM) && (csum != 0)) { os_log_error(mptcp_log_handle, "%s - %lx: Bad checksum %x \n", __func__, (unsigned long)VM_KERNEL_ADDRPERM(mp_tp->mpt_mpte), csum); } mptcp_notify_mpfail(tp->t_inpcb->inp_socket); return; } mptcp_notify_mpready(tp->t_inpcb->inp_socket); tp->t_rcv_map.mpt_dsn = full_dsn; tp->t_rcv_map.mpt_sseq = seqn; tp->t_rcv_map.mpt_len = mdss_data_len; tp->t_rcv_map.mpt_csum = csum; tp->t_mpflags |= TMPF_EMBED_DSN; } static uint16_t mptcp_input_csum(struct tcpcb *tp, struct mbuf *m, uint64_t dsn, uint32_t sseq, uint16_t dlen, uint16_t csum, int dfin) { struct mptcb *mp_tp = tptomptp(tp); int real_len = dlen - dfin; uint32_t sum = 0; VERIFY(real_len >= 0); if (mp_tp == NULL) { return 0; } if (!(mp_tp->mpt_flags & MPTCPF_CHECKSUM)) { return 0; } if (tp->t_mpflags & TMPF_TCP_FALLBACK) { return 0; } /* * The remote side may send a packet with fewer bytes than the * claimed DSS checksum length. */ if ((int)m_length2(m, NULL) < real_len) { return 0xffff; } if (real_len != 0) { sum = m_sum16(m, 0, real_len); } sum += in_pseudo64(htonll(dsn), htonl(sseq), htons(dlen) + csum); ADDCARRY(sum); DTRACE_MPTCP3(checksum__result, struct tcpcb *, tp, struct mbuf *, m, uint32_t, sum); return ~sum & 0xffff; } /* * MPTCP Checksum support * The checksum is calculated whenever the MPTCP DSS option is included * in the TCP packet. The checksum includes the sum of the MPTCP psuedo * header and the actual data indicated by the length specified in the * DSS option. */ int mptcp_validate_csum(struct tcpcb *tp, struct mbuf *m, uint64_t dsn, uint32_t sseq, uint16_t dlen, uint16_t csum, int dfin) { uint16_t mptcp_csum; mptcp_csum = mptcp_input_csum(tp, m, dsn, sseq, dlen, csum, dfin); if (mptcp_csum) { tp->t_mpflags |= TMPF_SND_MPFAIL; mptcp_notify_mpfail(tp->t_inpcb->inp_socket); m_freem(m); tcpstat.tcps_mp_badcsum++; return -1; } return 0; } uint16_t mptcp_output_csum(struct mbuf *m, uint64_t dss_val, uint32_t sseq, uint16_t dlen) { uint32_t sum = 0; if (dlen) { sum = m_sum16(m, 0, dlen); } dss_val = mptcp_hton64(dss_val); sseq = htonl(sseq); dlen = htons(dlen); sum += in_pseudo64(dss_val, sseq, dlen); ADDCARRY(sum); sum = ~sum & 0xffff; DTRACE_MPTCP2(checksum__result, struct mbuf *, m, uint32_t, sum); return (uint16_t)sum; } /* * When WiFi signal starts fading, there's more loss and RTT spikes. * Check if there has been a large spike by comparing against * a tolerable RTT spike threshold. */ boolean_t mptcp_no_rto_spike(struct socket *so) { struct tcpcb *tp = intotcpcb(sotoinpcb(so)); int32_t spike = 0; if (tp->t_rxtcur > mptcp_rtothresh) { spike = tp->t_rxtcur - mptcp_rtothresh; } if (spike > 0) { return FALSE; } else { return TRUE; } } void mptcp_handle_deferred_upcalls(struct mppcb *mpp, uint32_t flag) { VERIFY(mpp->mpp_flags & flag); mpp->mpp_flags &= ~flag; if (mptcp_should_defer_upcall(mpp)) { return; } if (mpp->mpp_flags & MPP_SHOULD_WORKLOOP) { mpp->mpp_flags &= ~MPP_SHOULD_WORKLOOP; mptcp_subflow_workloop(mpp->mpp_pcbe); } if (mpp->mpp_flags & MPP_SHOULD_RWAKEUP) { mpp->mpp_flags &= ~MPP_SHOULD_RWAKEUP; sorwakeup(mpp->mpp_socket); } if (mpp->mpp_flags & MPP_SHOULD_WWAKEUP) { mpp->mpp_flags &= ~MPP_SHOULD_WWAKEUP; sowwakeup(mpp->mpp_socket); } } static void mptcp_reset_itfinfo(struct mpt_itf_info *info) { memset(info, 0, sizeof(*info)); } void mptcp_session_necp_cb(void *handle, int action, uint32_t interface_index, uint32_t necp_flags, __unused bool *viable) { boolean_t has_v4 = !!(necp_flags & NECP_CLIENT_RESULT_FLAG_HAS_IPV4); boolean_t has_v6 = !!(necp_flags & NECP_CLIENT_RESULT_FLAG_HAS_IPV6); boolean_t has_nat64 = !!(necp_flags & NECP_CLIENT_RESULT_FLAG_HAS_NAT64); boolean_t low_power = !!(necp_flags & NECP_CLIENT_RESULT_FLAG_INTERFACE_LOW_POWER); struct mppcb *mp = (struct mppcb *)handle; struct mptses *mpte = mptompte(mp); struct socket *mp_so; struct mptcb *mp_tp; uint32_t i, ifindex; struct ifnet *ifp; int locked = 0; ifindex = interface_index; VERIFY(ifindex != IFSCOPE_NONE); /* About to be garbage-collected (see note about MPTCP/NECP interactions) */ if (mp->mpp_socket->so_usecount == 0) { return; } mp_so = mptetoso(mpte); if (action != NECP_CLIENT_CBACTION_INITIAL) { socket_lock(mp_so, 1); locked = 1; /* Check again, because it might have changed while waiting */ if (mp->mpp_socket->so_usecount == 0) { goto out; } } socket_lock_assert_owned(mp_so); mp_tp = mpte->mpte_mptcb; ifnet_head_lock_shared(); ifp = ifindex2ifnet[ifindex]; ifnet_head_done(); os_log(mptcp_log_handle, "%s - %lx: action: %u ifindex %u delegated to %u usecount %u mpt_flags %#x state %u v4 %u v6 %u nat64 %u power %u\n", __func__, (unsigned long)VM_KERNEL_ADDRPERM(mpte), action, ifindex, ifp && ifp->if_delegated.ifp ? ifp->if_delegated.ifp->if_index : IFSCOPE_NONE, mp->mpp_socket->so_usecount, mp_tp->mpt_flags, mp_tp->mpt_state, has_v4, has_v6, has_nat64, low_power); /* No need on fallen back sockets */ if (mp_tp->mpt_flags & MPTCPF_FALLBACK_TO_TCP) { goto out; } /* * When the interface goes in low-power mode we don't want to establish * new subflows on it. Thus, mark it internally as non-viable. */ if (low_power) { action = NECP_CLIENT_CBACTION_NONVIABLE; } if (action == NECP_CLIENT_CBACTION_INITIAL) { mpte->mpte_flags |= MPTE_ITFINFO_INIT; } if (action == NECP_CLIENT_CBACTION_NONVIABLE) { for (i = 0; i < mpte->mpte_itfinfo_size; i++) { if (mpte->mpte_itfinfo[i].ifindex == IFSCOPE_NONE) { continue; } if (mpte->mpte_itfinfo[i].ifindex == ifindex) { mptcp_reset_itfinfo(&mpte->mpte_itfinfo[i]); } } mptcp_sched_create_subflows(mpte); } else if (action == NECP_CLIENT_CBACTION_VIABLE || action == NECP_CLIENT_CBACTION_INITIAL) { int found_slot = 0, slot_index = -1; struct sockaddr *dst; if (ifp == NULL) { goto out; } if (IFNET_IS_COMPANION_LINK(ifp)) { goto out; } if (IFNET_IS_EXPENSIVE(ifp) && (mp_so->so_restrictions & SO_RESTRICT_DENY_EXPENSIVE)) { goto out; } if (IFNET_IS_CONSTRAINED(ifp) && (mp_so->so_restrictions & SO_RESTRICT_DENY_CONSTRAINED)) { goto out; } if (IFNET_IS_CELLULAR(ifp) && (mp_so->so_restrictions & SO_RESTRICT_DENY_CELLULAR)) { goto out; } if (IS_INTF_CLAT46(ifp)) { has_v4 = FALSE; } /* Look for the slot on where to store/update the interface-info. */ for (i = 0; i < mpte->mpte_itfinfo_size; i++) { /* Found a potential empty slot where we can put it */ if (mpte->mpte_itfinfo[i].ifindex == 0) { found_slot = 1; slot_index = i; } /* * The interface is already in our array. Check if we * need to update it. */ if (mpte->mpte_itfinfo[i].ifindex == ifindex && (mpte->mpte_itfinfo[i].has_v4_conn != has_v4 || mpte->mpte_itfinfo[i].has_v6_conn != has_v6 || mpte->mpte_itfinfo[i].has_nat64_conn != has_nat64)) { found_slot = 1; slot_index = i; break; } if (mpte->mpte_itfinfo[i].ifindex == ifindex) { /* * Ok, it's already there and we don't need * to update it */ goto out; } } dst = mptcp_get_session_dst(mpte, has_v6, has_v4); if (dst && dst->sa_family == AF_INET && has_v6 && !has_nat64 && !has_v4) { if (found_slot) { mpte->mpte_itfinfo[slot_index].ifindex = ifindex; mpte->mpte_itfinfo[slot_index].has_v4_conn = has_v4; mpte->mpte_itfinfo[slot_index].has_v6_conn = has_v6; mpte->mpte_itfinfo[slot_index].has_nat64_conn = has_nat64; } goto out; } if (found_slot == 0) { int new_size = mpte->mpte_itfinfo_size * 2; struct mpt_itf_info *info = kalloc_data(sizeof(*info) * new_size, Z_ZERO); if (info == NULL) { os_log_error(mptcp_log_handle, "%s - %lx: malloc failed for %u\n", __func__, (unsigned long)VM_KERNEL_ADDRPERM(mpte), new_size); goto out; } memcpy(info, mpte->mpte_itfinfo, mpte->mpte_itfinfo_size * sizeof(*info)); if (mpte->mpte_itfinfo_size > MPTE_ITFINFO_SIZE) { kfree_data(mpte->mpte_itfinfo, sizeof(*info) * mpte->mpte_itfinfo_size); } /* We allocated a new one, thus the first must be empty */ slot_index = mpte->mpte_itfinfo_size; mpte->mpte_itfinfo = info; mpte->mpte_itfinfo_size = new_size; } VERIFY(slot_index >= 0 && slot_index < (int)mpte->mpte_itfinfo_size); mpte->mpte_itfinfo[slot_index].ifindex = ifindex; mpte->mpte_itfinfo[slot_index].has_v4_conn = has_v4; mpte->mpte_itfinfo[slot_index].has_v6_conn = has_v6; mpte->mpte_itfinfo[slot_index].has_nat64_conn = has_nat64; mptcp_sched_create_subflows(mpte); } out: if (locked) { socket_unlock(mp_so, 1); } } void mptcp_set_restrictions(struct socket *mp_so) { struct mptses *mpte = mpsotompte(mp_so); uint32_t i; socket_lock_assert_owned(mp_so); ifnet_head_lock_shared(); for (i = 0; i < mpte->mpte_itfinfo_size; i++) { struct mpt_itf_info *info = &mpte->mpte_itfinfo[i]; uint32_t ifindex = info->ifindex; struct ifnet *ifp; if (ifindex == IFSCOPE_NONE) { continue; } ifp = ifindex2ifnet[ifindex]; if (ifp == NULL) { continue; } if (IFNET_IS_EXPENSIVE(ifp) && (mp_so->so_restrictions & SO_RESTRICT_DENY_EXPENSIVE)) { info->ifindex = IFSCOPE_NONE; } if (IFNET_IS_CONSTRAINED(ifp) && (mp_so->so_restrictions & SO_RESTRICT_DENY_CONSTRAINED)) { info->ifindex = IFSCOPE_NONE; } if (IFNET_IS_CELLULAR(ifp) && (mp_so->so_restrictions & SO_RESTRICT_DENY_CELLULAR)) { info->ifindex = IFSCOPE_NONE; } } ifnet_head_done(); } #define DUMP_BUF_CHK() { \ clen -= k; \ if (clen < 1) \ goto done; \ c += k; \ } int dump_mptcp_reass_qlen(char *str, int str_len) { char *c = str; int k, clen = str_len; if (mptcp_reass_total_qlen != 0) { k = scnprintf(c, clen, "\nmptcp reass qlen %d\n", mptcp_reass_total_qlen); DUMP_BUF_CHK(); } done: return str_len - clen; }