535 lines
15 KiB
C
535 lines
15 KiB
C
/*
|
|
* Copyright (c) 2012-2021 Apple Inc. All rights reserved.
|
|
*
|
|
* @APPLE_OSREFERENCE_LICENSE_HEADER_START@
|
|
*
|
|
* This file contains Original Code and/or Modifications of Original Code
|
|
* as defined in and that are subject to the Apple Public Source License
|
|
* Version 2.0 (the 'License'). You may not use this file except in
|
|
* compliance with the License. The rights granted to you under the License
|
|
* may not be used to create, or enable the creation or redistribution of,
|
|
* unlawful or unlicensed copies of an Apple operating system, or to
|
|
* circumvent, violate, or enable the circumvention or violation of, any
|
|
* terms of an Apple operating system software license agreement.
|
|
*
|
|
* Please obtain a copy of the License at
|
|
* http://www.opensource.apple.com/apsl/ and read it before using this file.
|
|
*
|
|
* The Original Code and all software distributed under the License are
|
|
* distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
|
|
* EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
|
|
* INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
|
|
* FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
|
|
* Please see the License for the specific language governing rights and
|
|
* limitations under the License.
|
|
*
|
|
* @APPLE_OSREFERENCE_LICENSE_HEADER_END@
|
|
*/
|
|
|
|
/*-
|
|
* Copyright (c) 2008 Joerg Sonnenberger <joerg@NetBSD.org>.
|
|
* All rights reserved.
|
|
*
|
|
* Redistribution and use in source and binary forms, with or without
|
|
* modification, are permitted provided that the following conditions
|
|
* are met:
|
|
*
|
|
* 1. Redistributions of source code must retain the above copyright
|
|
* notice, this list of conditions and the following disclaimer.
|
|
* 2. Redistributions in binary form must reproduce the above copyright
|
|
* notice, this list of conditions and the following disclaimer in
|
|
* the documentation and/or other materials provided with the
|
|
* distribution.
|
|
*
|
|
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
|
* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
|
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
|
|
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
|
|
* COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
|
|
* INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
|
|
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
|
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
|
|
* AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
|
* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
|
|
* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
|
* SUCH DAMAGE.
|
|
*/
|
|
|
|
#ifdef KERNEL
|
|
#include <sys/param.h>
|
|
#include <machine/endian.h>
|
|
#include <sys/mcache.h>
|
|
#include <sys/mbuf.h>
|
|
#include <kern/debug.h>
|
|
#include <libkern/libkern.h>
|
|
#include <mach/boolean.h>
|
|
#include <pexpert/pexpert.h>
|
|
#define CKSUM_ERR(fmt, args...) kprintf(fmt, ## args)
|
|
#else /* !KERNEL */
|
|
#ifndef LIBSYSCALL_INTERFACE
|
|
#error "LIBSYSCALL_INTERFACE not defined"
|
|
#endif /* !LIBSYSCALL_INTERFACE */
|
|
#include <stdlib.h>
|
|
#include <stddef.h>
|
|
#include <stdint.h>
|
|
#include <unistd.h>
|
|
#include <strings.h>
|
|
#include <mach/boolean.h>
|
|
#include <skywalk/os_skywalk_private.h>
|
|
#define CKSUM_ERR(fmt, args...) fprintf_stderr(fmt, ## args)
|
|
#endif /* !KERNEL */
|
|
|
|
/* compile time assert */
|
|
#ifndef _CASSERT
|
|
#define _CASSERT(x) _Static_assert(x, "compile-time assertion failed")
|
|
#endif /* !_CASSERT */
|
|
|
|
#ifndef VERIFY
|
|
#define VERIFY(EX) ((void)0)
|
|
#endif /* !VERIFY */
|
|
|
|
#ifndef CKSUM_ERR
|
|
#define CKSUM_ERR(fmt, args...) ((void)0)
|
|
#endif /* !CKSUM_ERR */
|
|
|
|
#define PREDICT_TRUE(x) __builtin_expect(!!((long)(x)), 1L)
|
|
#define PREDICT_FALSE(x) __builtin_expect(!!((long)(x)), 0L)
|
|
|
|
/* fake mbuf struct used only for calling os_cpu_in_cksum_mbuf() */
|
|
struct _mbuf {
|
|
struct _mbuf *_m_next;
|
|
void *_m_pad;
|
|
uint8_t *_m_data;
|
|
int32_t _m_len;
|
|
};
|
|
|
|
extern uint32_t os_cpu_in_cksum(const void *, uint32_t, uint32_t);
|
|
extern uint32_t os_cpu_in_cksum_mbuf(struct _mbuf *, int, int, uint32_t);
|
|
|
|
uint32_t
|
|
os_cpu_in_cksum(const void *data, uint32_t len, uint32_t initial_sum)
|
|
{
|
|
/*
|
|
* If data is 4-bytes aligned (conditional), length is multiple
|
|
* of 4-bytes (required), and the amount to checksum is small,
|
|
* this would be quicker; this is suitable for IPv4/TCP header.
|
|
*/
|
|
if (
|
|
#if !defined(__arm64__) && !defined(__x86_64__)
|
|
IS_P2ALIGNED(data, sizeof(uint32_t)) &&
|
|
#endif /* !__arm64__ && !__x86_64__ */
|
|
len <= 64 && (len & 3) == 0) {
|
|
uint8_t *p = __DECONST(uint8_t *, data);
|
|
uint64_t sum = initial_sum;
|
|
|
|
switch (len) {
|
|
case 20: /* simple IPv4 or TCP header */
|
|
sum += *(uint32_t *)(void *)p;
|
|
sum += *(uint32_t *)(void *)(p + 4);
|
|
sum += *(uint32_t *)(void *)(p + 8);
|
|
sum += *(uint32_t *)(void *)(p + 12);
|
|
sum += *(uint32_t *)(void *)(p + 16);
|
|
break;
|
|
|
|
case 32: /* TCP header + timestamp option */
|
|
sum += *(uint32_t *)(void *)p;
|
|
sum += *(uint32_t *)(void *)(p + 4);
|
|
sum += *(uint32_t *)(void *)(p + 8);
|
|
sum += *(uint32_t *)(void *)(p + 12);
|
|
sum += *(uint32_t *)(void *)(p + 16);
|
|
sum += *(uint32_t *)(void *)(p + 20);
|
|
sum += *(uint32_t *)(void *)(p + 24);
|
|
sum += *(uint32_t *)(void *)(p + 28);
|
|
break;
|
|
|
|
default:
|
|
while (len) {
|
|
sum += *(uint32_t *)(void *)p;
|
|
p += 4;
|
|
len -= 4;
|
|
}
|
|
break;
|
|
}
|
|
|
|
/* fold 64-bit to 16-bit (deferred carries) */
|
|
sum = (sum >> 32) + (sum & 0xffffffff); /* 33-bit */
|
|
sum = (sum >> 16) + (sum & 0xffff); /* 17-bit + carry */
|
|
sum = (sum >> 16) + (sum & 0xffff); /* 16-bit + carry */
|
|
sum = (sum >> 16) + (sum & 0xffff); /* final carry */
|
|
|
|
return sum & 0xffff;
|
|
}
|
|
|
|
/*
|
|
* Otherwise, let os_cpu_in_cksum_mbuf() handle it; it only looks
|
|
* at 3 fields: {next,data,len}, and since it doesn't care about
|
|
* the authenticity of the mbuf, we use a fake one here. Make
|
|
* sure the offsets are as expected.
|
|
*/
|
|
#if defined(__LP64__)
|
|
_CASSERT(offsetof(struct _mbuf, _m_next) == 0);
|
|
_CASSERT(offsetof(struct _mbuf, _m_data) == 16);
|
|
_CASSERT(offsetof(struct _mbuf, _m_len) == 24);
|
|
#else /* !__LP64__ */
|
|
_CASSERT(offsetof(struct _mbuf, _m_next) == 0);
|
|
_CASSERT(offsetof(struct _mbuf, _m_data) == 8);
|
|
_CASSERT(offsetof(struct _mbuf, _m_len) == 12);
|
|
#endif /* !__LP64__ */
|
|
#ifdef KERNEL
|
|
_CASSERT(offsetof(struct _mbuf, _m_next) ==
|
|
offsetof(struct mbuf, m_next));
|
|
_CASSERT(offsetof(struct _mbuf, _m_data) ==
|
|
offsetof(struct mbuf, m_data));
|
|
_CASSERT(offsetof(struct _mbuf, _m_len) ==
|
|
offsetof(struct mbuf, m_len));
|
|
#endif /* KERNEL */
|
|
struct _mbuf m = {
|
|
._m_next = NULL,
|
|
._m_data = __DECONST(uint8_t *, data),
|
|
._m_len = len,
|
|
};
|
|
|
|
return os_cpu_in_cksum_mbuf(&m, len, 0, initial_sum);
|
|
}
|
|
|
|
#if defined(__i386__) || defined(__x86_64__)
|
|
|
|
/*
|
|
* Checksum routine for Internet Protocol family headers (Portable Version).
|
|
*
|
|
* This routine is very heavily used in the network
|
|
* code and should be modified for each CPU to be as fast as possible.
|
|
*
|
|
* A discussion of different implementation techniques can be found in
|
|
* RFC 1071.
|
|
*
|
|
* The default implementation for 32-bit architectures is using
|
|
* a 32-bit accumulator and operating on 16-bit operands.
|
|
*
|
|
* The default implementation for 64-bit architectures is using
|
|
* a 64-bit accumulator and operating on 32-bit operands.
|
|
*
|
|
* Both versions are unrolled to handle 32 Byte / 64 Byte fragments as core
|
|
* of the inner loop. After each iteration of the inner loop, a partial
|
|
* reduction is done to avoid carry in long packets.
|
|
*/
|
|
|
|
#if !defined(__LP64__)
|
|
/* 32-bit version */
|
|
uint32_t
|
|
os_cpu_in_cksum_mbuf(struct _mbuf *m, int len, int off, uint32_t initial_sum)
|
|
{
|
|
int mlen;
|
|
uint32_t sum, partial;
|
|
unsigned int final_acc;
|
|
uint8_t *data;
|
|
boolean_t needs_swap, started_on_odd;
|
|
|
|
VERIFY(len >= 0);
|
|
VERIFY(off >= 0);
|
|
|
|
needs_swap = FALSE;
|
|
started_on_odd = FALSE;
|
|
sum = (initial_sum >> 16) + (initial_sum & 0xffff);
|
|
|
|
for (;;) {
|
|
if (PREDICT_FALSE(m == NULL)) {
|
|
CKSUM_ERR("%s: out of data\n", __func__);
|
|
return (uint32_t)-1;
|
|
}
|
|
mlen = m->_m_len;
|
|
if (mlen > off) {
|
|
mlen -= off;
|
|
data = m->_m_data + off;
|
|
goto post_initial_offset;
|
|
}
|
|
off -= mlen;
|
|
if (len == 0) {
|
|
break;
|
|
}
|
|
m = m->_m_next;
|
|
}
|
|
|
|
for (; len > 0; m = m->_m_next) {
|
|
if (PREDICT_FALSE(m == NULL)) {
|
|
CKSUM_ERR("%s: out of data\n", __func__);
|
|
return (uint32_t)-1;
|
|
}
|
|
mlen = m->_m_len;
|
|
data = m->_m_data;
|
|
post_initial_offset:
|
|
if (mlen == 0) {
|
|
continue;
|
|
}
|
|
if (mlen > len) {
|
|
mlen = len;
|
|
}
|
|
len -= mlen;
|
|
|
|
partial = 0;
|
|
if ((uintptr_t)data & 1) {
|
|
/* Align on word boundary */
|
|
started_on_odd = !started_on_odd;
|
|
#if BYTE_ORDER == LITTLE_ENDIAN
|
|
partial = *data << 8;
|
|
#else
|
|
partial = *data;
|
|
#endif
|
|
++data;
|
|
--mlen;
|
|
}
|
|
needs_swap = started_on_odd;
|
|
while (mlen >= 32) {
|
|
__builtin_prefetch(data + 32);
|
|
partial += *(uint16_t *)(void *)data;
|
|
partial += *(uint16_t *)(void *)(data + 2);
|
|
partial += *(uint16_t *)(void *)(data + 4);
|
|
partial += *(uint16_t *)(void *)(data + 6);
|
|
partial += *(uint16_t *)(void *)(data + 8);
|
|
partial += *(uint16_t *)(void *)(data + 10);
|
|
partial += *(uint16_t *)(void *)(data + 12);
|
|
partial += *(uint16_t *)(void *)(data + 14);
|
|
partial += *(uint16_t *)(void *)(data + 16);
|
|
partial += *(uint16_t *)(void *)(data + 18);
|
|
partial += *(uint16_t *)(void *)(data + 20);
|
|
partial += *(uint16_t *)(void *)(data + 22);
|
|
partial += *(uint16_t *)(void *)(data + 24);
|
|
partial += *(uint16_t *)(void *)(data + 26);
|
|
partial += *(uint16_t *)(void *)(data + 28);
|
|
partial += *(uint16_t *)(void *)(data + 30);
|
|
data += 32;
|
|
mlen -= 32;
|
|
if (PREDICT_FALSE(partial & 0xc0000000)) {
|
|
if (needs_swap) {
|
|
partial = (partial << 8) +
|
|
(partial >> 24);
|
|
}
|
|
sum += (partial >> 16);
|
|
sum += (partial & 0xffff);
|
|
partial = 0;
|
|
}
|
|
}
|
|
if (mlen & 16) {
|
|
partial += *(uint16_t *)(void *)data;
|
|
partial += *(uint16_t *)(void *)(data + 2);
|
|
partial += *(uint16_t *)(void *)(data + 4);
|
|
partial += *(uint16_t *)(void *)(data + 6);
|
|
partial += *(uint16_t *)(void *)(data + 8);
|
|
partial += *(uint16_t *)(void *)(data + 10);
|
|
partial += *(uint16_t *)(void *)(data + 12);
|
|
partial += *(uint16_t *)(void *)(data + 14);
|
|
data += 16;
|
|
mlen -= 16;
|
|
}
|
|
/*
|
|
* mlen is not updated below as the remaining tests
|
|
* are using bit masks, which are not affected.
|
|
*/
|
|
if (mlen & 8) {
|
|
partial += *(uint16_t *)(void *)data;
|
|
partial += *(uint16_t *)(void *)(data + 2);
|
|
partial += *(uint16_t *)(void *)(data + 4);
|
|
partial += *(uint16_t *)(void *)(data + 6);
|
|
data += 8;
|
|
}
|
|
if (mlen & 4) {
|
|
partial += *(uint16_t *)(void *)data;
|
|
partial += *(uint16_t *)(void *)(data + 2);
|
|
data += 4;
|
|
}
|
|
if (mlen & 2) {
|
|
partial += *(uint16_t *)(void *)data;
|
|
data += 2;
|
|
}
|
|
if (mlen & 1) {
|
|
#if BYTE_ORDER == LITTLE_ENDIAN
|
|
partial += *data;
|
|
#else
|
|
partial += *data << 8;
|
|
#endif
|
|
started_on_odd = !started_on_odd;
|
|
}
|
|
|
|
if (needs_swap) {
|
|
partial = (partial << 8) + (partial >> 24);
|
|
}
|
|
sum += (partial >> 16) + (partial & 0xffff);
|
|
/*
|
|
* Reduce sum to allow potential byte swap
|
|
* in the next iteration without carry.
|
|
*/
|
|
sum = (sum >> 16) + (sum & 0xffff);
|
|
}
|
|
final_acc = ((sum >> 16) & 0xffff) + (sum & 0xffff);
|
|
final_acc = (final_acc >> 16) + (final_acc & 0xffff);
|
|
return final_acc & 0xffff;
|
|
}
|
|
|
|
#else /* __LP64__ */
|
|
/* 64-bit version */
|
|
uint32_t
|
|
os_cpu_in_cksum_mbuf(struct _mbuf *m, int len, int off, uint32_t initial_sum)
|
|
{
|
|
int mlen;
|
|
uint64_t sum, partial;
|
|
unsigned int final_acc;
|
|
uint8_t *data;
|
|
boolean_t needs_swap, started_on_odd;
|
|
|
|
VERIFY(len >= 0);
|
|
VERIFY(off >= 0);
|
|
|
|
needs_swap = FALSE;
|
|
started_on_odd = FALSE;
|
|
sum = initial_sum;
|
|
|
|
for (;;) {
|
|
if (PREDICT_FALSE(m == NULL)) {
|
|
CKSUM_ERR("%s: out of data\n", __func__);
|
|
return (uint32_t)-1;
|
|
}
|
|
mlen = m->_m_len;
|
|
if (mlen > off) {
|
|
mlen -= off;
|
|
data = m->_m_data + off;
|
|
goto post_initial_offset;
|
|
}
|
|
off -= mlen;
|
|
if (len == 0) {
|
|
break;
|
|
}
|
|
m = m->_m_next;
|
|
}
|
|
|
|
for (; len > 0; m = m->_m_next) {
|
|
if (PREDICT_FALSE(m == NULL)) {
|
|
CKSUM_ERR("%s: out of data\n", __func__);
|
|
return (uint32_t)-1;
|
|
}
|
|
mlen = m->_m_len;
|
|
data = m->_m_data;
|
|
post_initial_offset:
|
|
if (mlen == 0) {
|
|
continue;
|
|
}
|
|
if (mlen > len) {
|
|
mlen = len;
|
|
}
|
|
len -= mlen;
|
|
|
|
partial = 0;
|
|
if ((uintptr_t)data & 1) {
|
|
/* Align on word boundary */
|
|
started_on_odd = !started_on_odd;
|
|
#if BYTE_ORDER == LITTLE_ENDIAN
|
|
partial = *data << 8;
|
|
#else
|
|
partial = *data;
|
|
#endif
|
|
++data;
|
|
--mlen;
|
|
}
|
|
needs_swap = started_on_odd;
|
|
if ((uintptr_t)data & 2) {
|
|
if (mlen < 2) {
|
|
goto trailing_bytes;
|
|
}
|
|
partial += *(uint16_t *)(void *)data;
|
|
data += 2;
|
|
mlen -= 2;
|
|
}
|
|
while (mlen >= 64) {
|
|
__builtin_prefetch(data + 32);
|
|
__builtin_prefetch(data + 64);
|
|
partial += *(uint32_t *)(void *)data;
|
|
partial += *(uint32_t *)(void *)(data + 4);
|
|
partial += *(uint32_t *)(void *)(data + 8);
|
|
partial += *(uint32_t *)(void *)(data + 12);
|
|
partial += *(uint32_t *)(void *)(data + 16);
|
|
partial += *(uint32_t *)(void *)(data + 20);
|
|
partial += *(uint32_t *)(void *)(data + 24);
|
|
partial += *(uint32_t *)(void *)(data + 28);
|
|
partial += *(uint32_t *)(void *)(data + 32);
|
|
partial += *(uint32_t *)(void *)(data + 36);
|
|
partial += *(uint32_t *)(void *)(data + 40);
|
|
partial += *(uint32_t *)(void *)(data + 44);
|
|
partial += *(uint32_t *)(void *)(data + 48);
|
|
partial += *(uint32_t *)(void *)(data + 52);
|
|
partial += *(uint32_t *)(void *)(data + 56);
|
|
partial += *(uint32_t *)(void *)(data + 60);
|
|
data += 64;
|
|
mlen -= 64;
|
|
if (PREDICT_FALSE(partial & (3ULL << 62))) {
|
|
if (needs_swap) {
|
|
partial = (partial << 8) +
|
|
(partial >> 56);
|
|
}
|
|
sum += (partial >> 32);
|
|
sum += (partial & 0xffffffff);
|
|
partial = 0;
|
|
}
|
|
}
|
|
/*
|
|
* mlen is not updated below as the remaining tests
|
|
* are using bit masks, which are not affected.
|
|
*/
|
|
if (mlen & 32) {
|
|
partial += *(uint32_t *)(void *)data;
|
|
partial += *(uint32_t *)(void *)(data + 4);
|
|
partial += *(uint32_t *)(void *)(data + 8);
|
|
partial += *(uint32_t *)(void *)(data + 12);
|
|
partial += *(uint32_t *)(void *)(data + 16);
|
|
partial += *(uint32_t *)(void *)(data + 20);
|
|
partial += *(uint32_t *)(void *)(data + 24);
|
|
partial += *(uint32_t *)(void *)(data + 28);
|
|
data += 32;
|
|
}
|
|
if (mlen & 16) {
|
|
partial += *(uint32_t *)(void *)data;
|
|
partial += *(uint32_t *)(void *)(data + 4);
|
|
partial += *(uint32_t *)(void *)(data + 8);
|
|
partial += *(uint32_t *)(void *)(data + 12);
|
|
data += 16;
|
|
}
|
|
if (mlen & 8) {
|
|
partial += *(uint32_t *)(void *)data;
|
|
partial += *(uint32_t *)(void *)(data + 4);
|
|
data += 8;
|
|
}
|
|
if (mlen & 4) {
|
|
partial += *(uint32_t *)(void *)data;
|
|
data += 4;
|
|
}
|
|
if (mlen & 2) {
|
|
partial += *(uint16_t *)(void *)data;
|
|
data += 2;
|
|
}
|
|
trailing_bytes:
|
|
if (mlen & 1) {
|
|
#if BYTE_ORDER == LITTLE_ENDIAN
|
|
partial += *data;
|
|
#else
|
|
partial += *data << 8;
|
|
#endif
|
|
started_on_odd = !started_on_odd;
|
|
}
|
|
|
|
if (needs_swap) {
|
|
partial = (partial << 8) + (partial >> 56);
|
|
}
|
|
sum += (partial >> 32) + (partial & 0xffffffff);
|
|
/*
|
|
* Reduce sum to allow potential byte swap
|
|
* in the next iteration without carry.
|
|
*/
|
|
sum = (sum >> 32) + (sum & 0xffffffff);
|
|
}
|
|
final_acc = (sum >> 48) + ((sum >> 32) & 0xffff) +
|
|
((sum >> 16) & 0xffff) + (sum & 0xffff);
|
|
final_acc = (final_acc >> 16) + (final_acc & 0xffff);
|
|
final_acc = (final_acc >> 16) + (final_acc & 0xffff);
|
|
return final_acc & 0xffff;
|
|
}
|
|
#endif /* __LP64 */
|
|
|
|
#endif /* __i386__ || __x86_64__ */
|