/* * Copyright (c) 2012-2021 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * * This file contains Original Code and/or Modifications of Original Code * as defined in and that are subject to the Apple Public Source License * Version 2.0 (the 'License'). You may not use this file except in * compliance with the License. The rights granted to you under the License * may not be used to create, or enable the creation or redistribution of, * unlawful or unlicensed copies of an Apple operating system, or to * circumvent, violate, or enable the circumvention or violation of, any * terms of an Apple operating system software license agreement. * * Please obtain a copy of the License at * http://www.opensource.apple.com/apsl/ and read it before using this file. * * The Original Code and all software distributed under the License are * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. * Please see the License for the specific language governing rights and * limitations under the License. * * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ */ /*- * Copyright (c) 2008 Joerg Sonnenberger . * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in * the documentation and/or other materials provided with the * distribution. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING, * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #ifdef KERNEL #include #include #include #include #include #include #include #include #define CKSUM_ERR(fmt, args...) kprintf(fmt, ## args) #else /* !KERNEL */ #ifndef LIBSYSCALL_INTERFACE #error "LIBSYSCALL_INTERFACE not defined" #endif /* !LIBSYSCALL_INTERFACE */ #include #include #include #include #include #include #include #define CKSUM_ERR(fmt, args...) fprintf_stderr(fmt, ## args) #endif /* !KERNEL */ /* compile time assert */ #ifndef _CASSERT #define _CASSERT(x) _Static_assert(x, "compile-time assertion failed") #endif /* !_CASSERT */ #ifndef VERIFY #define VERIFY(EX) ((void)0) #endif /* !VERIFY */ #ifndef CKSUM_ERR #define CKSUM_ERR(fmt, args...) ((void)0) #endif /* !CKSUM_ERR */ #define PREDICT_TRUE(x) __builtin_expect(!!((long)(x)), 1L) #define PREDICT_FALSE(x) __builtin_expect(!!((long)(x)), 0L) /* fake mbuf struct used only for calling os_cpu_in_cksum_mbuf() */ struct _mbuf { struct _mbuf *_m_next; void *_m_pad; uint8_t *_m_data; int32_t _m_len; }; extern uint32_t os_cpu_in_cksum(const void *, uint32_t, uint32_t); extern uint32_t os_cpu_in_cksum_mbuf(struct _mbuf *, int, int, uint32_t); uint32_t os_cpu_in_cksum(const void *data, uint32_t len, uint32_t initial_sum) { /* * If data is 4-bytes aligned (conditional), length is multiple * of 4-bytes (required), and the amount to checksum is small, * this would be quicker; this is suitable for IPv4/TCP header. */ if ( #if !defined(__arm64__) && !defined(__x86_64__) IS_P2ALIGNED(data, sizeof(uint32_t)) && #endif /* !__arm64__ && !__x86_64__ */ len <= 64 && (len & 3) == 0) { uint8_t *p = __DECONST(uint8_t *, data); uint64_t sum = initial_sum; switch (len) { case 20: /* simple IPv4 or TCP header */ sum += *(uint32_t *)(void *)p; sum += *(uint32_t *)(void *)(p + 4); sum += *(uint32_t *)(void *)(p + 8); sum += *(uint32_t *)(void *)(p + 12); sum += *(uint32_t *)(void *)(p + 16); break; case 32: /* TCP header + timestamp option */ sum += *(uint32_t *)(void *)p; sum += *(uint32_t *)(void *)(p + 4); sum += *(uint32_t *)(void *)(p + 8); sum += *(uint32_t *)(void *)(p + 12); sum += *(uint32_t *)(void *)(p + 16); sum += *(uint32_t *)(void *)(p + 20); sum += *(uint32_t *)(void *)(p + 24); sum += *(uint32_t *)(void *)(p + 28); break; default: while (len) { sum += *(uint32_t *)(void *)p; p += 4; len -= 4; } break; } /* fold 64-bit to 16-bit (deferred carries) */ sum = (sum >> 32) + (sum & 0xffffffff); /* 33-bit */ sum = (sum >> 16) + (sum & 0xffff); /* 17-bit + carry */ sum = (sum >> 16) + (sum & 0xffff); /* 16-bit + carry */ sum = (sum >> 16) + (sum & 0xffff); /* final carry */ return sum & 0xffff; } /* * Otherwise, let os_cpu_in_cksum_mbuf() handle it; it only looks * at 3 fields: {next,data,len}, and since it doesn't care about * the authenticity of the mbuf, we use a fake one here. Make * sure the offsets are as expected. */ #if defined(__LP64__) _CASSERT(offsetof(struct _mbuf, _m_next) == 0); _CASSERT(offsetof(struct _mbuf, _m_data) == 16); _CASSERT(offsetof(struct _mbuf, _m_len) == 24); #else /* !__LP64__ */ _CASSERT(offsetof(struct _mbuf, _m_next) == 0); _CASSERT(offsetof(struct _mbuf, _m_data) == 8); _CASSERT(offsetof(struct _mbuf, _m_len) == 12); #endif /* !__LP64__ */ #ifdef KERNEL _CASSERT(offsetof(struct _mbuf, _m_next) == offsetof(struct mbuf, m_next)); _CASSERT(offsetof(struct _mbuf, _m_data) == offsetof(struct mbuf, m_data)); _CASSERT(offsetof(struct _mbuf, _m_len) == offsetof(struct mbuf, m_len)); #endif /* KERNEL */ struct _mbuf m = { ._m_next = NULL, ._m_data = __DECONST(uint8_t *, data), ._m_len = len, }; return os_cpu_in_cksum_mbuf(&m, len, 0, initial_sum); } #if defined(__i386__) || defined(__x86_64__) /* * Checksum routine for Internet Protocol family headers (Portable Version). * * This routine is very heavily used in the network * code and should be modified for each CPU to be as fast as possible. * * A discussion of different implementation techniques can be found in * RFC 1071. * * The default implementation for 32-bit architectures is using * a 32-bit accumulator and operating on 16-bit operands. * * The default implementation for 64-bit architectures is using * a 64-bit accumulator and operating on 32-bit operands. * * Both versions are unrolled to handle 32 Byte / 64 Byte fragments as core * of the inner loop. After each iteration of the inner loop, a partial * reduction is done to avoid carry in long packets. */ #if !defined(__LP64__) /* 32-bit version */ uint32_t os_cpu_in_cksum_mbuf(struct _mbuf *m, int len, int off, uint32_t initial_sum) { int mlen; uint32_t sum, partial; unsigned int final_acc; uint8_t *data; boolean_t needs_swap, started_on_odd; VERIFY(len >= 0); VERIFY(off >= 0); needs_swap = FALSE; started_on_odd = FALSE; sum = (initial_sum >> 16) + (initial_sum & 0xffff); for (;;) { if (PREDICT_FALSE(m == NULL)) { CKSUM_ERR("%s: out of data\n", __func__); return (uint32_t)-1; } mlen = m->_m_len; if (mlen > off) { mlen -= off; data = m->_m_data + off; goto post_initial_offset; } off -= mlen; if (len == 0) { break; } m = m->_m_next; } for (; len > 0; m = m->_m_next) { if (PREDICT_FALSE(m == NULL)) { CKSUM_ERR("%s: out of data\n", __func__); return (uint32_t)-1; } mlen = m->_m_len; data = m->_m_data; post_initial_offset: if (mlen == 0) { continue; } if (mlen > len) { mlen = len; } len -= mlen; partial = 0; if ((uintptr_t)data & 1) { /* Align on word boundary */ started_on_odd = !started_on_odd; #if BYTE_ORDER == LITTLE_ENDIAN partial = *data << 8; #else partial = *data; #endif ++data; --mlen; } needs_swap = started_on_odd; while (mlen >= 32) { __builtin_prefetch(data + 32); partial += *(uint16_t *)(void *)data; partial += *(uint16_t *)(void *)(data + 2); partial += *(uint16_t *)(void *)(data + 4); partial += *(uint16_t *)(void *)(data + 6); partial += *(uint16_t *)(void *)(data + 8); partial += *(uint16_t *)(void *)(data + 10); partial += *(uint16_t *)(void *)(data + 12); partial += *(uint16_t *)(void *)(data + 14); partial += *(uint16_t *)(void *)(data + 16); partial += *(uint16_t *)(void *)(data + 18); partial += *(uint16_t *)(void *)(data + 20); partial += *(uint16_t *)(void *)(data + 22); partial += *(uint16_t *)(void *)(data + 24); partial += *(uint16_t *)(void *)(data + 26); partial += *(uint16_t *)(void *)(data + 28); partial += *(uint16_t *)(void *)(data + 30); data += 32; mlen -= 32; if (PREDICT_FALSE(partial & 0xc0000000)) { if (needs_swap) { partial = (partial << 8) + (partial >> 24); } sum += (partial >> 16); sum += (partial & 0xffff); partial = 0; } } if (mlen & 16) { partial += *(uint16_t *)(void *)data; partial += *(uint16_t *)(void *)(data + 2); partial += *(uint16_t *)(void *)(data + 4); partial += *(uint16_t *)(void *)(data + 6); partial += *(uint16_t *)(void *)(data + 8); partial += *(uint16_t *)(void *)(data + 10); partial += *(uint16_t *)(void *)(data + 12); partial += *(uint16_t *)(void *)(data + 14); data += 16; mlen -= 16; } /* * mlen is not updated below as the remaining tests * are using bit masks, which are not affected. */ if (mlen & 8) { partial += *(uint16_t *)(void *)data; partial += *(uint16_t *)(void *)(data + 2); partial += *(uint16_t *)(void *)(data + 4); partial += *(uint16_t *)(void *)(data + 6); data += 8; } if (mlen & 4) { partial += *(uint16_t *)(void *)data; partial += *(uint16_t *)(void *)(data + 2); data += 4; } if (mlen & 2) { partial += *(uint16_t *)(void *)data; data += 2; } if (mlen & 1) { #if BYTE_ORDER == LITTLE_ENDIAN partial += *data; #else partial += *data << 8; #endif started_on_odd = !started_on_odd; } if (needs_swap) { partial = (partial << 8) + (partial >> 24); } sum += (partial >> 16) + (partial & 0xffff); /* * Reduce sum to allow potential byte swap * in the next iteration without carry. */ sum = (sum >> 16) + (sum & 0xffff); } final_acc = ((sum >> 16) & 0xffff) + (sum & 0xffff); final_acc = (final_acc >> 16) + (final_acc & 0xffff); return final_acc & 0xffff; } #else /* __LP64__ */ /* 64-bit version */ uint32_t os_cpu_in_cksum_mbuf(struct _mbuf *m, int len, int off, uint32_t initial_sum) { int mlen; uint64_t sum, partial; unsigned int final_acc; uint8_t *data; boolean_t needs_swap, started_on_odd; VERIFY(len >= 0); VERIFY(off >= 0); needs_swap = FALSE; started_on_odd = FALSE; sum = initial_sum; for (;;) { if (PREDICT_FALSE(m == NULL)) { CKSUM_ERR("%s: out of data\n", __func__); return (uint32_t)-1; } mlen = m->_m_len; if (mlen > off) { mlen -= off; data = m->_m_data + off; goto post_initial_offset; } off -= mlen; if (len == 0) { break; } m = m->_m_next; } for (; len > 0; m = m->_m_next) { if (PREDICT_FALSE(m == NULL)) { CKSUM_ERR("%s: out of data\n", __func__); return (uint32_t)-1; } mlen = m->_m_len; data = m->_m_data; post_initial_offset: if (mlen == 0) { continue; } if (mlen > len) { mlen = len; } len -= mlen; partial = 0; if ((uintptr_t)data & 1) { /* Align on word boundary */ started_on_odd = !started_on_odd; #if BYTE_ORDER == LITTLE_ENDIAN partial = *data << 8; #else partial = *data; #endif ++data; --mlen; } needs_swap = started_on_odd; if ((uintptr_t)data & 2) { if (mlen < 2) { goto trailing_bytes; } partial += *(uint16_t *)(void *)data; data += 2; mlen -= 2; } while (mlen >= 64) { __builtin_prefetch(data + 32); __builtin_prefetch(data + 64); partial += *(uint32_t *)(void *)data; partial += *(uint32_t *)(void *)(data + 4); partial += *(uint32_t *)(void *)(data + 8); partial += *(uint32_t *)(void *)(data + 12); partial += *(uint32_t *)(void *)(data + 16); partial += *(uint32_t *)(void *)(data + 20); partial += *(uint32_t *)(void *)(data + 24); partial += *(uint32_t *)(void *)(data + 28); partial += *(uint32_t *)(void *)(data + 32); partial += *(uint32_t *)(void *)(data + 36); partial += *(uint32_t *)(void *)(data + 40); partial += *(uint32_t *)(void *)(data + 44); partial += *(uint32_t *)(void *)(data + 48); partial += *(uint32_t *)(void *)(data + 52); partial += *(uint32_t *)(void *)(data + 56); partial += *(uint32_t *)(void *)(data + 60); data += 64; mlen -= 64; if (PREDICT_FALSE(partial & (3ULL << 62))) { if (needs_swap) { partial = (partial << 8) + (partial >> 56); } sum += (partial >> 32); sum += (partial & 0xffffffff); partial = 0; } } /* * mlen is not updated below as the remaining tests * are using bit masks, which are not affected. */ if (mlen & 32) { partial += *(uint32_t *)(void *)data; partial += *(uint32_t *)(void *)(data + 4); partial += *(uint32_t *)(void *)(data + 8); partial += *(uint32_t *)(void *)(data + 12); partial += *(uint32_t *)(void *)(data + 16); partial += *(uint32_t *)(void *)(data + 20); partial += *(uint32_t *)(void *)(data + 24); partial += *(uint32_t *)(void *)(data + 28); data += 32; } if (mlen & 16) { partial += *(uint32_t *)(void *)data; partial += *(uint32_t *)(void *)(data + 4); partial += *(uint32_t *)(void *)(data + 8); partial += *(uint32_t *)(void *)(data + 12); data += 16; } if (mlen & 8) { partial += *(uint32_t *)(void *)data; partial += *(uint32_t *)(void *)(data + 4); data += 8; } if (mlen & 4) { partial += *(uint32_t *)(void *)data; data += 4; } if (mlen & 2) { partial += *(uint16_t *)(void *)data; data += 2; } trailing_bytes: if (mlen & 1) { #if BYTE_ORDER == LITTLE_ENDIAN partial += *data; #else partial += *data << 8; #endif started_on_odd = !started_on_odd; } if (needs_swap) { partial = (partial << 8) + (partial >> 56); } sum += (partial >> 32) + (partial & 0xffffffff); /* * Reduce sum to allow potential byte swap * in the next iteration without carry. */ sum = (sum >> 32) + (sum & 0xffffffff); } final_acc = (sum >> 48) + ((sum >> 32) & 0xffff) + ((sum >> 16) & 0xffff) + (sum & 0xffff); final_acc = (final_acc >> 16) + (final_acc & 0xffff); final_acc = (final_acc >> 16) + (final_acc & 0xffff); return final_acc & 0xffff; } #endif /* __LP64 */ #endif /* __i386__ || __x86_64__ */