/* * Copyright (c) 2020-2021 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * * This file contains Original Code and/or Modifications of Original Code * as defined in and that are subject to the Apple Public Source License * Version 2.0 (the 'License'). You may not use this file except in * compliance with the License. The rights granted to you under the License * may not be used to create, or enable the creation or redistribution of, * unlawful or unlicensed copies of an Apple operating system, or to * circumvent, violate, or enable the circumvention or violation of, any * terms of an Apple operating system software license agreement. * * Please obtain a copy of the License at * http://www.opensource.apple.com/apsl/ and read it before using this file. * * The Original Code and all software distributed under the License are * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. * Please see the License for the specific language governing rights and * limitations under the License. * * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ */ /* * extern int os_memcmp_mask_{16,32,48,64,80}B(const uint8_t *src1, * const uint8_t *src2, const uint8_t *mask); * * This module implements fixed-length memory compare with mask routines, * used mainly by the Skywalk networking subsystem. Each routine is called * on every packet and therefore needs to be as efficient as possible. * * When used in the kernel, these routines save and restore XMM registers. */ #ifndef KERNEL #ifndef LIBSYSCALL_INTERFACE #error "LIBSYSCALL_INTERFACE not defined" #endif /* !LIBSYSCALL_INTERFACE */ #endif /* !KERNEL */ #define src1 %rdi /* 1st arg */ #define src2 %rsi /* 2nd arg */ #define mask %rdx /* 3rd arg */ /* * @abstract Compare 16-byte buffers src1 against src2, applying the byte * masks to input data before comparison. * * @discussion * Returns zero if the two buffers are identical after applying the byte * masks, otherwise non-zero. * * @param src1 first 16-byte input buffer * @param src2 second 16-byte input buffer * @param byte_mask 16-byte byte mask applied before comparision */ .globl _os_memcmp_mask_16B .text .align 4 _os_memcmp_mask_16B: /* push callee-saved registers and set up base pointer */ push %rbp movq %rsp, %rbp #ifdef KERNEL /* allocate stack space and save xmm regs */ sub $2*16, %rsp movdqa %xmm0, 0*16(%rsp) movdqa %xmm1, 1*16(%rsp) #endif /* KERNEL */ movdqu (src1), %xmm0 movdqu (src2), %xmm1 pxor %xmm0, %xmm1 movdqu (mask), %xmm0 pand %xmm1, %xmm0 xorq %rax, %rax ptest %xmm0, %xmm0 setne %al #ifdef KERNEL /* restore xmm regs and deallocate stack space */ movdqa 0*16(%rsp), %xmm0 movdqa 1*16(%rsp), %xmm1 add $2*16, %rsp #endif /* KERNEL */ /* restore callee-saved registers */ pop %rbp ret /* * @abstract Compare 32-byte buffers src1 against src2, applying the byte * masks to input data before comparison. * * @discussion * Returns zero if the two buffers are identical after applying the byte * masks, otherwise non-zero. * * @param src1 first 32-byte input buffer * @param src2 second 32-byte input buffer * @param byte_mask 32-byte byte mask applied before comparision */ .globl _os_memcmp_mask_32B .text .align 4 _os_memcmp_mask_32B: /* push callee-saved registers and set up base pointer */ push %rbp movq %rsp, %rbp #ifdef KERNEL /* allocate stack space and save xmm regs */ sub $3*16, %rsp movdqa %xmm0, 0*16(%rsp) movdqa %xmm1, 1*16(%rsp) movdqa %xmm2, 2*16(%rsp) #endif /* KERNEL */ movdqu (src1), %xmm0 movdqu 0x10(src1), %xmm1 movdqu (src2), %xmm2 pxor %xmm0, %xmm2 movdqu 0x10(src2), %xmm0 pxor %xmm1, %xmm0 movdqu (mask), %xmm1 pand %xmm2, %xmm1 movdqu 0x10(mask), %xmm2 pand %xmm0, %xmm2 por %xmm1, %xmm2 xorq %rax, %rax ptest %xmm2, %xmm2 setne %al #ifdef KERNEL /* restore xmm regs and deallocate stack space */ movdqa 0*16(%rsp), %xmm0 movdqa 1*16(%rsp), %xmm1 movdqa 2*16(%rsp), %xmm2 add $3*16, %rsp #endif /* KERNEL */ /* restore callee-saved registers */ pop %rbp ret /* * @abstract Compare 48-byte buffers src1 against src2, applying the byte * masks to input data before comparison. * * @discussion * Returns zero if the two buffers are identical after applying the byte * masks, otherwise non-zero. * * @param src1 first 48-byte input buffer * @param src2 second 48-byte input buffer * @param byte_mask 48-byte byte mask applied before comparision */ .globl _os_memcmp_mask_48B .text .align 4 _os_memcmp_mask_48B: /* push callee-saved registers and set up base pointer */ push %rbp movq %rsp, %rbp #ifdef KERNEL /* allocate stack space and save xmm regs */ sub $4*16, %rsp movdqa %xmm0, 0*16(%rsp) movdqa %xmm1, 1*16(%rsp) movdqa %xmm2, 2*16(%rsp) movdqa %xmm3, 3*16(%rsp) #endif /* KERNEL */ movdqu (src1), %xmm0 movdqu 0x10(src1), %xmm1 movdqu 0x20(src1), %xmm2 movdqu (src2), %xmm3 pxor %xmm0, %xmm3 movdqu 0x10(src2), %xmm0 pxor %xmm1, %xmm0 movdqu 0x20(src2), %xmm1 pxor %xmm2, %xmm1 movdqu (mask), %xmm2 pand %xmm3, %xmm2 movdqu 0x10(mask), %xmm3 pand %xmm0, %xmm3 por %xmm2, %xmm3 movdqu 0x20(mask), %xmm0 pand %xmm1, %xmm0 por %xmm3, %xmm0 xorq %rax, %rax ptest %xmm0, %xmm0 setne %al #ifdef KERNEL /* restore xmm regs and deallocate stack space */ movdqa 0*16(%rsp), %xmm0 movdqa 1*16(%rsp), %xmm1 movdqa 2*16(%rsp), %xmm2 movdqa 3*16(%rsp), %xmm3 add $4*16, %rsp #endif /* KERNEL */ /* restore callee-saved registers */ pop %rbp ret /* * @abstract Compare 64-byte buffers src1 against src2, applying the byte * masks to input data before comparison. * * @discussion * Returns zero if the two buffers are identical after applying the byte * masks, otherwise non-zero. * * @param src1 first 64-byte input buffer * @param src2 second 64-byte input buffer * @param byte_mask 64-byte byte mask applied before comparision */ .globl _os_memcmp_mask_64B .text .align 4 _os_memcmp_mask_64B: /* push callee-saved registers and set up base pointer */ push %rbp movq %rsp, %rbp #ifdef KERNEL /* allocate stack space and save xmm regs */ sub $5*16, %rsp movdqa %xmm0, 0*16(%rsp) movdqa %xmm1, 1*16(%rsp) movdqa %xmm2, 2*16(%rsp) movdqa %xmm3, 3*16(%rsp) movdqa %xmm4, 4*16(%rsp) #endif /* KERNEL */ movdqu (src1), %xmm0 movdqu 0x10(src1), %xmm1 movdqu 0x20(src1), %xmm2 movdqu 0x30(src1), %xmm3 movdqu (src2), %xmm4 pxor %xmm0, %xmm4 movdqu 0x10(src2), %xmm0 pxor %xmm1, %xmm0 movdqu 0x20(src2), %xmm1 pxor %xmm2, %xmm1 movdqu 0x30(src2), %xmm2 pxor %xmm3, %xmm2 movdqu (mask), %xmm3 pand %xmm4, %xmm3 movdqu 0x10(mask), %xmm4 pand %xmm0, %xmm4 por %xmm3, %xmm4 movdqu 0x20(mask), %xmm0 pand %xmm1, %xmm0 movdqu 0x30(mask), %xmm1 pand %xmm2, %xmm1 por %xmm0, %xmm1 por %xmm4, %xmm1 xorq %rax, %rax ptest %xmm1, %xmm1 setne %al #ifdef KERNEL /* restore xmm regs and deallocate stack space */ movdqa 0*16(%rsp), %xmm0 movdqa 1*16(%rsp), %xmm1 movdqa 2*16(%rsp), %xmm2 movdqa 3*16(%rsp), %xmm3 movdqa 4*16(%rsp), %xmm4 add $5*16, %rsp #endif /* KERNEL */ /* restore callee-saved registers */ pop %rbp ret /* * @abstract Compare 80-byte buffers src1 against src2, applying the byte * masks to input data before comparison. * * @discussion * Returns zero if the two buffers are identical after applying the byte * masks, otherwise non-zero. * * @param src1 first 80-byte input buffer * @param src2 second 80-byte input buffer * @param byte_mask 80-byte byte mask applied before comparision */ .globl _os_memcmp_mask_80B .text .align 4 _os_memcmp_mask_80B: /* push callee-saved registers and set up base pointer */ push %rbp movq %rsp, %rbp #ifdef KERNEL /* allocate stack space and save xmm regs */ sub $6*16, %rsp movdqa %xmm0, 0*16(%rsp) movdqa %xmm1, 1*16(%rsp) movdqa %xmm2, 2*16(%rsp) movdqa %xmm3, 3*16(%rsp) movdqa %xmm4, 4*16(%rsp) movdqa %xmm5, 5*16(%rsp) #endif /* KERNEL */ movdqu (src1), %xmm0 movdqu 0x10(src1), %xmm1 movdqu 0x20(src1), %xmm2 movdqu 0x30(src1), %xmm3 movdqu 0x40(src1), %xmm4 movdqu (src2), %xmm5 pxor %xmm0, %xmm5 movdqu 0x10(src2), %xmm0 pxor %xmm1, %xmm0 movdqu 0x20(src2), %xmm1 pxor %xmm2, %xmm1 movdqu 0x30(src2), %xmm2 pxor %xmm3, %xmm2 movdqu 0x40(src2), %xmm3 pxor %xmm4, %xmm3 movdqu (mask), %xmm4 pand %xmm5, %xmm4 movdqu 0x10(mask), %xmm5 pand %xmm0, %xmm5 por %xmm4, %xmm5 movdqu 0x20(mask), %xmm0 pand %xmm1, %xmm0 movdqu 0x30(mask), %xmm4 pand %xmm2, %xmm4 por %xmm0, %xmm4 movdqu 0x40(mask), %xmm1 pand %xmm3, %xmm1 por %xmm5, %xmm4 por %xmm1, %xmm4 xorq %rax, %rax ptest %xmm4, %xmm4 setne %al #ifdef KERNEL /* restore xmm regs and deallocate stack space */ movdqa 0*16(%rsp), %xmm0 movdqa 1*16(%rsp), %xmm1 movdqa 2*16(%rsp), %xmm2 movdqa 3*16(%rsp), %xmm3 movdqa 4*16(%rsp), %xmm4 movdqa 5*16(%rsp), %xmm5 add $6*16, %rsp #endif /* KERNEL */ /* restore callee-saved registers */ pop %rbp ret