368 lines
9.3 KiB
ArmAsm
368 lines
9.3 KiB
ArmAsm
/*
|
|
* Copyright (c) 2020-2021 Apple Inc. All rights reserved.
|
|
*
|
|
* @APPLE_OSREFERENCE_LICENSE_HEADER_START@
|
|
*
|
|
* This file contains Original Code and/or Modifications of Original Code
|
|
* as defined in and that are subject to the Apple Public Source License
|
|
* Version 2.0 (the 'License'). You may not use this file except in
|
|
* compliance with the License. The rights granted to you under the License
|
|
* may not be used to create, or enable the creation or redistribution of,
|
|
* unlawful or unlicensed copies of an Apple operating system, or to
|
|
* circumvent, violate, or enable the circumvention or violation of, any
|
|
* terms of an Apple operating system software license agreement.
|
|
*
|
|
* Please obtain a copy of the License at
|
|
* http://www.opensource.apple.com/apsl/ and read it before using this file.
|
|
*
|
|
* The Original Code and all software distributed under the License are
|
|
* distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
|
|
* EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
|
|
* INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
|
|
* FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
|
|
* Please see the License for the specific language governing rights and
|
|
* limitations under the License.
|
|
*
|
|
* @APPLE_OSREFERENCE_LICENSE_HEADER_END@
|
|
*/
|
|
|
|
/*
|
|
* extern int os_memcmp_mask_{16,32,48,64,80}B(const uint8_t *src1,
|
|
* const uint8_t *src2, const uint8_t *mask);
|
|
*
|
|
* This module implements fixed-length memory compare with mask routines,
|
|
* used mainly by the Skywalk networking subsystem. Each routine is called
|
|
* on every packet and therefore needs to be as efficient as possible.
|
|
*
|
|
* When used in the kernel, these routines save and restore XMM registers.
|
|
*/
|
|
|
|
#ifndef KERNEL
|
|
#ifndef LIBSYSCALL_INTERFACE
|
|
#error "LIBSYSCALL_INTERFACE not defined"
|
|
#endif /* !LIBSYSCALL_INTERFACE */
|
|
#endif /* !KERNEL */
|
|
|
|
#define src1 %rdi /* 1st arg */
|
|
#define src2 %rsi /* 2nd arg */
|
|
#define mask %rdx /* 3rd arg */
|
|
|
|
/*
|
|
* @abstract Compare 16-byte buffers src1 against src2, applying the byte
|
|
* masks to input data before comparison.
|
|
*
|
|
* @discussion
|
|
* Returns zero if the two buffers are identical after applying the byte
|
|
* masks, otherwise non-zero.
|
|
*
|
|
* @param src1 first 16-byte input buffer
|
|
* @param src2 second 16-byte input buffer
|
|
* @param byte_mask 16-byte byte mask applied before comparision
|
|
*/
|
|
.globl _os_memcmp_mask_16B
|
|
.text
|
|
.align 4
|
|
_os_memcmp_mask_16B:
|
|
|
|
/* push callee-saved registers and set up base pointer */
|
|
push %rbp
|
|
movq %rsp, %rbp
|
|
|
|
#ifdef KERNEL
|
|
/* allocate stack space and save xmm regs */
|
|
sub $2*16, %rsp
|
|
movdqa %xmm0, 0*16(%rsp)
|
|
movdqa %xmm1, 1*16(%rsp)
|
|
#endif /* KERNEL */
|
|
|
|
movdqu (src1), %xmm0
|
|
movdqu (src2), %xmm1
|
|
pxor %xmm0, %xmm1
|
|
movdqu (mask), %xmm0
|
|
pand %xmm1, %xmm0
|
|
xorq %rax, %rax
|
|
ptest %xmm0, %xmm0
|
|
setne %al
|
|
|
|
#ifdef KERNEL
|
|
/* restore xmm regs and deallocate stack space */
|
|
movdqa 0*16(%rsp), %xmm0
|
|
movdqa 1*16(%rsp), %xmm1
|
|
add $2*16, %rsp
|
|
#endif /* KERNEL */
|
|
|
|
/* restore callee-saved registers */
|
|
pop %rbp
|
|
ret
|
|
|
|
/*
|
|
* @abstract Compare 32-byte buffers src1 against src2, applying the byte
|
|
* masks to input data before comparison.
|
|
*
|
|
* @discussion
|
|
* Returns zero if the two buffers are identical after applying the byte
|
|
* masks, otherwise non-zero.
|
|
*
|
|
* @param src1 first 32-byte input buffer
|
|
* @param src2 second 32-byte input buffer
|
|
* @param byte_mask 32-byte byte mask applied before comparision
|
|
*/
|
|
.globl _os_memcmp_mask_32B
|
|
.text
|
|
.align 4
|
|
_os_memcmp_mask_32B:
|
|
|
|
/* push callee-saved registers and set up base pointer */
|
|
push %rbp
|
|
movq %rsp, %rbp
|
|
|
|
#ifdef KERNEL
|
|
/* allocate stack space and save xmm regs */
|
|
sub $3*16, %rsp
|
|
movdqa %xmm0, 0*16(%rsp)
|
|
movdqa %xmm1, 1*16(%rsp)
|
|
movdqa %xmm2, 2*16(%rsp)
|
|
#endif /* KERNEL */
|
|
|
|
movdqu (src1), %xmm0
|
|
movdqu 0x10(src1), %xmm1
|
|
movdqu (src2), %xmm2
|
|
pxor %xmm0, %xmm2
|
|
movdqu 0x10(src2), %xmm0
|
|
pxor %xmm1, %xmm0
|
|
movdqu (mask), %xmm1
|
|
pand %xmm2, %xmm1
|
|
movdqu 0x10(mask), %xmm2
|
|
pand %xmm0, %xmm2
|
|
por %xmm1, %xmm2
|
|
xorq %rax, %rax
|
|
ptest %xmm2, %xmm2
|
|
setne %al
|
|
|
|
#ifdef KERNEL
|
|
/* restore xmm regs and deallocate stack space */
|
|
movdqa 0*16(%rsp), %xmm0
|
|
movdqa 1*16(%rsp), %xmm1
|
|
movdqa 2*16(%rsp), %xmm2
|
|
add $3*16, %rsp
|
|
#endif /* KERNEL */
|
|
|
|
/* restore callee-saved registers */
|
|
pop %rbp
|
|
ret
|
|
|
|
/*
|
|
* @abstract Compare 48-byte buffers src1 against src2, applying the byte
|
|
* masks to input data before comparison.
|
|
*
|
|
* @discussion
|
|
* Returns zero if the two buffers are identical after applying the byte
|
|
* masks, otherwise non-zero.
|
|
*
|
|
* @param src1 first 48-byte input buffer
|
|
* @param src2 second 48-byte input buffer
|
|
* @param byte_mask 48-byte byte mask applied before comparision
|
|
*/
|
|
.globl _os_memcmp_mask_48B
|
|
.text
|
|
.align 4
|
|
_os_memcmp_mask_48B:
|
|
|
|
/* push callee-saved registers and set up base pointer */
|
|
push %rbp
|
|
movq %rsp, %rbp
|
|
|
|
#ifdef KERNEL
|
|
/* allocate stack space and save xmm regs */
|
|
sub $4*16, %rsp
|
|
movdqa %xmm0, 0*16(%rsp)
|
|
movdqa %xmm1, 1*16(%rsp)
|
|
movdqa %xmm2, 2*16(%rsp)
|
|
movdqa %xmm3, 3*16(%rsp)
|
|
#endif /* KERNEL */
|
|
|
|
movdqu (src1), %xmm0
|
|
movdqu 0x10(src1), %xmm1
|
|
movdqu 0x20(src1), %xmm2
|
|
movdqu (src2), %xmm3
|
|
pxor %xmm0, %xmm3
|
|
movdqu 0x10(src2), %xmm0
|
|
pxor %xmm1, %xmm0
|
|
movdqu 0x20(src2), %xmm1
|
|
pxor %xmm2, %xmm1
|
|
movdqu (mask), %xmm2
|
|
pand %xmm3, %xmm2
|
|
movdqu 0x10(mask), %xmm3
|
|
pand %xmm0, %xmm3
|
|
por %xmm2, %xmm3
|
|
movdqu 0x20(mask), %xmm0
|
|
pand %xmm1, %xmm0
|
|
por %xmm3, %xmm0
|
|
xorq %rax, %rax
|
|
ptest %xmm0, %xmm0
|
|
setne %al
|
|
|
|
#ifdef KERNEL
|
|
/* restore xmm regs and deallocate stack space */
|
|
movdqa 0*16(%rsp), %xmm0
|
|
movdqa 1*16(%rsp), %xmm1
|
|
movdqa 2*16(%rsp), %xmm2
|
|
movdqa 3*16(%rsp), %xmm3
|
|
add $4*16, %rsp
|
|
#endif /* KERNEL */
|
|
|
|
/* restore callee-saved registers */
|
|
pop %rbp
|
|
ret
|
|
|
|
/*
|
|
* @abstract Compare 64-byte buffers src1 against src2, applying the byte
|
|
* masks to input data before comparison.
|
|
*
|
|
* @discussion
|
|
* Returns zero if the two buffers are identical after applying the byte
|
|
* masks, otherwise non-zero.
|
|
*
|
|
* @param src1 first 64-byte input buffer
|
|
* @param src2 second 64-byte input buffer
|
|
* @param byte_mask 64-byte byte mask applied before comparision
|
|
*/
|
|
.globl _os_memcmp_mask_64B
|
|
.text
|
|
.align 4
|
|
_os_memcmp_mask_64B:
|
|
|
|
/* push callee-saved registers and set up base pointer */
|
|
push %rbp
|
|
movq %rsp, %rbp
|
|
|
|
#ifdef KERNEL
|
|
/* allocate stack space and save xmm regs */
|
|
sub $5*16, %rsp
|
|
movdqa %xmm0, 0*16(%rsp)
|
|
movdqa %xmm1, 1*16(%rsp)
|
|
movdqa %xmm2, 2*16(%rsp)
|
|
movdqa %xmm3, 3*16(%rsp)
|
|
movdqa %xmm4, 4*16(%rsp)
|
|
#endif /* KERNEL */
|
|
|
|
movdqu (src1), %xmm0
|
|
movdqu 0x10(src1), %xmm1
|
|
movdqu 0x20(src1), %xmm2
|
|
movdqu 0x30(src1), %xmm3
|
|
movdqu (src2), %xmm4
|
|
pxor %xmm0, %xmm4
|
|
movdqu 0x10(src2), %xmm0
|
|
pxor %xmm1, %xmm0
|
|
movdqu 0x20(src2), %xmm1
|
|
pxor %xmm2, %xmm1
|
|
movdqu 0x30(src2), %xmm2
|
|
pxor %xmm3, %xmm2
|
|
movdqu (mask), %xmm3
|
|
pand %xmm4, %xmm3
|
|
movdqu 0x10(mask), %xmm4
|
|
pand %xmm0, %xmm4
|
|
por %xmm3, %xmm4
|
|
movdqu 0x20(mask), %xmm0
|
|
pand %xmm1, %xmm0
|
|
movdqu 0x30(mask), %xmm1
|
|
pand %xmm2, %xmm1
|
|
por %xmm0, %xmm1
|
|
por %xmm4, %xmm1
|
|
xorq %rax, %rax
|
|
ptest %xmm1, %xmm1
|
|
setne %al
|
|
|
|
#ifdef KERNEL
|
|
/* restore xmm regs and deallocate stack space */
|
|
movdqa 0*16(%rsp), %xmm0
|
|
movdqa 1*16(%rsp), %xmm1
|
|
movdqa 2*16(%rsp), %xmm2
|
|
movdqa 3*16(%rsp), %xmm3
|
|
movdqa 4*16(%rsp), %xmm4
|
|
add $5*16, %rsp
|
|
#endif /* KERNEL */
|
|
|
|
/* restore callee-saved registers */
|
|
pop %rbp
|
|
ret
|
|
|
|
/*
|
|
* @abstract Compare 80-byte buffers src1 against src2, applying the byte
|
|
* masks to input data before comparison.
|
|
*
|
|
* @discussion
|
|
* Returns zero if the two buffers are identical after applying the byte
|
|
* masks, otherwise non-zero.
|
|
*
|
|
* @param src1 first 80-byte input buffer
|
|
* @param src2 second 80-byte input buffer
|
|
* @param byte_mask 80-byte byte mask applied before comparision
|
|
*/
|
|
.globl _os_memcmp_mask_80B
|
|
.text
|
|
.align 4
|
|
_os_memcmp_mask_80B:
|
|
|
|
/* push callee-saved registers and set up base pointer */
|
|
push %rbp
|
|
movq %rsp, %rbp
|
|
|
|
#ifdef KERNEL
|
|
/* allocate stack space and save xmm regs */
|
|
sub $6*16, %rsp
|
|
movdqa %xmm0, 0*16(%rsp)
|
|
movdqa %xmm1, 1*16(%rsp)
|
|
movdqa %xmm2, 2*16(%rsp)
|
|
movdqa %xmm3, 3*16(%rsp)
|
|
movdqa %xmm4, 4*16(%rsp)
|
|
movdqa %xmm5, 5*16(%rsp)
|
|
#endif /* KERNEL */
|
|
|
|
movdqu (src1), %xmm0
|
|
movdqu 0x10(src1), %xmm1
|
|
movdqu 0x20(src1), %xmm2
|
|
movdqu 0x30(src1), %xmm3
|
|
movdqu 0x40(src1), %xmm4
|
|
movdqu (src2), %xmm5
|
|
pxor %xmm0, %xmm5
|
|
movdqu 0x10(src2), %xmm0
|
|
pxor %xmm1, %xmm0
|
|
movdqu 0x20(src2), %xmm1
|
|
pxor %xmm2, %xmm1
|
|
movdqu 0x30(src2), %xmm2
|
|
pxor %xmm3, %xmm2
|
|
movdqu 0x40(src2), %xmm3
|
|
pxor %xmm4, %xmm3
|
|
movdqu (mask), %xmm4
|
|
pand %xmm5, %xmm4
|
|
movdqu 0x10(mask), %xmm5
|
|
pand %xmm0, %xmm5
|
|
por %xmm4, %xmm5
|
|
movdqu 0x20(mask), %xmm0
|
|
pand %xmm1, %xmm0
|
|
movdqu 0x30(mask), %xmm4
|
|
pand %xmm2, %xmm4
|
|
por %xmm0, %xmm4
|
|
movdqu 0x40(mask), %xmm1
|
|
pand %xmm3, %xmm1
|
|
por %xmm5, %xmm4
|
|
por %xmm1, %xmm4
|
|
xorq %rax, %rax
|
|
ptest %xmm4, %xmm4
|
|
setne %al
|
|
|
|
#ifdef KERNEL
|
|
/* restore xmm regs and deallocate stack space */
|
|
movdqa 0*16(%rsp), %xmm0
|
|
movdqa 1*16(%rsp), %xmm1
|
|
movdqa 2*16(%rsp), %xmm2
|
|
movdqa 3*16(%rsp), %xmm3
|
|
movdqa 4*16(%rsp), %xmm4
|
|
movdqa 5*16(%rsp), %xmm5
|
|
add $6*16, %rsp
|
|
#endif /* KERNEL */
|
|
|
|
/* restore callee-saved registers */
|
|
pop %rbp
|
|
ret
|