/* * Copyright (c) 2015-2020 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * * This file contains Original Code and/or Modifications of Original Code * as defined in and that are subject to the Apple Public Source License * Version 2.0 (the 'License'). You may not use this file except in * compliance with the License. The rights granted to you under the License * may not be used to create, or enable the creation or redistribution of, * unlawful or unlicensed copies of an Apple operating system, or to * circumvent, violate, or enable the circumvention or violation of, any * terms of an Apple operating system software license agreement. * * Please obtain a copy of the License at * http://www.opensource.apple.com/apsl/ and read it before using this file. * * The Original Code and all software distributed under the License are * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. * Please see the License for the specific language governing rights and * limitations under the License. * * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #define XNU_TEST_BITMAP #include #include #include /* * How ulock promotion works: * * There’s a requested policy field on every thread called ‘promotions’, which * expresses which ulock promotions are happening to this thread. * The promotion priority saturates until the promotion count goes to 0. * * We also track effective promotion qos, which is the qos before clamping. * This value is used for promoting a thread that another thread is waiting on, * so that the lock owner reinflates to the right priority after unclamping. * * This also works for non-QoS threads, which can donate base priority to QoS * and non-QoS threads alike. * * ulock wait applies a promotion to the owner communicated through * UL_UNFAIR_LOCK as waiters block, and that promotion is saturated as long as * there is still an owner. In ulock wake, if the waker is still the owner, * then it clears its ownership and drops the boost. It does NOT transfer * ownership/priority boost to the new thread. Instead, it selects the * waiting thread with the highest base priority to be woken next, and * relies on that thread to carry the torch for the other waiting threads. */ static LCK_GRP_DECLARE(ull_lck_grp, "ulocks"); #if ULL_TICKET_LOCK typedef lck_ticket_t ull_lock_t; #define ull_lock_init(ull) lck_ticket_init(&ull->ull_lock, &ull_lck_grp) #define ull_lock_destroy(ull) lck_ticket_destroy(&ull->ull_lock, &ull_lck_grp) #define ull_lock(ull) lck_ticket_lock(&ull->ull_lock, &ull_lck_grp) #define ull_unlock(ull) lck_ticket_unlock(&ull->ull_lock) #define ull_assert_owned(ull) lck_ticket_assert_owned(&ull->ull_lock) #define ull_assert_notwned(ull) lck_ticket_assert_not_owned(&ull->ull_lock) #else typedef lck_spin_t ull_lock_t; #define ull_lock_init(ull) lck_spin_init(&ull->ull_lock, &ull_lck_grp, NULL) #define ull_lock_destroy(ull) lck_spin_destroy(&ull->ull_lock, &ull_lck_grp) #define ull_lock(ull) lck_spin_lock_grp(&ull->ull_lock, &ull_lck_grp) #define ull_unlock(ull) lck_spin_unlock(&ull->ull_lock) #define ull_assert_owned(ull) LCK_SPIN_ASSERT(&ull->ull_lock, LCK_ASSERT_OWNED) #define ull_assert_notwned(ull) LCK_SPIN_ASSERT(&ull->ull_lock, LCK_ASSERT_NOTOWNED) #endif /* ULL_TICKET_LOCK */ #define ULOCK_TO_EVENT(ull) ((event_t)ull) #define EVENT_TO_ULOCK(event) ((ull_t *)event) typedef enum { ULK_INVALID = 0, ULK_UADDR, ULK_XPROC, } ulk_type; typedef struct { union { struct __attribute__((packed)) { user_addr_t ulk_addr; /* * We use the task address as a hashing key, * so that ulock wakes across exec can't * be confused. */ task_t ulk_task __kernel_data_semantics; }; struct __attribute__((packed)) { uint64_t ulk_object; uint64_t ulk_offset; }; }; ulk_type ulk_key_type; } ulk_t; #define ULK_UADDR_LEN (sizeof(user_addr_t) + sizeof(task_t)) #define ULK_XPROC_LEN (sizeof(uint64_t) + sizeof(uint64_t)) inline static bool ull_key_match(ulk_t *a, ulk_t *b) { if (a->ulk_key_type != b->ulk_key_type) { return false; } if (a->ulk_key_type == ULK_UADDR) { return (a->ulk_task == b->ulk_task) && (a->ulk_addr == b->ulk_addr); } assert(a->ulk_key_type == ULK_XPROC); return (a->ulk_object == b->ulk_object) && (a->ulk_offset == b->ulk_offset); } typedef struct ull { /* * ull_owner is the most recent known value for the owner of this ulock * i.e. it may be out of date WRT the real value in userspace. */ thread_t ull_owner; /* holds +1 thread reference */ ulk_t ull_key; ull_lock_t ull_lock; uint ull_bucket_index; int32_t ull_nwaiters; int32_t ull_refcount; uint8_t ull_opcode; struct turnstile *ull_turnstile; queue_chain_t ull_hash_link; } ull_t; #define ULL_MUST_EXIST 0x0001 static void ull_put(ull_t *); static uint32_t ulock_adaptive_spin_usecs = 20; SYSCTL_INT(_kern, OID_AUTO, ulock_adaptive_spin_usecs, CTLFLAG_RW | CTLFLAG_LOCKED, &ulock_adaptive_spin_usecs, 0, "ulock adaptive spin duration"); #if DEVELOPMENT || DEBUG static int ull_simulate_copyin_fault = 0; static void ull_dump(ull_t *ull) { kprintf("ull\t%p\n", ull); switch (ull->ull_key.ulk_key_type) { case ULK_UADDR: kprintf("ull_key.ulk_key_type\tULK_UADDR\n"); kprintf("ull_key.ulk_task\t%p\n", ull->ull_key.ulk_task); kprintf("ull_key.ulk_addr\t%p\n", (void *)(ull->ull_key.ulk_addr)); break; case ULK_XPROC: kprintf("ull_key.ulk_key_type\tULK_XPROC\n"); kprintf("ull_key.ulk_object\t%p\n", (void *)(ull->ull_key.ulk_object)); kprintf("ull_key.ulk_offset\t%p\n", (void *)(ull->ull_key.ulk_offset)); break; default: kprintf("ull_key.ulk_key_type\tUNKNOWN %d\n", ull->ull_key.ulk_key_type); break; } kprintf("ull_nwaiters\t%d\n", ull->ull_nwaiters); kprintf("ull_refcount\t%d\n", ull->ull_refcount); kprintf("ull_opcode\t%d\n\n", ull->ull_opcode); kprintf("ull_owner\t0x%llx\n\n", thread_tid(ull->ull_owner)); kprintf("ull_turnstile\t%p\n\n", ull->ull_turnstile); } #endif typedef struct ull_bucket { queue_head_t ulb_head; #if ULL_TICKET_LOCK lck_ticket_t ulb_lock; #else lck_spin_t ulb_lock; #endif /* ULL_TICKET_LOCK */ } ull_bucket_t; static SECURITY_READ_ONLY_LATE(int) ull_hash_buckets; static SECURITY_READ_ONLY_LATE(ull_bucket_t *) ull_bucket; static uint32_t ull_nzalloc = 0; static KALLOC_TYPE_DEFINE(ull_zone, ull_t, KT_DEFAULT); #if ULL_TICKET_LOCK #define ull_bucket_lock(i) lck_ticket_lock(&ull_bucket[i].ulb_lock, &ull_lck_grp) #define ull_bucket_unlock(i) lck_ticket_unlock(&ull_bucket[i].ulb_lock) #else #define ull_bucket_lock(i) lck_spin_lock_grp(&ull_bucket[i].ulb_lock, &ull_lck_grp) #define ull_bucket_unlock(i) lck_spin_unlock(&ull_bucket[i].ulb_lock) #endif /* ULL_TICKET_LOCK */ static __inline__ uint32_t ull_hash_index(const void *key, size_t length) { uint32_t hash = os_hash_jenkins(key, length); hash &= (ull_hash_buckets - 1); return hash; } #define ULL_INDEX(keyp) ull_hash_index(keyp, keyp->ulk_key_type == ULK_UADDR ? ULK_UADDR_LEN : ULK_XPROC_LEN) static void ulock_initialize(void) { assert(thread_max > 16); /* Size ull_hash_buckets based on thread_max. * Round up to nearest power of 2, then divide by 4 */ ull_hash_buckets = (1 << (bit_ceiling(thread_max) - 2)); kprintf("%s>thread_max=%d, ull_hash_buckets=%d\n", __FUNCTION__, thread_max, ull_hash_buckets); assert(ull_hash_buckets >= thread_max / 4); ull_bucket = zalloc_permanent(sizeof(ull_bucket_t) * ull_hash_buckets, ZALIGN_PTR); assert(ull_bucket != NULL); for (int i = 0; i < ull_hash_buckets; i++) { queue_init(&ull_bucket[i].ulb_head); #if ULL_TICKET_LOCK lck_ticket_init(&ull_bucket[i].ulb_lock, &ull_lck_grp); #else lck_spin_init(&ull_bucket[i].ulb_lock, &ull_lck_grp, NULL); #endif /* ULL_TICKET_LOCK */ } } STARTUP(EARLY_BOOT, STARTUP_RANK_FIRST, ulock_initialize); #if DEVELOPMENT || DEBUG /* Count the number of hash entries for a given task address. * if task==0, dump the whole table. */ static int ull_hash_dump(task_t task) { int count = 0; if (task == TASK_NULL) { kprintf("%s>total number of ull_t allocated %d\n", __FUNCTION__, ull_nzalloc); kprintf("%s>BEGIN\n", __FUNCTION__); } for (int i = 0; i < ull_hash_buckets; i++) { ull_bucket_lock(i); if (!queue_empty(&ull_bucket[i].ulb_head)) { ull_t *elem; if (task == TASK_NULL) { kprintf("%s>index %d:\n", __FUNCTION__, i); } qe_foreach_element(elem, &ull_bucket[i].ulb_head, ull_hash_link) { if ((task == TASK_NULL) || ((elem->ull_key.ulk_key_type == ULK_UADDR) && (task == elem->ull_key.ulk_task))) { ull_dump(elem); count++; } } } ull_bucket_unlock(i); } if (task == TASK_NULL) { kprintf("%s>END\n", __FUNCTION__); ull_nzalloc = 0; } return count; } #endif static ull_t * ull_alloc(ulk_t *key) { ull_t *ull = (ull_t *)zalloc_flags(ull_zone, Z_SET_NOTSHARED); assert(ull != NULL); ull->ull_refcount = 1; ull->ull_key = *key; ull->ull_bucket_index = ULL_INDEX(key); ull->ull_nwaiters = 0; ull->ull_opcode = 0; ull->ull_owner = THREAD_NULL; ull->ull_turnstile = TURNSTILE_NULL; ull_lock_init(ull); ull_nzalloc++; return ull; } static void ull_free(ull_t *ull) { assert(ull->ull_owner == THREAD_NULL); assert(ull->ull_turnstile == TURNSTILE_NULL); ull_assert_notwned(ull); ull_lock_destroy(ull); zfree(ull_zone, ull); } /* Finds an existing ulock structure (ull_t), or creates a new one. * If MUST_EXIST flag is set, returns NULL instead of creating a new one. * The ulock structure is returned with ull_lock locked */ static ull_t * ull_get(ulk_t *key, uint32_t flags, ull_t **unused_ull) { ull_t *ull = NULL; uint i = ULL_INDEX(key); ull_t *new_ull = (flags & ULL_MUST_EXIST) ? NULL : ull_alloc(key); ull_t *elem; ull_bucket_lock(i); qe_foreach_element(elem, &ull_bucket[i].ulb_head, ull_hash_link) { ull_lock(elem); if (ull_key_match(&elem->ull_key, key)) { ull = elem; break; } else { ull_unlock(elem); } } if (ull == NULL) { if (flags & ULL_MUST_EXIST) { /* Must already exist (called from wake) */ ull_bucket_unlock(i); assert(new_ull == NULL); assert(unused_ull == NULL); return NULL; } if (new_ull == NULL) { /* Alloc above failed */ ull_bucket_unlock(i); return NULL; } ull = new_ull; ull_lock(ull); enqueue(&ull_bucket[i].ulb_head, &ull->ull_hash_link); } else if (!(flags & ULL_MUST_EXIST)) { assert(new_ull); assert(unused_ull); assert(*unused_ull == NULL); *unused_ull = new_ull; } ull->ull_refcount++; ull_bucket_unlock(i); return ull; /* still locked */ } /* * Must be called with ull_lock held */ static void ull_put(ull_t *ull) { ull_assert_owned(ull); int refcount = --ull->ull_refcount; assert(refcount == 0 ? (ull->ull_key.ulk_key_type == ULK_INVALID) : 1); ull_unlock(ull); if (refcount > 0) { return; } ull_bucket_lock(ull->ull_bucket_index); remqueue(&ull->ull_hash_link); ull_bucket_unlock(ull->ull_bucket_index); ull_free(ull); } extern kern_return_t vm_map_page_info(vm_map_t map, vm_map_offset_t offset, vm_page_info_flavor_t flavor, vm_page_info_t info, mach_msg_type_number_t *count); extern vm_map_t current_map(void); extern boolean_t machine_thread_on_core(thread_t thread); static int uaddr_findobj(user_addr_t uaddr, uint64_t *objectp, uint64_t *offsetp) { kern_return_t ret; vm_page_info_basic_data_t info; mach_msg_type_number_t count = VM_PAGE_INFO_BASIC_COUNT; ret = vm_map_page_info(current_map(), uaddr, VM_PAGE_INFO_BASIC, (vm_page_info_t)&info, &count); if (ret != KERN_SUCCESS) { return EINVAL; } if (objectp != NULL) { *objectp = (uint64_t)info.object_id; } if (offsetp != NULL) { *offsetp = (uint64_t)info.offset; } return 0; } static void ulock_wait_continue(void *, wait_result_t); static void ulock_wait_cleanup(ull_t *, thread_t, thread_t, int32_t *); inline static int wait_result_to_return_code(wait_result_t wr) { int ret = 0; switch (wr) { case THREAD_AWAKENED: break; case THREAD_TIMED_OUT: ret = ETIMEDOUT; break; case THREAD_INTERRUPTED: case THREAD_RESTART: default: ret = EINTR; break; } return ret; } static int ulock_resolve_owner(uint32_t value, thread_t *owner) { mach_port_name_t owner_name = ulock_owner_value_to_port_name(value); *owner = port_name_to_thread(owner_name, PORT_INTRANS_THREAD_IN_CURRENT_TASK | PORT_INTRANS_THREAD_NOT_CURRENT_THREAD); if (*owner == THREAD_NULL) { /* * Translation failed - even though the lock value is up to date, * whatever was stored in the lock wasn't actually a thread port. */ return owner_name == MACH_PORT_DEAD ? ESRCH : EOWNERDEAD; } return 0; } int sys_ulock_wait(struct proc *p, struct ulock_wait_args *args, int32_t *retval) { struct ulock_wait2_args args2; args2.operation = args->operation; args2.addr = args->addr; args2.value = args->value; args2.timeout = (uint64_t)(args->timeout) * NSEC_PER_USEC; args2.value2 = 0; return sys_ulock_wait2(p, &args2, retval); } int sys_ulock_wait2(struct proc *p, struct ulock_wait2_args *args, int32_t *retval) { uint8_t opcode = (uint8_t)(args->operation & UL_OPCODE_MASK); uint flags = args->operation & UL_FLAGS_MASK; if (flags & ULF_WAIT_CANCEL_POINT) { __pthread_testcancel(1); } int ret = 0; thread_t self = current_thread(); ulk_t key; /* involved threads - each variable holds +1 ref if not null */ thread_t owner_thread = THREAD_NULL; thread_t old_owner = THREAD_NULL; ull_t *unused_ull = NULL; if ((flags & ULF_WAIT_MASK) != flags) { ret = EINVAL; goto munge_retval; } bool set_owner = false; bool xproc = false; size_t lock_size = sizeof(uint32_t); int copy_ret; switch (opcode) { case UL_UNFAIR_LOCK: set_owner = true; break; case UL_COMPARE_AND_WAIT: break; case UL_COMPARE_AND_WAIT64: lock_size = sizeof(uint64_t); break; case UL_COMPARE_AND_WAIT_SHARED: xproc = true; break; case UL_COMPARE_AND_WAIT64_SHARED: xproc = true; lock_size = sizeof(uint64_t); break; default: ret = EINVAL; goto munge_retval; } uint64_t value = 0; if ((args->addr == 0) || (args->addr & (lock_size - 1))) { ret = EINVAL; goto munge_retval; } if (xproc) { uint64_t object = 0; uint64_t offset = 0; ret = uaddr_findobj(args->addr, &object, &offset); if (ret) { ret = EINVAL; goto munge_retval; } key.ulk_key_type = ULK_XPROC; key.ulk_object = object; key.ulk_offset = offset; } else { key.ulk_key_type = ULK_UADDR; key.ulk_task = proc_task(p); key.ulk_addr = args->addr; } if ((flags & ULF_WAIT_ADAPTIVE_SPIN) && set_owner) { /* * Attempt the copyin outside of the lock once, * * If it doesn't match (which is common), return right away. * * If it matches, resolve the current owner, and if it is on core, * spin a bit waiting for the value to change. If the owner isn't on * core, or if the value stays stable, then go on with the regular * blocking code. */ uint64_t end = 0; uint32_t u32; ret = copyin_atomic32(args->addr, &u32); if (ret || u32 != args->value) { goto munge_retval; } for (;;) { if (owner_thread == NULL && ulock_resolve_owner(u32, &owner_thread) != 0) { break; } /* owner_thread may have a +1 starting here */ if (!machine_thread_on_core(owner_thread)) { break; } if (end == 0) { clock_interval_to_deadline(ulock_adaptive_spin_usecs, NSEC_PER_USEC, &end); } else if (mach_absolute_time() > end) { break; } if (copyin_atomic32_wait_if_equals(args->addr, u32) != 0) { goto munge_retval; } } } ull_t *ull = ull_get(&key, 0, &unused_ull); if (ull == NULL) { ret = ENOMEM; goto munge_retval; } /* ull is locked */ ull->ull_nwaiters++; if (ull->ull_opcode == 0) { ull->ull_opcode = opcode; } else if (ull->ull_opcode != opcode) { ret = EDOM; goto out_locked; } /* * We don't want this copyin to get wedged behind VM operations, * but we have to read the userspace value under the ull lock for correctness. * * Until exists, * holding the ull spinlock across copyin forces any * vm_fault we encounter to fail. */ /* copyin_atomicXX always checks alignment */ if (lock_size == 4) { uint32_t u32; copy_ret = copyin_atomic32(args->addr, &u32); value = u32; } else { copy_ret = copyin_atomic64(args->addr, &value); } #if DEVELOPMENT || DEBUG /* Occasionally simulate copyin finding the user address paged out */ if (((ull_simulate_copyin_fault == proc_getpid(p)) || (ull_simulate_copyin_fault == 1)) && (copy_ret == 0)) { static _Atomic int fault_inject = 0; if (os_atomic_inc_orig(&fault_inject, relaxed) % 73 == 0) { copy_ret = EFAULT; } } #endif if (copy_ret != 0) { /* copyin() will return an error if the access to the user addr would have faulted, * so just return and let the user level code fault it in. */ ret = copy_ret; goto out_locked; } if (value != args->value) { /* Lock value has changed from expected so bail out */ goto out_locked; } if (set_owner) { if (owner_thread == THREAD_NULL) { ret = ulock_resolve_owner((uint32_t)args->value, &owner_thread); if (ret == EOWNERDEAD) { /* * Translation failed - even though the lock value is up to date, * whatever was stored in the lock wasn't actually a thread port. */ goto out_locked; } /* HACK: don't bail on MACH_PORT_DEAD, to avoid blowing up the no-tsd pthread lock */ ret = 0; } /* owner_thread has a +1 reference */ /* * At this point, I know: * a) owner_thread is definitely the current owner, because I just read the value * b) owner_thread is either: * i) holding the user lock or * ii) has just unlocked the user lock after I looked * and is heading toward the kernel to call ull_wake. * If so, it's going to have to wait for the ull mutex. * * Therefore, I can ask the turnstile to promote its priority, and I can rely * on it to come by later to issue the wakeup and lose its promotion. */ /* Return the +1 ref from the ull_owner field */ old_owner = ull->ull_owner; ull->ull_owner = THREAD_NULL; if (owner_thread != THREAD_NULL) { /* The ull_owner field now owns a +1 ref on owner_thread */ thread_reference(owner_thread); ull->ull_owner = owner_thread; } } wait_result_t wr; uint64_t timeout = args->timeout; /* nanoseconds */ uint64_t deadline = TIMEOUT_WAIT_FOREVER; wait_interrupt_t interruptible = THREAD_ABORTSAFE; struct turnstile *ts; ts = turnstile_prepare((uintptr_t)ull, &ull->ull_turnstile, TURNSTILE_NULL, TURNSTILE_ULOCK); thread_set_pending_block_hint(self, kThreadWaitUserLock); if (flags & ULF_WAIT_WORKQ_DATA_CONTENTION) { interruptible |= THREAD_WAIT_NOREPORT; } turnstile_update_inheritor(ts, owner_thread, (TURNSTILE_DELAYED_UPDATE | TURNSTILE_INHERITOR_THREAD)); if (timeout) { if (flags & ULF_DEADLINE) { deadline = timeout; } else { nanoseconds_to_deadline(timeout, &deadline); } } wr = waitq_assert_wait64(&ts->ts_waitq, CAST_EVENT64_T(ULOCK_TO_EVENT(ull)), interruptible, deadline); if (wr == THREAD_WAITING) { uthread_t uthread = (uthread_t)get_bsdthread_info(self); uthread->uu_save.uus_ulock_wait_data.ull = ull; uthread->uu_save.uus_ulock_wait_data.retval = retval; uthread->uu_save.uus_ulock_wait_data.flags = flags; uthread->uu_save.uus_ulock_wait_data.owner_thread = owner_thread; uthread->uu_save.uus_ulock_wait_data.old_owner = old_owner; } ull_unlock(ull); if (unused_ull) { ull_free(unused_ull); unused_ull = NULL; } turnstile_update_inheritor_complete(ts, TURNSTILE_INTERLOCK_NOT_HELD); if (wr == THREAD_WAITING) { if (set_owner && owner_thread != THREAD_NULL) { thread_handoff_parameter(owner_thread, ulock_wait_continue, ull, THREAD_HANDOFF_NONE); } else { assert(owner_thread == THREAD_NULL); thread_block_parameter(ulock_wait_continue, ull); } /* NOT REACHED */ } ret = wait_result_to_return_code(wr); ull_lock(ull); turnstile_complete((uintptr_t)ull, &ull->ull_turnstile, NULL, TURNSTILE_ULOCK); out_locked: ulock_wait_cleanup(ull, owner_thread, old_owner, retval); owner_thread = NULL; if (unused_ull) { ull_free(unused_ull); unused_ull = NULL; } assert(*retval >= 0); munge_retval: if (owner_thread) { thread_deallocate(owner_thread); } if (ret == ESTALE) { ret = 0; } if ((flags & ULF_NO_ERRNO) && (ret != 0)) { *retval = -ret; ret = 0; } return ret; } /* * Must be called with ull_lock held */ static void ulock_wait_cleanup(ull_t *ull, thread_t owner_thread, thread_t old_owner, int32_t *retval) { ull_assert_owned(ull); thread_t old_lingering_owner = THREAD_NULL; *retval = --ull->ull_nwaiters; if (ull->ull_nwaiters == 0) { /* * If the wait was canceled early, we might need to * clear out the lingering owner reference before * freeing the ull. */ old_lingering_owner = ull->ull_owner; ull->ull_owner = THREAD_NULL; memset(&ull->ull_key, 0, sizeof ull->ull_key); ull->ull_refcount--; assert(ull->ull_refcount > 0); } ull_put(ull); /* Need to be called after dropping the interlock */ turnstile_cleanup(); if (owner_thread != THREAD_NULL) { thread_deallocate(owner_thread); } if (old_owner != THREAD_NULL) { thread_deallocate(old_owner); } if (old_lingering_owner != THREAD_NULL) { thread_deallocate(old_lingering_owner); } assert(*retval >= 0); } __attribute__((noreturn)) static void ulock_wait_continue(__unused void * parameter, wait_result_t wr) { uthread_t uthread = current_uthread(); int ret = 0; ull_t *ull = uthread->uu_save.uus_ulock_wait_data.ull; int32_t *retval = uthread->uu_save.uus_ulock_wait_data.retval; uint flags = uthread->uu_save.uus_ulock_wait_data.flags; thread_t owner_thread = uthread->uu_save.uus_ulock_wait_data.owner_thread; thread_t old_owner = uthread->uu_save.uus_ulock_wait_data.old_owner; ret = wait_result_to_return_code(wr); ull_lock(ull); turnstile_complete((uintptr_t)ull, &ull->ull_turnstile, NULL, TURNSTILE_ULOCK); ulock_wait_cleanup(ull, owner_thread, old_owner, retval); if ((flags & ULF_NO_ERRNO) && (ret != 0)) { *retval = -ret; ret = 0; } unix_syscall_return(ret); } int sys_ulock_wake(struct proc *p, struct ulock_wake_args *args, int32_t *retval) { int ret = 0; #if DEVELOPMENT || DEBUG uint8_t opcode = (uint8_t)(args->operation & UL_OPCODE_MASK); if (opcode == UL_DEBUG_HASH_DUMP_PID) { *retval = ull_hash_dump(proc_task(p)); return ret; } else if (opcode == UL_DEBUG_HASH_DUMP_ALL) { *retval = ull_hash_dump(TASK_NULL); return ret; } else if (opcode == UL_DEBUG_SIMULATE_COPYIN_FAULT) { ull_simulate_copyin_fault = (int)(args->wake_value); return ret; } #endif ret = ulock_wake(proc_task(p), args->operation, args->addr, args->wake_value); if ((args->operation & ULF_NO_ERRNO) && (ret != 0)) { *retval = -ret; ret = 0; } return ret; } int ulock_wake(task_t task, uint32_t operation, user_addr_t addr, uint64_t wake_value) { uint8_t opcode = (uint8_t)(operation & UL_OPCODE_MASK); uint flags = operation & UL_FLAGS_MASK; int ret = 0; ulk_t key; /* involved threads - each variable holds +1 ref if not null */ thread_t wake_thread = THREAD_NULL; bool set_owner = false; bool allow_non_owner = false; bool xproc = false; switch (opcode) { case UL_UNFAIR_LOCK: set_owner = true; break; case UL_COMPARE_AND_WAIT: case UL_COMPARE_AND_WAIT64: break; case UL_COMPARE_AND_WAIT_SHARED: case UL_COMPARE_AND_WAIT64_SHARED: xproc = true; break; default: ret = EINVAL; goto munge_retval; } if ((flags & ULF_WAKE_MASK) != flags) { ret = EINVAL; goto munge_retval; } if ((flags & ULF_WAKE_THREAD) && ((flags & ULF_WAKE_ALL) || set_owner)) { ret = EINVAL; goto munge_retval; } if (flags & ULF_WAKE_ALLOW_NON_OWNER) { if (!set_owner) { ret = EINVAL; goto munge_retval; } allow_non_owner = true; } if (addr == 0) { ret = EINVAL; goto munge_retval; } if (xproc) { uint64_t object = 0; uint64_t offset = 0; ret = uaddr_findobj(addr, &object, &offset); if (ret) { ret = EINVAL; goto munge_retval; } key.ulk_key_type = ULK_XPROC; key.ulk_object = object; key.ulk_offset = offset; } else { key.ulk_key_type = ULK_UADDR; key.ulk_task = task; key.ulk_addr = addr; } if (flags & ULF_WAKE_THREAD) { mach_port_name_t wake_thread_name = (mach_port_name_t)(wake_value); wake_thread = port_name_to_thread(wake_thread_name, PORT_INTRANS_THREAD_IN_CURRENT_TASK | PORT_INTRANS_THREAD_NOT_CURRENT_THREAD); if (wake_thread == THREAD_NULL) { ret = ESRCH; goto munge_retval; } } ull_t *ull = ull_get(&key, ULL_MUST_EXIST, NULL); thread_t new_owner = THREAD_NULL; struct turnstile *ts = TURNSTILE_NULL; thread_t cleanup_thread = THREAD_NULL; if (ull == NULL) { ret = ENOENT; goto munge_retval; } /* ull is locked */ if (opcode != ull->ull_opcode) { ret = EDOM; goto out_ull_put; } if (set_owner) { if ((ull->ull_owner != current_thread()) && !allow_non_owner) { /* * If the current thread isn't the known owner, * then this wake call was late to the party, * and the kernel already knows who owns the lock. * * This current owner already knows the lock is contended * and will redrive wakes, just bail out. */ goto out_ull_put; } } else { assert(ull->ull_owner == THREAD_NULL); } ts = turnstile_prepare((uintptr_t)ull, &ull->ull_turnstile, TURNSTILE_NULL, TURNSTILE_ULOCK); assert(ts != TURNSTILE_NULL); if (flags & ULF_WAKE_THREAD) { kern_return_t kr = waitq_wakeup64_thread(&ts->ts_waitq, CAST_EVENT64_T(ULOCK_TO_EVENT(ull)), wake_thread, THREAD_AWAKENED); if (kr != KERN_SUCCESS) { assert(kr == KERN_NOT_WAITING); ret = EALREADY; } } else if (flags & ULF_WAKE_ALL) { waitq_wakeup64_all(&ts->ts_waitq, CAST_EVENT64_T(ULOCK_TO_EVENT(ull)), THREAD_AWAKENED, set_owner ? WAITQ_UPDATE_INHERITOR : WAITQ_WAKEUP_DEFAULT); } else if (set_owner) { /* * The turnstile waitq is priority ordered, * and will wake up the highest priority waiter * and set it as the inheritor for us. */ new_owner = waitq_wakeup64_identify(&ts->ts_waitq, CAST_EVENT64_T(ULOCK_TO_EVENT(ull)), THREAD_AWAKENED, WAITQ_UPDATE_INHERITOR); } else { waitq_wakeup64_one(&ts->ts_waitq, CAST_EVENT64_T(ULOCK_TO_EVENT(ull)), THREAD_AWAKENED, WAITQ_WAKEUP_DEFAULT); } if (set_owner) { turnstile_update_inheritor_complete(ts, TURNSTILE_INTERLOCK_HELD); cleanup_thread = ull->ull_owner; ull->ull_owner = new_owner; } turnstile_complete((uintptr_t)ull, &ull->ull_turnstile, NULL, TURNSTILE_ULOCK); out_ull_put: ull_put(ull); if (ts != TURNSTILE_NULL) { /* Need to be called after dropping the interlock */ turnstile_cleanup(); } if (cleanup_thread != THREAD_NULL) { thread_deallocate(cleanup_thread); } munge_retval: if (wake_thread != THREAD_NULL) { thread_deallocate(wake_thread); } return ret; } void kdp_ulock_find_owner(__unused struct waitq * waitq, event64_t event, thread_waitinfo_t * waitinfo) { ull_t *ull = EVENT_TO_ULOCK(event); zone_require(ull_zone->kt_zv.zv_zone, ull); switch (ull->ull_opcode) { case UL_UNFAIR_LOCK: case UL_UNFAIR_LOCK64_SHARED: waitinfo->owner = thread_tid(ull->ull_owner); waitinfo->context = ull->ull_key.ulk_addr; break; case UL_COMPARE_AND_WAIT: case UL_COMPARE_AND_WAIT64: case UL_COMPARE_AND_WAIT_SHARED: case UL_COMPARE_AND_WAIT64_SHARED: waitinfo->owner = 0; waitinfo->context = ull->ull_key.ulk_addr; break; default: panic("%s: Invalid ulock opcode %d addr %p", __FUNCTION__, ull->ull_opcode, (void*)ull); break; } return; }