1101 lines
28 KiB
C
1101 lines
28 KiB
C
/*
|
||
* Copyright (c) 2015-2020 Apple Inc. All rights reserved.
|
||
*
|
||
* @APPLE_OSREFERENCE_LICENSE_HEADER_START@
|
||
*
|
||
* This file contains Original Code and/or Modifications of Original Code
|
||
* as defined in and that are subject to the Apple Public Source License
|
||
* Version 2.0 (the 'License'). You may not use this file except in
|
||
* compliance with the License. The rights granted to you under the License
|
||
* may not be used to create, or enable the creation or redistribution of,
|
||
* unlawful or unlicensed copies of an Apple operating system, or to
|
||
* circumvent, violate, or enable the circumvention or violation of, any
|
||
* terms of an Apple operating system software license agreement.
|
||
*
|
||
* Please obtain a copy of the License at
|
||
* http://www.opensource.apple.com/apsl/ and read it before using this file.
|
||
*
|
||
* The Original Code and all software distributed under the License are
|
||
* distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
|
||
* EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
|
||
* INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
|
||
* FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
|
||
* Please see the License for the specific language governing rights and
|
||
* limitations under the License.
|
||
*
|
||
* @APPLE_OSREFERENCE_LICENSE_HEADER_END@
|
||
*/
|
||
|
||
#include <machine/atomic.h>
|
||
|
||
#include <sys/param.h>
|
||
#include <sys/systm.h>
|
||
#include <sys/ioctl.h>
|
||
#include <sys/file_internal.h>
|
||
#include <sys/proc_internal.h>
|
||
#include <sys/kernel.h>
|
||
#include <sys/guarded.h>
|
||
#include <sys/stat.h>
|
||
#include <sys/malloc.h>
|
||
#include <sys/sysproto.h>
|
||
#include <sys/pthread_shims.h>
|
||
|
||
#include <mach/mach_types.h>
|
||
|
||
#include <kern/cpu_data.h>
|
||
#include <kern/mach_param.h>
|
||
#include <kern/kern_types.h>
|
||
#include <kern/assert.h>
|
||
#include <kern/zalloc.h>
|
||
#include <kern/thread.h>
|
||
#include <kern/clock.h>
|
||
#include <kern/ledger.h>
|
||
#include <kern/policy_internal.h>
|
||
#include <kern/task.h>
|
||
#include <kern/telemetry.h>
|
||
#include <kern/waitq.h>
|
||
#include <kern/sched_prim.h>
|
||
#include <kern/turnstile.h>
|
||
#include <kern/zalloc.h>
|
||
#include <kern/debug.h>
|
||
|
||
#include <pexpert/pexpert.h>
|
||
|
||
#define XNU_TEST_BITMAP
|
||
#include <kern/bits.h>
|
||
|
||
#include <os/hash.h>
|
||
#include <sys/ulock.h>
|
||
|
||
/*
|
||
* How ulock promotion works:
|
||
*
|
||
* There’s a requested policy field on every thread called ‘promotions’, which
|
||
* expresses which ulock promotions are happening to this thread.
|
||
* The promotion priority saturates until the promotion count goes to 0.
|
||
*
|
||
* We also track effective promotion qos, which is the qos before clamping.
|
||
* This value is used for promoting a thread that another thread is waiting on,
|
||
* so that the lock owner reinflates to the right priority after unclamping.
|
||
*
|
||
* This also works for non-QoS threads, which can donate base priority to QoS
|
||
* and non-QoS threads alike.
|
||
*
|
||
* ulock wait applies a promotion to the owner communicated through
|
||
* UL_UNFAIR_LOCK as waiters block, and that promotion is saturated as long as
|
||
* there is still an owner. In ulock wake, if the waker is still the owner,
|
||
* then it clears its ownership and drops the boost. It does NOT transfer
|
||
* ownership/priority boost to the new thread. Instead, it selects the
|
||
* waiting thread with the highest base priority to be woken next, and
|
||
* relies on that thread to carry the torch for the other waiting threads.
|
||
*/
|
||
|
||
static LCK_GRP_DECLARE(ull_lck_grp, "ulocks");
|
||
|
||
|
||
#if ULL_TICKET_LOCK
|
||
typedef lck_ticket_t ull_lock_t;
|
||
#define ull_lock_init(ull) lck_ticket_init(&ull->ull_lock, &ull_lck_grp)
|
||
#define ull_lock_destroy(ull) lck_ticket_destroy(&ull->ull_lock, &ull_lck_grp)
|
||
#define ull_lock(ull) lck_ticket_lock(&ull->ull_lock, &ull_lck_grp)
|
||
#define ull_unlock(ull) lck_ticket_unlock(&ull->ull_lock)
|
||
#define ull_assert_owned(ull) lck_ticket_assert_owned(&ull->ull_lock)
|
||
#define ull_assert_notwned(ull) lck_ticket_assert_not_owned(&ull->ull_lock)
|
||
#else
|
||
typedef lck_spin_t ull_lock_t;
|
||
#define ull_lock_init(ull) lck_spin_init(&ull->ull_lock, &ull_lck_grp, NULL)
|
||
#define ull_lock_destroy(ull) lck_spin_destroy(&ull->ull_lock, &ull_lck_grp)
|
||
#define ull_lock(ull) lck_spin_lock_grp(&ull->ull_lock, &ull_lck_grp)
|
||
#define ull_unlock(ull) lck_spin_unlock(&ull->ull_lock)
|
||
#define ull_assert_owned(ull) LCK_SPIN_ASSERT(&ull->ull_lock, LCK_ASSERT_OWNED)
|
||
#define ull_assert_notwned(ull) LCK_SPIN_ASSERT(&ull->ull_lock, LCK_ASSERT_NOTOWNED)
|
||
#endif /* ULL_TICKET_LOCK */
|
||
|
||
#define ULOCK_TO_EVENT(ull) ((event_t)ull)
|
||
#define EVENT_TO_ULOCK(event) ((ull_t *)event)
|
||
|
||
typedef enum {
|
||
ULK_INVALID = 0,
|
||
ULK_UADDR,
|
||
ULK_XPROC,
|
||
} ulk_type;
|
||
|
||
typedef struct {
|
||
union {
|
||
struct __attribute__((packed)) {
|
||
user_addr_t ulk_addr;
|
||
/*
|
||
* We use the task address as a hashing key,
|
||
* so that ulock wakes across exec can't
|
||
* be confused.
|
||
*/
|
||
task_t ulk_task __kernel_data_semantics;
|
||
};
|
||
struct __attribute__((packed)) {
|
||
uint64_t ulk_object;
|
||
uint64_t ulk_offset;
|
||
};
|
||
};
|
||
ulk_type ulk_key_type;
|
||
} ulk_t;
|
||
|
||
#define ULK_UADDR_LEN (sizeof(user_addr_t) + sizeof(task_t))
|
||
#define ULK_XPROC_LEN (sizeof(uint64_t) + sizeof(uint64_t))
|
||
|
||
inline static bool
|
||
ull_key_match(ulk_t *a, ulk_t *b)
|
||
{
|
||
if (a->ulk_key_type != b->ulk_key_type) {
|
||
return false;
|
||
}
|
||
|
||
if (a->ulk_key_type == ULK_UADDR) {
|
||
return (a->ulk_task == b->ulk_task) &&
|
||
(a->ulk_addr == b->ulk_addr);
|
||
}
|
||
|
||
assert(a->ulk_key_type == ULK_XPROC);
|
||
return (a->ulk_object == b->ulk_object) &&
|
||
(a->ulk_offset == b->ulk_offset);
|
||
}
|
||
|
||
typedef struct ull {
|
||
/*
|
||
* ull_owner is the most recent known value for the owner of this ulock
|
||
* i.e. it may be out of date WRT the real value in userspace.
|
||
*/
|
||
thread_t ull_owner; /* holds +1 thread reference */
|
||
ulk_t ull_key;
|
||
ull_lock_t ull_lock;
|
||
uint ull_bucket_index;
|
||
int32_t ull_nwaiters;
|
||
int32_t ull_refcount;
|
||
uint8_t ull_opcode;
|
||
struct turnstile *ull_turnstile;
|
||
queue_chain_t ull_hash_link;
|
||
} ull_t;
|
||
|
||
#define ULL_MUST_EXIST 0x0001
|
||
static void ull_put(ull_t *);
|
||
|
||
static uint32_t ulock_adaptive_spin_usecs = 20;
|
||
|
||
SYSCTL_INT(_kern, OID_AUTO, ulock_adaptive_spin_usecs, CTLFLAG_RW | CTLFLAG_LOCKED,
|
||
&ulock_adaptive_spin_usecs, 0, "ulock adaptive spin duration");
|
||
|
||
#if DEVELOPMENT || DEBUG
|
||
static int ull_simulate_copyin_fault = 0;
|
||
|
||
static void
|
||
ull_dump(ull_t *ull)
|
||
{
|
||
kprintf("ull\t%p\n", ull);
|
||
switch (ull->ull_key.ulk_key_type) {
|
||
case ULK_UADDR:
|
||
kprintf("ull_key.ulk_key_type\tULK_UADDR\n");
|
||
kprintf("ull_key.ulk_task\t%p\n", ull->ull_key.ulk_task);
|
||
kprintf("ull_key.ulk_addr\t%p\n", (void *)(ull->ull_key.ulk_addr));
|
||
break;
|
||
case ULK_XPROC:
|
||
kprintf("ull_key.ulk_key_type\tULK_XPROC\n");
|
||
kprintf("ull_key.ulk_object\t%p\n", (void *)(ull->ull_key.ulk_object));
|
||
kprintf("ull_key.ulk_offset\t%p\n", (void *)(ull->ull_key.ulk_offset));
|
||
break;
|
||
default:
|
||
kprintf("ull_key.ulk_key_type\tUNKNOWN %d\n", ull->ull_key.ulk_key_type);
|
||
break;
|
||
}
|
||
kprintf("ull_nwaiters\t%d\n", ull->ull_nwaiters);
|
||
kprintf("ull_refcount\t%d\n", ull->ull_refcount);
|
||
kprintf("ull_opcode\t%d\n\n", ull->ull_opcode);
|
||
kprintf("ull_owner\t0x%llx\n\n", thread_tid(ull->ull_owner));
|
||
kprintf("ull_turnstile\t%p\n\n", ull->ull_turnstile);
|
||
}
|
||
#endif
|
||
|
||
typedef struct ull_bucket {
|
||
queue_head_t ulb_head;
|
||
#if ULL_TICKET_LOCK
|
||
lck_ticket_t ulb_lock;
|
||
#else
|
||
lck_spin_t ulb_lock;
|
||
#endif /* ULL_TICKET_LOCK */
|
||
} ull_bucket_t;
|
||
|
||
static SECURITY_READ_ONLY_LATE(int) ull_hash_buckets;
|
||
static SECURITY_READ_ONLY_LATE(ull_bucket_t *) ull_bucket;
|
||
static uint32_t ull_nzalloc = 0;
|
||
static KALLOC_TYPE_DEFINE(ull_zone, ull_t, KT_DEFAULT);
|
||
|
||
#if ULL_TICKET_LOCK
|
||
#define ull_bucket_lock(i) lck_ticket_lock(&ull_bucket[i].ulb_lock, &ull_lck_grp)
|
||
#define ull_bucket_unlock(i) lck_ticket_unlock(&ull_bucket[i].ulb_lock)
|
||
#else
|
||
#define ull_bucket_lock(i) lck_spin_lock_grp(&ull_bucket[i].ulb_lock, &ull_lck_grp)
|
||
#define ull_bucket_unlock(i) lck_spin_unlock(&ull_bucket[i].ulb_lock)
|
||
#endif /* ULL_TICKET_LOCK */
|
||
static __inline__ uint32_t
|
||
ull_hash_index(const void *key, size_t length)
|
||
{
|
||
uint32_t hash = os_hash_jenkins(key, length);
|
||
|
||
hash &= (ull_hash_buckets - 1);
|
||
|
||
return hash;
|
||
}
|
||
|
||
#define ULL_INDEX(keyp) ull_hash_index(keyp, keyp->ulk_key_type == ULK_UADDR ? ULK_UADDR_LEN : ULK_XPROC_LEN)
|
||
|
||
static void
|
||
ulock_initialize(void)
|
||
{
|
||
assert(thread_max > 16);
|
||
/* Size ull_hash_buckets based on thread_max.
|
||
* Round up to nearest power of 2, then divide by 4
|
||
*/
|
||
ull_hash_buckets = (1 << (bit_ceiling(thread_max) - 2));
|
||
|
||
kprintf("%s>thread_max=%d, ull_hash_buckets=%d\n", __FUNCTION__, thread_max, ull_hash_buckets);
|
||
assert(ull_hash_buckets >= thread_max / 4);
|
||
|
||
ull_bucket = zalloc_permanent(sizeof(ull_bucket_t) * ull_hash_buckets,
|
||
ZALIGN_PTR);
|
||
assert(ull_bucket != NULL);
|
||
|
||
for (int i = 0; i < ull_hash_buckets; i++) {
|
||
queue_init(&ull_bucket[i].ulb_head);
|
||
#if ULL_TICKET_LOCK
|
||
lck_ticket_init(&ull_bucket[i].ulb_lock, &ull_lck_grp);
|
||
#else
|
||
lck_spin_init(&ull_bucket[i].ulb_lock, &ull_lck_grp, NULL);
|
||
#endif /* ULL_TICKET_LOCK */
|
||
}
|
||
}
|
||
STARTUP(EARLY_BOOT, STARTUP_RANK_FIRST, ulock_initialize);
|
||
|
||
#if DEVELOPMENT || DEBUG
|
||
/* Count the number of hash entries for a given task address.
|
||
* if task==0, dump the whole table.
|
||
*/
|
||
static int
|
||
ull_hash_dump(task_t task)
|
||
{
|
||
int count = 0;
|
||
if (task == TASK_NULL) {
|
||
kprintf("%s>total number of ull_t allocated %d\n", __FUNCTION__, ull_nzalloc);
|
||
kprintf("%s>BEGIN\n", __FUNCTION__);
|
||
}
|
||
for (int i = 0; i < ull_hash_buckets; i++) {
|
||
ull_bucket_lock(i);
|
||
if (!queue_empty(&ull_bucket[i].ulb_head)) {
|
||
ull_t *elem;
|
||
if (task == TASK_NULL) {
|
||
kprintf("%s>index %d:\n", __FUNCTION__, i);
|
||
}
|
||
qe_foreach_element(elem, &ull_bucket[i].ulb_head, ull_hash_link) {
|
||
if ((task == TASK_NULL) || ((elem->ull_key.ulk_key_type == ULK_UADDR)
|
||
&& (task == elem->ull_key.ulk_task))) {
|
||
ull_dump(elem);
|
||
count++;
|
||
}
|
||
}
|
||
}
|
||
ull_bucket_unlock(i);
|
||
}
|
||
if (task == TASK_NULL) {
|
||
kprintf("%s>END\n", __FUNCTION__);
|
||
ull_nzalloc = 0;
|
||
}
|
||
return count;
|
||
}
|
||
#endif
|
||
|
||
static ull_t *
|
||
ull_alloc(ulk_t *key)
|
||
{
|
||
ull_t *ull = (ull_t *)zalloc_flags(ull_zone, Z_SET_NOTSHARED);
|
||
assert(ull != NULL);
|
||
|
||
ull->ull_refcount = 1;
|
||
ull->ull_key = *key;
|
||
ull->ull_bucket_index = ULL_INDEX(key);
|
||
ull->ull_nwaiters = 0;
|
||
ull->ull_opcode = 0;
|
||
|
||
ull->ull_owner = THREAD_NULL;
|
||
ull->ull_turnstile = TURNSTILE_NULL;
|
||
|
||
ull_lock_init(ull);
|
||
|
||
ull_nzalloc++;
|
||
return ull;
|
||
}
|
||
|
||
static void
|
||
ull_free(ull_t *ull)
|
||
{
|
||
assert(ull->ull_owner == THREAD_NULL);
|
||
assert(ull->ull_turnstile == TURNSTILE_NULL);
|
||
|
||
ull_assert_notwned(ull);
|
||
|
||
ull_lock_destroy(ull);
|
||
|
||
zfree(ull_zone, ull);
|
||
}
|
||
|
||
/* Finds an existing ulock structure (ull_t), or creates a new one.
|
||
* If MUST_EXIST flag is set, returns NULL instead of creating a new one.
|
||
* The ulock structure is returned with ull_lock locked
|
||
*/
|
||
static ull_t *
|
||
ull_get(ulk_t *key, uint32_t flags, ull_t **unused_ull)
|
||
{
|
||
ull_t *ull = NULL;
|
||
uint i = ULL_INDEX(key);
|
||
ull_t *new_ull = (flags & ULL_MUST_EXIST) ? NULL : ull_alloc(key);
|
||
ull_t *elem;
|
||
|
||
ull_bucket_lock(i);
|
||
qe_foreach_element(elem, &ull_bucket[i].ulb_head, ull_hash_link) {
|
||
ull_lock(elem);
|
||
if (ull_key_match(&elem->ull_key, key)) {
|
||
ull = elem;
|
||
break;
|
||
} else {
|
||
ull_unlock(elem);
|
||
}
|
||
}
|
||
if (ull == NULL) {
|
||
if (flags & ULL_MUST_EXIST) {
|
||
/* Must already exist (called from wake) */
|
||
ull_bucket_unlock(i);
|
||
assert(new_ull == NULL);
|
||
assert(unused_ull == NULL);
|
||
return NULL;
|
||
}
|
||
|
||
if (new_ull == NULL) {
|
||
/* Alloc above failed */
|
||
ull_bucket_unlock(i);
|
||
return NULL;
|
||
}
|
||
|
||
ull = new_ull;
|
||
ull_lock(ull);
|
||
enqueue(&ull_bucket[i].ulb_head, &ull->ull_hash_link);
|
||
} else if (!(flags & ULL_MUST_EXIST)) {
|
||
assert(new_ull);
|
||
assert(unused_ull);
|
||
assert(*unused_ull == NULL);
|
||
*unused_ull = new_ull;
|
||
}
|
||
|
||
ull->ull_refcount++;
|
||
|
||
ull_bucket_unlock(i);
|
||
|
||
return ull; /* still locked */
|
||
}
|
||
|
||
/*
|
||
* Must be called with ull_lock held
|
||
*/
|
||
static void
|
||
ull_put(ull_t *ull)
|
||
{
|
||
ull_assert_owned(ull);
|
||
int refcount = --ull->ull_refcount;
|
||
assert(refcount == 0 ? (ull->ull_key.ulk_key_type == ULK_INVALID) : 1);
|
||
ull_unlock(ull);
|
||
|
||
if (refcount > 0) {
|
||
return;
|
||
}
|
||
|
||
ull_bucket_lock(ull->ull_bucket_index);
|
||
remqueue(&ull->ull_hash_link);
|
||
ull_bucket_unlock(ull->ull_bucket_index);
|
||
|
||
ull_free(ull);
|
||
}
|
||
|
||
extern kern_return_t vm_map_page_info(vm_map_t map, vm_map_offset_t offset, vm_page_info_flavor_t flavor, vm_page_info_t info, mach_msg_type_number_t *count);
|
||
extern vm_map_t current_map(void);
|
||
extern boolean_t machine_thread_on_core(thread_t thread);
|
||
|
||
static int
|
||
uaddr_findobj(user_addr_t uaddr, uint64_t *objectp, uint64_t *offsetp)
|
||
{
|
||
kern_return_t ret;
|
||
vm_page_info_basic_data_t info;
|
||
mach_msg_type_number_t count = VM_PAGE_INFO_BASIC_COUNT;
|
||
ret = vm_map_page_info(current_map(), uaddr, VM_PAGE_INFO_BASIC, (vm_page_info_t)&info, &count);
|
||
if (ret != KERN_SUCCESS) {
|
||
return EINVAL;
|
||
}
|
||
|
||
if (objectp != NULL) {
|
||
*objectp = (uint64_t)info.object_id;
|
||
}
|
||
if (offsetp != NULL) {
|
||
*offsetp = (uint64_t)info.offset;
|
||
}
|
||
|
||
return 0;
|
||
}
|
||
|
||
static void ulock_wait_continue(void *, wait_result_t);
|
||
static void ulock_wait_cleanup(ull_t *, thread_t, thread_t, int32_t *);
|
||
|
||
inline static int
|
||
wait_result_to_return_code(wait_result_t wr)
|
||
{
|
||
int ret = 0;
|
||
|
||
switch (wr) {
|
||
case THREAD_AWAKENED:
|
||
break;
|
||
case THREAD_TIMED_OUT:
|
||
ret = ETIMEDOUT;
|
||
break;
|
||
case THREAD_INTERRUPTED:
|
||
case THREAD_RESTART:
|
||
default:
|
||
ret = EINTR;
|
||
break;
|
||
}
|
||
|
||
return ret;
|
||
}
|
||
|
||
static int
|
||
ulock_resolve_owner(uint32_t value, thread_t *owner)
|
||
{
|
||
mach_port_name_t owner_name = ulock_owner_value_to_port_name(value);
|
||
|
||
*owner = port_name_to_thread(owner_name,
|
||
PORT_INTRANS_THREAD_IN_CURRENT_TASK |
|
||
PORT_INTRANS_THREAD_NOT_CURRENT_THREAD);
|
||
if (*owner == THREAD_NULL) {
|
||
/*
|
||
* Translation failed - even though the lock value is up to date,
|
||
* whatever was stored in the lock wasn't actually a thread port.
|
||
*/
|
||
return owner_name == MACH_PORT_DEAD ? ESRCH : EOWNERDEAD;
|
||
}
|
||
return 0;
|
||
}
|
||
|
||
int
|
||
sys_ulock_wait(struct proc *p, struct ulock_wait_args *args, int32_t *retval)
|
||
{
|
||
struct ulock_wait2_args args2;
|
||
|
||
args2.operation = args->operation;
|
||
args2.addr = args->addr;
|
||
args2.value = args->value;
|
||
args2.timeout = (uint64_t)(args->timeout) * NSEC_PER_USEC;
|
||
args2.value2 = 0;
|
||
|
||
return sys_ulock_wait2(p, &args2, retval);
|
||
}
|
||
|
||
int
|
||
sys_ulock_wait2(struct proc *p, struct ulock_wait2_args *args, int32_t *retval)
|
||
{
|
||
uint8_t opcode = (uint8_t)(args->operation & UL_OPCODE_MASK);
|
||
uint flags = args->operation & UL_FLAGS_MASK;
|
||
|
||
if (flags & ULF_WAIT_CANCEL_POINT) {
|
||
__pthread_testcancel(1);
|
||
}
|
||
|
||
int ret = 0;
|
||
thread_t self = current_thread();
|
||
ulk_t key;
|
||
|
||
/* involved threads - each variable holds +1 ref if not null */
|
||
thread_t owner_thread = THREAD_NULL;
|
||
thread_t old_owner = THREAD_NULL;
|
||
|
||
ull_t *unused_ull = NULL;
|
||
|
||
if ((flags & ULF_WAIT_MASK) != flags) {
|
||
ret = EINVAL;
|
||
goto munge_retval;
|
||
}
|
||
|
||
bool set_owner = false;
|
||
bool xproc = false;
|
||
size_t lock_size = sizeof(uint32_t);
|
||
int copy_ret;
|
||
|
||
switch (opcode) {
|
||
case UL_UNFAIR_LOCK:
|
||
set_owner = true;
|
||
break;
|
||
case UL_COMPARE_AND_WAIT:
|
||
break;
|
||
case UL_COMPARE_AND_WAIT64:
|
||
lock_size = sizeof(uint64_t);
|
||
break;
|
||
case UL_COMPARE_AND_WAIT_SHARED:
|
||
xproc = true;
|
||
break;
|
||
case UL_COMPARE_AND_WAIT64_SHARED:
|
||
xproc = true;
|
||
lock_size = sizeof(uint64_t);
|
||
break;
|
||
default:
|
||
ret = EINVAL;
|
||
goto munge_retval;
|
||
}
|
||
|
||
uint64_t value = 0;
|
||
|
||
if ((args->addr == 0) || (args->addr & (lock_size - 1))) {
|
||
ret = EINVAL;
|
||
goto munge_retval;
|
||
}
|
||
|
||
if (xproc) {
|
||
uint64_t object = 0;
|
||
uint64_t offset = 0;
|
||
|
||
ret = uaddr_findobj(args->addr, &object, &offset);
|
||
if (ret) {
|
||
ret = EINVAL;
|
||
goto munge_retval;
|
||
}
|
||
key.ulk_key_type = ULK_XPROC;
|
||
key.ulk_object = object;
|
||
key.ulk_offset = offset;
|
||
} else {
|
||
key.ulk_key_type = ULK_UADDR;
|
||
key.ulk_task = proc_task(p);
|
||
key.ulk_addr = args->addr;
|
||
}
|
||
|
||
if ((flags & ULF_WAIT_ADAPTIVE_SPIN) && set_owner) {
|
||
/*
|
||
* Attempt the copyin outside of the lock once,
|
||
*
|
||
* If it doesn't match (which is common), return right away.
|
||
*
|
||
* If it matches, resolve the current owner, and if it is on core,
|
||
* spin a bit waiting for the value to change. If the owner isn't on
|
||
* core, or if the value stays stable, then go on with the regular
|
||
* blocking code.
|
||
*/
|
||
uint64_t end = 0;
|
||
uint32_t u32;
|
||
|
||
ret = copyin_atomic32(args->addr, &u32);
|
||
if (ret || u32 != args->value) {
|
||
goto munge_retval;
|
||
}
|
||
for (;;) {
|
||
if (owner_thread == NULL && ulock_resolve_owner(u32, &owner_thread) != 0) {
|
||
break;
|
||
}
|
||
|
||
/* owner_thread may have a +1 starting here */
|
||
|
||
if (!machine_thread_on_core(owner_thread)) {
|
||
break;
|
||
}
|
||
if (end == 0) {
|
||
clock_interval_to_deadline(ulock_adaptive_spin_usecs,
|
||
NSEC_PER_USEC, &end);
|
||
} else if (mach_absolute_time() > end) {
|
||
break;
|
||
}
|
||
if (copyin_atomic32_wait_if_equals(args->addr, u32) != 0) {
|
||
goto munge_retval;
|
||
}
|
||
}
|
||
}
|
||
|
||
ull_t *ull = ull_get(&key, 0, &unused_ull);
|
||
if (ull == NULL) {
|
||
ret = ENOMEM;
|
||
goto munge_retval;
|
||
}
|
||
/* ull is locked */
|
||
|
||
ull->ull_nwaiters++;
|
||
|
||
if (ull->ull_opcode == 0) {
|
||
ull->ull_opcode = opcode;
|
||
} else if (ull->ull_opcode != opcode) {
|
||
ret = EDOM;
|
||
goto out_locked;
|
||
}
|
||
|
||
/*
|
||
* We don't want this copyin to get wedged behind VM operations,
|
||
* but we have to read the userspace value under the ull lock for correctness.
|
||
*
|
||
* Until <rdar://problem/24999882> exists,
|
||
* holding the ull spinlock across copyin forces any
|
||
* vm_fault we encounter to fail.
|
||
*/
|
||
|
||
/* copyin_atomicXX always checks alignment */
|
||
|
||
if (lock_size == 4) {
|
||
uint32_t u32;
|
||
copy_ret = copyin_atomic32(args->addr, &u32);
|
||
value = u32;
|
||
} else {
|
||
copy_ret = copyin_atomic64(args->addr, &value);
|
||
}
|
||
|
||
#if DEVELOPMENT || DEBUG
|
||
/* Occasionally simulate copyin finding the user address paged out */
|
||
if (((ull_simulate_copyin_fault == proc_getpid(p)) || (ull_simulate_copyin_fault == 1)) && (copy_ret == 0)) {
|
||
static _Atomic int fault_inject = 0;
|
||
if (os_atomic_inc_orig(&fault_inject, relaxed) % 73 == 0) {
|
||
copy_ret = EFAULT;
|
||
}
|
||
}
|
||
#endif
|
||
if (copy_ret != 0) {
|
||
/* copyin() will return an error if the access to the user addr would have faulted,
|
||
* so just return and let the user level code fault it in.
|
||
*/
|
||
ret = copy_ret;
|
||
goto out_locked;
|
||
}
|
||
|
||
if (value != args->value) {
|
||
/* Lock value has changed from expected so bail out */
|
||
goto out_locked;
|
||
}
|
||
|
||
if (set_owner) {
|
||
if (owner_thread == THREAD_NULL) {
|
||
ret = ulock_resolve_owner((uint32_t)args->value, &owner_thread);
|
||
if (ret == EOWNERDEAD) {
|
||
/*
|
||
* Translation failed - even though the lock value is up to date,
|
||
* whatever was stored in the lock wasn't actually a thread port.
|
||
*/
|
||
goto out_locked;
|
||
}
|
||
/* HACK: don't bail on MACH_PORT_DEAD, to avoid blowing up the no-tsd pthread lock */
|
||
ret = 0;
|
||
}
|
||
/* owner_thread has a +1 reference */
|
||
|
||
/*
|
||
* At this point, I know:
|
||
* a) owner_thread is definitely the current owner, because I just read the value
|
||
* b) owner_thread is either:
|
||
* i) holding the user lock or
|
||
* ii) has just unlocked the user lock after I looked
|
||
* and is heading toward the kernel to call ull_wake.
|
||
* If so, it's going to have to wait for the ull mutex.
|
||
*
|
||
* Therefore, I can ask the turnstile to promote its priority, and I can rely
|
||
* on it to come by later to issue the wakeup and lose its promotion.
|
||
*/
|
||
|
||
/* Return the +1 ref from the ull_owner field */
|
||
old_owner = ull->ull_owner;
|
||
ull->ull_owner = THREAD_NULL;
|
||
|
||
if (owner_thread != THREAD_NULL) {
|
||
/* The ull_owner field now owns a +1 ref on owner_thread */
|
||
thread_reference(owner_thread);
|
||
ull->ull_owner = owner_thread;
|
||
}
|
||
}
|
||
|
||
wait_result_t wr;
|
||
uint64_t timeout = args->timeout; /* nanoseconds */
|
||
uint64_t deadline = TIMEOUT_WAIT_FOREVER;
|
||
wait_interrupt_t interruptible = THREAD_ABORTSAFE;
|
||
struct turnstile *ts;
|
||
|
||
ts = turnstile_prepare((uintptr_t)ull, &ull->ull_turnstile,
|
||
TURNSTILE_NULL, TURNSTILE_ULOCK);
|
||
thread_set_pending_block_hint(self, kThreadWaitUserLock);
|
||
|
||
if (flags & ULF_WAIT_WORKQ_DATA_CONTENTION) {
|
||
interruptible |= THREAD_WAIT_NOREPORT;
|
||
}
|
||
|
||
turnstile_update_inheritor(ts, owner_thread,
|
||
(TURNSTILE_DELAYED_UPDATE | TURNSTILE_INHERITOR_THREAD));
|
||
|
||
if (timeout) {
|
||
if (flags & ULF_DEADLINE) {
|
||
deadline = timeout;
|
||
} else {
|
||
nanoseconds_to_deadline(timeout, &deadline);
|
||
}
|
||
}
|
||
|
||
wr = waitq_assert_wait64(&ts->ts_waitq, CAST_EVENT64_T(ULOCK_TO_EVENT(ull)),
|
||
interruptible, deadline);
|
||
|
||
if (wr == THREAD_WAITING) {
|
||
uthread_t uthread = (uthread_t)get_bsdthread_info(self);
|
||
uthread->uu_save.uus_ulock_wait_data.ull = ull;
|
||
uthread->uu_save.uus_ulock_wait_data.retval = retval;
|
||
uthread->uu_save.uus_ulock_wait_data.flags = flags;
|
||
uthread->uu_save.uus_ulock_wait_data.owner_thread = owner_thread;
|
||
uthread->uu_save.uus_ulock_wait_data.old_owner = old_owner;
|
||
}
|
||
|
||
ull_unlock(ull);
|
||
|
||
if (unused_ull) {
|
||
ull_free(unused_ull);
|
||
unused_ull = NULL;
|
||
}
|
||
|
||
turnstile_update_inheritor_complete(ts, TURNSTILE_INTERLOCK_NOT_HELD);
|
||
|
||
if (wr == THREAD_WAITING) {
|
||
if (set_owner && owner_thread != THREAD_NULL) {
|
||
thread_handoff_parameter(owner_thread, ulock_wait_continue, ull, THREAD_HANDOFF_NONE);
|
||
} else {
|
||
assert(owner_thread == THREAD_NULL);
|
||
thread_block_parameter(ulock_wait_continue, ull);
|
||
}
|
||
/* NOT REACHED */
|
||
}
|
||
|
||
ret = wait_result_to_return_code(wr);
|
||
|
||
ull_lock(ull);
|
||
turnstile_complete((uintptr_t)ull, &ull->ull_turnstile, NULL, TURNSTILE_ULOCK);
|
||
|
||
out_locked:
|
||
ulock_wait_cleanup(ull, owner_thread, old_owner, retval);
|
||
owner_thread = NULL;
|
||
|
||
if (unused_ull) {
|
||
ull_free(unused_ull);
|
||
unused_ull = NULL;
|
||
}
|
||
|
||
assert(*retval >= 0);
|
||
|
||
munge_retval:
|
||
if (owner_thread) {
|
||
thread_deallocate(owner_thread);
|
||
}
|
||
if (ret == ESTALE) {
|
||
ret = 0;
|
||
}
|
||
if ((flags & ULF_NO_ERRNO) && (ret != 0)) {
|
||
*retval = -ret;
|
||
ret = 0;
|
||
}
|
||
return ret;
|
||
}
|
||
|
||
/*
|
||
* Must be called with ull_lock held
|
||
*/
|
||
static void
|
||
ulock_wait_cleanup(ull_t *ull, thread_t owner_thread, thread_t old_owner, int32_t *retval)
|
||
{
|
||
ull_assert_owned(ull);
|
||
|
||
thread_t old_lingering_owner = THREAD_NULL;
|
||
|
||
*retval = --ull->ull_nwaiters;
|
||
if (ull->ull_nwaiters == 0) {
|
||
/*
|
||
* If the wait was canceled early, we might need to
|
||
* clear out the lingering owner reference before
|
||
* freeing the ull.
|
||
*/
|
||
old_lingering_owner = ull->ull_owner;
|
||
ull->ull_owner = THREAD_NULL;
|
||
|
||
memset(&ull->ull_key, 0, sizeof ull->ull_key);
|
||
ull->ull_refcount--;
|
||
assert(ull->ull_refcount > 0);
|
||
}
|
||
ull_put(ull);
|
||
|
||
/* Need to be called after dropping the interlock */
|
||
turnstile_cleanup();
|
||
|
||
if (owner_thread != THREAD_NULL) {
|
||
thread_deallocate(owner_thread);
|
||
}
|
||
|
||
if (old_owner != THREAD_NULL) {
|
||
thread_deallocate(old_owner);
|
||
}
|
||
|
||
if (old_lingering_owner != THREAD_NULL) {
|
||
thread_deallocate(old_lingering_owner);
|
||
}
|
||
|
||
assert(*retval >= 0);
|
||
}
|
||
|
||
__attribute__((noreturn))
|
||
static void
|
||
ulock_wait_continue(__unused void * parameter, wait_result_t wr)
|
||
{
|
||
uthread_t uthread = current_uthread();
|
||
int ret = 0;
|
||
|
||
ull_t *ull = uthread->uu_save.uus_ulock_wait_data.ull;
|
||
int32_t *retval = uthread->uu_save.uus_ulock_wait_data.retval;
|
||
uint flags = uthread->uu_save.uus_ulock_wait_data.flags;
|
||
thread_t owner_thread = uthread->uu_save.uus_ulock_wait_data.owner_thread;
|
||
thread_t old_owner = uthread->uu_save.uus_ulock_wait_data.old_owner;
|
||
|
||
ret = wait_result_to_return_code(wr);
|
||
|
||
ull_lock(ull);
|
||
turnstile_complete((uintptr_t)ull, &ull->ull_turnstile, NULL, TURNSTILE_ULOCK);
|
||
|
||
ulock_wait_cleanup(ull, owner_thread, old_owner, retval);
|
||
|
||
if ((flags & ULF_NO_ERRNO) && (ret != 0)) {
|
||
*retval = -ret;
|
||
ret = 0;
|
||
}
|
||
|
||
unix_syscall_return(ret);
|
||
}
|
||
|
||
int
|
||
sys_ulock_wake(struct proc *p, struct ulock_wake_args *args, int32_t *retval)
|
||
{
|
||
int ret = 0;
|
||
#if DEVELOPMENT || DEBUG
|
||
uint8_t opcode = (uint8_t)(args->operation & UL_OPCODE_MASK);
|
||
|
||
if (opcode == UL_DEBUG_HASH_DUMP_PID) {
|
||
*retval = ull_hash_dump(proc_task(p));
|
||
return ret;
|
||
} else if (opcode == UL_DEBUG_HASH_DUMP_ALL) {
|
||
*retval = ull_hash_dump(TASK_NULL);
|
||
return ret;
|
||
} else if (opcode == UL_DEBUG_SIMULATE_COPYIN_FAULT) {
|
||
ull_simulate_copyin_fault = (int)(args->wake_value);
|
||
return ret;
|
||
}
|
||
#endif
|
||
ret = ulock_wake(proc_task(p), args->operation, args->addr, args->wake_value);
|
||
|
||
if ((args->operation & ULF_NO_ERRNO) && (ret != 0)) {
|
||
*retval = -ret;
|
||
ret = 0;
|
||
}
|
||
|
||
return ret;
|
||
}
|
||
|
||
int
|
||
ulock_wake(task_t task, uint32_t operation, user_addr_t addr, uint64_t wake_value)
|
||
{
|
||
uint8_t opcode = (uint8_t)(operation & UL_OPCODE_MASK);
|
||
uint flags = operation & UL_FLAGS_MASK;
|
||
int ret = 0;
|
||
ulk_t key;
|
||
|
||
/* involved threads - each variable holds +1 ref if not null */
|
||
thread_t wake_thread = THREAD_NULL;
|
||
|
||
bool set_owner = false;
|
||
bool allow_non_owner = false;
|
||
bool xproc = false;
|
||
|
||
switch (opcode) {
|
||
case UL_UNFAIR_LOCK:
|
||
set_owner = true;
|
||
break;
|
||
case UL_COMPARE_AND_WAIT:
|
||
case UL_COMPARE_AND_WAIT64:
|
||
break;
|
||
case UL_COMPARE_AND_WAIT_SHARED:
|
||
case UL_COMPARE_AND_WAIT64_SHARED:
|
||
xproc = true;
|
||
break;
|
||
default:
|
||
ret = EINVAL;
|
||
goto munge_retval;
|
||
}
|
||
|
||
if ((flags & ULF_WAKE_MASK) != flags) {
|
||
ret = EINVAL;
|
||
goto munge_retval;
|
||
}
|
||
|
||
if ((flags & ULF_WAKE_THREAD) && ((flags & ULF_WAKE_ALL) || set_owner)) {
|
||
ret = EINVAL;
|
||
goto munge_retval;
|
||
}
|
||
|
||
if (flags & ULF_WAKE_ALLOW_NON_OWNER) {
|
||
if (!set_owner) {
|
||
ret = EINVAL;
|
||
goto munge_retval;
|
||
}
|
||
|
||
allow_non_owner = true;
|
||
}
|
||
|
||
if (addr == 0) {
|
||
ret = EINVAL;
|
||
goto munge_retval;
|
||
}
|
||
|
||
if (xproc) {
|
||
uint64_t object = 0;
|
||
uint64_t offset = 0;
|
||
|
||
ret = uaddr_findobj(addr, &object, &offset);
|
||
if (ret) {
|
||
ret = EINVAL;
|
||
goto munge_retval;
|
||
}
|
||
key.ulk_key_type = ULK_XPROC;
|
||
key.ulk_object = object;
|
||
key.ulk_offset = offset;
|
||
} else {
|
||
key.ulk_key_type = ULK_UADDR;
|
||
key.ulk_task = task;
|
||
key.ulk_addr = addr;
|
||
}
|
||
|
||
if (flags & ULF_WAKE_THREAD) {
|
||
mach_port_name_t wake_thread_name = (mach_port_name_t)(wake_value);
|
||
wake_thread = port_name_to_thread(wake_thread_name,
|
||
PORT_INTRANS_THREAD_IN_CURRENT_TASK |
|
||
PORT_INTRANS_THREAD_NOT_CURRENT_THREAD);
|
||
if (wake_thread == THREAD_NULL) {
|
||
ret = ESRCH;
|
||
goto munge_retval;
|
||
}
|
||
}
|
||
|
||
ull_t *ull = ull_get(&key, ULL_MUST_EXIST, NULL);
|
||
thread_t new_owner = THREAD_NULL;
|
||
struct turnstile *ts = TURNSTILE_NULL;
|
||
thread_t cleanup_thread = THREAD_NULL;
|
||
|
||
if (ull == NULL) {
|
||
ret = ENOENT;
|
||
goto munge_retval;
|
||
}
|
||
/* ull is locked */
|
||
|
||
if (opcode != ull->ull_opcode) {
|
||
ret = EDOM;
|
||
goto out_ull_put;
|
||
}
|
||
|
||
if (set_owner) {
|
||
if ((ull->ull_owner != current_thread()) && !allow_non_owner) {
|
||
/*
|
||
* If the current thread isn't the known owner,
|
||
* then this wake call was late to the party,
|
||
* and the kernel already knows who owns the lock.
|
||
*
|
||
* This current owner already knows the lock is contended
|
||
* and will redrive wakes, just bail out.
|
||
*/
|
||
goto out_ull_put;
|
||
}
|
||
} else {
|
||
assert(ull->ull_owner == THREAD_NULL);
|
||
}
|
||
|
||
ts = turnstile_prepare((uintptr_t)ull, &ull->ull_turnstile,
|
||
TURNSTILE_NULL, TURNSTILE_ULOCK);
|
||
assert(ts != TURNSTILE_NULL);
|
||
|
||
if (flags & ULF_WAKE_THREAD) {
|
||
kern_return_t kr = waitq_wakeup64_thread(&ts->ts_waitq,
|
||
CAST_EVENT64_T(ULOCK_TO_EVENT(ull)),
|
||
wake_thread, THREAD_AWAKENED);
|
||
if (kr != KERN_SUCCESS) {
|
||
assert(kr == KERN_NOT_WAITING);
|
||
ret = EALREADY;
|
||
}
|
||
} else if (flags & ULF_WAKE_ALL) {
|
||
waitq_wakeup64_all(&ts->ts_waitq, CAST_EVENT64_T(ULOCK_TO_EVENT(ull)),
|
||
THREAD_AWAKENED,
|
||
set_owner ? WAITQ_UPDATE_INHERITOR : WAITQ_WAKEUP_DEFAULT);
|
||
} else if (set_owner) {
|
||
/*
|
||
* The turnstile waitq is priority ordered,
|
||
* and will wake up the highest priority waiter
|
||
* and set it as the inheritor for us.
|
||
*/
|
||
new_owner = waitq_wakeup64_identify(&ts->ts_waitq,
|
||
CAST_EVENT64_T(ULOCK_TO_EVENT(ull)),
|
||
THREAD_AWAKENED, WAITQ_UPDATE_INHERITOR);
|
||
} else {
|
||
waitq_wakeup64_one(&ts->ts_waitq, CAST_EVENT64_T(ULOCK_TO_EVENT(ull)),
|
||
THREAD_AWAKENED, WAITQ_WAKEUP_DEFAULT);
|
||
}
|
||
|
||
if (set_owner) {
|
||
turnstile_update_inheritor_complete(ts, TURNSTILE_INTERLOCK_HELD);
|
||
cleanup_thread = ull->ull_owner;
|
||
ull->ull_owner = new_owner;
|
||
}
|
||
|
||
turnstile_complete((uintptr_t)ull, &ull->ull_turnstile, NULL, TURNSTILE_ULOCK);
|
||
|
||
out_ull_put:
|
||
ull_put(ull);
|
||
|
||
if (ts != TURNSTILE_NULL) {
|
||
/* Need to be called after dropping the interlock */
|
||
turnstile_cleanup();
|
||
}
|
||
|
||
if (cleanup_thread != THREAD_NULL) {
|
||
thread_deallocate(cleanup_thread);
|
||
}
|
||
|
||
munge_retval:
|
||
if (wake_thread != THREAD_NULL) {
|
||
thread_deallocate(wake_thread);
|
||
}
|
||
|
||
return ret;
|
||
}
|
||
|
||
void
|
||
kdp_ulock_find_owner(__unused struct waitq * waitq, event64_t event, thread_waitinfo_t * waitinfo)
|
||
{
|
||
ull_t *ull = EVENT_TO_ULOCK(event);
|
||
|
||
zone_require(ull_zone->kt_zv.zv_zone, ull);
|
||
|
||
switch (ull->ull_opcode) {
|
||
case UL_UNFAIR_LOCK:
|
||
case UL_UNFAIR_LOCK64_SHARED:
|
||
waitinfo->owner = thread_tid(ull->ull_owner);
|
||
waitinfo->context = ull->ull_key.ulk_addr;
|
||
break;
|
||
case UL_COMPARE_AND_WAIT:
|
||
case UL_COMPARE_AND_WAIT64:
|
||
case UL_COMPARE_AND_WAIT_SHARED:
|
||
case UL_COMPARE_AND_WAIT64_SHARED:
|
||
waitinfo->owner = 0;
|
||
waitinfo->context = ull->ull_key.ulk_addr;
|
||
break;
|
||
default:
|
||
panic("%s: Invalid ulock opcode %d addr %p", __FUNCTION__, ull->ull_opcode, (void*)ull);
|
||
break;
|
||
}
|
||
return;
|
||
}
|