gems-kernel/source/THIRDPARTY/xnu/bsd/vm/vm_unix.c

4216 lines
129 KiB
C
Raw Normal View History

2024-06-03 16:29:39 +00:00
/*
* Copyright (c) 2000-2021 Apple Inc. All rights reserved.
*
* @APPLE_OSREFERENCE_LICENSE_HEADER_START@
*
* This file contains Original Code and/or Modifications of Original Code
* as defined in and that are subject to the Apple Public Source License
* Version 2.0 (the 'License'). You may not use this file except in
* compliance with the License. The rights granted to you under the License
* may not be used to create, or enable the creation or redistribution of,
* unlawful or unlicensed copies of an Apple operating system, or to
* circumvent, violate, or enable the circumvention or violation of, any
* terms of an Apple operating system software license agreement.
*
* Please obtain a copy of the License at
* http://www.opensource.apple.com/apsl/ and read it before using this file.
*
* The Original Code and all software distributed under the License are
* distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
* EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
* INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
* Please see the License for the specific language governing rights and
* limitations under the License.
*
* @APPLE_OSREFERENCE_LICENSE_HEADER_END@
*/
/*
* Mach Operating System
* Copyright (c) 1987 Carnegie-Mellon University
* All rights reserved. The CMU software License Agreement specifies
* the terms and conditions for use and redistribution.
*/
/*
* NOTICE: This file was modified by SPARTA, Inc. in 2006 to introduce
* support for mandatory and extensible security protections. This notice
* is included in support of clause 2.2 (b) of the Apple Public License,
* Version 2.0.
*/
#include <vm/vm_options.h>
#include <kern/ecc.h>
#include <kern/task.h>
#include <kern/thread.h>
#include <kern/debug.h>
#include <kern/extmod_statistics.h>
#include <mach/mach_traps.h>
#include <mach/port.h>
#include <mach/sdt.h>
#include <mach/task.h>
#include <mach/task_access.h>
#include <mach/task_special_ports.h>
#include <mach/time_value.h>
#include <mach/vm_map.h>
#include <mach/vm_param.h>
#include <mach/vm_prot.h>
#include <machine/machine_routines.h>
#include <sys/file_internal.h>
#include <sys/param.h>
#include <sys/systm.h>
#include <sys/dir.h>
#include <sys/namei.h>
#include <sys/proc_internal.h>
#include <sys/kauth.h>
#include <sys/vm.h>
#include <sys/file.h>
#include <sys/vnode_internal.h>
#include <sys/mount.h>
#include <sys/xattr.h>
#include <sys/trace.h>
#include <sys/kernel.h>
#include <sys/ubc_internal.h>
#include <sys/user.h>
#include <sys/syslog.h>
#include <sys/stat.h>
#include <sys/sysproto.h>
#include <sys/mman.h>
#include <sys/sysctl.h>
#include <sys/cprotect.h>
#include <sys/kpi_socket.h>
#include <sys/kas_info.h>
#include <sys/socket.h>
#include <sys/socketvar.h>
#include <sys/random.h>
#include <sys/code_signing.h>
#if NECP
#include <net/necp.h>
#endif /* NECP */
#if SKYWALK
#include <skywalk/os_channel.h>
#endif /* SKYWALK */
#include <security/audit/audit.h>
#include <security/mac.h>
#include <bsm/audit_kevents.h>
#include <kern/kalloc.h>
#include <vm/vm_map.h>
#include <vm/vm_kern.h>
#include <vm/vm_pageout.h>
#include <mach/shared_region.h>
#include <vm/vm_shared_region.h>
#include <vm/vm_dyld_pager.h>
#include <vm/vm_protos.h>
#include <sys/kern_memorystatus.h>
#include <sys/kern_memorystatus_freeze.h>
#include <sys/proc_internal.h>
#include <mach-o/fixup-chains.h>
#if CONFIG_MACF
#include <security/mac_framework.h>
#endif
#include <kern/bits.h>
#if CONFIG_CSR
#include <sys/csr.h>
#endif /* CONFIG_CSR */
#include <sys/trust_caches.h>
#include <libkern/amfi/amfi.h>
#include <IOKit/IOBSD.h>
#if VM_MAP_DEBUG_APPLE_PROTECT
SYSCTL_INT(_vm, OID_AUTO, map_debug_apple_protect, CTLFLAG_RW | CTLFLAG_LOCKED, &vm_map_debug_apple_protect, 0, "");
#endif /* VM_MAP_DEBUG_APPLE_PROTECT */
#if VM_MAP_DEBUG_FOURK
SYSCTL_INT(_vm, OID_AUTO, map_debug_fourk, CTLFLAG_RW | CTLFLAG_LOCKED, &vm_map_debug_fourk, 0, "");
#endif /* VM_MAP_DEBUG_FOURK */
#if DEVELOPMENT || DEBUG
static int
sysctl_kmem_alloc_contig SYSCTL_HANDLER_ARGS
{
#pragma unused(arg1, arg2)
vm_offset_t kaddr;
kern_return_t kr;
int error = 0;
int size = 0;
error = sysctl_handle_int(oidp, &size, 0, req);
if (error || !req->newptr) {
return error;
}
kr = kmem_alloc_contig(kernel_map, &kaddr, (vm_size_t)size,
0, 0, 0, KMA_DATA, VM_KERN_MEMORY_IOKIT);
if (kr == KERN_SUCCESS) {
kmem_free(kernel_map, kaddr, size);
}
return error;
}
SYSCTL_PROC(_vm, OID_AUTO, kmem_alloc_contig, CTLTYPE_INT | CTLFLAG_WR | CTLFLAG_LOCKED | CTLFLAG_MASKED,
0, 0, &sysctl_kmem_alloc_contig, "I", "");
extern int vm_region_footprint;
SYSCTL_INT(_vm, OID_AUTO, region_footprint, CTLFLAG_RW | CTLFLAG_ANYBODY | CTLFLAG_LOCKED, &vm_region_footprint, 0, "");
static int
sysctl_kmem_gobj_stats SYSCTL_HANDLER_ARGS
{
#pragma unused(arg1, arg2, oidp)
kmem_gobj_stats stats = kmem_get_gobj_stats();
return SYSCTL_OUT(req, &stats, sizeof(stats));
}
SYSCTL_PROC(_vm, OID_AUTO, sysctl_kmem_gobj_stats,
CTLTYPE_STRUCT | CTLFLAG_RD | CTLFLAG_LOCKED | CTLFLAG_MASKED,
0, 0, &sysctl_kmem_gobj_stats, "S,kmem_gobj_stats", "");
#endif /* DEVELOPMENT || DEBUG */
static int
sysctl_vm_self_region_footprint SYSCTL_HANDLER_ARGS
{
#pragma unused(arg1, arg2, oidp)
int error = 0;
int value;
value = task_self_region_footprint();
error = SYSCTL_OUT(req, &value, sizeof(int));
if (error) {
return error;
}
if (!req->newptr) {
return 0;
}
error = SYSCTL_IN(req, &value, sizeof(int));
if (error) {
return error;
}
task_self_region_footprint_set(value);
return 0;
}
SYSCTL_PROC(_vm, OID_AUTO, self_region_footprint, CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_ANYBODY | CTLFLAG_LOCKED | CTLFLAG_MASKED, 0, 0, &sysctl_vm_self_region_footprint, "I", "");
static int
sysctl_vm_self_region_page_size SYSCTL_HANDLER_ARGS
{
#pragma unused(arg1, arg2, oidp)
int error = 0;
int value;
value = (1 << thread_self_region_page_shift());
error = SYSCTL_OUT(req, &value, sizeof(int));
if (error) {
return error;
}
if (!req->newptr) {
return 0;
}
error = SYSCTL_IN(req, &value, sizeof(int));
if (error) {
return error;
}
if (value != 0 && value != 4096 && value != 16384) {
return EINVAL;
}
#if !__ARM_MIXED_PAGE_SIZE__
if (value != vm_map_page_size(current_map())) {
return EINVAL;
}
#endif /* !__ARM_MIXED_PAGE_SIZE__ */
thread_self_region_page_shift_set(bit_first(value));
return 0;
}
SYSCTL_PROC(_vm, OID_AUTO, self_region_page_size, CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_ANYBODY | CTLFLAG_LOCKED | CTLFLAG_MASKED, 0, 0, &sysctl_vm_self_region_page_size, "I", "");
#if DEVELOPMENT || DEBUG
extern int panic_on_unsigned_execute;
SYSCTL_INT(_vm, OID_AUTO, panic_on_unsigned_execute, CTLFLAG_RW | CTLFLAG_LOCKED, &panic_on_unsigned_execute, 0, "");
extern int vm_log_xnu_user_debug;
SYSCTL_INT(_vm, OID_AUTO, log_xnu_user_debug, CTLFLAG_RW | CTLFLAG_LOCKED, &vm_log_xnu_user_debug, 0, "");
#endif /* DEVELOPMENT || DEBUG */
extern int cs_executable_create_upl;
extern int cs_executable_wire;
SYSCTL_INT(_vm, OID_AUTO, cs_executable_create_upl, CTLFLAG_RD | CTLFLAG_LOCKED, &cs_executable_create_upl, 0, "");
SYSCTL_INT(_vm, OID_AUTO, cs_executable_wire, CTLFLAG_RD | CTLFLAG_LOCKED, &cs_executable_wire, 0, "");
extern int apple_protect_pager_count;
extern int apple_protect_pager_count_mapped;
extern unsigned int apple_protect_pager_cache_limit;
SYSCTL_INT(_vm, OID_AUTO, apple_protect_pager_count, CTLFLAG_RD | CTLFLAG_LOCKED, &apple_protect_pager_count, 0, "");
SYSCTL_INT(_vm, OID_AUTO, apple_protect_pager_count_mapped, CTLFLAG_RD | CTLFLAG_LOCKED, &apple_protect_pager_count_mapped, 0, "");
SYSCTL_UINT(_vm, OID_AUTO, apple_protect_pager_cache_limit, CTLFLAG_RW | CTLFLAG_LOCKED, &apple_protect_pager_cache_limit, 0, "");
#if DEVELOPMENT || DEBUG
extern int radar_20146450;
SYSCTL_INT(_vm, OID_AUTO, radar_20146450, CTLFLAG_RW | CTLFLAG_LOCKED, &radar_20146450, 0, "");
extern int macho_printf;
SYSCTL_INT(_vm, OID_AUTO, macho_printf, CTLFLAG_RW | CTLFLAG_LOCKED, &macho_printf, 0, "");
extern int apple_protect_pager_data_request_debug;
SYSCTL_INT(_vm, OID_AUTO, apple_protect_pager_data_request_debug, CTLFLAG_RW | CTLFLAG_LOCKED, &apple_protect_pager_data_request_debug, 0, "");
#if __arm64__
/* These are meant to support the page table accounting unit test. */
extern unsigned int arm_hardware_page_size;
extern unsigned int arm_pt_desc_size;
extern unsigned int arm_pt_root_size;
extern unsigned int inuse_user_tteroot_count;
extern unsigned int inuse_kernel_tteroot_count;
extern unsigned int inuse_user_ttepages_count;
extern unsigned int inuse_kernel_ttepages_count;
extern unsigned int inuse_user_ptepages_count;
extern unsigned int inuse_kernel_ptepages_count;
SYSCTL_UINT(_vm, OID_AUTO, native_hw_pagesize, CTLFLAG_RD | CTLFLAG_LOCKED, &arm_hardware_page_size, 0, "");
SYSCTL_UINT(_vm, OID_AUTO, arm_pt_desc_size, CTLFLAG_RD | CTLFLAG_LOCKED, &arm_pt_desc_size, 0, "");
SYSCTL_UINT(_vm, OID_AUTO, arm_pt_root_size, CTLFLAG_RD | CTLFLAG_LOCKED, &arm_pt_root_size, 0, "");
SYSCTL_UINT(_vm, OID_AUTO, user_tte_root, CTLFLAG_RD | CTLFLAG_LOCKED, &inuse_user_tteroot_count, 0, "");
SYSCTL_UINT(_vm, OID_AUTO, kernel_tte_root, CTLFLAG_RD | CTLFLAG_LOCKED, &inuse_kernel_tteroot_count, 0, "");
SYSCTL_UINT(_vm, OID_AUTO, user_tte_pages, CTLFLAG_RD | CTLFLAG_LOCKED, &inuse_user_ttepages_count, 0, "");
SYSCTL_UINT(_vm, OID_AUTO, kernel_tte_pages, CTLFLAG_RD | CTLFLAG_LOCKED, &inuse_kernel_ttepages_count, 0, "");
SYSCTL_UINT(_vm, OID_AUTO, user_pte_pages, CTLFLAG_RD | CTLFLAG_LOCKED, &inuse_user_ptepages_count, 0, "");
SYSCTL_UINT(_vm, OID_AUTO, kernel_pte_pages, CTLFLAG_RD | CTLFLAG_LOCKED, &inuse_kernel_ptepages_count, 0, "");
#if !CONFIG_SPTM
extern unsigned int free_page_size_tt_count;
extern unsigned int free_two_page_size_tt_count;
extern unsigned int free_tt_count;
SYSCTL_UINT(_vm, OID_AUTO, free_1page_tte_root, CTLFLAG_RD | CTLFLAG_LOCKED, &free_page_size_tt_count, 0, "");
SYSCTL_UINT(_vm, OID_AUTO, free_2page_tte_root, CTLFLAG_RD | CTLFLAG_LOCKED, &free_two_page_size_tt_count, 0, "");
SYSCTL_UINT(_vm, OID_AUTO, free_tte_root, CTLFLAG_RD | CTLFLAG_LOCKED, &free_tt_count, 0, "");
#endif
#if DEVELOPMENT || DEBUG
extern unsigned long pmap_asid_flushes;
SYSCTL_ULONG(_vm, OID_AUTO, pmap_asid_flushes, CTLFLAG_RD | CTLFLAG_LOCKED, &pmap_asid_flushes, "");
extern unsigned long pmap_asid_hits;
SYSCTL_ULONG(_vm, OID_AUTO, pmap_asid_hits, CTLFLAG_RD | CTLFLAG_LOCKED, &pmap_asid_hits, "");
extern unsigned long pmap_asid_misses;
SYSCTL_ULONG(_vm, OID_AUTO, pmap_asid_misses, CTLFLAG_RD | CTLFLAG_LOCKED, &pmap_asid_misses, "");
#endif
#endif /* __arm64__ */
#if __arm64__
extern int fourk_pager_data_request_debug;
SYSCTL_INT(_vm, OID_AUTO, fourk_pager_data_request_debug, CTLFLAG_RW | CTLFLAG_LOCKED, &fourk_pager_data_request_debug, 0, "");
#endif /* __arm64__ */
#endif /* DEVELOPMENT || DEBUG */
SYSCTL_INT(_vm, OID_AUTO, vm_do_collapse_compressor, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_counters.do_collapse_compressor, 0, "");
SYSCTL_INT(_vm, OID_AUTO, vm_do_collapse_compressor_pages, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_counters.do_collapse_compressor_pages, 0, "");
SYSCTL_INT(_vm, OID_AUTO, vm_do_collapse_terminate, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_counters.do_collapse_terminate, 0, "");
SYSCTL_INT(_vm, OID_AUTO, vm_do_collapse_terminate_failure, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_counters.do_collapse_terminate_failure, 0, "");
SYSCTL_INT(_vm, OID_AUTO, vm_should_cow_but_wired, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_counters.should_cow_but_wired, 0, "");
SYSCTL_INT(_vm, OID_AUTO, vm_create_upl_extra_cow, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_counters.create_upl_extra_cow, 0, "");
SYSCTL_INT(_vm, OID_AUTO, vm_create_upl_extra_cow_pages, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_counters.create_upl_extra_cow_pages, 0, "");
SYSCTL_INT(_vm, OID_AUTO, vm_create_upl_lookup_failure_write, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_counters.create_upl_lookup_failure_write, 0, "");
SYSCTL_INT(_vm, OID_AUTO, vm_create_upl_lookup_failure_copy, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_counters.create_upl_lookup_failure_copy, 0, "");
#if VM_SCAN_FOR_SHADOW_CHAIN
static int vm_shadow_max_enabled = 0; /* Disabled by default */
extern int proc_shadow_max(void);
static int
vm_shadow_max SYSCTL_HANDLER_ARGS
{
#pragma unused(arg1, arg2, oidp)
int value = 0;
if (vm_shadow_max_enabled) {
value = proc_shadow_max();
}
return SYSCTL_OUT(req, &value, sizeof(value));
}
SYSCTL_PROC(_vm, OID_AUTO, vm_shadow_max, CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_LOCKED,
0, 0, &vm_shadow_max, "I", "");
SYSCTL_INT(_vm, OID_AUTO, vm_shadow_max_enabled, CTLFLAG_RW | CTLFLAG_LOCKED, &vm_shadow_max_enabled, 0, "");
#endif /* VM_SCAN_FOR_SHADOW_CHAIN */
SYSCTL_INT(_vm, OID_AUTO, vm_debug_events, CTLFLAG_RW | CTLFLAG_LOCKED, &vm_debug_events, 0, "");
__attribute__((noinline)) int __KERNEL_WAITING_ON_TASKGATED_CHECK_ACCESS_UPCALL__(
mach_port_t task_access_port, int32_t calling_pid, uint32_t calling_gid, int32_t target_pid, mach_task_flavor_t flavor);
/*
* Sysctl's related to data/stack execution. See osfmk/vm/vm_map.c
*/
#if DEVELOPMENT || DEBUG
extern int allow_stack_exec, allow_data_exec;
SYSCTL_INT(_vm, OID_AUTO, allow_stack_exec, CTLFLAG_RW | CTLFLAG_LOCKED, &allow_stack_exec, 0, "");
SYSCTL_INT(_vm, OID_AUTO, allow_data_exec, CTLFLAG_RW | CTLFLAG_LOCKED, &allow_data_exec, 0, "");
#endif /* DEVELOPMENT || DEBUG */
static const char *prot_values[] = {
"none",
"read-only",
"write-only",
"read-write",
"execute-only",
"read-execute",
"write-execute",
"read-write-execute"
};
void
log_stack_execution_failure(addr64_t vaddr, vm_prot_t prot)
{
printf("Data/Stack execution not permitted: %s[pid %d] at virtual address 0x%qx, protections were %s\n",
current_proc()->p_comm, proc_getpid(current_proc()), vaddr, prot_values[prot & VM_PROT_ALL]);
}
/*
* shared_region_unnest_logging: level of logging of unnesting events
* 0 - no logging
* 1 - throttled logging of unexpected unnesting events (default)
* 2 - unthrottled logging of unexpected unnesting events
* 3+ - unthrottled logging of all unnesting events
*/
int shared_region_unnest_logging = 1;
SYSCTL_INT(_vm, OID_AUTO, shared_region_unnest_logging, CTLFLAG_RW | CTLFLAG_LOCKED,
&shared_region_unnest_logging, 0, "");
int vm_shared_region_unnest_log_interval = 10;
int shared_region_unnest_log_count_threshold = 5;
#if XNU_TARGET_OS_OSX
#if defined (__x86_64__)
static int scdir_enforce = 1;
#else /* defined (__x86_64__) */
static int scdir_enforce = 0; /* AOT caches live elsewhere */
#endif /* defined (__x86_64__) */
static char *scdir_path[] = {
"/System/Library/dyld/",
"/System/Volumes/Preboot/Cryptexes/OS/System/Library/dyld",
"/System/Cryptexes/OS/System/Library/dyld",
NULL
};
#else /* XNU_TARGET_OS_OSX */
static int scdir_enforce = 0;
static char *scdir_path[] = {
"/System/Library/Caches/com.apple.dyld/",
"/private/preboot/Cryptexes/OS/System/Library/Caches/com.apple.dyld",
"/System/Cryptexes/OS/System/Library/Caches/com.apple.dyld",
NULL
};
#endif /* XNU_TARGET_OS_OSX */
static char *driverkit_scdir_path[] = {
"/System/DriverKit/System/Library/dyld/",
#if XNU_TARGET_OS_OSX
"/System/Volumes/Preboot/Cryptexes/OS/System/DriverKit/System/Library/dyld",
#else
"/private/preboot/Cryptexes/OS/System/DriverKit/System/Library/dyld",
#endif /* XNU_TARGET_OS_OSX */
"/System/Cryptexes/OS/System/DriverKit/System/Library/dyld",
NULL
};
#ifndef SECURE_KERNEL
static int sysctl_scdir_enforce SYSCTL_HANDLER_ARGS
{
#if CONFIG_CSR
if (csr_check(CSR_ALLOW_UNRESTRICTED_FS) != 0) {
printf("Failed attempt to set vm.enforce_shared_cache_dir sysctl\n");
return EPERM;
}
#endif /* CONFIG_CSR */
return sysctl_handle_int(oidp, arg1, arg2, req);
}
SYSCTL_PROC(_vm, OID_AUTO, enforce_shared_cache_dir, CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED, &scdir_enforce, 0, sysctl_scdir_enforce, "I", "");
#endif
/* These log rate throttling state variables aren't thread safe, but
* are sufficient unto the task.
*/
static int64_t last_unnest_log_time = 0;
static int shared_region_unnest_log_count = 0;
void
log_unnest_badness(
vm_map_t m,
vm_map_offset_t s,
vm_map_offset_t e,
boolean_t is_nested_map,
vm_map_offset_t lowest_unnestable_addr)
{
struct timeval tv;
if (shared_region_unnest_logging == 0) {
return;
}
if (shared_region_unnest_logging <= 2 &&
is_nested_map &&
s >= lowest_unnestable_addr) {
/*
* Unnesting of writable map entries is fine.
*/
return;
}
if (shared_region_unnest_logging <= 1) {
microtime(&tv);
if ((tv.tv_sec - last_unnest_log_time) <
vm_shared_region_unnest_log_interval) {
if (shared_region_unnest_log_count++ >
shared_region_unnest_log_count_threshold) {
return;
}
} else {
last_unnest_log_time = tv.tv_sec;
shared_region_unnest_log_count = 0;
}
}
DTRACE_VM4(log_unnest_badness,
vm_map_t, m,
vm_map_offset_t, s,
vm_map_offset_t, e,
vm_map_offset_t, lowest_unnestable_addr);
printf("%s[%d] triggered unnest of range 0x%qx->0x%qx of DYLD shared region in VM map %p. While not abnormal for debuggers, this increases system memory footprint until the target exits.\n", current_proc()->p_comm, proc_getpid(current_proc()), (uint64_t)s, (uint64_t)e, (void *) VM_KERNEL_ADDRPERM(m));
}
uint64_t
vm_purge_filebacked_pagers(void)
{
uint64_t pages_purged;
pages_purged = 0;
pages_purged += apple_protect_pager_purge_all();
pages_purged += shared_region_pager_purge_all();
pages_purged += dyld_pager_purge_all();
#if DEVELOPMENT || DEBUG
printf("%s:%d pages purged: %llu\n", __FUNCTION__, __LINE__, pages_purged);
#endif /* DEVELOPMENT || DEBUG */
return pages_purged;
}
int
useracc(
user_addr_t addr,
user_size_t len,
int prot)
{
vm_map_t map;
map = current_map();
return vm_map_check_protection(
map,
vm_map_trunc_page(addr,
vm_map_page_mask(map)),
vm_map_round_page(addr + len,
vm_map_page_mask(map)),
prot == B_READ ? VM_PROT_READ : VM_PROT_WRITE);
}
int
vslock(
user_addr_t addr,
user_size_t len)
{
kern_return_t kret;
vm_map_t map;
map = current_map();
kret = vm_map_wire_kernel(map,
vm_map_trunc_page(addr,
vm_map_page_mask(map)),
vm_map_round_page(addr + len,
vm_map_page_mask(map)),
VM_PROT_READ | VM_PROT_WRITE, VM_KERN_MEMORY_BSD,
FALSE);
switch (kret) {
case KERN_SUCCESS:
return 0;
case KERN_INVALID_ADDRESS:
case KERN_NO_SPACE:
return ENOMEM;
case KERN_PROTECTION_FAILURE:
return EACCES;
default:
return EINVAL;
}
}
int
vsunlock(
user_addr_t addr,
user_size_t len,
__unused int dirtied)
{
#if FIXME /* [ */
pmap_t pmap;
vm_page_t pg;
vm_map_offset_t vaddr;
ppnum_t paddr;
#endif /* FIXME ] */
kern_return_t kret;
vm_map_t map;
map = current_map();
#if FIXME /* [ */
if (dirtied) {
pmap = get_task_pmap(current_task());
for (vaddr = vm_map_trunc_page(addr, PAGE_MASK);
vaddr < vm_map_round_page(addr + len, PAGE_MASK);
vaddr += PAGE_SIZE) {
paddr = pmap_find_phys(pmap, vaddr);
pg = PHYS_TO_VM_PAGE(paddr);
vm_page_set_modified(pg);
}
}
#endif /* FIXME ] */
#ifdef lint
dirtied++;
#endif /* lint */
kret = vm_map_unwire(map,
vm_map_trunc_page(addr,
vm_map_page_mask(map)),
vm_map_round_page(addr + len,
vm_map_page_mask(map)),
FALSE);
switch (kret) {
case KERN_SUCCESS:
return 0;
case KERN_INVALID_ADDRESS:
case KERN_NO_SPACE:
return ENOMEM;
case KERN_PROTECTION_FAILURE:
return EACCES;
default:
return EINVAL;
}
}
int
subyte(
user_addr_t addr,
int byte)
{
char character;
character = (char)byte;
return copyout((void *)&(character), addr, sizeof(char)) == 0 ? 0 : -1;
}
int
suibyte(
user_addr_t addr,
int byte)
{
char character;
character = (char)byte;
return copyout((void *)&(character), addr, sizeof(char)) == 0 ? 0 : -1;
}
int
fubyte(user_addr_t addr)
{
unsigned char byte;
if (copyin(addr, (void *) &byte, sizeof(char))) {
return -1;
}
return byte;
}
int
fuibyte(user_addr_t addr)
{
unsigned char byte;
if (copyin(addr, (void *) &(byte), sizeof(char))) {
return -1;
}
return byte;
}
int
suword(
user_addr_t addr,
long word)
{
return copyout((void *) &word, addr, sizeof(int)) == 0 ? 0 : -1;
}
long
fuword(user_addr_t addr)
{
long word = 0;
if (copyin(addr, (void *) &word, sizeof(int))) {
return -1;
}
return word;
}
/* suiword and fuiword are the same as suword and fuword, respectively */
int
suiword(
user_addr_t addr,
long word)
{
return copyout((void *) &word, addr, sizeof(int)) == 0 ? 0 : -1;
}
long
fuiword(user_addr_t addr)
{
long word = 0;
if (copyin(addr, (void *) &word, sizeof(int))) {
return -1;
}
return word;
}
/*
* With a 32-bit kernel and mixed 32/64-bit user tasks, this interface allows the
* fetching and setting of process-sized size_t and pointer values.
*/
int
sulong(user_addr_t addr, int64_t word)
{
if (IS_64BIT_PROCESS(current_proc())) {
return copyout((void *)&word, addr, sizeof(word)) == 0 ? 0 : -1;
} else {
return suiword(addr, (long)word);
}
}
int64_t
fulong(user_addr_t addr)
{
int64_t longword;
if (IS_64BIT_PROCESS(current_proc())) {
if (copyin(addr, (void *)&longword, sizeof(longword)) != 0) {
return -1;
}
return longword;
} else {
return (int64_t)fuiword(addr);
}
}
int
suulong(user_addr_t addr, uint64_t uword)
{
if (IS_64BIT_PROCESS(current_proc())) {
return copyout((void *)&uword, addr, sizeof(uword)) == 0 ? 0 : -1;
} else {
return suiword(addr, (uint32_t)uword);
}
}
uint64_t
fuulong(user_addr_t addr)
{
uint64_t ulongword;
if (IS_64BIT_PROCESS(current_proc())) {
if (copyin(addr, (void *)&ulongword, sizeof(ulongword)) != 0) {
return -1ULL;
}
return ulongword;
} else {
return (uint64_t)fuiword(addr);
}
}
int
swapon(__unused proc_t procp, __unused struct swapon_args *uap, __unused int *retval)
{
return ENOTSUP;
}
/*
* pid_for_task
*
* Find the BSD process ID for the Mach task associated with the given Mach port
* name
*
* Parameters: args User argument descriptor (see below)
*
* Indirect parameters: args->t Mach port name
* args->pid Process ID (returned value; see below)
*
* Returns: KERL_SUCCESS Success
* KERN_FAILURE Not success
*
* Implicit returns: args->pid Process ID
*
*/
kern_return_t
pid_for_task(
struct pid_for_task_args *args)
{
mach_port_name_t t = args->t;
user_addr_t pid_addr = args->pid;
proc_t p;
task_t t1;
int pid = -1;
kern_return_t err = KERN_SUCCESS;
AUDIT_MACH_SYSCALL_ENTER(AUE_PIDFORTASK);
AUDIT_ARG(mach_port1, t);
t1 = port_name_to_task_name(t);
if (t1 == TASK_NULL) {
err = KERN_FAILURE;
goto pftout;
} else {
p = get_bsdtask_info(t1);
if (p) {
pid = proc_pid(p);
err = KERN_SUCCESS;
} else if (task_is_a_corpse(t1)) {
pid = task_pid(t1);
err = KERN_SUCCESS;
} else {
err = KERN_FAILURE;
}
}
task_deallocate(t1);
pftout:
AUDIT_ARG(pid, pid);
(void) copyout((char *) &pid, pid_addr, sizeof(int));
AUDIT_MACH_SYSCALL_EXIT(err);
return err;
}
/*
*
* tfp_policy = KERN_TFP_POLICY_DENY; Deny Mode: None allowed except for self
* tfp_policy = KERN_TFP_POLICY_DEFAULT; default mode: all posix checks and upcall via task port for authentication
*
*/
static int tfp_policy = KERN_TFP_POLICY_DEFAULT;
/*
* Routine: task_for_pid_posix_check
* Purpose:
* Verify that the current process should be allowed to
* get the target process's task port. This is only
* permitted if:
* - The current process is root
* OR all of the following are true:
* - The target process's real, effective, and saved uids
* are the same as the current proc's euid,
* - The target process's group set is a subset of the
* calling process's group set, and
* - The target process hasn't switched credentials.
*
* Returns: TRUE: permitted
* FALSE: denied
*/
static int
task_for_pid_posix_check(proc_t target)
{
kauth_cred_t targetcred, mycred;
bool checkcredentials;
uid_t myuid;
int allowed;
/* No task_for_pid on bad targets */
if (target->p_stat == SZOMB) {
return FALSE;
}
mycred = kauth_cred_get();
myuid = kauth_cred_getuid(mycred);
/* If we're running as root, the check passes */
if (kauth_cred_issuser(mycred)) {
return TRUE;
}
/* We're allowed to get our own task port */
if (target == current_proc()) {
return TRUE;
}
/*
* Under DENY, only root can get another proc's task port,
* so no more checks are needed.
*/
if (tfp_policy == KERN_TFP_POLICY_DENY) {
return FALSE;
}
targetcred = kauth_cred_proc_ref(target);
allowed = TRUE;
checkcredentials = !proc_is_third_party_debuggable_driver(target);
if (checkcredentials) {
/* Do target's ruid, euid, and saved uid match my euid? */
if ((kauth_cred_getuid(targetcred) != myuid) ||
(kauth_cred_getruid(targetcred) != myuid) ||
(kauth_cred_getsvuid(targetcred) != myuid)) {
allowed = FALSE;
goto out;
}
/* Are target's groups a subset of my groups? */
if (kauth_cred_gid_subset(targetcred, mycred, &allowed) ||
allowed == 0) {
allowed = FALSE;
goto out;
}
}
/* Has target switched credentials? */
if (target->p_flag & P_SUGID) {
allowed = FALSE;
goto out;
}
out:
kauth_cred_unref(&targetcred);
return allowed;
}
/*
* __KERNEL_WAITING_ON_TASKGATED_CHECK_ACCESS_UPCALL__
*
* Description: Waits for the user space daemon to respond to the request
* we made. Function declared non inline to be visible in
* stackshots and spindumps as well as debugging.
*/
__attribute__((noinline)) int
__KERNEL_WAITING_ON_TASKGATED_CHECK_ACCESS_UPCALL__(
mach_port_t task_access_port, int32_t calling_pid, uint32_t calling_gid, int32_t target_pid, mach_task_flavor_t flavor)
{
return check_task_access_with_flavor(task_access_port, calling_pid, calling_gid, target_pid, flavor);
}
/*
* Routine: task_for_pid
* Purpose:
* Get the task port for another "process", named by its
* process ID on the same host as "target_task".
*
* Only permitted to privileged processes, or processes
* with the same user ID.
*
* Note: if pid == 0, an error is return no matter who is calling.
*
* XXX This should be a BSD system call, not a Mach trap!!!
*/
kern_return_t
task_for_pid(
struct task_for_pid_args *args)
{
mach_port_name_t target_tport = args->target_tport;
int pid = args->pid;
user_addr_t task_addr = args->t;
proc_t p = PROC_NULL;
task_t t1 = TASK_NULL;
task_t task = TASK_NULL;
mach_port_name_t tret = MACH_PORT_NULL;
ipc_port_t tfpport = MACH_PORT_NULL;
void * sright = NULL;
int error = 0;
boolean_t is_current_proc = FALSE;
struct proc_ident pident = {0};
AUDIT_MACH_SYSCALL_ENTER(AUE_TASKFORPID);
AUDIT_ARG(pid, pid);
AUDIT_ARG(mach_port1, target_tport);
/* Always check if pid == 0 */
if (pid == 0) {
(void) copyout((char *)&tret, task_addr, sizeof(mach_port_name_t));
AUDIT_MACH_SYSCALL_EXIT(KERN_FAILURE);
return KERN_FAILURE;
}
t1 = port_name_to_task(target_tport);
if (t1 == TASK_NULL) {
(void) copyout((char *)&tret, task_addr, sizeof(mach_port_name_t));
AUDIT_MACH_SYSCALL_EXIT(KERN_FAILURE);
return KERN_FAILURE;
}
p = proc_find(pid);
if (p == PROC_NULL) {
error = KERN_FAILURE;
goto tfpout;
}
pident = proc_ident(p);
is_current_proc = (p == current_proc());
#if CONFIG_AUDIT
AUDIT_ARG(process, p);
#endif
if (!(task_for_pid_posix_check(p))) {
error = KERN_FAILURE;
goto tfpout;
}
if (proc_task(p) == TASK_NULL) {
error = KERN_SUCCESS;
goto tfpout;
}
/*
* Grab a task reference and drop the proc reference as the proc ref
* shouldn't be held accross upcalls.
*/
task = proc_task(p);
task_reference(task);
proc_rele(p);
p = PROC_NULL;
/* IPC is not active on the task until after `exec_resettextvp` has been called.
* We don't want to call into MAC hooks until we know that this has occured, otherwise
* AMFI and others will read uninitialized fields from the csproc
*/
if (!task_is_ipc_active(task)) {
error = KERN_FAILURE;
goto tfpout;
}
#if CONFIG_MACF
error = mac_proc_check_get_task(kauth_cred_get(), &pident, TASK_FLAVOR_CONTROL);
if (error) {
error = KERN_FAILURE;
goto tfpout;
}
#endif
/* If we aren't root and target's task access port is set... */
if (!kauth_cred_issuser(kauth_cred_get()) &&
!is_current_proc &&
(task_get_task_access_port(task, &tfpport) == 0) &&
(tfpport != IPC_PORT_NULL)) {
if (tfpport == IPC_PORT_DEAD) {
error = KERN_PROTECTION_FAILURE;
goto tfpout;
}
/* Call up to the task access server */
error = __KERNEL_WAITING_ON_TASKGATED_CHECK_ACCESS_UPCALL__(tfpport,
proc_selfpid(), kauth_getgid(), pid, TASK_FLAVOR_CONTROL);
if (error != MACH_MSG_SUCCESS) {
if (error == MACH_RCV_INTERRUPTED) {
error = KERN_ABORTED;
} else {
error = KERN_FAILURE;
}
goto tfpout;
}
}
/* Grant task port access */
extmod_statistics_incr_task_for_pid(task);
/* this reference will be consumed during conversion */
task_reference(task);
if (task == current_task()) {
/* return pinned self if current_task() so equality check with mach_task_self_ passes */
sright = (void *)convert_task_to_port_pinned(task);
} else {
sright = (void *)convert_task_to_port(task);
}
/* extra task ref consumed */
/*
* Check if the task has been corpsified. We must do so after conversion
* since we don't hold locks and may have grabbed a corpse control port
* above which will prevent no-senders notification delivery.
*/
if (task_is_a_corpse(task)) {
ipc_port_release_send(sright);
error = KERN_FAILURE;
goto tfpout;
}
tret = ipc_port_copyout_send(
sright,
get_task_ipcspace(current_task()));
error = KERN_SUCCESS;
tfpout:
task_deallocate(t1);
AUDIT_ARG(mach_port2, tret);
(void) copyout((char *) &tret, task_addr, sizeof(mach_port_name_t));
if (tfpport != IPC_PORT_NULL) {
ipc_port_release_send(tfpport);
}
if (task != TASK_NULL) {
task_deallocate(task);
}
if (p != PROC_NULL) {
proc_rele(p);
}
AUDIT_MACH_SYSCALL_EXIT(error);
return error;
}
/*
* Routine: task_name_for_pid
* Purpose:
* Get the task name port for another "process", named by its
* process ID on the same host as "target_task".
*
* Only permitted to privileged processes, or processes
* with the same user ID.
*
* XXX This should be a BSD system call, not a Mach trap!!!
*/
kern_return_t
task_name_for_pid(
struct task_name_for_pid_args *args)
{
mach_port_name_t target_tport = args->target_tport;
int pid = args->pid;
user_addr_t task_addr = args->t;
proc_t p = PROC_NULL;
task_t t1 = TASK_NULL;
mach_port_name_t tret = MACH_PORT_NULL;
void * sright;
int error = 0, refheld = 0;
kauth_cred_t target_cred;
AUDIT_MACH_SYSCALL_ENTER(AUE_TASKNAMEFORPID);
AUDIT_ARG(pid, pid);
AUDIT_ARG(mach_port1, target_tport);
t1 = port_name_to_task(target_tport);
if (t1 == TASK_NULL) {
(void) copyout((char *)&tret, task_addr, sizeof(mach_port_name_t));
AUDIT_MACH_SYSCALL_EXIT(KERN_FAILURE);
return KERN_FAILURE;
}
p = proc_find(pid);
if (p != PROC_NULL) {
AUDIT_ARG(process, p);
target_cred = kauth_cred_proc_ref(p);
refheld = 1;
if ((p->p_stat != SZOMB)
&& ((current_proc() == p)
|| kauth_cred_issuser(kauth_cred_get())
|| ((kauth_cred_getuid(target_cred) == kauth_cred_getuid(kauth_cred_get())) &&
((kauth_cred_getruid(target_cred) == kauth_getruid())))
|| IOCurrentTaskHasEntitlement("com.apple.system-task-ports.name.safe")
)) {
if (proc_task(p) != TASK_NULL) {
struct proc_ident pident = proc_ident(p);
task_t task = proc_task(p);
task_reference(task);
proc_rele(p);
p = PROC_NULL;
#if CONFIG_MACF
error = mac_proc_check_get_task(kauth_cred_get(), &pident, TASK_FLAVOR_NAME);
if (error) {
task_deallocate(task);
goto noperm;
}
#endif
sright = (void *)convert_task_name_to_port(task);
task = NULL;
tret = ipc_port_copyout_send(sright,
get_task_ipcspace(current_task()));
} else {
tret = MACH_PORT_NULL;
}
AUDIT_ARG(mach_port2, tret);
(void) copyout((char *)&tret, task_addr, sizeof(mach_port_name_t));
task_deallocate(t1);
error = KERN_SUCCESS;
goto tnfpout;
}
}
#if CONFIG_MACF
noperm:
#endif
task_deallocate(t1);
tret = MACH_PORT_NULL;
(void) copyout((char *) &tret, task_addr, sizeof(mach_port_name_t));
error = KERN_FAILURE;
tnfpout:
if (refheld != 0) {
kauth_cred_unref(&target_cred);
}
if (p != PROC_NULL) {
proc_rele(p);
}
AUDIT_MACH_SYSCALL_EXIT(error);
return error;
}
/*
* Routine: task_inspect_for_pid
* Purpose:
* Get the task inspect port for another "process", named by its
* process ID on the same host as "target_task".
*/
int
task_inspect_for_pid(struct proc *p __unused, struct task_inspect_for_pid_args *args, int *ret)
{
mach_port_name_t target_tport = args->target_tport;
int pid = args->pid;
user_addr_t task_addr = args->t;
proc_t proc = PROC_NULL;
task_t t1 = TASK_NULL;
task_inspect_t task_insp = TASK_INSPECT_NULL;
mach_port_name_t tret = MACH_PORT_NULL;
ipc_port_t tfpport = MACH_PORT_NULL;
int error = 0;
void *sright = NULL;
boolean_t is_current_proc = FALSE;
struct proc_ident pident = {0};
/* Disallow inspect port for kernel_task */
if (pid == 0) {
(void) copyout((char *)&tret, task_addr, sizeof(mach_port_name_t));
return EPERM;
}
t1 = port_name_to_task(target_tport);
if (t1 == TASK_NULL) {
(void) copyout((char *) &tret, task_addr, sizeof(mach_port_name_t));
return EINVAL;
}
proc = proc_find(pid);
if (proc == PROC_NULL) {
error = ESRCH;
goto tifpout;
}
pident = proc_ident(proc);
is_current_proc = (proc == current_proc());
if (!(task_for_pid_posix_check(proc))) {
error = EPERM;
goto tifpout;
}
task_insp = proc_task(proc);
if (task_insp == TASK_INSPECT_NULL) {
goto tifpout;
}
/*
* Grab a task reference and drop the proc reference before making any upcalls.
*/
task_reference(task_insp);
proc_rele(proc);
proc = PROC_NULL;
#if CONFIG_MACF
error = mac_proc_check_get_task(kauth_cred_get(), &pident, TASK_FLAVOR_INSPECT);
if (error) {
error = EPERM;
goto tifpout;
}
#endif
/* If we aren't root and target's task access port is set... */
if (!kauth_cred_issuser(kauth_cred_get()) &&
!is_current_proc &&
(task_get_task_access_port(task_insp, &tfpport) == 0) &&
(tfpport != IPC_PORT_NULL)) {
if (tfpport == IPC_PORT_DEAD) {
error = EACCES;
goto tifpout;
}
/* Call up to the task access server */
error = __KERNEL_WAITING_ON_TASKGATED_CHECK_ACCESS_UPCALL__(tfpport,
proc_selfpid(), kauth_getgid(), pid, TASK_FLAVOR_INSPECT);
if (error != MACH_MSG_SUCCESS) {
if (error == MACH_RCV_INTERRUPTED) {
error = EINTR;
} else {
error = EPERM;
}
goto tifpout;
}
}
/* Check if the task has been corpsified */
if (task_is_a_corpse(task_insp)) {
error = EACCES;
goto tifpout;
}
/* could be IP_NULL, consumes a ref */
sright = (void*) convert_task_inspect_to_port(task_insp);
task_insp = TASK_INSPECT_NULL;
tret = ipc_port_copyout_send(sright, get_task_ipcspace(current_task()));
tifpout:
task_deallocate(t1);
(void) copyout((char *) &tret, task_addr, sizeof(mach_port_name_t));
if (proc != PROC_NULL) {
proc_rele(proc);
}
if (tfpport != IPC_PORT_NULL) {
ipc_port_release_send(tfpport);
}
if (task_insp != TASK_INSPECT_NULL) {
task_deallocate(task_insp);
}
*ret = error;
return error;
}
/*
* Routine: task_read_for_pid
* Purpose:
* Get the task read port for another "process", named by its
* process ID on the same host as "target_task".
*/
int
task_read_for_pid(struct proc *p __unused, struct task_read_for_pid_args *args, int *ret)
{
mach_port_name_t target_tport = args->target_tport;
int pid = args->pid;
user_addr_t task_addr = args->t;
proc_t proc = PROC_NULL;
task_t t1 = TASK_NULL;
task_read_t task_read = TASK_READ_NULL;
mach_port_name_t tret = MACH_PORT_NULL;
ipc_port_t tfpport = MACH_PORT_NULL;
int error = 0;
void *sright = NULL;
boolean_t is_current_proc = FALSE;
struct proc_ident pident = {0};
/* Disallow read port for kernel_task */
if (pid == 0) {
(void) copyout((char *)&tret, task_addr, sizeof(mach_port_name_t));
return EPERM;
}
t1 = port_name_to_task(target_tport);
if (t1 == TASK_NULL) {
(void) copyout((char *)&tret, task_addr, sizeof(mach_port_name_t));
return EINVAL;
}
proc = proc_find(pid);
if (proc == PROC_NULL) {
error = ESRCH;
goto trfpout;
}
pident = proc_ident(proc);
is_current_proc = (proc == current_proc());
if (!(task_for_pid_posix_check(proc))) {
error = EPERM;
goto trfpout;
}
task_read = proc_task(proc);
if (task_read == TASK_INSPECT_NULL) {
goto trfpout;
}
/*
* Grab a task reference and drop the proc reference before making any upcalls.
*/
task_reference(task_read);
proc_rele(proc);
proc = PROC_NULL;
#if CONFIG_MACF
error = mac_proc_check_get_task(kauth_cred_get(), &pident, TASK_FLAVOR_READ);
if (error) {
error = EPERM;
goto trfpout;
}
#endif
/* If we aren't root and target's task access port is set... */
if (!kauth_cred_issuser(kauth_cred_get()) &&
!is_current_proc &&
(task_get_task_access_port(task_read, &tfpport) == 0) &&
(tfpport != IPC_PORT_NULL)) {
if (tfpport == IPC_PORT_DEAD) {
error = EACCES;
goto trfpout;
}
/* Call up to the task access server */
error = __KERNEL_WAITING_ON_TASKGATED_CHECK_ACCESS_UPCALL__(tfpport,
proc_selfpid(), kauth_getgid(), pid, TASK_FLAVOR_READ);
if (error != MACH_MSG_SUCCESS) {
if (error == MACH_RCV_INTERRUPTED) {
error = EINTR;
} else {
error = EPERM;
}
goto trfpout;
}
}
/* Check if the task has been corpsified */
if (task_is_a_corpse(task_read)) {
error = EACCES;
goto trfpout;
}
/* could be IP_NULL, consumes a ref */
sright = (void*) convert_task_read_to_port(task_read);
task_read = TASK_READ_NULL;
tret = ipc_port_copyout_send(sright, get_task_ipcspace(current_task()));
trfpout:
task_deallocate(t1);
(void) copyout((char *) &tret, task_addr, sizeof(mach_port_name_t));
if (proc != PROC_NULL) {
proc_rele(proc);
}
if (tfpport != IPC_PORT_NULL) {
ipc_port_release_send(tfpport);
}
if (task_read != TASK_READ_NULL) {
task_deallocate(task_read);
}
*ret = error;
return error;
}
kern_return_t
pid_suspend(struct proc *p __unused, struct pid_suspend_args *args, int *ret)
{
task_t target = NULL;
proc_t targetproc = PROC_NULL;
int pid = args->pid;
int error = 0;
mach_port_t tfpport = MACH_PORT_NULL;
if (pid == 0) {
error = EPERM;
goto out;
}
targetproc = proc_find(pid);
if (targetproc == PROC_NULL) {
error = ESRCH;
goto out;
}
if (!task_for_pid_posix_check(targetproc) &&
!IOCurrentTaskHasEntitlement(PROCESS_RESUME_SUSPEND_ENTITLEMENT)) {
error = EPERM;
goto out;
}
#if CONFIG_MACF
error = mac_proc_check_suspend_resume(targetproc, MAC_PROC_CHECK_SUSPEND);
if (error) {
error = EPERM;
goto out;
}
#endif
target = proc_task(targetproc);
#if XNU_TARGET_OS_OSX
if (target != TASK_NULL) {
/* If we aren't root and target's task access port is set... */
if (!kauth_cred_issuser(kauth_cred_get()) &&
targetproc != current_proc() &&
(task_get_task_access_port(target, &tfpport) == 0) &&
(tfpport != IPC_PORT_NULL)) {
if (tfpport == IPC_PORT_DEAD) {
error = EACCES;
goto out;
}
/* Call up to the task access server */
error = __KERNEL_WAITING_ON_TASKGATED_CHECK_ACCESS_UPCALL__(tfpport,
proc_selfpid(), kauth_getgid(), pid, TASK_FLAVOR_CONTROL);
if (error != MACH_MSG_SUCCESS) {
if (error == MACH_RCV_INTERRUPTED) {
error = EINTR;
} else {
error = EPERM;
}
goto out;
}
}
}
#endif /* XNU_TARGET_OS_OSX */
task_reference(target);
error = task_pidsuspend(target);
if (error) {
if (error == KERN_INVALID_ARGUMENT) {
error = EINVAL;
} else {
error = EPERM;
}
}
#if CONFIG_MEMORYSTATUS
else {
memorystatus_on_suspend(targetproc);
}
#endif
task_deallocate(target);
out:
if (tfpport != IPC_PORT_NULL) {
ipc_port_release_send(tfpport);
}
if (targetproc != PROC_NULL) {
proc_rele(targetproc);
}
*ret = error;
return error;
}
kern_return_t
debug_control_port_for_pid(struct debug_control_port_for_pid_args *args)
{
mach_port_name_t target_tport = args->target_tport;
int pid = args->pid;
user_addr_t task_addr = args->t;
proc_t p = PROC_NULL;
task_t t1 = TASK_NULL;
task_t task = TASK_NULL;
mach_port_name_t tret = MACH_PORT_NULL;
ipc_port_t tfpport = MACH_PORT_NULL;
ipc_port_t sright = NULL;
int error = 0;
boolean_t is_current_proc = FALSE;
struct proc_ident pident = {0};
AUDIT_MACH_SYSCALL_ENTER(AUE_DBGPORTFORPID);
AUDIT_ARG(pid, pid);
AUDIT_ARG(mach_port1, target_tport);
/* Always check if pid == 0 */
if (pid == 0) {
(void) copyout((char *)&tret, task_addr, sizeof(mach_port_name_t));
AUDIT_MACH_SYSCALL_EXIT(KERN_FAILURE);
return KERN_FAILURE;
}
t1 = port_name_to_task(target_tport);
if (t1 == TASK_NULL) {
(void) copyout((char *)&tret, task_addr, sizeof(mach_port_name_t));
AUDIT_MACH_SYSCALL_EXIT(KERN_FAILURE);
return KERN_FAILURE;
}
p = proc_find(pid);
if (p == PROC_NULL) {
error = KERN_FAILURE;
goto tfpout;
}
pident = proc_ident(p);
is_current_proc = (p == current_proc());
#if CONFIG_AUDIT
AUDIT_ARG(process, p);
#endif
if (!(task_for_pid_posix_check(p))) {
error = KERN_FAILURE;
goto tfpout;
}
if (proc_task(p) == TASK_NULL) {
error = KERN_SUCCESS;
goto tfpout;
}
/*
* Grab a task reference and drop the proc reference before making any upcalls.
*/
task = proc_task(p);
task_reference(task);
proc_rele(p);
p = PROC_NULL;
if (!IOCurrentTaskHasEntitlement(DEBUG_PORT_ENTITLEMENT)) {
#if CONFIG_MACF
error = mac_proc_check_get_task(kauth_cred_get(), &pident, TASK_FLAVOR_CONTROL);
if (error) {
error = KERN_FAILURE;
goto tfpout;
}
#endif
/* If we aren't root and target's task access port is set... */
if (!kauth_cred_issuser(kauth_cred_get()) &&
!is_current_proc &&
(task_get_task_access_port(task, &tfpport) == 0) &&
(tfpport != IPC_PORT_NULL)) {
if (tfpport == IPC_PORT_DEAD) {
error = KERN_PROTECTION_FAILURE;
goto tfpout;
}
/* Call up to the task access server */
error = __KERNEL_WAITING_ON_TASKGATED_CHECK_ACCESS_UPCALL__(tfpport,
proc_selfpid(), kauth_getgid(), pid, TASK_FLAVOR_CONTROL);
if (error != MACH_MSG_SUCCESS) {
if (error == MACH_RCV_INTERRUPTED) {
error = KERN_ABORTED;
} else {
error = KERN_FAILURE;
}
goto tfpout;
}
}
}
/* Check if the task has been corpsified */
if (task_is_a_corpse(task)) {
error = KERN_FAILURE;
goto tfpout;
}
error = task_get_debug_control_port(task, &sright);
if (error != KERN_SUCCESS) {
goto tfpout;
}
tret = ipc_port_copyout_send(
sright,
get_task_ipcspace(current_task()));
error = KERN_SUCCESS;
tfpout:
task_deallocate(t1);
AUDIT_ARG(mach_port2, tret);
(void) copyout((char *) &tret, task_addr, sizeof(mach_port_name_t));
if (tfpport != IPC_PORT_NULL) {
ipc_port_release_send(tfpport);
}
if (task != TASK_NULL) {
task_deallocate(task);
}
if (p != PROC_NULL) {
proc_rele(p);
}
AUDIT_MACH_SYSCALL_EXIT(error);
return error;
}
kern_return_t
pid_resume(struct proc *p __unused, struct pid_resume_args *args, int *ret)
{
task_t target = NULL;
proc_t targetproc = PROC_NULL;
int pid = args->pid;
int error = 0;
mach_port_t tfpport = MACH_PORT_NULL;
if (pid == 0) {
error = EPERM;
goto out;
}
targetproc = proc_find(pid);
if (targetproc == PROC_NULL) {
error = ESRCH;
goto out;
}
if (!task_for_pid_posix_check(targetproc) &&
!IOCurrentTaskHasEntitlement(PROCESS_RESUME_SUSPEND_ENTITLEMENT)) {
error = EPERM;
goto out;
}
#if CONFIG_MACF
error = mac_proc_check_suspend_resume(targetproc, MAC_PROC_CHECK_RESUME);
if (error) {
error = EPERM;
goto out;
}
#endif
target = proc_task(targetproc);
#if XNU_TARGET_OS_OSX
if (target != TASK_NULL) {
/* If we aren't root and target's task access port is set... */
if (!kauth_cred_issuser(kauth_cred_get()) &&
targetproc != current_proc() &&
(task_get_task_access_port(target, &tfpport) == 0) &&
(tfpport != IPC_PORT_NULL)) {
if (tfpport == IPC_PORT_DEAD) {
error = EACCES;
goto out;
}
/* Call up to the task access server */
error = __KERNEL_WAITING_ON_TASKGATED_CHECK_ACCESS_UPCALL__(tfpport,
proc_selfpid(), kauth_getgid(), pid, TASK_FLAVOR_CONTROL);
if (error != MACH_MSG_SUCCESS) {
if (error == MACH_RCV_INTERRUPTED) {
error = EINTR;
} else {
error = EPERM;
}
goto out;
}
}
}
#endif /* XNU_TARGET_OS_OSX */
#if !XNU_TARGET_OS_OSX
#if SOCKETS
resume_proc_sockets(targetproc);
#endif /* SOCKETS */
#endif /* !XNU_TARGET_OS_OSX */
task_reference(target);
#if CONFIG_MEMORYSTATUS
memorystatus_on_resume(targetproc);
#endif
error = task_pidresume(target);
if (error) {
if (error == KERN_INVALID_ARGUMENT) {
error = EINVAL;
} else {
if (error == KERN_MEMORY_ERROR) {
psignal(targetproc, SIGKILL);
error = EIO;
} else {
error = EPERM;
}
}
}
task_deallocate(target);
out:
if (tfpport != IPC_PORT_NULL) {
ipc_port_release_send(tfpport);
}
if (targetproc != PROC_NULL) {
proc_rele(targetproc);
}
*ret = error;
return error;
}
#if !XNU_TARGET_OS_OSX
/*
* Freeze the specified process (provided in args->pid), or find and freeze a PID.
* When a process is specified, this call is blocking, otherwise we wake up the
* freezer thread and do not block on a process being frozen.
*/
kern_return_t
pid_hibernate(struct proc *p __unused, struct pid_hibernate_args *args, int *ret)
{
int error = 0;
proc_t targetproc = PROC_NULL;
int pid = args->pid;
#ifndef CONFIG_FREEZE
#pragma unused(pid)
#else
/*
* If a pid has been provided, we obtain the process handle and call task_for_pid_posix_check().
*/
if (pid >= 0) {
targetproc = proc_find(pid);
if (targetproc == PROC_NULL) {
error = ESRCH;
goto out;
}
if (!task_for_pid_posix_check(targetproc)) {
error = EPERM;
goto out;
}
}
#if CONFIG_MACF
//Note that targetproc may be null
error = mac_proc_check_suspend_resume(targetproc, MAC_PROC_CHECK_HIBERNATE);
if (error) {
error = EPERM;
goto out;
}
#endif
if (pid == -2) {
vm_pageout_anonymous_pages();
} else if (pid == -1) {
memorystatus_on_inactivity(targetproc);
} else {
error = memorystatus_freeze_process_sync(targetproc);
}
out:
#endif /* CONFIG_FREEZE */
if (targetproc != PROC_NULL) {
proc_rele(targetproc);
}
*ret = error;
return error;
}
#endif /* !XNU_TARGET_OS_OSX */
#if SOCKETS
int
networking_memstatus_callout(proc_t p, uint32_t status)
{
struct fileproc *fp;
/*
* proc list lock NOT held
* proc lock NOT held
* a reference on the proc has been held / shall be dropped by the caller.
*/
LCK_MTX_ASSERT(&proc_list_mlock, LCK_MTX_ASSERT_NOTOWNED);
LCK_MTX_ASSERT(&p->p_mlock, LCK_MTX_ASSERT_NOTOWNED);
proc_fdlock(p);
fdt_foreach(fp, p) {
switch (FILEGLOB_DTYPE(fp->fp_glob)) {
#if NECP
case DTYPE_NETPOLICY:
necp_fd_memstatus(p, status,
(struct necp_fd_data *)fp_get_data(fp));
break;
#endif /* NECP */
#if SKYWALK
case DTYPE_CHANNEL:
kern_channel_memstatus(p, status,
(struct kern_channel *)fp_get_data(fp));
break;
#endif /* SKYWALK */
default:
break;
}
}
proc_fdunlock(p);
return 1;
}
#if SKYWALK
/*
* Since we make multiple passes across the fileproc array, record the
* first MAX_CHANNELS channel handles found. MAX_CHANNELS should be
* large enough to accomodate most, if not all cases. If we find more,
* we'll go to the slow path during second pass.
*/
#define MAX_CHANNELS 8 /* should be more than enough */
#endif /* SKYWALK */
static int
networking_defunct_callout(proc_t p, void *arg)
{
struct pid_shutdown_sockets_args *args = arg;
int pid = args->pid;
int level = args->level;
struct fileproc *fp;
#if SKYWALK
int i;
int channel_count = 0;
struct kern_channel *channel_array[MAX_CHANNELS];
bzero(&channel_array, sizeof(channel_array));
#endif /* SKYWALK */
proc_fdlock(p);
fdt_foreach(fp, p) {
struct fileglob *fg = fp->fp_glob;
switch (FILEGLOB_DTYPE(fg)) {
case DTYPE_SOCKET: {
struct socket *so = (struct socket *)fg_get_data(fg);
if (proc_getpid(p) == pid || so->last_pid == pid ||
((so->so_flags & SOF_DELEGATED) && so->e_pid == pid)) {
/* Call networking stack with socket and level */
(void)socket_defunct(p, so, level);
}
break;
}
#if NECP
case DTYPE_NETPOLICY:
/* first pass: defunct necp and get stats for ntstat */
if (proc_getpid(p) == pid) {
necp_fd_defunct(p,
(struct necp_fd_data *)fg_get_data(fg));
}
break;
#endif /* NECP */
#if SKYWALK
case DTYPE_CHANNEL:
/* first pass: get channels and total count */
if (proc_getpid(p) == pid) {
if (channel_count < MAX_CHANNELS) {
channel_array[channel_count] =
(struct kern_channel *)fg_get_data(fg);
}
++channel_count;
}
break;
#endif /* SKYWALK */
default:
break;
}
}
#if SKYWALK
/*
* Second pass: defunct channels/flows (after NECP). Handle
* the common case of up to MAX_CHANNELS count with fast path,
* and traverse the fileproc array again only if we exceed it.
*/
if (channel_count != 0 && channel_count <= MAX_CHANNELS) {
ASSERT(proc_getpid(p) == pid);
for (i = 0; i < channel_count; i++) {
ASSERT(channel_array[i] != NULL);
kern_channel_defunct(p, channel_array[i]);
}
} else if (channel_count != 0) {
ASSERT(proc_getpid(p) == pid);
fdt_foreach(fp, p) {
struct fileglob *fg = fp->fp_glob;
if (FILEGLOB_DTYPE(fg) == DTYPE_CHANNEL) {
kern_channel_defunct(p,
(struct kern_channel *)fg_get_data(fg));
}
}
}
#endif /* SKYWALK */
proc_fdunlock(p);
return PROC_RETURNED;
}
int
pid_shutdown_sockets(struct proc *p __unused, struct pid_shutdown_sockets_args *args, int *ret)
{
int error = 0;
proc_t targetproc = PROC_NULL;
int pid = args->pid;
int level = args->level;
if (level != SHUTDOWN_SOCKET_LEVEL_DISCONNECT_SVC &&
level != SHUTDOWN_SOCKET_LEVEL_DISCONNECT_ALL) {
error = EINVAL;
goto out;
}
targetproc = proc_find(pid);
if (targetproc == PROC_NULL) {
error = ESRCH;
goto out;
}
if (!task_for_pid_posix_check(targetproc) &&
!IOCurrentTaskHasEntitlement(PROCESS_RESUME_SUSPEND_ENTITLEMENT)) {
error = EPERM;
goto out;
}
#if CONFIG_MACF
error = mac_proc_check_suspend_resume(targetproc, MAC_PROC_CHECK_SHUTDOWN_SOCKETS);
if (error) {
error = EPERM;
goto out;
}
#endif
proc_iterate(PROC_ALLPROCLIST | PROC_NOWAITTRANS,
networking_defunct_callout, args, NULL, NULL);
out:
if (targetproc != PROC_NULL) {
proc_rele(targetproc);
}
*ret = error;
return error;
}
#endif /* SOCKETS */
static int
sysctl_settfp_policy(__unused struct sysctl_oid *oidp, void *arg1,
__unused int arg2, struct sysctl_req *req)
{
int error = 0;
int new_value;
error = SYSCTL_OUT(req, arg1, sizeof(int));
if (error || req->newptr == USER_ADDR_NULL) {
return error;
}
if (!kauth_cred_issuser(kauth_cred_get())) {
return EPERM;
}
if ((error = SYSCTL_IN(req, &new_value, sizeof(int)))) {
goto out;
}
if ((new_value == KERN_TFP_POLICY_DENY)
|| (new_value == KERN_TFP_POLICY_DEFAULT)) {
tfp_policy = new_value;
} else {
error = EINVAL;
}
out:
return error;
}
#if defined(SECURE_KERNEL)
static int kern_secure_kernel = 1;
#else
static int kern_secure_kernel = 0;
#endif
SYSCTL_INT(_kern, OID_AUTO, secure_kernel, CTLFLAG_RD | CTLFLAG_LOCKED, &kern_secure_kernel, 0, "");
SYSCTL_NODE(_kern, KERN_TFP, tfp, CTLFLAG_RW | CTLFLAG_LOCKED, 0, "tfp");
SYSCTL_PROC(_kern_tfp, KERN_TFP_POLICY, policy, CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED,
&tfp_policy, sizeof(uint32_t), &sysctl_settfp_policy, "I", "policy");
SYSCTL_INT(_vm, OID_AUTO, shared_region_trace_level, CTLFLAG_RW | CTLFLAG_LOCKED,
&shared_region_trace_level, 0, "");
SYSCTL_INT(_vm, OID_AUTO, shared_region_version, CTLFLAG_RD | CTLFLAG_LOCKED,
&shared_region_version, 0, "");
SYSCTL_INT(_vm, OID_AUTO, shared_region_persistence, CTLFLAG_RW | CTLFLAG_LOCKED,
&shared_region_persistence, 0, "");
/*
* shared_region_check_np:
*
* This system call is intended for dyld.
*
* dyld calls this when any process starts to see if the process's shared
* region is already set up and ready to use.
* This call returns the base address of the first mapping in the
* process's shared region's first mapping.
* dyld will then check what's mapped at that address.
*
* If the shared region is empty, dyld will then attempt to map the shared
* cache file in the shared region via the shared_region_map_np() system call.
*
* If something's already mapped in the shared region, dyld will check if it
* matches the shared cache it would like to use for that process.
* If it matches, evrything's ready and the process can proceed and use the
* shared region.
* If it doesn't match, dyld will unmap the shared region and map the shared
* cache into the process's address space via mmap().
*
* A NULL pointer argument can be used by dyld to indicate it has unmapped
* the shared region. We will remove the shared_region reference from the task.
*
* ERROR VALUES
* EINVAL no shared region
* ENOMEM shared region is empty
* EFAULT bad address for "start_address"
*/
int
shared_region_check_np(
__unused struct proc *p,
struct shared_region_check_np_args *uap,
__unused int *retvalp)
{
vm_shared_region_t shared_region;
mach_vm_offset_t start_address = 0;
int error = 0;
kern_return_t kr;
task_t task = current_task();
SHARED_REGION_TRACE_DEBUG(
("shared_region: %p [%d(%s)] -> check_np(0x%llx)\n",
(void *)VM_KERNEL_ADDRPERM(current_thread()),
proc_getpid(p), p->p_comm,
(uint64_t)uap->start_address));
/*
* Special value of start_address used to indicate that map_with_linking() should
* no longer be allowed in this process
*/
if (uap->start_address == (task_get_64bit_addr(task) ? DYLD_VM_END_MWL : (uint32_t)DYLD_VM_END_MWL)) {
p->p_disallow_map_with_linking = TRUE;
return 0;
}
/* retrieve the current tasks's shared region */
shared_region = vm_shared_region_get(task);
if (shared_region != NULL) {
/*
* A NULL argument is used by dyld to indicate the task
* has unmapped its shared region.
*/
if (uap->start_address == 0) {
/* unmap it first */
vm_shared_region_remove(task, shared_region);
vm_shared_region_set(task, NULL);
} else {
/* retrieve address of its first mapping... */
kr = vm_shared_region_start_address(shared_region, &start_address, task);
if (kr != KERN_SUCCESS) {
SHARED_REGION_TRACE_ERROR(("shared_region: %p [%d(%s)] "
"check_np(0x%llx) "
"vm_shared_region_start_address() failed\n",
(void *)VM_KERNEL_ADDRPERM(current_thread()),
proc_getpid(p), p->p_comm,
(uint64_t)uap->start_address));
error = ENOMEM;
} else {
#if __has_feature(ptrauth_calls)
/*
* Remap any section of the shared library that
* has authenticated pointers into private memory.
*/
if (vm_shared_region_auth_remap(shared_region) != KERN_SUCCESS) {
SHARED_REGION_TRACE_ERROR(("shared_region: %p [%d(%s)] "
"check_np(0x%llx) "
"vm_shared_region_auth_remap() failed\n",
(void *)VM_KERNEL_ADDRPERM(current_thread()),
proc_getpid(p), p->p_comm,
(uint64_t)uap->start_address));
error = ENOMEM;
}
#endif /* __has_feature(ptrauth_calls) */
/* ... and give it to the caller */
if (error == 0) {
error = copyout(&start_address,
(user_addr_t) uap->start_address,
sizeof(start_address));
if (error != 0) {
SHARED_REGION_TRACE_ERROR(
("shared_region: %p [%d(%s)] "
"check_np(0x%llx) "
"copyout(0x%llx) error %d\n",
(void *)VM_KERNEL_ADDRPERM(current_thread()),
proc_getpid(p), p->p_comm,
(uint64_t)uap->start_address, (uint64_t)start_address,
error));
}
}
}
}
vm_shared_region_deallocate(shared_region);
} else {
/* no shared region ! */
error = EINVAL;
}
SHARED_REGION_TRACE_DEBUG(
("shared_region: %p [%d(%s)] check_np(0x%llx) <- 0x%llx %d\n",
(void *)VM_KERNEL_ADDRPERM(current_thread()),
proc_getpid(p), p->p_comm,
(uint64_t)uap->start_address, (uint64_t)start_address, error));
return error;
}
static int
shared_region_copyin(
struct proc *p,
user_addr_t user_addr,
unsigned int count,
unsigned int element_size,
void *kernel_data)
{
int error = 0;
vm_size_t size = count * element_size;
error = copyin(user_addr, kernel_data, size);
if (error) {
SHARED_REGION_TRACE_ERROR(
("shared_region: %p [%d(%s)] map(): "
"copyin(0x%llx, %ld) failed (error=%d)\n",
(void *)VM_KERNEL_ADDRPERM(current_thread()),
proc_getpid(p), p->p_comm,
(uint64_t)user_addr, (long)size, error));
}
return error;
}
/*
* A reasonable upper limit to prevent overflow of allocation/copyin.
*/
#define _SR_FILE_MAPPINGS_MAX_FILES 256
/* forward declaration */
__attribute__((noinline))
static void shared_region_map_and_slide_cleanup(
struct proc *p,
uint32_t files_count,
struct _sr_file_mappings *sr_file_mappings,
struct vm_shared_region *shared_region);
/*
* Setup part of _shared_region_map_and_slide().
* It had to be broken out of _shared_region_map_and_slide() to
* prevent compiler inlining from blowing out the stack.
*/
__attribute__((noinline))
static int
shared_region_map_and_slide_setup(
struct proc *p,
uint32_t files_count,
struct shared_file_np *files,
uint32_t mappings_count,
struct shared_file_mapping_slide_np *mappings,
struct _sr_file_mappings **sr_file_mappings,
struct vm_shared_region **shared_region_ptr,
struct vnode *rdir_vp)
{
int error = 0;
struct _sr_file_mappings *srfmp;
uint32_t mappings_next;
struct vnode_attr va;
off_t fs;
#if CONFIG_MACF
vm_prot_t maxprot = VM_PROT_ALL;
#endif
uint32_t i;
struct vm_shared_region *shared_region = NULL;
boolean_t is_driverkit = task_is_driver(current_task());
SHARED_REGION_TRACE_DEBUG(
("shared_region: %p [%d(%s)] -> map\n",
(void *)VM_KERNEL_ADDRPERM(current_thread()),
proc_getpid(p), p->p_comm));
if (files_count > _SR_FILE_MAPPINGS_MAX_FILES) {
error = E2BIG;
goto done;
}
if (files_count == 0) {
error = EINVAL;
goto done;
}
*sr_file_mappings = kalloc_type(struct _sr_file_mappings, files_count,
Z_WAITOK | Z_ZERO);
if (*sr_file_mappings == NULL) {
error = ENOMEM;
goto done;
}
mappings_next = 0;
for (i = 0; i < files_count; i++) {
srfmp = &(*sr_file_mappings)[i];
srfmp->fd = files[i].sf_fd;
srfmp->mappings_count = files[i].sf_mappings_count;
srfmp->mappings = &mappings[mappings_next];
mappings_next += srfmp->mappings_count;
if (mappings_next > mappings_count) {
error = EINVAL;
goto done;
}
srfmp->slide = files[i].sf_slide;
}
/* get the process's shared region (setup in vm_map_exec()) */
shared_region = vm_shared_region_trim_and_get(current_task());
*shared_region_ptr = shared_region;
if (shared_region == NULL) {
SHARED_REGION_TRACE_ERROR(
("shared_region: %p [%d(%s)] map(): "
"no shared region\n",
(void *)VM_KERNEL_ADDRPERM(current_thread()),
proc_getpid(p), p->p_comm));
error = EINVAL;
goto done;
}
/*
* Check the shared region matches the current root
* directory of this process. Deny the mapping to
* avoid tainting the shared region with something that
* doesn't quite belong into it.
*/
struct vnode *sr_vnode = vm_shared_region_root_dir(shared_region);
if (sr_vnode != NULL ? rdir_vp != sr_vnode : rdir_vp != rootvnode) {
SHARED_REGION_TRACE_ERROR(
("shared_region: map(%p) root_dir mismatch\n",
(void *)VM_KERNEL_ADDRPERM(current_thread())));
error = EPERM;
goto done;
}
for (srfmp = &(*sr_file_mappings)[0];
srfmp < &(*sr_file_mappings)[files_count];
srfmp++) {
if (srfmp->mappings_count == 0) {
/* no mappings here... */
continue;
}
/*
* A file descriptor of -1 is used to indicate that the data
* to be put in the shared region for this mapping comes directly
* from the processes address space. Ensure we have proper alignments.
*/
if (srfmp->fd == -1) {
/* only allow one mapping per fd */
if (srfmp->mappings_count > 1) {
SHARED_REGION_TRACE_ERROR(
("shared_region: %p [%d(%s)] map data >1 mapping\n",
(void *)VM_KERNEL_ADDRPERM(current_thread()),
proc_getpid(p), p->p_comm));
error = EINVAL;
goto done;
}
/*
* The destination address and size must be page aligned.
*/
struct shared_file_mapping_slide_np *mapping = &srfmp->mappings[0];
mach_vm_address_t dest_addr = mapping->sms_address;
mach_vm_size_t map_size = mapping->sms_size;
if (!vm_map_page_aligned(dest_addr, vm_map_page_mask(current_map()))) {
SHARED_REGION_TRACE_ERROR(
("shared_region: %p [%d(%s)] map data destination 0x%llx not aligned\n",
(void *)VM_KERNEL_ADDRPERM(current_thread()),
proc_getpid(p), p->p_comm, dest_addr));
error = EINVAL;
goto done;
}
if (!vm_map_page_aligned(map_size, vm_map_page_mask(current_map()))) {
SHARED_REGION_TRACE_ERROR(
("shared_region: %p [%d(%s)] map data size 0x%llx not aligned\n",
(void *)VM_KERNEL_ADDRPERM(current_thread()),
proc_getpid(p), p->p_comm, map_size));
error = EINVAL;
goto done;
}
continue;
}
/* get file structure from file descriptor */
error = fp_get_ftype(p, srfmp->fd, DTYPE_VNODE, EINVAL, &srfmp->fp);
if (error) {
SHARED_REGION_TRACE_ERROR(
("shared_region: %p [%d(%s)] map: "
"fd=%d lookup failed (error=%d)\n",
(void *)VM_KERNEL_ADDRPERM(current_thread()),
proc_getpid(p), p->p_comm, srfmp->fd, error));
goto done;
}
/* we need at least read permission on the file */
if (!(srfmp->fp->fp_glob->fg_flag & FREAD)) {
SHARED_REGION_TRACE_ERROR(
("shared_region: %p [%d(%s)] map: "
"fd=%d not readable\n",
(void *)VM_KERNEL_ADDRPERM(current_thread()),
proc_getpid(p), p->p_comm, srfmp->fd));
error = EPERM;
goto done;
}
/* get vnode from file structure */
error = vnode_getwithref((vnode_t)fp_get_data(srfmp->fp));
if (error) {
SHARED_REGION_TRACE_ERROR(
("shared_region: %p [%d(%s)] map: "
"fd=%d getwithref failed (error=%d)\n",
(void *)VM_KERNEL_ADDRPERM(current_thread()),
proc_getpid(p), p->p_comm, srfmp->fd, error));
goto done;
}
srfmp->vp = (struct vnode *)fp_get_data(srfmp->fp);
/* make sure the vnode is a regular file */
if (srfmp->vp->v_type != VREG) {
SHARED_REGION_TRACE_ERROR(
("shared_region: %p [%d(%s)] map(%p:'%s'): "
"not a file (type=%d)\n",
(void *)VM_KERNEL_ADDRPERM(current_thread()),
proc_getpid(p), p->p_comm,
(void *)VM_KERNEL_ADDRPERM(srfmp->vp),
srfmp->vp->v_name, srfmp->vp->v_type));
error = EINVAL;
goto done;
}
#if CONFIG_MACF
/* pass in 0 for the offset argument because AMFI does not need the offset
* of the shared cache */
error = mac_file_check_mmap(vfs_context_ucred(vfs_context_current()),
srfmp->fp->fp_glob, VM_PROT_ALL, MAP_FILE | MAP_PRIVATE | MAP_FIXED, 0, &maxprot);
if (error) {
goto done;
}
#endif /* MAC */
#if XNU_TARGET_OS_OSX && defined(__arm64__)
/*
* Check if the shared cache is in the trust cache;
* if so, we can skip the root ownership check.
*/
#if DEVELOPMENT || DEBUG
/*
* Skip both root ownership and trust cache check if
* enforcement is disabled.
*/
if (!cs_system_enforcement()) {
goto after_root_check;
}
#endif /* DEVELOPMENT || DEBUG */
struct cs_blob *blob = csvnode_get_blob(srfmp->vp, 0);
if (blob == NULL) {
SHARED_REGION_TRACE_ERROR(
("shared_region: %p [%d(%s)] map(%p:'%s'): "
"missing CS blob\n",
(void *)VM_KERNEL_ADDRPERM(current_thread()),
proc_getpid(p), p->p_comm,
(void *)VM_KERNEL_ADDRPERM(srfmp->vp),
srfmp->vp->v_name));
goto root_check;
}
const uint8_t *cdhash = csblob_get_cdhash(blob);
if (cdhash == NULL) {
SHARED_REGION_TRACE_ERROR(
("shared_region: %p [%d(%s)] map(%p:'%s'): "
"missing cdhash\n",
(void *)VM_KERNEL_ADDRPERM(current_thread()),
proc_getpid(p), p->p_comm,
(void *)VM_KERNEL_ADDRPERM(srfmp->vp),
srfmp->vp->v_name));
goto root_check;
}
bool in_trust_cache = false;
TrustCacheQueryToken_t qt;
if (query_trust_cache(kTCQueryTypeAll, cdhash, &qt) == KERN_SUCCESS) {
TCType_t tc_type = kTCTypeInvalid;
TCReturn_t tc_ret = amfi->TrustCache.queryGetTCType(&qt, &tc_type);
in_trust_cache = (tc_ret.error == kTCReturnSuccess &&
(tc_type == kTCTypeCryptex1BootOS ||
tc_type == kTCTypeStatic ||
tc_type == kTCTypeEngineering));
}
if (!in_trust_cache) {
SHARED_REGION_TRACE_ERROR(
("shared_region: %p [%d(%s)] map(%p:'%s'): "
"not in trust cache\n",
(void *)VM_KERNEL_ADDRPERM(current_thread()),
proc_getpid(p), p->p_comm,
(void *)VM_KERNEL_ADDRPERM(srfmp->vp),
srfmp->vp->v_name));
goto root_check;
}
goto after_root_check;
root_check:
#endif /* XNU_TARGET_OS_OSX && defined(__arm64__) */
/* The shared cache file must be owned by root */
VATTR_INIT(&va);
VATTR_WANTED(&va, va_uid);
error = vnode_getattr(srfmp->vp, &va, vfs_context_current());
if (error) {
SHARED_REGION_TRACE_ERROR(
("shared_region: %p [%d(%s)] map(%p:'%s'): "
"vnode_getattr(%p) failed (error=%d)\n",
(void *)VM_KERNEL_ADDRPERM(current_thread()),
proc_getpid(p), p->p_comm,
(void *)VM_KERNEL_ADDRPERM(srfmp->vp),
srfmp->vp->v_name,
(void *)VM_KERNEL_ADDRPERM(srfmp->vp),
error));
goto done;
}
if (va.va_uid != 0) {
SHARED_REGION_TRACE_ERROR(
("shared_region: %p [%d(%s)] map(%p:'%s'): "
"owned by uid=%d instead of 0\n",
(void *)VM_KERNEL_ADDRPERM(current_thread()),
proc_getpid(p), p->p_comm,
(void *)VM_KERNEL_ADDRPERM(srfmp->vp),
srfmp->vp->v_name, va.va_uid));
error = EPERM;
goto done;
}
#if XNU_TARGET_OS_OSX && defined(__arm64__)
after_root_check:
#endif /* XNU_TARGET_OS_OSX && defined(__arm64__) */
#if CONFIG_CSR
if (csr_check(CSR_ALLOW_UNRESTRICTED_FS) != 0) {
VATTR_INIT(&va);
VATTR_WANTED(&va, va_flags);
error = vnode_getattr(srfmp->vp, &va, vfs_context_current());
if (error) {
SHARED_REGION_TRACE_ERROR(
("shared_region: %p [%d(%s)] map(%p:'%s'): "
"vnode_getattr(%p) failed (error=%d)\n",
(void *)VM_KERNEL_ADDRPERM(current_thread()),
proc_getpid(p), p->p_comm,
(void *)VM_KERNEL_ADDRPERM(srfmp->vp),
srfmp->vp->v_name,
(void *)VM_KERNEL_ADDRPERM(srfmp->vp),
error));
goto done;
}
if (!(va.va_flags & SF_RESTRICTED)) {
/*
* CSR is not configured in CSR_ALLOW_UNRESTRICTED_FS mode, and
* the shared cache file is NOT SIP-protected, so reject the
* mapping request
*/
SHARED_REGION_TRACE_ERROR(
("shared_region: %p [%d(%s)] map(%p:'%s'), "
"vnode is not SIP-protected. \n",
(void *)VM_KERNEL_ADDRPERM(current_thread()),
proc_getpid(p), p->p_comm,
(void *)VM_KERNEL_ADDRPERM(srfmp->vp),
srfmp->vp->v_name));
error = EPERM;
goto done;
}
}
#else /* CONFIG_CSR */
/*
* Devices without SIP/ROSP need to make sure that the shared cache
* is either on the root volume or in the preboot cryptex volume.
*/
assert(rdir_vp != NULL);
if (srfmp->vp->v_mount != rdir_vp->v_mount) {
vnode_t preboot_vp = NULL;
#if XNU_TARGET_OS_OSX
#define PREBOOT_CRYPTEX_PATH "/System/Volumes/Preboot/Cryptexes"
#else
#define PREBOOT_CRYPTEX_PATH "/private/preboot/Cryptexes"
#endif
error = vnode_lookup(PREBOOT_CRYPTEX_PATH, 0, &preboot_vp, vfs_context_current());
if (error || srfmp->vp->v_mount != preboot_vp->v_mount) {
SHARED_REGION_TRACE_ERROR(
("shared_region: %p [%d(%s)] map(%p:'%s'): "
"not on process' root volume nor preboot volume\n",
(void *)VM_KERNEL_ADDRPERM(current_thread()),
proc_getpid(p), p->p_comm,
(void *)VM_KERNEL_ADDRPERM(srfmp->vp),
srfmp->vp->v_name));
error = EPERM;
if (preboot_vp) {
(void)vnode_put(preboot_vp);
}
goto done;
} else if (preboot_vp) {
(void)vnode_put(preboot_vp);
}
}
#endif /* CONFIG_CSR */
if (scdir_enforce) {
char **expected_scdir_path = is_driverkit ? driverkit_scdir_path : scdir_path;
struct vnode *scdir_vp = NULL;
for (expected_scdir_path = is_driverkit ? driverkit_scdir_path : scdir_path;
*expected_scdir_path != NULL;
expected_scdir_path++) {
/* get vnode for expected_scdir_path */
error = vnode_lookup(*expected_scdir_path, 0, &scdir_vp, vfs_context_current());
if (error) {
SHARED_REGION_TRACE_ERROR(
("shared_region: %p [%d(%s)]: "
"vnode_lookup(%s) failed (error=%d)\n",
(void *)VM_KERNEL_ADDRPERM(current_thread()),
proc_getpid(p), p->p_comm,
*expected_scdir_path, error));
continue;
}
/* check if parent is scdir_vp */
assert(scdir_vp != NULL);
if (vnode_parent(srfmp->vp) == scdir_vp) {
(void)vnode_put(scdir_vp);
scdir_vp = NULL;
goto scdir_ok;
}
(void)vnode_put(scdir_vp);
scdir_vp = NULL;
}
/* nothing matches */
SHARED_REGION_TRACE_ERROR(
("shared_region: %p [%d(%s)] map(%p:'%s'): "
"shared cache file not in expected directory\n",
(void *)VM_KERNEL_ADDRPERM(current_thread()),
proc_getpid(p), p->p_comm,
(void *)VM_KERNEL_ADDRPERM(srfmp->vp),
srfmp->vp->v_name));
error = EPERM;
goto done;
}
scdir_ok:
/* get vnode size */
error = vnode_size(srfmp->vp, &fs, vfs_context_current());
if (error) {
SHARED_REGION_TRACE_ERROR(
("shared_region: %p [%d(%s)] map(%p:'%s'): "
"vnode_size(%p) failed (error=%d)\n",
(void *)VM_KERNEL_ADDRPERM(current_thread()),
proc_getpid(p), p->p_comm,
(void *)VM_KERNEL_ADDRPERM(srfmp->vp),
srfmp->vp->v_name,
(void *)VM_KERNEL_ADDRPERM(srfmp->vp), error));
goto done;
}
srfmp->file_size = fs;
/* get the file's memory object handle */
srfmp->file_control = ubc_getobject(srfmp->vp, UBC_HOLDOBJECT);
if (srfmp->file_control == MEMORY_OBJECT_CONTROL_NULL) {
SHARED_REGION_TRACE_ERROR(
("shared_region: %p [%d(%s)] map(%p:'%s'): "
"no memory object\n",
(void *)VM_KERNEL_ADDRPERM(current_thread()),
proc_getpid(p), p->p_comm,
(void *)VM_KERNEL_ADDRPERM(srfmp->vp),
srfmp->vp->v_name));
error = EINVAL;
goto done;
}
/* check that the mappings are properly covered by code signatures */
if (!cs_system_enforcement()) {
/* code signing is not enforced: no need to check */
} else {
for (i = 0; i < srfmp->mappings_count; i++) {
if (srfmp->mappings[i].sms_init_prot & VM_PROT_ZF) {
/* zero-filled mapping: not backed by the file */
continue;
}
if (ubc_cs_is_range_codesigned(srfmp->vp,
srfmp->mappings[i].sms_file_offset,
srfmp->mappings[i].sms_size)) {
/* this mapping is fully covered by code signatures */
continue;
}
SHARED_REGION_TRACE_ERROR(
("shared_region: %p [%d(%s)] map(%p:'%s'): "
"mapping #%d/%d [0x%llx:0x%llx:0x%llx:0x%x:0x%x] "
"is not code-signed\n",
(void *)VM_KERNEL_ADDRPERM(current_thread()),
proc_getpid(p), p->p_comm,
(void *)VM_KERNEL_ADDRPERM(srfmp->vp),
srfmp->vp->v_name,
i, srfmp->mappings_count,
srfmp->mappings[i].sms_address,
srfmp->mappings[i].sms_size,
srfmp->mappings[i].sms_file_offset,
srfmp->mappings[i].sms_max_prot,
srfmp->mappings[i].sms_init_prot));
error = EINVAL;
goto done;
}
}
}
done:
if (error != 0) {
shared_region_map_and_slide_cleanup(p, files_count, *sr_file_mappings, shared_region);
*sr_file_mappings = NULL;
*shared_region_ptr = NULL;
}
return error;
}
/*
* shared_region_map_np()
*
* This system call is intended for dyld.
*
* dyld uses this to map a shared cache file into a shared region.
* This is usually done only the first time a shared cache is needed.
* Subsequent processes will just use the populated shared region without
* requiring any further setup.
*/
static int
_shared_region_map_and_slide(
struct proc *p,
uint32_t files_count,
struct shared_file_np *files,
uint32_t mappings_count,
struct shared_file_mapping_slide_np *mappings)
{
int error = 0;
kern_return_t kr = KERN_SUCCESS;
struct _sr_file_mappings *sr_file_mappings = NULL;
struct vnode *rdir_vp = NULL;
struct vm_shared_region *shared_region = NULL;
/*
* Get a reference to the current proc's root dir.
* Need this to prevent racing with chroot.
*/
proc_fdlock(p);
rdir_vp = p->p_fd.fd_rdir;
if (rdir_vp == NULL) {
rdir_vp = rootvnode;
}
assert(rdir_vp != NULL);
vnode_get(rdir_vp);
proc_fdunlock(p);
/*
* Turn files, mappings into sr_file_mappings and other setup.
*/
error = shared_region_map_and_slide_setup(p, files_count,
files, mappings_count, mappings,
&sr_file_mappings, &shared_region, rdir_vp);
if (error != 0) {
vnode_put(rdir_vp);
return error;
}
/* map the file(s) into that shared region's submap */
kr = vm_shared_region_map_file(shared_region, files_count, sr_file_mappings);
if (kr != KERN_SUCCESS) {
SHARED_REGION_TRACE_ERROR(("shared_region: %p [%d(%s)] map(): "
"vm_shared_region_map_file() failed kr=0x%x\n",
(void *)VM_KERNEL_ADDRPERM(current_thread()),
proc_getpid(p), p->p_comm, kr));
}
/* convert kern_return_t to errno */
switch (kr) {
case KERN_SUCCESS:
error = 0;
break;
case KERN_INVALID_ADDRESS:
error = EFAULT;
break;
case KERN_PROTECTION_FAILURE:
error = EPERM;
break;
case KERN_NO_SPACE:
error = ENOMEM;
break;
case KERN_FAILURE:
case KERN_INVALID_ARGUMENT:
default:
error = EINVAL;
break;
}
/*
* Mark that this process is now using split libraries.
*/
if (error == 0 && (p->p_flag & P_NOSHLIB)) {
OSBitAndAtomic(~((uint32_t)P_NOSHLIB), &p->p_flag);
}
vnode_put(rdir_vp);
shared_region_map_and_slide_cleanup(p, files_count, sr_file_mappings, shared_region);
SHARED_REGION_TRACE_DEBUG(
("shared_region: %p [%d(%s)] <- map\n",
(void *)VM_KERNEL_ADDRPERM(current_thread()),
proc_getpid(p), p->p_comm));
return error;
}
/*
* Clean up part of _shared_region_map_and_slide()
* It had to be broken out of _shared_region_map_and_slide() to
* prevent compiler inlining from blowing out the stack.
*/
__attribute__((noinline))
static void
shared_region_map_and_slide_cleanup(
struct proc *p,
uint32_t files_count,
struct _sr_file_mappings *sr_file_mappings,
struct vm_shared_region *shared_region)
{
struct _sr_file_mappings *srfmp;
struct vnode_attr va;
if (sr_file_mappings != NULL) {
for (srfmp = &sr_file_mappings[0]; srfmp < &sr_file_mappings[files_count]; srfmp++) {
if (srfmp->vp != NULL) {
vnode_lock_spin(srfmp->vp);
srfmp->vp->v_flag |= VSHARED_DYLD;
vnode_unlock(srfmp->vp);
/* update the vnode's access time */
if (!(vnode_vfsvisflags(srfmp->vp) & MNT_NOATIME)) {
VATTR_INIT(&va);
nanotime(&va.va_access_time);
VATTR_SET_ACTIVE(&va, va_access_time);
vnode_setattr(srfmp->vp, &va, vfs_context_current());
}
#if NAMEDSTREAMS
/*
* If the shared cache is compressed, it may
* have a namedstream vnode instantiated for
* for it. That namedstream vnode will also
* have to be marked with VSHARED_DYLD.
*/
if (vnode_hasnamedstreams(srfmp->vp)) {
vnode_t svp;
if (vnode_getnamedstream(srfmp->vp, &svp, XATTR_RESOURCEFORK_NAME,
NS_OPEN, 0, vfs_context_kernel()) == 0) {
vnode_lock_spin(svp);
svp->v_flag |= VSHARED_DYLD;
vnode_unlock(svp);
vnode_put(svp);
}
}
#endif /* NAMEDSTREAMS */
/*
* release the vnode...
* ubc_map() still holds it for us in the non-error case
*/
(void) vnode_put(srfmp->vp);
srfmp->vp = NULL;
}
if (srfmp->fp != NULL) {
/* release the file descriptor */
fp_drop(p, srfmp->fd, srfmp->fp, 0);
srfmp->fp = NULL;
}
}
kfree_type(struct _sr_file_mappings, files_count, sr_file_mappings);
}
if (shared_region != NULL) {
vm_shared_region_deallocate(shared_region);
}
}
/*
* For each file mapped, we may have mappings for:
* TEXT, EXECUTE, LINKEDIT, DATA_CONST, __AUTH, DATA
* so let's round up to 8 mappings per file.
*/
#define SFM_MAX (_SR_FILE_MAPPINGS_MAX_FILES * 8) /* max mapping structs allowed to pass in */
/*
* This is the new interface for setting up shared region mappings.
*
* The slide used for shared regions setup using this interface is done differently
* from the old interface. The slide value passed in the shared_files_np represents
* a max value. The kernel will choose a random value based on that, then use it
* for all shared regions.
*/
#if defined (__x86_64__)
#define SLIDE_AMOUNT_MASK ~FOURK_PAGE_MASK
#else
#define SLIDE_AMOUNT_MASK ~SIXTEENK_PAGE_MASK
#endif
int
shared_region_map_and_slide_2_np(
struct proc *p,
struct shared_region_map_and_slide_2_np_args *uap,
__unused int *retvalp)
{
unsigned int files_count;
struct shared_file_np *shared_files = NULL;
unsigned int mappings_count;
struct shared_file_mapping_slide_np *mappings = NULL;
kern_return_t kr = KERN_SUCCESS;
files_count = uap->files_count;
mappings_count = uap->mappings_count;
if (files_count == 0) {
SHARED_REGION_TRACE_INFO(
("shared_region: %p [%d(%s)] map(): "
"no files\n",
(void *)VM_KERNEL_ADDRPERM(current_thread()),
proc_getpid(p), p->p_comm));
kr = 0; /* no files to map: we're done ! */
goto done;
} else if (files_count <= _SR_FILE_MAPPINGS_MAX_FILES) {
shared_files = kalloc_data(files_count * sizeof(shared_files[0]), Z_WAITOK);
if (shared_files == NULL) {
kr = KERN_RESOURCE_SHORTAGE;
goto done;
}
} else {
SHARED_REGION_TRACE_ERROR(
("shared_region: %p [%d(%s)] map(): "
"too many files (%d) max %d\n",
(void *)VM_KERNEL_ADDRPERM(current_thread()),
proc_getpid(p), p->p_comm,
files_count, _SR_FILE_MAPPINGS_MAX_FILES));
kr = KERN_FAILURE;
goto done;
}
if (mappings_count == 0) {
SHARED_REGION_TRACE_INFO(
("shared_region: %p [%d(%s)] map(): "
"no mappings\n",
(void *)VM_KERNEL_ADDRPERM(current_thread()),
proc_getpid(p), p->p_comm));
kr = 0; /* no mappings: we're done ! */
goto done;
} else if (mappings_count <= SFM_MAX) {
mappings = kalloc_data(mappings_count * sizeof(mappings[0]), Z_WAITOK);
if (mappings == NULL) {
kr = KERN_RESOURCE_SHORTAGE;
goto done;
}
} else {
SHARED_REGION_TRACE_ERROR(
("shared_region: %p [%d(%s)] map(): "
"too many mappings (%d) max %d\n",
(void *)VM_KERNEL_ADDRPERM(current_thread()),
proc_getpid(p), p->p_comm,
mappings_count, SFM_MAX));
kr = KERN_FAILURE;
goto done;
}
kr = shared_region_copyin(p, uap->files, files_count, sizeof(shared_files[0]), shared_files);
if (kr != KERN_SUCCESS) {
goto done;
}
kr = shared_region_copyin(p, uap->mappings, mappings_count, sizeof(mappings[0]), mappings);
if (kr != KERN_SUCCESS) {
goto done;
}
uint32_t max_slide = shared_files[0].sf_slide;
uint32_t random_val;
uint32_t slide_amount;
if (max_slide != 0) {
read_random(&random_val, sizeof random_val);
slide_amount = ((random_val % max_slide) & SLIDE_AMOUNT_MASK);
} else {
slide_amount = 0;
}
#if DEVELOPMENT || DEBUG
extern bool bootarg_disable_aslr;
if (bootarg_disable_aslr) {
slide_amount = 0;
}
#endif /* DEVELOPMENT || DEBUG */
/*
* Fix up the mappings to reflect the desired slide.
*/
unsigned int f;
unsigned int m = 0;
unsigned int i;
for (f = 0; f < files_count; ++f) {
shared_files[f].sf_slide = slide_amount;
for (i = 0; i < shared_files[f].sf_mappings_count; ++i, ++m) {
if (m >= mappings_count) {
SHARED_REGION_TRACE_ERROR(
("shared_region: %p [%d(%s)] map(): "
"mapping count argument was too small\n",
(void *)VM_KERNEL_ADDRPERM(current_thread()),
proc_getpid(p), p->p_comm));
kr = KERN_FAILURE;
goto done;
}
mappings[m].sms_address += slide_amount;
if (mappings[m].sms_slide_size != 0) {
mappings[m].sms_slide_start += slide_amount;
}
}
}
kr = _shared_region_map_and_slide(p, files_count, shared_files, mappings_count, mappings);
done:
kfree_data(shared_files, files_count * sizeof(shared_files[0]));
kfree_data(mappings, mappings_count * sizeof(mappings[0]));
return kr;
}
/*
* A syscall for dyld to use to map data pages that need load time relocation fixups.
* The fixups are performed by a custom pager during page-in, so the pages still appear
* "clean" and hence are easily discarded under memory pressure. They can be re-paged-in
* on demand later, all w/o using the compressor.
*
* Note these page are treated as MAP_PRIVATE. So if the application dirties any pages while
* running, they are COW'd as normal.
*/
int
map_with_linking_np(
struct proc *p,
struct map_with_linking_np_args *uap,
__unused int *retvalp)
{
uint32_t region_count;
uint32_t r;
struct mwl_region *regions = NULL;
struct mwl_region *rp;
uint32_t link_info_size;
void *link_info = NULL; /* starts with a struct mwl_info_hdr */
struct mwl_info_hdr *info_hdr = NULL;
uint64_t binds_size;
int fd;
struct fileproc *fp = NULL;
struct vnode *vp = NULL;
size_t file_size;
off_t fs;
struct vnode_attr va;
memory_object_control_t file_control = NULL;
int error;
kern_return_t kr = KERN_SUCCESS;
/*
* Check if dyld has told us it finished with this call.
*/
if (p->p_disallow_map_with_linking) {
printf("%s: [%d(%s)]: map__with_linking() was disabled\n",
__func__, proc_getpid(p), p->p_comm);
kr = KERN_FAILURE;
goto done;
}
/*
* First we do some sanity checking on what dyld has passed us.
*/
region_count = uap->region_count;
link_info_size = uap->link_info_size;
if (region_count == 0) {
printf("%s: [%d(%s)]: region_count == 0\n",
__func__, proc_getpid(p), p->p_comm);
kr = KERN_FAILURE;
goto done;
}
if (region_count > MWL_MAX_REGION_COUNT) {
printf("%s: [%d(%s)]: region_count too big %d\n",
__func__, proc_getpid(p), p->p_comm, region_count);
kr = KERN_FAILURE;
goto done;
}
if (link_info_size <= MWL_MIN_LINK_INFO_SIZE) {
printf("%s: [%d(%s)]: link_info_size too small\n",
__func__, proc_getpid(p), p->p_comm);
kr = KERN_FAILURE;
goto done;
}
if (link_info_size >= MWL_MAX_LINK_INFO_SIZE) {
printf("%s: [%d(%s)]: link_info_size too big %d\n",
__func__, proc_getpid(p), p->p_comm, link_info_size);
kr = KERN_FAILURE;
goto done;
}
/*
* Allocate and copyin the regions and link info
*/
regions = kalloc_data(region_count * sizeof(regions[0]), Z_WAITOK);
if (regions == NULL) {
printf("%s: [%d(%s)]: failed to allocate regions\n",
__func__, proc_getpid(p), p->p_comm);
kr = KERN_RESOURCE_SHORTAGE;
goto done;
}
kr = shared_region_copyin(p, uap->regions, region_count, sizeof(regions[0]), regions);
if (kr != KERN_SUCCESS) {
printf("%s: [%d(%s)]: failed to copyin regions kr=%d\n",
__func__, proc_getpid(p), p->p_comm, kr);
goto done;
}
link_info = kalloc_data(link_info_size, Z_WAITOK);
if (link_info == NULL) {
printf("%s: [%d(%s)]: failed to allocate link_info\n",
__func__, proc_getpid(p), p->p_comm);
kr = KERN_RESOURCE_SHORTAGE;
goto done;
}
kr = shared_region_copyin(p, uap->link_info, 1, link_info_size, link_info);
if (kr != KERN_SUCCESS) {
printf("%s: [%d(%s)]: failed to copyin link_info kr=%d\n",
__func__, proc_getpid(p), p->p_comm, kr);
goto done;
}
/*
* Do some verification the data structures.
*/
info_hdr = (struct mwl_info_hdr *)link_info;
if (info_hdr->mwli_version != MWL_INFO_VERS) {
printf("%s: [%d(%s)]: unrecognized mwli_version=%d\n",
__func__, proc_getpid(p), p->p_comm, info_hdr->mwli_version);
kr = KERN_FAILURE;
goto done;
}
if (info_hdr->mwli_binds_offset > link_info_size) {
printf("%s: [%d(%s)]: mwli_binds_offset too large %d\n",
__func__, proc_getpid(p), p->p_comm, info_hdr->mwli_binds_offset);
kr = KERN_FAILURE;
goto done;
}
/* some older devs have s/w page size > h/w page size, no need to support them */
if (info_hdr->mwli_page_size != PAGE_SIZE) {
/* no printf, since this is expected on some devices */
kr = KERN_INVALID_ARGUMENT;
goto done;
}
binds_size = (uint64_t)info_hdr->mwli_binds_count *
((info_hdr->mwli_pointer_format == DYLD_CHAINED_PTR_32) ? 4 : 8);
if (binds_size > link_info_size - info_hdr->mwli_binds_offset) {
printf("%s: [%d(%s)]: mwli_binds_count too large %d\n",
__func__, proc_getpid(p), p->p_comm, info_hdr->mwli_binds_count);
kr = KERN_FAILURE;
goto done;
}
if (info_hdr->mwli_chains_offset > link_info_size) {
printf("%s: [%d(%s)]: mwli_chains_offset too large %d\n",
__func__, proc_getpid(p), p->p_comm, info_hdr->mwli_chains_offset);
kr = KERN_FAILURE;
goto done;
}
/*
* Ensure the chained starts in the link info and make sure the
* segment info offsets are within bounds.
*/
if (info_hdr->mwli_chains_size < sizeof(struct dyld_chained_starts_in_image)) {
printf("%s: [%d(%s)]: mwli_chains_size too small %d\n",
__func__, proc_getpid(p), p->p_comm, info_hdr->mwli_chains_size);
kr = KERN_FAILURE;
goto done;
}
if (info_hdr->mwli_chains_size > link_info_size - info_hdr->mwli_chains_offset) {
printf("%s: [%d(%s)]: mwli_chains_size too large %d\n",
__func__, proc_getpid(p), p->p_comm, info_hdr->mwli_chains_size);
kr = KERN_FAILURE;
goto done;
}
/* Note that more verification of offsets is done in the pager itself */
/*
* Ensure we've only been given one FD and verify valid protections.
*/
fd = regions[0].mwlr_fd;
for (r = 0; r < region_count; ++r) {
if (regions[r].mwlr_fd != fd) {
printf("%s: [%d(%s)]: mwlr_fd mismatch %d and %d\n",
__func__, proc_getpid(p), p->p_comm, fd, regions[r].mwlr_fd);
kr = KERN_FAILURE;
goto done;
}
/*
* Only allow data mappings and not zero fill. Permit TPRO
* mappings only when VM_PROT_READ | VM_PROT_WRITE.
*/
if (regions[r].mwlr_protections & VM_PROT_EXECUTE) {
printf("%s: [%d(%s)]: mwlr_protections EXECUTE not allowed\n",
__func__, proc_getpid(p), p->p_comm);
kr = KERN_FAILURE;
goto done;
}
if (regions[r].mwlr_protections & VM_PROT_ZF) {
printf("%s: [%d(%s)]: region %d, found VM_PROT_ZF not allowed\n",
__func__, proc_getpid(p), p->p_comm, r);
kr = KERN_FAILURE;
goto done;
}
if ((regions[r].mwlr_protections & VM_PROT_TPRO) &&
!(regions[r].mwlr_protections & VM_PROT_WRITE)) {
printf("%s: [%d(%s)]: region %d, found VM_PROT_TPRO without VM_PROT_WRITE\n",
__func__, proc_getpid(p), p->p_comm, r);
kr = KERN_FAILURE;
goto done;
}
}
/* get file structure from file descriptor */
error = fp_get_ftype(p, fd, DTYPE_VNODE, EINVAL, &fp);
if (error) {
printf("%s: [%d(%s)]: fp_get_ftype() failed, error %d\n",
__func__, proc_getpid(p), p->p_comm, error);
kr = KERN_FAILURE;
goto done;
}
/* We need at least read permission on the file */
if (!(fp->fp_glob->fg_flag & FREAD)) {
printf("%s: [%d(%s)]: not readable\n",
__func__, proc_getpid(p), p->p_comm);
kr = KERN_FAILURE;
goto done;
}
/* Get the vnode from file structure */
vp = (struct vnode *)fp_get_data(fp);
error = vnode_getwithref(vp);
if (error) {
printf("%s: [%d(%s)]: failed to get vnode, error %d\n",
__func__, proc_getpid(p), p->p_comm, error);
kr = KERN_FAILURE;
vp = NULL; /* just to be sure */
goto done;
}
/* Make sure the vnode is a regular file */
if (vp->v_type != VREG) {
printf("%s: [%d(%s)]: vnode not VREG\n",
__func__, proc_getpid(p), p->p_comm);
kr = KERN_FAILURE;
goto done;
}
/* get vnode size */
error = vnode_size(vp, &fs, vfs_context_current());
if (error) {
goto done;
}
file_size = fs;
/* get the file's memory object handle */
file_control = ubc_getobject(vp, UBC_HOLDOBJECT);
if (file_control == MEMORY_OBJECT_CONTROL_NULL) {
printf("%s: [%d(%s)]: no memory object\n",
__func__, proc_getpid(p), p->p_comm);
kr = KERN_FAILURE;
goto done;
}
for (r = 0; r < region_count; ++r) {
rp = &regions[r];
#if CONFIG_MACF
vm_prot_t prot = (rp->mwlr_protections & VM_PROT_ALL);
error = mac_file_check_mmap(vfs_context_ucred(vfs_context_current()),
fp->fp_glob, prot, MAP_FILE | MAP_PRIVATE | MAP_FIXED, rp->mwlr_file_offset, &prot);
if (error) {
printf("%s: [%d(%s)]: mac_file_check_mmap() failed, region %d, error %d\n",
__func__, proc_getpid(p), p->p_comm, r, error);
kr = KERN_FAILURE;
goto done;
}
#endif /* MAC */
/* check that the mappings are properly covered by code signatures */
if (cs_system_enforcement()) {
if (!ubc_cs_is_range_codesigned(vp, rp->mwlr_file_offset, rp->mwlr_size)) {
printf("%s: [%d(%s)]: region %d, not code signed\n",
__func__, proc_getpid(p), p->p_comm, r);
kr = KERN_FAILURE;
goto done;
}
}
}
/* update the vnode's access time */
if (!(vnode_vfsvisflags(vp) & MNT_NOATIME)) {
VATTR_INIT(&va);
nanotime(&va.va_access_time);
VATTR_SET_ACTIVE(&va, va_access_time);
vnode_setattr(vp, &va, vfs_context_current());
}
/* get the VM to do the work */
kr = vm_map_with_linking(proc_task(p), regions, region_count, link_info, link_info_size, file_control);
done:
if (fp != NULL) {
/* release the file descriptor */
fp_drop(p, fd, fp, 0);
}
if (vp != NULL) {
(void)vnode_put(vp);
}
if (regions != NULL) {
kfree_data(regions, region_count * sizeof(regions[0]));
}
/* link info is used in the pager if things worked */
if (link_info != NULL && kr != KERN_SUCCESS) {
kfree_data(link_info, link_info_size);
}
switch (kr) {
case KERN_SUCCESS:
return 0;
case KERN_RESOURCE_SHORTAGE:
return ENOMEM;
default:
return EINVAL;
}
}
#if DEBUG || DEVELOPMENT
SYSCTL_INT(_vm, OID_AUTO, dyld_pager_count,
CTLFLAG_RD | CTLFLAG_LOCKED, &dyld_pager_count, 0, "");
SYSCTL_INT(_vm, OID_AUTO, dyld_pager_count_max,
CTLFLAG_RD | CTLFLAG_LOCKED, &dyld_pager_count_max, 0, "");
#endif /* DEBUG || DEVELOPMENT */
/* sysctl overflow room */
SYSCTL_INT(_vm, OID_AUTO, pagesize, CTLFLAG_RD | CTLFLAG_LOCKED,
(int *) &page_size, 0, "vm page size");
/* vm_page_free_target is provided as a makeshift solution for applications that want to
* allocate buffer space, possibly purgeable memory, but not cause inactive pages to be
* reclaimed. It allows the app to calculate how much memory is free outside the free target. */
extern unsigned int vm_page_free_target;
SYSCTL_INT(_vm, OID_AUTO, vm_page_free_target, CTLFLAG_RD | CTLFLAG_LOCKED,
&vm_page_free_target, 0, "Pageout daemon free target");
SYSCTL_INT(_vm, OID_AUTO, memory_pressure, CTLFLAG_RD | CTLFLAG_LOCKED,
&vm_pageout_state.vm_memory_pressure, 0, "Memory pressure indicator");
static int
vm_ctl_page_free_wanted SYSCTL_HANDLER_ARGS
{
#pragma unused(oidp, arg1, arg2)
unsigned int page_free_wanted;
page_free_wanted = mach_vm_ctl_page_free_wanted();
return SYSCTL_OUT(req, &page_free_wanted, sizeof(page_free_wanted));
}
SYSCTL_PROC(_vm, OID_AUTO, page_free_wanted,
CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_LOCKED,
0, 0, vm_ctl_page_free_wanted, "I", "");
extern unsigned int vm_page_purgeable_count;
SYSCTL_INT(_vm, OID_AUTO, page_purgeable_count, CTLFLAG_RD | CTLFLAG_LOCKED,
&vm_page_purgeable_count, 0, "Purgeable page count");
extern unsigned int vm_page_purgeable_wired_count;
SYSCTL_INT(_vm, OID_AUTO, page_purgeable_wired_count, CTLFLAG_RD | CTLFLAG_LOCKED,
&vm_page_purgeable_wired_count, 0, "Wired purgeable page count");
extern unsigned int vm_page_kern_lpage_count;
SYSCTL_INT(_vm, OID_AUTO, kern_lpage_count, CTLFLAG_RD | CTLFLAG_LOCKED,
&vm_page_kern_lpage_count, 0, "kernel used large pages");
#if DEVELOPMENT || DEBUG
#if __ARM_MIXED_PAGE_SIZE__
static int vm_mixed_pagesize_supported = 1;
#else
static int vm_mixed_pagesize_supported = 0;
#endif /*__ARM_MIXED_PAGE_SIZE__ */
SYSCTL_INT(_debug, OID_AUTO, vm_mixed_pagesize_supported, CTLFLAG_ANYBODY | CTLFLAG_RD | CTLFLAG_LOCKED,
&vm_mixed_pagesize_supported, 0, "kernel support for mixed pagesize");
SCALABLE_COUNTER_DECLARE(vm_page_grab_count);
SYSCTL_SCALABLE_COUNTER(_vm, pages_grabbed, vm_page_grab_count, "Total pages grabbed");
SYSCTL_ULONG(_vm, OID_AUTO, pages_freed, CTLFLAG_RD | CTLFLAG_LOCKED,
&vm_pageout_vminfo.vm_page_pages_freed, "Total pages freed");
SYSCTL_INT(_vm, OID_AUTO, pageout_purged_objects, CTLFLAG_RD | CTLFLAG_LOCKED,
&vm_pageout_debug.vm_pageout_purged_objects, 0, "System purged object count");
SYSCTL_UINT(_vm, OID_AUTO, pageout_cleaned_busy, CTLFLAG_RD | CTLFLAG_LOCKED,
&vm_pageout_debug.vm_pageout_cleaned_busy, 0, "Cleaned pages busy (deactivated)");
SYSCTL_UINT(_vm, OID_AUTO, pageout_cleaned_nolock, CTLFLAG_RD | CTLFLAG_LOCKED,
&vm_pageout_debug.vm_pageout_cleaned_nolock, 0, "Cleaned pages no-lock (deactivated)");
SYSCTL_UINT(_vm, OID_AUTO, pageout_cleaned_volatile_reactivated, CTLFLAG_RD | CTLFLAG_LOCKED,
&vm_pageout_debug.vm_pageout_cleaned_volatile_reactivated, 0, "Cleaned pages volatile reactivated");
SYSCTL_UINT(_vm, OID_AUTO, pageout_cleaned_fault_reactivated, CTLFLAG_RD | CTLFLAG_LOCKED,
&vm_pageout_debug.vm_pageout_cleaned_fault_reactivated, 0, "Cleaned pages fault reactivated");
SYSCTL_UINT(_vm, OID_AUTO, pageout_cleaned_reactivated, CTLFLAG_RD | CTLFLAG_LOCKED,
&vm_pageout_debug.vm_pageout_cleaned_reactivated, 0, "Cleaned pages reactivated"); /* sum of all reactivated AND busy and nolock (even though those actually get reDEactivated */
SYSCTL_ULONG(_vm, OID_AUTO, pageout_cleaned, CTLFLAG_RD | CTLFLAG_LOCKED,
&vm_pageout_vminfo.vm_pageout_freed_cleaned, "Cleaned pages freed");
SYSCTL_UINT(_vm, OID_AUTO, pageout_cleaned_reference_reactivated, CTLFLAG_RD | CTLFLAG_LOCKED,
&vm_pageout_debug.vm_pageout_cleaned_reference_reactivated, 0, "Cleaned pages reference reactivated");
SYSCTL_UINT(_vm, OID_AUTO, pageout_enqueued_cleaned, CTLFLAG_RD | CTLFLAG_LOCKED,
&vm_pageout_debug.vm_pageout_enqueued_cleaned, 0, ""); /* sum of next two */
#endif /* DEVELOPMENT || DEBUG */
extern int madvise_free_debug;
SYSCTL_INT(_vm, OID_AUTO, madvise_free_debug, CTLFLAG_RW | CTLFLAG_LOCKED,
&madvise_free_debug, 0, "zero-fill on madvise(MADV_FREE*)");
extern int madvise_free_debug_sometimes;
SYSCTL_INT(_vm, OID_AUTO, madvise_free_debug_sometimes, CTLFLAG_RW | CTLFLAG_LOCKED,
&madvise_free_debug_sometimes, 0, "sometimes zero-fill on madvise(MADV_FREE*)");
SYSCTL_INT(_vm, OID_AUTO, page_reusable_count, CTLFLAG_RD | CTLFLAG_LOCKED,
&vm_page_stats_reusable.reusable_count, 0, "Reusable page count");
SYSCTL_QUAD(_vm, OID_AUTO, reusable_success, CTLFLAG_RD | CTLFLAG_LOCKED,
&vm_page_stats_reusable.reusable_pages_success, "");
SYSCTL_QUAD(_vm, OID_AUTO, reusable_failure, CTLFLAG_RD | CTLFLAG_LOCKED,
&vm_page_stats_reusable.reusable_pages_failure, "");
SYSCTL_QUAD(_vm, OID_AUTO, reusable_pages_shared, CTLFLAG_RD | CTLFLAG_LOCKED,
&vm_page_stats_reusable.reusable_pages_shared, "");
SYSCTL_QUAD(_vm, OID_AUTO, all_reusable_calls, CTLFLAG_RD | CTLFLAG_LOCKED,
&vm_page_stats_reusable.all_reusable_calls, "");
SYSCTL_QUAD(_vm, OID_AUTO, partial_reusable_calls, CTLFLAG_RD | CTLFLAG_LOCKED,
&vm_page_stats_reusable.partial_reusable_calls, "");
SYSCTL_QUAD(_vm, OID_AUTO, reuse_success, CTLFLAG_RD | CTLFLAG_LOCKED,
&vm_page_stats_reusable.reuse_pages_success, "");
SYSCTL_QUAD(_vm, OID_AUTO, reuse_failure, CTLFLAG_RD | CTLFLAG_LOCKED,
&vm_page_stats_reusable.reuse_pages_failure, "");
SYSCTL_QUAD(_vm, OID_AUTO, all_reuse_calls, CTLFLAG_RD | CTLFLAG_LOCKED,
&vm_page_stats_reusable.all_reuse_calls, "");
SYSCTL_QUAD(_vm, OID_AUTO, partial_reuse_calls, CTLFLAG_RD | CTLFLAG_LOCKED,
&vm_page_stats_reusable.partial_reuse_calls, "");
SYSCTL_QUAD(_vm, OID_AUTO, can_reuse_success, CTLFLAG_RD | CTLFLAG_LOCKED,
&vm_page_stats_reusable.can_reuse_success, "");
SYSCTL_QUAD(_vm, OID_AUTO, can_reuse_failure, CTLFLAG_RD | CTLFLAG_LOCKED,
&vm_page_stats_reusable.can_reuse_failure, "");
SYSCTL_QUAD(_vm, OID_AUTO, reusable_reclaimed, CTLFLAG_RD | CTLFLAG_LOCKED,
&vm_page_stats_reusable.reusable_reclaimed, "");
SYSCTL_QUAD(_vm, OID_AUTO, reusable_nonwritable, CTLFLAG_RD | CTLFLAG_LOCKED,
&vm_page_stats_reusable.reusable_nonwritable, "");
SYSCTL_QUAD(_vm, OID_AUTO, reusable_shared, CTLFLAG_RD | CTLFLAG_LOCKED,
&vm_page_stats_reusable.reusable_shared, "");
SYSCTL_QUAD(_vm, OID_AUTO, free_shared, CTLFLAG_RD | CTLFLAG_LOCKED,
&vm_page_stats_reusable.free_shared, "");
extern unsigned int vm_page_free_count, vm_page_speculative_count;
SYSCTL_UINT(_vm, OID_AUTO, page_free_count, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_free_count, 0, "");
SYSCTL_UINT(_vm, OID_AUTO, page_speculative_count, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_speculative_count, 0, "");
extern unsigned int vm_page_cleaned_count;
SYSCTL_UINT(_vm, OID_AUTO, page_cleaned_count, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_cleaned_count, 0, "Cleaned queue size");
extern unsigned int vm_page_pageable_internal_count, vm_page_pageable_external_count;
SYSCTL_UINT(_vm, OID_AUTO, page_pageable_internal_count, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_pageable_internal_count, 0, "");
SYSCTL_UINT(_vm, OID_AUTO, page_pageable_external_count, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_pageable_external_count, 0, "");
/* pageout counts */
SYSCTL_UINT(_vm, OID_AUTO, pageout_inactive_clean, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_state.vm_pageout_inactive_clean, 0, "");
SYSCTL_UINT(_vm, OID_AUTO, pageout_inactive_used, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_state.vm_pageout_inactive_used, 0, "");
SYSCTL_ULONG(_vm, OID_AUTO, pageout_inactive_dirty_internal, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_vminfo.vm_pageout_inactive_dirty_internal, "");
SYSCTL_ULONG(_vm, OID_AUTO, pageout_inactive_dirty_external, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_vminfo.vm_pageout_inactive_dirty_external, "");
SYSCTL_ULONG(_vm, OID_AUTO, pageout_speculative_clean, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_vminfo.vm_pageout_freed_speculative, "");
SYSCTL_ULONG(_vm, OID_AUTO, pageout_freed_external, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_vminfo.vm_pageout_freed_external, "");
SYSCTL_ULONG(_vm, OID_AUTO, pageout_freed_speculative, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_vminfo.vm_pageout_freed_speculative, "");
SYSCTL_ULONG(_vm, OID_AUTO, pageout_freed_cleaned, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_vminfo.vm_pageout_freed_cleaned, "");
SYSCTL_ULONG(_vm, OID_AUTO, pageout_protected_sharedcache, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_vminfo.vm_pageout_protected_sharedcache, "");
SYSCTL_ULONG(_vm, OID_AUTO, pageout_forcereclaimed_sharedcache, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_vminfo.vm_pageout_forcereclaimed_sharedcache, "");
SYSCTL_ULONG(_vm, OID_AUTO, pageout_protected_realtime, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_vminfo.vm_pageout_protected_realtime, "");
SYSCTL_ULONG(_vm, OID_AUTO, pageout_forcereclaimed_realtime, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_vminfo.vm_pageout_forcereclaimed_realtime, "");
extern unsigned int vm_page_realtime_count;
SYSCTL_UINT(_vm, OID_AUTO, page_realtime_count, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_realtime_count, 0, "");
extern int vm_pageout_protect_realtime;
SYSCTL_INT(_vm, OID_AUTO, pageout_protect_realtime, CTLFLAG_RW | CTLFLAG_LOCKED, &vm_pageout_protect_realtime, 0, "");
/* counts of pages prefaulted when entering a memory object */
extern int64_t vm_prefault_nb_pages, vm_prefault_nb_bailout;
SYSCTL_QUAD(_vm, OID_AUTO, prefault_nb_pages, CTLFLAG_RW | CTLFLAG_LOCKED, &vm_prefault_nb_pages, "");
SYSCTL_QUAD(_vm, OID_AUTO, prefault_nb_bailout, CTLFLAG_RW | CTLFLAG_LOCKED, &vm_prefault_nb_bailout, "");
#if defined (__x86_64__)
extern unsigned int vm_clump_promote_threshold;
SYSCTL_UINT(_vm, OID_AUTO, vm_clump_promote_threshold, CTLFLAG_RW | CTLFLAG_LOCKED, &vm_clump_promote_threshold, 0, "clump size threshold for promotes");
#if DEVELOPMENT || DEBUG
extern unsigned long vm_clump_stats[];
SYSCTL_LONG(_vm, OID_AUTO, vm_clump_stats1, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_clump_stats[1], "free page allocations from clump of 1 page");
SYSCTL_LONG(_vm, OID_AUTO, vm_clump_stats2, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_clump_stats[2], "free page allocations from clump of 2 pages");
SYSCTL_LONG(_vm, OID_AUTO, vm_clump_stats3, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_clump_stats[3], "free page allocations from clump of 3 pages");
SYSCTL_LONG(_vm, OID_AUTO, vm_clump_stats4, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_clump_stats[4], "free page allocations from clump of 4 pages");
SYSCTL_LONG(_vm, OID_AUTO, vm_clump_stats5, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_clump_stats[5], "free page allocations from clump of 5 pages");
SYSCTL_LONG(_vm, OID_AUTO, vm_clump_stats6, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_clump_stats[6], "free page allocations from clump of 6 pages");
SYSCTL_LONG(_vm, OID_AUTO, vm_clump_stats7, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_clump_stats[7], "free page allocations from clump of 7 pages");
SYSCTL_LONG(_vm, OID_AUTO, vm_clump_stats8, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_clump_stats[8], "free page allocations from clump of 8 pages");
SYSCTL_LONG(_vm, OID_AUTO, vm_clump_stats9, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_clump_stats[9], "free page allocations from clump of 9 pages");
SYSCTL_LONG(_vm, OID_AUTO, vm_clump_stats10, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_clump_stats[10], "free page allocations from clump of 10 pages");
SYSCTL_LONG(_vm, OID_AUTO, vm_clump_stats11, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_clump_stats[11], "free page allocations from clump of 11 pages");
SYSCTL_LONG(_vm, OID_AUTO, vm_clump_stats12, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_clump_stats[12], "free page allocations from clump of 12 pages");
SYSCTL_LONG(_vm, OID_AUTO, vm_clump_stats13, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_clump_stats[13], "free page allocations from clump of 13 pages");
SYSCTL_LONG(_vm, OID_AUTO, vm_clump_stats14, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_clump_stats[14], "free page allocations from clump of 14 pages");
SYSCTL_LONG(_vm, OID_AUTO, vm_clump_stats15, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_clump_stats[15], "free page allocations from clump of 15 pages");
SYSCTL_LONG(_vm, OID_AUTO, vm_clump_stats16, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_clump_stats[16], "free page allocations from clump of 16 pages");
extern unsigned long vm_clump_allocs, vm_clump_inserts, vm_clump_inrange, vm_clump_promotes;
SYSCTL_LONG(_vm, OID_AUTO, vm_clump_alloc, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_clump_allocs, "free page allocations");
SYSCTL_LONG(_vm, OID_AUTO, vm_clump_inserts, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_clump_inserts, "free page insertions");
SYSCTL_LONG(_vm, OID_AUTO, vm_clump_inrange, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_clump_inrange, "free page insertions that are part of vm_pages");
SYSCTL_LONG(_vm, OID_AUTO, vm_clump_promotes, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_clump_promotes, "pages promoted to head");
#endif /* if DEVELOPMENT || DEBUG */
#endif /* #if defined (__x86_64__) */
#if CONFIG_SECLUDED_MEMORY
SYSCTL_UINT(_vm, OID_AUTO, num_tasks_can_use_secluded_mem, CTLFLAG_RD | CTLFLAG_LOCKED, &num_tasks_can_use_secluded_mem, 0, "");
extern unsigned int vm_page_secluded_target;
extern unsigned int vm_page_secluded_count;
extern unsigned int vm_page_secluded_count_free;
extern unsigned int vm_page_secluded_count_inuse;
extern unsigned int vm_page_secluded_count_over_target;
SYSCTL_UINT(_vm, OID_AUTO, page_secluded_target, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_secluded_target, 0, "");
SYSCTL_UINT(_vm, OID_AUTO, page_secluded_count, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_secluded_count, 0, "");
SYSCTL_UINT(_vm, OID_AUTO, page_secluded_count_free, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_secluded_count_free, 0, "");
SYSCTL_UINT(_vm, OID_AUTO, page_secluded_count_inuse, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_secluded_count_inuse, 0, "");
SYSCTL_UINT(_vm, OID_AUTO, page_secluded_count_over_target, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_secluded_count_over_target, 0, "");
extern struct vm_page_secluded_data vm_page_secluded;
SYSCTL_UINT(_vm, OID_AUTO, page_secluded_eligible, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_secluded.eligible_for_secluded, 0, "");
SYSCTL_UINT(_vm, OID_AUTO, page_secluded_grab_success_free, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_secluded.grab_success_free, 0, "");
SYSCTL_UINT(_vm, OID_AUTO, page_secluded_grab_success_other, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_secluded.grab_success_other, 0, "");
SYSCTL_UINT(_vm, OID_AUTO, page_secluded_grab_failure_locked, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_secluded.grab_failure_locked, 0, "");
SYSCTL_UINT(_vm, OID_AUTO, page_secluded_grab_failure_state, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_secluded.grab_failure_state, 0, "");
SYSCTL_UINT(_vm, OID_AUTO, page_secluded_grab_failure_realtime, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_secluded.grab_failure_realtime, 0, "");
SYSCTL_UINT(_vm, OID_AUTO, page_secluded_grab_failure_dirty, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_secluded.grab_failure_dirty, 0, "");
SYSCTL_UINT(_vm, OID_AUTO, page_secluded_grab_for_iokit, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_secluded.grab_for_iokit, 0, "");
SYSCTL_UINT(_vm, OID_AUTO, page_secluded_grab_for_iokit_success, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_secluded.grab_for_iokit_success, 0, "");
#endif /* CONFIG_SECLUDED_MEMORY */
#pragma mark Deferred Reclaim
#if CONFIG_DEFERRED_RECLAIM
#if DEVELOPMENT || DEBUG
/*
* VM reclaim testing
*/
extern bool vm_deferred_reclamation_block_until_pid_has_been_reclaimed(pid_t pid);
static int
sysctl_vm_reclaim_drain_async_queue SYSCTL_HANDLER_ARGS
{
#pragma unused(arg1, arg2)
int error = EINVAL, pid = 0;
/*
* Only send on write
*/
error = sysctl_handle_int(oidp, &pid, 0, req);
if (error || !req->newptr) {
return error;
}
bool success = vm_deferred_reclamation_block_until_pid_has_been_reclaimed(pid);
if (success) {
error = 0;
}
return error;
}
SYSCTL_PROC(_vm, OID_AUTO, reclaim_drain_async_queue,
CTLTYPE_INT | CTLFLAG_WR | CTLFLAG_LOCKED | CTLFLAG_MASKED, 0, 0,
&sysctl_vm_reclaim_drain_async_queue, "I", "");
extern uint64_t vm_reclaim_max_threshold;
extern uint64_t vm_reclaim_trim_divisor;
SYSCTL_ULONG(_vm, OID_AUTO, reclaim_max_threshold, CTLFLAG_RW | CTLFLAG_LOCKED, &vm_reclaim_max_threshold, "");
SYSCTL_ULONG(_vm, OID_AUTO, reclaim_trim_divisor, CTLFLAG_RW | CTLFLAG_LOCKED, &vm_reclaim_trim_divisor, "");
#endif /* DEVELOPMENT || DEBUG */
#endif /* CONFIG_DEFERRED_RECLAIM */
#include <kern/thread.h>
#include <sys/user.h>
void vm_pageout_io_throttle(void);
void
vm_pageout_io_throttle(void)
{
struct uthread *uthread = current_uthread();
/*
* thread is marked as a low priority I/O type
* and the I/O we issued while in this cleaning operation
* collided with normal I/O operations... we'll
* delay in order to mitigate the impact of this
* task on the normal operation of the system
*/
if (uthread->uu_lowpri_window) {
throttle_lowpri_io(1);
}
}
int
vm_pressure_monitor(
__unused struct proc *p,
struct vm_pressure_monitor_args *uap,
int *retval)
{
kern_return_t kr;
uint32_t pages_reclaimed;
uint32_t pages_wanted;
kr = mach_vm_pressure_monitor(
(boolean_t) uap->wait_for_pressure,
uap->nsecs_monitored,
(uap->pages_reclaimed) ? &pages_reclaimed : NULL,
&pages_wanted);
switch (kr) {
case KERN_SUCCESS:
break;
case KERN_ABORTED:
return EINTR;
default:
return EINVAL;
}
if (uap->pages_reclaimed) {
if (copyout((void *)&pages_reclaimed,
uap->pages_reclaimed,
sizeof(pages_reclaimed)) != 0) {
return EFAULT;
}
}
*retval = (int) pages_wanted;
return 0;
}
int
kas_info(struct proc *p,
struct kas_info_args *uap,
int *retval __unused)
{
#ifndef CONFIG_KAS_INFO
(void)p;
(void)uap;
return ENOTSUP;
#else /* CONFIG_KAS_INFO */
int selector = uap->selector;
user_addr_t valuep = uap->value;
user_addr_t sizep = uap->size;
user_size_t size, rsize;
int error;
if (!kauth_cred_issuser(kauth_cred_get())) {
return EPERM;
}
#if CONFIG_MACF
error = mac_system_check_kas_info(kauth_cred_get(), selector);
if (error) {
return error;
}
#endif
if (IS_64BIT_PROCESS(p)) {
user64_size_t size64;
error = copyin(sizep, &size64, sizeof(size64));
size = (user_size_t)size64;
} else {
user32_size_t size32;
error = copyin(sizep, &size32, sizeof(size32));
size = (user_size_t)size32;
}
if (error) {
return error;
}
switch (selector) {
case KAS_INFO_KERNEL_TEXT_SLIDE_SELECTOR:
{
uint64_t slide = vm_kernel_slide;
if (sizeof(slide) != size) {
return EINVAL;
}
error = copyout(&slide, valuep, sizeof(slide));
if (error) {
return error;
}
rsize = size;
}
break;
case KAS_INFO_KERNEL_SEGMENT_VMADDR_SELECTOR:
{
uint32_t i;
kernel_mach_header_t *mh = &_mh_execute_header;
struct load_command *cmd;
cmd = (struct load_command*) &mh[1];
uint64_t *bases;
rsize = mh->ncmds * sizeof(uint64_t);
/*
* Return the size if no data was passed
*/
if (valuep == 0) {
break;
}
if (rsize > size) {
return EINVAL;
}
bases = kalloc_data(rsize, Z_WAITOK | Z_ZERO);
for (i = 0; i < mh->ncmds; i++) {
if (cmd->cmd == LC_SEGMENT_KERNEL) {
__IGNORE_WCASTALIGN(kernel_segment_command_t * sg = (kernel_segment_command_t *) cmd);
bases[i] = (uint64_t)sg->vmaddr;
}
cmd = (struct load_command *) ((uintptr_t) cmd + cmd->cmdsize);
}
error = copyout(bases, valuep, rsize);
kfree_data(bases, rsize);
if (error) {
return error;
}
}
break;
case KAS_INFO_SPTM_TEXT_SLIDE_SELECTOR:
case KAS_INFO_TXM_TEXT_SLIDE_SELECTOR:
{
#if CONFIG_SPTM
const uint64_t slide =
(selector == KAS_INFO_SPTM_TEXT_SLIDE_SELECTOR) ? vm_sptm_offsets.slide : vm_txm_offsets.slide;
#else
const uint64_t slide = 0;
#endif
if (sizeof(slide) != size) {
return EINVAL;
}
error = copyout(&slide, valuep, sizeof(slide));
if (error) {
return error;
}
rsize = size;
}
break;
default:
return EINVAL;
}
if (IS_64BIT_PROCESS(p)) {
user64_size_t size64 = (user64_size_t)rsize;
error = copyout(&size64, sizep, sizeof(size64));
} else {
user32_size_t size32 = (user32_size_t)rsize;
error = copyout(&size32, sizep, sizeof(size32));
}
return error;
#endif /* CONFIG_KAS_INFO */
}
#if __has_feature(ptrauth_calls)
/*
* Generate a random pointer signing key that isn't 0.
*/
uint64_t
generate_jop_key(void)
{
uint64_t key;
do {
read_random(&key, sizeof key);
} while (key == 0);
return key;
}
#endif /* __has_feature(ptrauth_calls) */
#pragma clang diagnostic push
#pragma clang diagnostic ignored "-Wcast-qual"
#pragma clang diagnostic ignored "-Wunused-function"
static void
asserts()
{
static_assert(sizeof(vm_min_kernel_address) == sizeof(unsigned long));
static_assert(sizeof(vm_max_kernel_address) == sizeof(unsigned long));
}
SYSCTL_ULONG(_vm, OID_AUTO, vm_min_kernel_address, CTLFLAG_RD, (unsigned long *) &vm_min_kernel_address, "");
SYSCTL_ULONG(_vm, OID_AUTO, vm_max_kernel_address, CTLFLAG_RD, (unsigned long *) &vm_max_kernel_address, "");
#pragma clang diagnostic pop
extern uint32_t vm_page_pages;
SYSCTL_UINT(_vm, OID_AUTO, pages, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_pages, 0, "");
extern uint32_t vm_page_busy_absent_skipped;
SYSCTL_UINT(_vm, OID_AUTO, page_busy_absent_skipped, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_busy_absent_skipped, 0, "");
extern uint32_t vm_page_upl_tainted;
SYSCTL_UINT(_vm, OID_AUTO, upl_pages_tainted, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_upl_tainted, 0, "");
extern uint32_t vm_page_iopl_tainted;
SYSCTL_UINT(_vm, OID_AUTO, iopl_pages_tainted, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_iopl_tainted, 0, "");
#if __arm64__ && (DEVELOPMENT || DEBUG)
extern int vm_footprint_suspend_allowed;
SYSCTL_INT(_vm, OID_AUTO, footprint_suspend_allowed, CTLFLAG_RW | CTLFLAG_LOCKED, &vm_footprint_suspend_allowed, 0, "");
extern void pmap_footprint_suspend(vm_map_t map, boolean_t suspend);
static int
sysctl_vm_footprint_suspend SYSCTL_HANDLER_ARGS
{
#pragma unused(oidp, arg1, arg2)
int error = 0;
int new_value;
if (req->newptr == USER_ADDR_NULL) {
return 0;
}
error = SYSCTL_IN(req, &new_value, sizeof(int));
if (error) {
return error;
}
if (!vm_footprint_suspend_allowed) {
if (new_value != 0) {
/* suspends are not allowed... */
return 0;
}
/* ... but let resumes proceed */
}
DTRACE_VM2(footprint_suspend,
vm_map_t, current_map(),
int, new_value);
pmap_footprint_suspend(current_map(), new_value);
return 0;
}
SYSCTL_PROC(_vm, OID_AUTO, footprint_suspend,
CTLTYPE_INT | CTLFLAG_WR | CTLFLAG_ANYBODY | CTLFLAG_LOCKED | CTLFLAG_MASKED,
0, 0, &sysctl_vm_footprint_suspend, "I", "");
#endif /* __arm64__ && (DEVELOPMENT || DEBUG) */
extern uint64_t vm_map_corpse_footprint_count;
extern uint64_t vm_map_corpse_footprint_size_avg;
extern uint64_t vm_map_corpse_footprint_size_max;
extern uint64_t vm_map_corpse_footprint_full;
extern uint64_t vm_map_corpse_footprint_no_buf;
SYSCTL_QUAD(_vm, OID_AUTO, corpse_footprint_count,
CTLFLAG_RD | CTLFLAG_LOCKED, &vm_map_corpse_footprint_count, "");
SYSCTL_QUAD(_vm, OID_AUTO, corpse_footprint_size_avg,
CTLFLAG_RD | CTLFLAG_LOCKED, &vm_map_corpse_footprint_size_avg, "");
SYSCTL_QUAD(_vm, OID_AUTO, corpse_footprint_size_max,
CTLFLAG_RD | CTLFLAG_LOCKED, &vm_map_corpse_footprint_size_max, "");
SYSCTL_QUAD(_vm, OID_AUTO, corpse_footprint_full,
CTLFLAG_RD | CTLFLAG_LOCKED, &vm_map_corpse_footprint_full, "");
SYSCTL_QUAD(_vm, OID_AUTO, corpse_footprint_no_buf,
CTLFLAG_RD | CTLFLAG_LOCKED, &vm_map_corpse_footprint_no_buf, "");
#if CODE_SIGNING_MONITOR
extern uint64_t vm_cs_defer_to_csm;
extern uint64_t vm_cs_defer_to_csm_not;
SYSCTL_QUAD(_vm, OID_AUTO, cs_defer_to_csm,
CTLFLAG_RD | CTLFLAG_LOCKED, &vm_cs_defer_to_csm, "");
SYSCTL_QUAD(_vm, OID_AUTO, cs_defer_to_csm_not,
CTLFLAG_RD | CTLFLAG_LOCKED, &vm_cs_defer_to_csm_not, "");
#endif /* CODE_SIGNING_MONITOR */
extern uint64_t shared_region_pager_copied;
extern uint64_t shared_region_pager_slid;
extern uint64_t shared_region_pager_slid_error;
extern uint64_t shared_region_pager_reclaimed;
SYSCTL_QUAD(_vm, OID_AUTO, shared_region_pager_copied,
CTLFLAG_RD | CTLFLAG_LOCKED, &shared_region_pager_copied, "");
SYSCTL_QUAD(_vm, OID_AUTO, shared_region_pager_slid,
CTLFLAG_RD | CTLFLAG_LOCKED, &shared_region_pager_slid, "");
SYSCTL_QUAD(_vm, OID_AUTO, shared_region_pager_slid_error,
CTLFLAG_RD | CTLFLAG_LOCKED, &shared_region_pager_slid_error, "");
SYSCTL_QUAD(_vm, OID_AUTO, shared_region_pager_reclaimed,
CTLFLAG_RD | CTLFLAG_LOCKED, &shared_region_pager_reclaimed, "");
extern int shared_region_destroy_delay;
SYSCTL_INT(_vm, OID_AUTO, shared_region_destroy_delay,
CTLFLAG_RW | CTLFLAG_LOCKED, &shared_region_destroy_delay, 0, "");
#if MACH_ASSERT
extern int pmap_ledgers_panic_leeway;
SYSCTL_INT(_vm, OID_AUTO, pmap_ledgers_panic_leeway, CTLFLAG_RW | CTLFLAG_LOCKED, &pmap_ledgers_panic_leeway, 0, "");
#endif /* MACH_ASSERT */
extern uint64_t vm_map_lookup_and_lock_object_copy_slowly_count;
extern uint64_t vm_map_lookup_and_lock_object_copy_slowly_size;
extern uint64_t vm_map_lookup_and_lock_object_copy_slowly_max;
extern uint64_t vm_map_lookup_and_lock_object_copy_slowly_restart;
extern uint64_t vm_map_lookup_and_lock_object_copy_slowly_error;
extern uint64_t vm_map_lookup_and_lock_object_copy_strategically_count;
extern uint64_t vm_map_lookup_and_lock_object_copy_strategically_size;
extern uint64_t vm_map_lookup_and_lock_object_copy_strategically_max;
extern uint64_t vm_map_lookup_and_lock_object_copy_strategically_restart;
extern uint64_t vm_map_lookup_and_lock_object_copy_strategically_error;
extern uint64_t vm_map_lookup_and_lock_object_copy_shadow_count;
extern uint64_t vm_map_lookup_and_lock_object_copy_shadow_size;
extern uint64_t vm_map_lookup_and_lock_object_copy_shadow_max;
SYSCTL_QUAD(_vm, OID_AUTO, map_lookup_locked_copy_slowly_count,
CTLFLAG_RD | CTLFLAG_LOCKED, &vm_map_lookup_and_lock_object_copy_slowly_count, "");
SYSCTL_QUAD(_vm, OID_AUTO, map_lookup_locked_copy_slowly_size,
CTLFLAG_RD | CTLFLAG_LOCKED, &vm_map_lookup_and_lock_object_copy_slowly_size, "");
SYSCTL_QUAD(_vm, OID_AUTO, map_lookup_locked_copy_slowly_max,
CTLFLAG_RD | CTLFLAG_LOCKED, &vm_map_lookup_and_lock_object_copy_slowly_max, "");
SYSCTL_QUAD(_vm, OID_AUTO, map_lookup_locked_copy_slowly_restart,
CTLFLAG_RD | CTLFLAG_LOCKED, &vm_map_lookup_and_lock_object_copy_slowly_restart, "");
SYSCTL_QUAD(_vm, OID_AUTO, map_lookup_locked_copy_slowly_error,
CTLFLAG_RD | CTLFLAG_LOCKED, &vm_map_lookup_and_lock_object_copy_slowly_error, "");
SYSCTL_QUAD(_vm, OID_AUTO, map_lookup_locked_copy_strategically_count,
CTLFLAG_RD | CTLFLAG_LOCKED, &vm_map_lookup_and_lock_object_copy_strategically_count, "");
SYSCTL_QUAD(_vm, OID_AUTO, map_lookup_locked_copy_strategically_size,
CTLFLAG_RD | CTLFLAG_LOCKED, &vm_map_lookup_and_lock_object_copy_strategically_size, "");
SYSCTL_QUAD(_vm, OID_AUTO, map_lookup_locked_copy_strategically_max,
CTLFLAG_RD | CTLFLAG_LOCKED, &vm_map_lookup_and_lock_object_copy_strategically_max, "");
SYSCTL_QUAD(_vm, OID_AUTO, map_lookup_locked_copy_strategically_restart,
CTLFLAG_RD | CTLFLAG_LOCKED, &vm_map_lookup_and_lock_object_copy_strategically_restart, "");
SYSCTL_QUAD(_vm, OID_AUTO, map_lookup_locked_copy_strategically_error,
CTLFLAG_RD | CTLFLAG_LOCKED, &vm_map_lookup_and_lock_object_copy_strategically_error, "");
SYSCTL_QUAD(_vm, OID_AUTO, map_lookup_locked_copy_shadow_count,
CTLFLAG_RD | CTLFLAG_LOCKED, &vm_map_lookup_and_lock_object_copy_shadow_count, "");
SYSCTL_QUAD(_vm, OID_AUTO, map_lookup_locked_copy_shadow_size,
CTLFLAG_RD | CTLFLAG_LOCKED, &vm_map_lookup_and_lock_object_copy_shadow_size, "");
SYSCTL_QUAD(_vm, OID_AUTO, map_lookup_locked_copy_shadow_max,
CTLFLAG_RD | CTLFLAG_LOCKED, &vm_map_lookup_and_lock_object_copy_shadow_max, "");
extern int vm_protect_privileged_from_untrusted;
SYSCTL_INT(_vm, OID_AUTO, protect_privileged_from_untrusted,
CTLFLAG_RW | CTLFLAG_LOCKED, &vm_protect_privileged_from_untrusted, 0, "");
extern uint64_t vm_copied_on_read;
SYSCTL_QUAD(_vm, OID_AUTO, copied_on_read,
CTLFLAG_RD | CTLFLAG_LOCKED, &vm_copied_on_read, "");
extern int vm_shared_region_count;
extern int vm_shared_region_peak;
SYSCTL_INT(_vm, OID_AUTO, shared_region_count,
CTLFLAG_RD | CTLFLAG_LOCKED, &vm_shared_region_count, 0, "");
SYSCTL_INT(_vm, OID_AUTO, shared_region_peak,
CTLFLAG_RD | CTLFLAG_LOCKED, &vm_shared_region_peak, 0, "");
#if DEVELOPMENT || DEBUG
extern unsigned int shared_region_pagers_resident_count;
SYSCTL_INT(_vm, OID_AUTO, shared_region_pagers_resident_count,
CTLFLAG_RD | CTLFLAG_LOCKED, &shared_region_pagers_resident_count, 0, "");
extern unsigned int shared_region_pagers_resident_peak;
SYSCTL_INT(_vm, OID_AUTO, shared_region_pagers_resident_peak,
CTLFLAG_RD | CTLFLAG_LOCKED, &shared_region_pagers_resident_peak, 0, "");
extern int shared_region_pager_count;
SYSCTL_INT(_vm, OID_AUTO, shared_region_pager_count,
CTLFLAG_RD | CTLFLAG_LOCKED, &shared_region_pager_count, 0, "");
#if __has_feature(ptrauth_calls)
extern int shared_region_key_count;
SYSCTL_INT(_vm, OID_AUTO, shared_region_key_count,
CTLFLAG_RD | CTLFLAG_LOCKED, &shared_region_key_count, 0, "");
extern int vm_shared_region_reslide_count;
SYSCTL_INT(_vm, OID_AUTO, shared_region_reslide_count,
CTLFLAG_RD | CTLFLAG_LOCKED, &vm_shared_region_reslide_count, 0, "");
#endif /* __has_feature(ptrauth_calls) */
#endif /* DEVELOPMENT || DEBUG */
#if MACH_ASSERT
extern int debug4k_filter;
SYSCTL_INT(_vm, OID_AUTO, debug4k_filter, CTLFLAG_RW | CTLFLAG_LOCKED, &debug4k_filter, 0, "");
extern int debug4k_panic_on_terminate;
SYSCTL_INT(_vm, OID_AUTO, debug4k_panic_on_terminate, CTLFLAG_RW | CTLFLAG_LOCKED, &debug4k_panic_on_terminate, 0, "");
extern int debug4k_panic_on_exception;
SYSCTL_INT(_vm, OID_AUTO, debug4k_panic_on_exception, CTLFLAG_RW | CTLFLAG_LOCKED, &debug4k_panic_on_exception, 0, "");
extern int debug4k_panic_on_misaligned_sharing;
SYSCTL_INT(_vm, OID_AUTO, debug4k_panic_on_misaligned_sharing, CTLFLAG_RW | CTLFLAG_LOCKED, &debug4k_panic_on_misaligned_sharing, 0, "");
#endif /* MACH_ASSERT */
extern uint64_t vm_map_set_size_limit_count;
extern uint64_t vm_map_set_data_limit_count;
extern uint64_t vm_map_enter_RLIMIT_AS_count;
extern uint64_t vm_map_enter_RLIMIT_DATA_count;
SYSCTL_QUAD(_vm, OID_AUTO, map_set_size_limit_count, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_map_set_size_limit_count, "");
SYSCTL_QUAD(_vm, OID_AUTO, map_set_data_limit_count, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_map_set_data_limit_count, "");
SYSCTL_QUAD(_vm, OID_AUTO, map_enter_RLIMIT_AS_count, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_map_enter_RLIMIT_AS_count, "");
SYSCTL_QUAD(_vm, OID_AUTO, map_enter_RLIMIT_DATA_count, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_map_enter_RLIMIT_DATA_count, "");
extern uint64_t vm_fault_resilient_media_initiate;
extern uint64_t vm_fault_resilient_media_retry;
extern uint64_t vm_fault_resilient_media_proceed;
extern uint64_t vm_fault_resilient_media_release;
extern uint64_t vm_fault_resilient_media_abort1;
extern uint64_t vm_fault_resilient_media_abort2;
SYSCTL_QUAD(_vm, OID_AUTO, fault_resilient_media_initiate, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_fault_resilient_media_initiate, "");
SYSCTL_QUAD(_vm, OID_AUTO, fault_resilient_media_retry, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_fault_resilient_media_retry, "");
SYSCTL_QUAD(_vm, OID_AUTO, fault_resilient_media_proceed, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_fault_resilient_media_proceed, "");
SYSCTL_QUAD(_vm, OID_AUTO, fault_resilient_media_release, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_fault_resilient_media_release, "");
SYSCTL_QUAD(_vm, OID_AUTO, fault_resilient_media_abort1, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_fault_resilient_media_abort1, "");
SYSCTL_QUAD(_vm, OID_AUTO, fault_resilient_media_abort2, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_fault_resilient_media_abort2, "");
#if MACH_ASSERT
extern int vm_fault_resilient_media_inject_error1_rate;
extern int vm_fault_resilient_media_inject_error1;
extern int vm_fault_resilient_media_inject_error2_rate;
extern int vm_fault_resilient_media_inject_error2;
extern int vm_fault_resilient_media_inject_error3_rate;
extern int vm_fault_resilient_media_inject_error3;
SYSCTL_INT(_vm, OID_AUTO, fault_resilient_media_inject_error1_rate, CTLFLAG_RW | CTLFLAG_LOCKED, &vm_fault_resilient_media_inject_error1_rate, 0, "");
SYSCTL_INT(_vm, OID_AUTO, fault_resilient_media_inject_error1, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_fault_resilient_media_inject_error1, 0, "");
SYSCTL_INT(_vm, OID_AUTO, fault_resilient_media_inject_error2_rate, CTLFLAG_RW | CTLFLAG_LOCKED, &vm_fault_resilient_media_inject_error2_rate, 0, "");
SYSCTL_INT(_vm, OID_AUTO, fault_resilient_media_inject_error2, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_fault_resilient_media_inject_error2, 0, "");
SYSCTL_INT(_vm, OID_AUTO, fault_resilient_media_inject_error3_rate, CTLFLAG_RW | CTLFLAG_LOCKED, &vm_fault_resilient_media_inject_error3_rate, 0, "");
SYSCTL_INT(_vm, OID_AUTO, fault_resilient_media_inject_error3, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_fault_resilient_media_inject_error3, 0, "");
#endif /* MACH_ASSERT */
extern uint64_t pmap_query_page_info_retries;
SYSCTL_QUAD(_vm, OID_AUTO, pmap_query_page_info_retries, CTLFLAG_RD | CTLFLAG_LOCKED, &pmap_query_page_info_retries, "");
/*
* A sysctl which causes all existing shared regions to become stale. They
* will no longer be used by anything new and will be torn down as soon as
* the last existing user exits. A write of non-zero value causes that to happen.
* This should only be used by launchd, so we check that this is initproc.
*/
static int
shared_region_pivot(__unused struct sysctl_oid *oidp, __unused void *arg1, __unused int arg2, struct sysctl_req *req)
{
unsigned int value = 0;
int changed = 0;
int error = sysctl_io_number(req, 0, sizeof(value), &value, &changed);
if (error || !changed) {
return error;
}
if (current_proc() != initproc) {
return EPERM;
}
vm_shared_region_pivot();
return 0;
}
SYSCTL_PROC(_vm, OID_AUTO, shared_region_pivot,
CTLTYPE_INT | CTLFLAG_WR | CTLFLAG_LOCKED,
0, 0, shared_region_pivot, "I", "");
extern uint64_t vm_object_shadow_forced;
extern uint64_t vm_object_shadow_skipped;
SYSCTL_QUAD(_vm, OID_AUTO, object_shadow_forced, CTLFLAG_RD | CTLFLAG_LOCKED,
&vm_object_shadow_forced, "");
SYSCTL_QUAD(_vm, OID_AUTO, object_shadow_skipped, CTLFLAG_RD | CTLFLAG_LOCKED,
&vm_object_shadow_skipped, "");
SYSCTL_INT(_vm, OID_AUTO, vmtc_total, CTLFLAG_RD | CTLFLAG_LOCKED,
&vmtc_total, 0, "total text page corruptions detected");
#if DEBUG || DEVELOPMENT
/*
* A sysctl that can be used to corrupt a text page with an illegal instruction.
* Used for testing text page self healing.
*/
extern kern_return_t vm_corrupt_text_addr(uintptr_t);
static int
corrupt_text_addr(__unused struct sysctl_oid *oidp, __unused void *arg1, __unused int arg2, struct sysctl_req *req)
{
uint64_t value = 0;
int error = sysctl_handle_quad(oidp, &value, 0, req);
if (error || !req->newptr) {
return error;
}
if (vm_corrupt_text_addr((uintptr_t)value) == KERN_SUCCESS) {
return 0;
} else {
return EINVAL;
}
}
SYSCTL_PROC(_vm, OID_AUTO, corrupt_text_addr,
CTLTYPE_QUAD | CTLFLAG_WR | CTLFLAG_LOCKED | CTLFLAG_MASKED,
0, 0, corrupt_text_addr, "-", "");
#endif /* DEBUG || DEVELOPMENT */
#if CONFIG_MAP_RANGES
/*
* vm.malloc_ranges
*
* space-separated list of <left:right> hexadecimal addresses.
*/
static int
vm_map_malloc_ranges SYSCTL_HANDLER_ARGS
{
vm_map_t map = current_map();
struct mach_vm_range r1, r2;
char str[20 * 4];
int len;
if (vm_map_get_user_range(map, UMEM_RANGE_ID_DEFAULT, &r1)) {
return ENOENT;
}
if (vm_map_get_user_range(map, UMEM_RANGE_ID_HEAP, &r2)) {
return ENOENT;
}
len = scnprintf(str, sizeof(str), "0x%llx:0x%llx 0x%llx:0x%llx",
r1.max_address, r2.min_address,
r2.max_address, get_map_max(map));
return SYSCTL_OUT(req, str, len);
}
SYSCTL_PROC(_vm, OID_AUTO, malloc_ranges,
CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_LOCKED | CTLFLAG_MASKED,
0, 0, &vm_map_malloc_ranges, "A", "");
#if DEBUG || DEVELOPMENT
static int
vm_map_user_range_default SYSCTL_HANDLER_ARGS
{
#pragma unused(arg1, arg2, oidp)
struct mach_vm_range range;
if (vm_map_get_user_range(current_map(), UMEM_RANGE_ID_DEFAULT, &range)
!= KERN_SUCCESS) {
return EINVAL;
}
return SYSCTL_OUT(req, &range, sizeof(range));
}
static int
vm_map_user_range_heap SYSCTL_HANDLER_ARGS
{
#pragma unused(arg1, arg2, oidp)
struct mach_vm_range range;
if (vm_map_get_user_range(current_map(), UMEM_RANGE_ID_HEAP, &range)
!= KERN_SUCCESS) {
return EINVAL;
}
return SYSCTL_OUT(req, &range, sizeof(range));
}
/*
* A sysctl that can be used to return ranges for the current VM map.
* Used for testing VM ranges.
*/
SYSCTL_PROC(_vm, OID_AUTO, vm_map_user_range_default, CTLTYPE_STRUCT | CTLFLAG_RD | CTLFLAG_LOCKED,
0, 0, &vm_map_user_range_default, "S,mach_vm_range", "");
SYSCTL_PROC(_vm, OID_AUTO, vm_map_user_range_heap, CTLTYPE_STRUCT | CTLFLAG_RD | CTLFLAG_LOCKED,
0, 0, &vm_map_user_range_heap, "S,mach_vm_range", "");
#endif /* DEBUG || DEVELOPMENT */
#endif /* CONFIG_MAP_RANGES */
#if DEBUG || DEVELOPMENT
#endif /* DEBUG || DEVELOPMENT */
extern uint64_t vm_map_range_overflows_count;
SYSCTL_QUAD(_vm, OID_AUTO, map_range_overflows_count, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_map_range_overflows_count, "");
extern boolean_t vm_map_range_overflows_log;
SYSCTL_INT(_vm, OID_AUTO, map_range_oveflows_log, CTLFLAG_RW | CTLFLAG_LOCKED, &vm_map_range_overflows_log, 0, "");
extern uint64_t c_seg_filled_no_contention;
extern uint64_t c_seg_filled_contention;
extern clock_sec_t c_seg_filled_contention_sec_max;
extern clock_nsec_t c_seg_filled_contention_nsec_max;
SYSCTL_QUAD(_vm, OID_AUTO, c_seg_filled_no_contention, CTLFLAG_RD | CTLFLAG_LOCKED, &c_seg_filled_no_contention, "");
SYSCTL_QUAD(_vm, OID_AUTO, c_seg_filled_contention, CTLFLAG_RD | CTLFLAG_LOCKED, &c_seg_filled_contention, "");
SYSCTL_ULONG(_vm, OID_AUTO, c_seg_filled_contention_sec_max, CTLFLAG_RD | CTLFLAG_LOCKED, &c_seg_filled_contention_sec_max, "");
SYSCTL_UINT(_vm, OID_AUTO, c_seg_filled_contention_nsec_max, CTLFLAG_RD | CTLFLAG_LOCKED, &c_seg_filled_contention_nsec_max, 0, "");
#if (XNU_TARGET_OS_OSX && __arm64__)
extern clock_nsec_t c_process_major_report_over_ms; /* report if over ? ms */
extern int c_process_major_yield_after; /* yield after moving ? segments */
extern uint64_t c_process_major_reports;
extern clock_sec_t c_process_major_max_sec;
extern clock_nsec_t c_process_major_max_nsec;
extern uint32_t c_process_major_peak_segcount;
SYSCTL_UINT(_vm, OID_AUTO, c_process_major_report_over_ms, CTLFLAG_RW | CTLFLAG_LOCKED, &c_process_major_report_over_ms, 0, "");
SYSCTL_INT(_vm, OID_AUTO, c_process_major_yield_after, CTLFLAG_RW | CTLFLAG_LOCKED, &c_process_major_yield_after, 0, "");
SYSCTL_QUAD(_vm, OID_AUTO, c_process_major_reports, CTLFLAG_RD | CTLFLAG_LOCKED, &c_process_major_reports, "");
SYSCTL_ULONG(_vm, OID_AUTO, c_process_major_max_sec, CTLFLAG_RD | CTLFLAG_LOCKED, &c_process_major_max_sec, "");
SYSCTL_UINT(_vm, OID_AUTO, c_process_major_max_nsec, CTLFLAG_RD | CTLFLAG_LOCKED, &c_process_major_max_nsec, 0, "");
SYSCTL_UINT(_vm, OID_AUTO, c_process_major_peak_segcount, CTLFLAG_RD | CTLFLAG_LOCKED, &c_process_major_peak_segcount, 0, "");
#endif /* (XNU_TARGET_OS_OSX && __arm64__) */
#if DEVELOPMENT || DEBUG
extern int panic_object_not_alive;
SYSCTL_INT(_vm, OID_AUTO, panic_object_not_alive, CTLFLAG_RW | CTLFLAG_LOCKED | CTLFLAG_ANYBODY, &panic_object_not_alive, 0, "");
#endif /* DEVELOPMENT || DEBUG */
#if MACH_ASSERT
extern int fbdp_no_panic;
SYSCTL_INT(_vm, OID_AUTO, fbdp_no_panic, CTLFLAG_RW | CTLFLAG_LOCKED | CTLFLAG_ANYBODY, &fbdp_no_panic, 0, "");
#endif /* MACH_ASSERT */