gems-kernel/source/THIRDPARTY/xnu/bsd/kern/sysv_shm.c

1307 lines
34 KiB
C
Raw Normal View History

2024-06-03 16:29:39 +00:00
/*
* Copyright (c) 2000-2019 Apple Inc. All rights reserved.
*
* @APPLE_OSREFERENCE_LICENSE_HEADER_START@
*
* This file contains Original Code and/or Modifications of Original Code
* as defined in and that are subject to the Apple Public Source License
* Version 2.0 (the 'License'). You may not use this file except in
* compliance with the License. The rights granted to you under the License
* may not be used to create, or enable the creation or redistribution of,
* unlawful or unlicensed copies of an Apple operating system, or to
* circumvent, violate, or enable the circumvention or violation of, any
* terms of an Apple operating system software license agreement.
*
* Please obtain a copy of the License at
* http://www.opensource.apple.com/apsl/ and read it before using this file.
*
* The Original Code and all software distributed under the License are
* distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
* EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
* INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
* Please see the License for the specific language governing rights and
* limitations under the License.
*
* @APPLE_OSREFERENCE_LICENSE_HEADER_END@
*/
/* $NetBSD: sysv_shm.c,v 1.23 1994/07/04 23:25:12 glass Exp $ */
/*
* Copyright (c) 1994 Adam Glass and Charles Hannum. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. All advertising materials mentioning features or use of this software
* must display the following acknowledgement:
* This product includes software developed by Adam Glass and Charles
* Hannum.
* 4. The names of the authors may not be used to endorse or promote products
* derived from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHORS ``AS IS'' AND ANY EXPRESS OR
* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
* IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
* THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
/*
* NOTICE: This file was modified by McAfee Research in 2004 to introduce
* support for mandatory and extensible security protections. This notice
* is included in support of clause 2.2 (b) of the Apple Public License,
* Version 2.0.
* Copyright (c) 2005-2006 SPARTA, Inc.
*/
#include <sys/appleapiopts.h>
#include <sys/param.h>
#include <sys/systm.h>
#include <sys/kernel.h>
#include <sys/shm_internal.h>
#include <sys/proc_internal.h>
#include <sys/kauth.h>
#include <sys/malloc.h>
#include <sys/mman.h>
#include <sys/stat.h>
#include <sys/sysctl.h>
#include <sys/ipcs.h>
#include <sys/sysent.h>
#include <sys/sysproto.h>
#if CONFIG_MACF
#include <security/mac_framework.h>
#endif
#include <security/audit/audit.h>
#include <mach/mach_types.h>
#include <mach/vm_inherit.h>
#include <mach/vm_map.h>
#include <mach/mach_vm.h>
#include <vm/vm_map.h>
#include <vm/vm_protos.h>
#include <vm/vm_kern.h>
#include <kern/locks.h>
#include <os/overflow.h>
/* Uncomment this line to see MAC debugging output. */
/* #define MAC_DEBUG */
#if CONFIG_MACF_DEBUG
#define MPRINTF(a) printf a
#else
#define MPRINTF(a)
#endif
#if SYSV_SHM
static int shminit(void);
static LCK_GRP_DECLARE(sysv_shm_subsys_lck_grp, "sysv_shm_subsys_lock");
static LCK_MTX_DECLARE(sysv_shm_subsys_mutex, &sysv_shm_subsys_lck_grp);
#define SYSV_SHM_SUBSYS_LOCK() lck_mtx_lock(&sysv_shm_subsys_mutex)
#define SYSV_SHM_SUBSYS_UNLOCK() lck_mtx_unlock(&sysv_shm_subsys_mutex)
static int oshmctl(void *p, void *uap, void *retval);
static int shmget_allocate_segment(struct proc *p, struct shmget_args *uap, int mode, int * retval);
static int shmget_existing(struct shmget_args *uap, int mode, int segnum, int * retval);
static void shmid_ds_64to32(struct user_shmid_ds *in, struct user32_shmid_ds *out);
static void shmid_ds_32to64(struct user32_shmid_ds *in, struct user_shmid_ds *out);
/* XXX casting to (sy_call_t *) is bogus, as usual. */
static sy_call_t* const shmcalls[] = {
(sy_call_t *)shmat, (sy_call_t *)oshmctl,
(sy_call_t *)shmdt, (sy_call_t *)shmget,
(sy_call_t *)shmctl
};
#define SHMSEG_FREE 0x0200
#define SHMSEG_REMOVED 0x0400
#define SHMSEG_ALLOCATED 0x0800
#define SHMSEG_WANTED 0x1000
static int shm_last_free, shm_nused, shm_committed;
struct shmid_kernel *shmsegs; /* 64 bit version */
static int shm_inited = 0;
/*
* Since anonymous memory chunks are limited to ANON_MAX_SIZE bytes,
* we have to keep a list of chunks when we want to handle a shared memory
* segment bigger than ANON_MAX_SIZE.
* Each chunk points to a VM named entry of up to ANON_MAX_SIZE bytes
* of anonymous memory.
*/
struct shm_handle {
void * shm_object; /* named entry for this chunk*/
memory_object_size_t shm_handle_size; /* size of this chunk */
struct shm_handle *shm_handle_next; /* next chunk */
};
struct shmmap_state {
mach_vm_address_t va; /* user address */
int shmid; /* segment id */
};
static void shm_deallocate_segment(struct shmid_kernel *);
static int shm_find_segment_by_key(key_t);
static struct shmid_kernel *shm_find_segment_by_shmid(int);
static int shm_delete_mapping(struct proc *, struct shmmap_state *, int);
#ifdef __APPLE_API_PRIVATE
#define DEFAULT_SHMMAX (4 * 1024 * 1024)
#define DEFAULT_SHMMIN 1
#define DEFAULT_SHMMNI 32
#define DEFAULT_SHMSEG 8
#define DEFAULT_SHMALL 1024
struct shminfo shminfo = {
.shmmax = DEFAULT_SHMMAX,
.shmmin = DEFAULT_SHMMIN,
.shmmni = DEFAULT_SHMMNI,
.shmseg = DEFAULT_SHMSEG,
.shmall = DEFAULT_SHMALL
};
#define SHMID_IS_VALID(x) ((x) >= 0)
#define SHMID_UNALLOCATED (-1)
#define SHMID_SENTINEL (-2)
#endif /* __APPLE_API_PRIVATE */
static __inline__ time_t
sysv_shmtime(void)
{
struct timeval tv;
microtime(&tv);
return tv.tv_sec;
}
/*
* This conversion is safe, since if we are converting for a 32 bit process,
* then it's value of (struct shmid_ds)->shm_segsz will never exceed 4G.
*
* NOTE: Source and target may *NOT* overlap! (target is smaller)
*/
static void
shmid_ds_64to32(struct user_shmid_ds *in, struct user32_shmid_ds *out)
{
out->shm_perm = in->shm_perm;
out->shm_segsz = in->shm_segsz;
out->shm_lpid = in->shm_lpid;
out->shm_cpid = in->shm_cpid;
out->shm_nattch = in->shm_nattch;
out->shm_atime = in->shm_atime;
out->shm_dtime = in->shm_dtime;
out->shm_ctime = in->shm_ctime;
out->shm_internal = CAST_DOWN_EXPLICIT(int, in->shm_internal);
}
/*
* NOTE: Source and target may are permitted to overlap! (source is smaller);
* this works because we copy fields in order from the end of the struct to
* the beginning.
*/
static void
shmid_ds_32to64(struct user32_shmid_ds *in, struct user_shmid_ds *out)
{
out->shm_internal = in->shm_internal;
out->shm_ctime = in->shm_ctime;
out->shm_dtime = in->shm_dtime;
out->shm_atime = in->shm_atime;
out->shm_nattch = in->shm_nattch;
out->shm_cpid = in->shm_cpid;
out->shm_lpid = in->shm_lpid;
out->shm_segsz = in->shm_segsz;
out->shm_perm = in->shm_perm;
}
static int
shm_find_segment_by_key(key_t key)
{
int i;
for (i = 0; i < shminfo.shmmni; i++) {
if ((shmsegs[i].u.shm_perm.mode & SHMSEG_ALLOCATED) &&
shmsegs[i].u.shm_perm._key == key) {
return i;
}
}
return -1;
}
static struct shmid_kernel *
shm_find_segment_by_shmid(int shmid)
{
int segnum;
struct shmid_kernel *shmseg;
segnum = IPCID_TO_IX(shmid);
if (segnum < 0 || segnum >= shminfo.shmmni) {
return NULL;
}
shmseg = &shmsegs[segnum];
if ((shmseg->u.shm_perm.mode & (SHMSEG_ALLOCATED | SHMSEG_REMOVED))
!= SHMSEG_ALLOCATED ||
shmseg->u.shm_perm._seq != IPCID_TO_SEQ(shmid)) {
return NULL;
}
return shmseg;
}
static void
shm_deallocate_segment(struct shmid_kernel *shmseg)
{
struct shm_handle *shm_handle, *shm_handle_next;
mach_vm_size_t size;
for (shm_handle = CAST_DOWN(void *, shmseg->u.shm_internal); /* tunnel */
shm_handle != NULL;
shm_handle = shm_handle_next) {
shm_handle_next = shm_handle->shm_handle_next;
mach_memory_entry_port_release(shm_handle->shm_object);
kfree_type(struct shm_handle, shm_handle);
}
shmseg->u.shm_internal = USER_ADDR_NULL; /* tunnel */
size = vm_map_round_page(shmseg->u.shm_segsz,
vm_map_page_mask(current_map()));
shm_committed -= btoc(size);
shm_nused--;
shmseg->u.shm_perm.mode = SHMSEG_FREE;
#if CONFIG_MACF
/* Reset the MAC label */
mac_sysvshm_label_recycle(shmseg);
#endif
}
static int
shm_delete_mapping(__unused struct proc *p, struct shmmap_state *shmmap_s,
int deallocate)
{
struct shmid_kernel *shmseg;
int segnum, result;
mach_vm_size_t size;
segnum = IPCID_TO_IX(shmmap_s->shmid);
shmseg = &shmsegs[segnum];
size = vm_map_round_page(shmseg->u.shm_segsz,
vm_map_page_mask(current_map())); /* XXX done for us? */
if (deallocate) {
result = mach_vm_deallocate(current_map(), shmmap_s->va, size);
if (result != KERN_SUCCESS) {
return EINVAL;
}
}
shmmap_s->shmid = SHMID_UNALLOCATED;
shmseg->u.shm_dtime = sysv_shmtime();
if ((--shmseg->u.shm_nattch <= 0) &&
(shmseg->u.shm_perm.mode & SHMSEG_REMOVED)) {
shm_deallocate_segment(shmseg);
shm_last_free = segnum;
}
return 0;
}
int
shmdt(struct proc *p, struct shmdt_args *uap, int32_t *retval)
{
#if CONFIG_MACF
struct shmid_kernel *shmsegptr;
#endif
struct shmmap_state *shmmap_s;
int i;
int shmdtret = 0;
AUDIT_ARG(svipc_addr, uap->shmaddr);
SYSV_SHM_SUBSYS_LOCK();
if ((shmdtret = shminit())) {
goto shmdt_out;
}
shmmap_s = (struct shmmap_state *)p->vm_shm;
if (shmmap_s == NULL) {
shmdtret = EINVAL;
goto shmdt_out;
}
for (; shmmap_s->shmid != SHMID_SENTINEL; shmmap_s++) {
if (SHMID_IS_VALID(shmmap_s->shmid) &&
shmmap_s->va == (mach_vm_offset_t)uap->shmaddr) {
break;
}
}
if (!SHMID_IS_VALID(shmmap_s->shmid)) {
shmdtret = EINVAL;
goto shmdt_out;
}
#if CONFIG_MACF
/*
* XXX: It might be useful to move this into the shm_delete_mapping
* function
*/
shmsegptr = &shmsegs[IPCID_TO_IX(shmmap_s->shmid)];
shmdtret = mac_sysvshm_check_shmdt(kauth_cred_get(), shmsegptr);
if (shmdtret) {
goto shmdt_out;
}
#endif
i = shm_delete_mapping(p, shmmap_s, 1);
if (i == 0) {
*retval = 0;
}
shmdtret = i;
shmdt_out:
SYSV_SHM_SUBSYS_UNLOCK();
return shmdtret;
}
int
shmat(struct proc *p, struct shmat_args *uap, user_addr_t *retval)
{
int error, flags;
struct shmid_kernel *shmseg;
struct shmmap_state *shmmap_s = NULL;
struct shm_handle *shm_handle;
mach_vm_address_t attach_va; /* attach address in/out */
mach_vm_address_t shmlba;
mach_vm_size_t map_size; /* size of map entry */
mach_vm_size_t mapped_size;
vm_prot_t prot;
kern_return_t rv;
int shmat_ret;
vm_map_kernel_flags_t vmk_flags;
shmat_ret = 0;
AUDIT_ARG(svipc_id, uap->shmid);
AUDIT_ARG(svipc_addr, uap->shmaddr);
SYSV_SHM_SUBSYS_LOCK();
if ((shmat_ret = shminit())) {
goto shmat_out;
}
shmmap_s = (struct shmmap_state *)p->vm_shm;
if (shmmap_s == NULL) {
/* lazily allocate the shm map */
int nsegs = shminfo.shmseg;
if (nsegs <= 0) {
shmat_ret = EMFILE;
goto shmat_out;
}
/* +1 for the sentinel */
shmmap_s = kalloc_type(struct shmmap_state, nsegs + 1, Z_WAITOK);
if (shmmap_s == NULL) {
shmat_ret = ENOMEM;
goto shmat_out;
}
/* initialize the entries */
for (int i = 0; i < nsegs; i++) {
shmmap_s[i].shmid = SHMID_UNALLOCATED;
}
shmmap_s[nsegs].shmid = SHMID_SENTINEL;
p->vm_shm = (caddr_t)shmmap_s;
}
shmseg = shm_find_segment_by_shmid(uap->shmid);
if (shmseg == NULL) {
shmat_ret = EINVAL;
goto shmat_out;
}
AUDIT_ARG(svipc_perm, &shmseg->u.shm_perm);
error = ipcperm(kauth_cred_get(), &shmseg->u.shm_perm,
(uap->shmflg & SHM_RDONLY) ? IPC_R : IPC_R | IPC_W);
if (error) {
shmat_ret = error;
goto shmat_out;
}
#if CONFIG_MACF
error = mac_sysvshm_check_shmat(kauth_cred_get(), shmseg, uap->shmflg);
if (error) {
shmat_ret = error;
goto shmat_out;
}
#endif
/* find a free shmid */
while (SHMID_IS_VALID(shmmap_s->shmid)) {
shmmap_s++;
}
if (shmmap_s->shmid != SHMID_UNALLOCATED) {
/* no free shmids */
shmat_ret = EMFILE;
goto shmat_out;
}
map_size = vm_map_round_page(shmseg->u.shm_segsz,
vm_map_page_mask(current_map()));
prot = VM_PROT_READ;
if ((uap->shmflg & SHM_RDONLY) == 0) {
prot |= VM_PROT_WRITE;
}
flags = MAP_ANON | MAP_SHARED;
if (uap->shmaddr) {
flags |= MAP_FIXED;
}
attach_va = (mach_vm_address_t)uap->shmaddr;
shmlba = vm_map_page_size(current_map()); /* XXX instead of SHMLBA */
if (uap->shmflg & SHM_RND) {
attach_va &= ~(shmlba - 1);
} else if ((attach_va & (shmlba - 1)) != 0) {
shmat_ret = EINVAL;
goto shmat_out;
}
if (flags & MAP_FIXED) {
vmk_flags = VM_MAP_KERNEL_FLAGS_FIXED();
} else {
vmk_flags = VM_MAP_KERNEL_FLAGS_ANYWHERE();
}
mapped_size = 0;
/* first reserve enough space... */
rv = mach_vm_map_kernel(current_map(),
&attach_va,
map_size,
0,
vmk_flags,
IPC_PORT_NULL,
0,
FALSE,
VM_PROT_NONE,
VM_PROT_NONE,
VM_INHERIT_NONE);
if (rv != KERN_SUCCESS) {
goto out;
}
shmmap_s->va = attach_va;
/* ... then map the shared memory over the reserved space */
for (shm_handle = CAST_DOWN(void *, shmseg->u.shm_internal);/* tunnel */
shm_handle != NULL;
shm_handle = shm_handle->shm_handle_next) {
vm_map_size_t chunk_size;
assert(mapped_size < map_size);
chunk_size = shm_handle->shm_handle_size;
if (chunk_size > map_size - mapped_size) {
/*
* Partial mapping of last chunk due to
* page size mismatch.
*/
assert(vm_map_page_shift(current_map()) < PAGE_SHIFT);
assert(shm_handle->shm_handle_next == NULL);
chunk_size = map_size - mapped_size;
}
rv = vm_map_enter_mem_object(
current_map(), /* process map */
&attach_va, /* attach address */
chunk_size, /* size to map */
(mach_vm_offset_t)0, /* alignment mask */
VM_MAP_KERNEL_FLAGS_FIXED(.vmf_overwrite = true),
shm_handle->shm_object,
(mach_vm_offset_t)0,
FALSE,
prot,
prot,
VM_INHERIT_SHARE);
if (rv != KERN_SUCCESS) {
goto out;
}
mapped_size += chunk_size;
attach_va = attach_va + chunk_size;
}
shmmap_s->shmid = uap->shmid;
shmseg->u.shm_lpid = proc_getpid(p);
shmseg->u.shm_atime = sysv_shmtime();
shmseg->u.shm_nattch++;
*retval = shmmap_s->va; /* XXX return -1 on error */
shmat_ret = 0;
goto shmat_out;
out:
if (mapped_size > 0) {
(void) mach_vm_deallocate(current_map(),
shmmap_s->va,
mapped_size);
}
switch (rv) {
case KERN_INVALID_ADDRESS:
case KERN_NO_SPACE:
shmat_ret = ENOMEM;
break;
case KERN_PROTECTION_FAILURE:
shmat_ret = EACCES;
break;
default:
shmat_ret = EINVAL;
break;
}
shmat_out:
SYSV_SHM_SUBSYS_UNLOCK();
return shmat_ret;
}
static int
oshmctl(__unused void *p, __unused void *uap, __unused void *retval)
{
return EINVAL;
}
/*
* Returns: 0 Success
* EINVAL
* copyout:EFAULT
* copyin:EFAULT
* ipcperm:EPERM
* ipcperm:EACCES
*/
int
shmctl(__unused struct proc *p, struct shmctl_args *uap, int32_t *retval)
{
int error;
kauth_cred_t cred = kauth_cred_get();
struct user_shmid_ds inbuf;
struct shmid_kernel *shmseg;
int shmctl_ret = 0;
AUDIT_ARG(svipc_cmd, uap->cmd);
AUDIT_ARG(svipc_id, uap->shmid);
SYSV_SHM_SUBSYS_LOCK();
if ((shmctl_ret = shminit())) {
goto shmctl_out;
}
shmseg = shm_find_segment_by_shmid(uap->shmid);
if (shmseg == NULL) {
shmctl_ret = EINVAL;
goto shmctl_out;
}
/* XXAUDIT: This is the perms BEFORE any change by this call. This
* may not be what is desired.
*/
AUDIT_ARG(svipc_perm, &shmseg->u.shm_perm);
#if CONFIG_MACF
error = mac_sysvshm_check_shmctl(cred, shmseg, uap->cmd);
if (error) {
shmctl_ret = error;
goto shmctl_out;
}
#endif
switch (uap->cmd) {
case IPC_STAT:
error = ipcperm(cred, &shmseg->u.shm_perm, IPC_R);
if (error) {
shmctl_ret = error;
goto shmctl_out;
}
if (IS_64BIT_PROCESS(p)) {
struct user_shmid_ds shmid_ds = {};
memcpy(&shmid_ds, &shmseg->u, sizeof(struct user_shmid_ds));
/* Clear kernel reserved pointer before copying to user space */
shmid_ds.shm_internal = USER_ADDR_NULL;
error = copyout(&shmid_ds, uap->buf, sizeof(shmid_ds));
} else {
struct user32_shmid_ds shmid_ds32 = {};
shmid_ds_64to32(&shmseg->u, &shmid_ds32);
/* Clear kernel reserved pointer before copying to user space */
shmid_ds32.shm_internal = (user32_addr_t)0;
error = copyout(&shmid_ds32, uap->buf, sizeof(shmid_ds32));
}
if (error) {
shmctl_ret = error;
goto shmctl_out;
}
break;
case IPC_SET:
error = ipcperm(cred, &shmseg->u.shm_perm, IPC_M);
if (error) {
shmctl_ret = error;
goto shmctl_out;
}
if (IS_64BIT_PROCESS(p)) {
error = copyin(uap->buf, &inbuf, sizeof(struct user_shmid_ds));
} else {
struct user32_shmid_ds shmid_ds32;
error = copyin(uap->buf, &shmid_ds32, sizeof(shmid_ds32));
/* convert in place; ugly, but safe */
shmid_ds_32to64(&shmid_ds32, &inbuf);
}
if (error) {
shmctl_ret = error;
goto shmctl_out;
}
shmseg->u.shm_perm.uid = inbuf.shm_perm.uid;
shmseg->u.shm_perm.gid = inbuf.shm_perm.gid;
shmseg->u.shm_perm.mode =
(shmseg->u.shm_perm.mode & ~ACCESSPERMS) |
(inbuf.shm_perm.mode & ACCESSPERMS);
shmseg->u.shm_ctime = sysv_shmtime();
break;
case IPC_RMID:
error = ipcperm(cred, &shmseg->u.shm_perm, IPC_M);
if (error) {
shmctl_ret = error;
goto shmctl_out;
}
shmseg->u.shm_perm._key = IPC_PRIVATE;
shmseg->u.shm_perm.mode |= SHMSEG_REMOVED;
if (shmseg->u.shm_nattch <= 0) {
shm_deallocate_segment(shmseg);
shm_last_free = IPCID_TO_IX(uap->shmid);
}
break;
#if 0
case SHM_LOCK:
case SHM_UNLOCK:
#endif
default:
shmctl_ret = EINVAL;
goto shmctl_out;
}
*retval = 0;
shmctl_ret = 0;
shmctl_out:
SYSV_SHM_SUBSYS_UNLOCK();
return shmctl_ret;
}
static int
shmget_existing(struct shmget_args *uap, int mode, int segnum, int *retval)
{
struct shmid_kernel *shmseg;
int error = 0;
shmseg = &shmsegs[segnum];
if (shmseg->u.shm_perm.mode & SHMSEG_REMOVED) {
/*
* This segment is in the process of being allocated. Wait
* until it's done, and look the key up again (in case the
* allocation failed or it was freed).
*/
shmseg->u.shm_perm.mode |= SHMSEG_WANTED;
error = tsleep((caddr_t)shmseg, PLOCK | PCATCH, "shmget", 0);
if (error) {
return error;
}
return EAGAIN;
}
/*
* The low 9 bits of shmflag are the mode bits being requested, which
* are the actual mode bits desired on the segment, and not in IPC_R
* form; therefore it would be incorrect to call ipcperm() to validate
* them; instead, we AND the existing mode with the requested mode, and
* verify that it matches the requested mode; otherwise, we fail with
* EACCES (access denied).
*/
if ((shmseg->u.shm_perm.mode & mode) != mode) {
return EACCES;
}
#if CONFIG_MACF
error = mac_sysvshm_check_shmget(kauth_cred_get(), shmseg, uap->shmflg);
if (error) {
return error;
}
#endif
if (uap->size && uap->size > shmseg->u.shm_segsz) {
return EINVAL;
}
if ((uap->shmflg & (IPC_CREAT | IPC_EXCL)) == (IPC_CREAT | IPC_EXCL)) {
return EEXIST;
}
*retval = IXSEQ_TO_IPCID(segnum, shmseg->u.shm_perm);
return 0;
}
static int
shmget_allocate_segment(struct proc *p, struct shmget_args *uap, int mode,
int *retval)
{
int i, segnum, shmid;
kauth_cred_t cred = kauth_cred_get();
struct shmid_kernel *shmseg;
struct shm_handle *shm_handle;
kern_return_t kret;
mach_vm_size_t total_size, size = 0, alloc_size;
void * mem_object;
struct shm_handle *shm_handle_next, **shm_handle_next_p;
if (uap->size <= 0 ||
uap->size < (user_size_t)shminfo.shmmin ||
uap->size > (user_size_t)shminfo.shmmax) {
return EINVAL;
}
if (shm_nused >= shminfo.shmmni) { /* any shmids left? */
return ENOSPC;
}
if (mach_vm_round_page_overflow(uap->size, &total_size)) {
return EINVAL;
}
if ((user_ssize_t)(shm_committed + btoc(total_size)) > shminfo.shmall) {
return ENOMEM;
}
if (shm_last_free < 0) {
for (i = 0; i < shminfo.shmmni; i++) {
if (shmsegs[i].u.shm_perm.mode & SHMSEG_FREE) {
break;
}
}
if (i == shminfo.shmmni) {
panic("shmseg free count inconsistent");
}
segnum = i;
} else {
segnum = shm_last_free;
shm_last_free = -1;
}
shmseg = &shmsegs[segnum];
/*
* In case we sleep in malloc(), mark the segment present but deleted
* so that noone else tries to create the same key.
* XXX but we don't release the global lock !?
*/
shmseg->u.shm_perm.mode = SHMSEG_ALLOCATED | SHMSEG_REMOVED;
shmseg->u.shm_perm._key = uap->key;
shmseg->u.shm_perm._seq = (shmseg->u.shm_perm._seq + 1) & 0x7fff;
shm_handle_next_p = NULL;
for (alloc_size = 0;
alloc_size < total_size;
alloc_size += size) {
size = MIN(total_size - alloc_size, ANON_MAX_SIZE);
kret = mach_make_memory_entry_64(
VM_MAP_NULL,
(memory_object_size_t *) &size,
(memory_object_offset_t) 0,
MAP_MEM_NAMED_CREATE | VM_PROT_DEFAULT,
(ipc_port_t *) &mem_object, 0);
if (kret != KERN_SUCCESS) {
goto out;
}
shm_handle = kalloc_type(struct shm_handle, Z_WAITOK | Z_NOFAIL);
shm_handle->shm_object = mem_object;
shm_handle->shm_handle_size = size;
shm_handle->shm_handle_next = NULL;
if (shm_handle_next_p == NULL) {
shmseg->u.shm_internal = CAST_USER_ADDR_T(shm_handle);/* tunnel */
} else {
*shm_handle_next_p = shm_handle;
}
shm_handle_next_p = &shm_handle->shm_handle_next;
}
shmid = IXSEQ_TO_IPCID(segnum, shmseg->u.shm_perm);
shmseg->u.shm_perm.cuid = shmseg->u.shm_perm.uid = kauth_cred_getuid(cred);
shmseg->u.shm_perm.cgid = shmseg->u.shm_perm.gid = kauth_cred_getgid(cred);
shmseg->u.shm_perm.mode = (shmseg->u.shm_perm.mode & SHMSEG_WANTED) |
(mode & ACCESSPERMS) | SHMSEG_ALLOCATED;
shmseg->u.shm_segsz = uap->size;
shmseg->u.shm_cpid = proc_getpid(p);
shmseg->u.shm_lpid = shmseg->u.shm_nattch = 0;
shmseg->u.shm_atime = shmseg->u.shm_dtime = 0;
#if CONFIG_MACF
mac_sysvshm_label_associate(cred, shmseg);
#endif
shmseg->u.shm_ctime = sysv_shmtime();
shm_committed += btoc(size);
shm_nused++;
AUDIT_ARG(svipc_perm, &shmseg->u.shm_perm);
if (shmseg->u.shm_perm.mode & SHMSEG_WANTED) {
/*
* Somebody else wanted this key while we were asleep. Wake
* them up now.
*/
shmseg->u.shm_perm.mode &= ~SHMSEG_WANTED;
wakeup((caddr_t)shmseg);
}
*retval = shmid;
AUDIT_ARG(svipc_id, shmid);
return 0;
out:
if (kret != KERN_SUCCESS) {
for (shm_handle = CAST_DOWN(void *, shmseg->u.shm_internal); /* tunnel */
shm_handle != NULL;
shm_handle = shm_handle_next) {
shm_handle_next = shm_handle->shm_handle_next;
mach_memory_entry_port_release(shm_handle->shm_object);
kfree_type(struct shm_handle, shm_handle);
}
shmseg->u.shm_internal = USER_ADDR_NULL; /* tunnel */
}
switch (kret) {
case KERN_INVALID_ADDRESS:
case KERN_NO_SPACE:
return ENOMEM;
case KERN_PROTECTION_FAILURE:
return EACCES;
default:
return EINVAL;
}
}
int
shmget(struct proc *p, struct shmget_args *uap, int32_t *retval)
{
int segnum, mode, error;
int shmget_ret = 0;
/* Auditing is actually done in shmget_allocate_segment() */
SYSV_SHM_SUBSYS_LOCK();
if ((shmget_ret = shminit())) {
goto shmget_out;
}
mode = uap->shmflg & ACCESSPERMS;
if (uap->key != IPC_PRIVATE) {
again:
segnum = shm_find_segment_by_key(uap->key);
if (segnum >= 0) {
error = shmget_existing(uap, mode, segnum, retval);
if (error == EAGAIN) {
goto again;
}
shmget_ret = error;
goto shmget_out;
}
if ((uap->shmflg & IPC_CREAT) == 0) {
shmget_ret = ENOENT;
goto shmget_out;
}
}
shmget_ret = shmget_allocate_segment(p, uap, mode, retval);
shmget_out:
SYSV_SHM_SUBSYS_UNLOCK();
return shmget_ret;
}
/*
* shmsys
*
* Entry point for all SHM calls: shmat, oshmctl, shmdt, shmget, shmctl
*
* Parameters: p Process requesting the call
* uap User argument descriptor (see below)
* retval Return value of the selected shm call
*
* Indirect parameters: uap->which msg call to invoke (index in array of shm calls)
* uap->a2 User argument descriptor
*
* Returns: 0 Success
* !0 Not success
*
* Implicit returns: retval Return value of the selected shm call
*
* DEPRECATED: This interface should not be used to call the other SHM
* functions (shmat, oshmctl, shmdt, shmget, shmctl). The correct
* usage is to call the other SHM functions directly.
*/
int
shmsys(struct proc *p, struct shmsys_args *uap, int32_t *retval)
{
/* The routine that we are dispatching already does this */
if (uap->which >= sizeof(shmcalls) / sizeof(shmcalls[0])) {
return EINVAL;
}
return (*shmcalls[uap->which])(p, &uap->a2, retval);
}
/*
* Return 0 on success, 1 on failure.
*/
int
shmfork(struct proc *p1, struct proc *p2)
{
struct shmmap_state *shmmap_s;
int nsegs = 0;
int ret = 0;
SYSV_SHM_SUBSYS_LOCK();
if (shminit()) {
ret = 1;
goto shmfork_out;
}
struct shmmap_state *src = (struct shmmap_state *)p1->vm_shm;
assert(src);
/* count number of shmid entries in src */
for (struct shmmap_state *s = src; s->shmid != SHMID_SENTINEL; s++) {
nsegs++;
}
shmmap_s = kalloc_type(struct shmmap_state, nsegs + 1, Z_WAITOK);
if (shmmap_s == NULL) {
ret = 1;
goto shmfork_out;
}
bcopy(src, (caddr_t)shmmap_s, (nsegs + 1) * sizeof(struct shmmap_state));
p2->vm_shm = (caddr_t)shmmap_s;
for (; shmmap_s->shmid != SHMID_SENTINEL; shmmap_s++) {
if (SHMID_IS_VALID(shmmap_s->shmid)) {
shmsegs[IPCID_TO_IX(shmmap_s->shmid)].u.shm_nattch++;
}
}
shmfork_out:
SYSV_SHM_SUBSYS_UNLOCK();
return ret;
}
static void
shmcleanup(struct proc *p, int deallocate)
{
struct shmmap_state *shmmap_s;
int nsegs = 0;
SYSV_SHM_SUBSYS_LOCK();
shmmap_s = (struct shmmap_state *)p->vm_shm;
for (; shmmap_s->shmid != SHMID_SENTINEL; shmmap_s++) {
nsegs++;
if (SHMID_IS_VALID(shmmap_s->shmid)) {
/*
* XXX: Should the MAC framework enforce
* check here as well.
*/
shm_delete_mapping(p, shmmap_s, deallocate);
}
}
kfree_type(struct shmmap_state, nsegs + 1, p->vm_shm);
SYSV_SHM_SUBSYS_UNLOCK();
}
void
shmexit(struct proc *p)
{
shmcleanup(p, 1);
}
/*
* shmexec() is like shmexit(), only it doesn't delete the mappings,
* since the old address space has already been destroyed and the new
* one instantiated. Instead, it just does the housekeeping work we
* need to do to keep the System V shared memory subsystem sane.
*/
__private_extern__ void
shmexec(struct proc *p)
{
shmcleanup(p, 0);
}
int
shminit(void)
{
size_t sz;
int i;
if (!shm_inited) {
/*
* we store internally 64 bit, since if we didn't, we would
* be unable to represent a segment size in excess of 32 bits
* with the (struct shmid_ds)->shm_segsz field; also, POSIX
* dictates this filed be a size_t, which is 64 bits when
* running 64 bit binaries.
*/
if (os_mul_overflow(shminfo.shmmni, sizeof(struct shmid_kernel), &sz)) {
return ENOMEM;
}
shmsegs = zalloc_permanent(sz, ZALIGN_PTR);
if (shmsegs == NULL) {
return ENOMEM;
}
for (i = 0; i < shminfo.shmmni; i++) {
shmsegs[i].u.shm_perm.mode = SHMSEG_FREE;
shmsegs[i].u.shm_perm._seq = 0;
#if CONFIG_MACF
mac_sysvshm_label_init(&shmsegs[i]);
#endif
}
shm_last_free = 0;
shm_nused = 0;
shm_committed = 0;
shm_inited = 1;
}
return 0;
}
/* (struct sysctl_oid *oidp, void *arg1, int arg2, \
* struct sysctl_req *req) */
static int
sysctl_shminfo(__unused struct sysctl_oid *oidp, void *arg1,
__unused int arg2, struct sysctl_req *req)
{
int error = 0;
int sysctl_shminfo_ret = 0;
int64_t saved_shmmax;
int64_t saved_shmmin;
int64_t saved_shmseg;
int64_t saved_shmmni;
int64_t saved_shmall;
error = SYSCTL_OUT(req, arg1, sizeof(int64_t));
if (error || req->newptr == USER_ADDR_NULL) {
return error;
}
SYSV_SHM_SUBSYS_LOCK();
/* shmmni can not be changed after SysV SHM has been initialized */
if (shm_inited && arg1 == &shminfo.shmmni) {
sysctl_shminfo_ret = EPERM;
goto sysctl_shminfo_out;
}
saved_shmmax = shminfo.shmmax;
saved_shmmin = shminfo.shmmin;
saved_shmseg = shminfo.shmseg;
saved_shmmni = shminfo.shmmni;
saved_shmall = shminfo.shmall;
if ((error = SYSCTL_IN(req, arg1, sizeof(int64_t))) != 0) {
sysctl_shminfo_ret = error;
goto sysctl_shminfo_out;
}
if (arg1 == &shminfo.shmmax) {
/* shmmax needs to be page-aligned */
if (shminfo.shmmax & PAGE_MASK_64 || shminfo.shmmax < 0) {
shminfo.shmmax = saved_shmmax;
sysctl_shminfo_ret = EINVAL;
goto sysctl_shminfo_out;
}
} else if (arg1 == &shminfo.shmmin) {
if (shminfo.shmmin < 0) {
shminfo.shmmin = saved_shmmin;
sysctl_shminfo_ret = EINVAL;
goto sysctl_shminfo_out;
}
} else if (arg1 == &shminfo.shmseg) {
/* add a sanity check - 20847256 */
if (shminfo.shmseg > INT32_MAX || shminfo.shmseg < 0) {
shminfo.shmseg = saved_shmseg;
sysctl_shminfo_ret = EINVAL;
goto sysctl_shminfo_out;
}
} else if (arg1 == &shminfo.shmmni) {
/* add a sanity check - 20847256 */
if (shminfo.shmmni > INT32_MAX || shminfo.shmmni < 0) {
shminfo.shmmni = saved_shmmni;
sysctl_shminfo_ret = EINVAL;
goto sysctl_shminfo_out;
}
} else if (arg1 == &shminfo.shmall) {
/* add a sanity check - 20847256 */
if (shminfo.shmall > INT32_MAX || shminfo.shmall < 0) {
shminfo.shmall = saved_shmall;
sysctl_shminfo_ret = EINVAL;
goto sysctl_shminfo_out;
}
}
sysctl_shminfo_ret = 0;
sysctl_shminfo_out:
SYSV_SHM_SUBSYS_UNLOCK();
return sysctl_shminfo_ret;
}
static int
IPCS_shm_sysctl(__unused struct sysctl_oid *oidp, __unused void *arg1,
__unused int arg2, struct sysctl_req *req)
{
int error;
int cursor;
union {
struct user32_IPCS_command u32;
struct user_IPCS_command u64;
} ipcs = { };
struct user32_shmid_ds shmid_ds32 = { }; /* post conversion, 32 bit version */
struct user_shmid_ds shmid_ds = { }; /* 64 bit version */
void *shmid_dsp;
size_t ipcs_sz = sizeof(struct user_IPCS_command);
size_t shmid_ds_sz = sizeof(struct user_shmid_ds);
struct proc *p = current_proc();
SYSV_SHM_SUBSYS_LOCK();
if ((error = shminit())) {
goto ipcs_shm_sysctl_out;
}
if (!IS_64BIT_PROCESS(p)) {
ipcs_sz = sizeof(struct user32_IPCS_command);
shmid_ds_sz = sizeof(struct user32_shmid_ds);
}
/* Copy in the command structure */
if ((error = SYSCTL_IN(req, &ipcs, ipcs_sz)) != 0) {
goto ipcs_shm_sysctl_out;
}
if (!IS_64BIT_PROCESS(p)) { /* convert in place */
ipcs.u64.ipcs_data = CAST_USER_ADDR_T(ipcs.u32.ipcs_data);
}
/* Let us version this interface... */
if (ipcs.u64.ipcs_magic != IPCS_MAGIC) {
error = EINVAL;
goto ipcs_shm_sysctl_out;
}
switch (ipcs.u64.ipcs_op) {
case IPCS_SHM_CONF: /* Obtain global configuration data */
if (ipcs.u64.ipcs_datalen != sizeof(struct shminfo)) {
if (ipcs.u64.ipcs_cursor != 0) { /* fwd. compat. */
error = ENOMEM;
break;
}
error = ERANGE;
break;
}
error = copyout(&shminfo, ipcs.u64.ipcs_data, ipcs.u64.ipcs_datalen);
break;
case IPCS_SHM_ITER: /* Iterate over existing segments */
cursor = ipcs.u64.ipcs_cursor;
if (cursor < 0 || cursor >= shminfo.shmmni) {
error = ERANGE;
break;
}
if (ipcs.u64.ipcs_datalen != (int)shmid_ds_sz) {
error = EINVAL;
break;
}
for (; cursor < shminfo.shmmni; cursor++) {
if (shmsegs[cursor].u.shm_perm.mode & SHMSEG_ALLOCATED) {
break;
}
continue;
}
if (cursor == shminfo.shmmni) {
error = ENOENT;
break;
}
shmid_dsp = &shmsegs[cursor]; /* default: 64 bit */
/*
* If necessary, convert the 64 bit kernel segment
* descriptor to a 32 bit user one.
*/
if (!IS_64BIT_PROCESS(p)) {
shmid_ds_64to32(shmid_dsp, &shmid_ds32);
/* Clear kernel reserved pointer before copying to user space */
shmid_ds32.shm_internal = (user32_addr_t)0;
shmid_dsp = &shmid_ds32;
} else {
memcpy(&shmid_ds, shmid_dsp, sizeof(shmid_ds));
/* Clear kernel reserved pointer before copying to user space */
shmid_ds.shm_internal = USER_ADDR_NULL;
shmid_dsp = &shmid_ds;
}
error = copyout(shmid_dsp, ipcs.u64.ipcs_data, ipcs.u64.ipcs_datalen);
if (!error) {
/* update cursor */
ipcs.u64.ipcs_cursor = cursor + 1;
if (!IS_64BIT_PROCESS(p)) { /* convert in place */
ipcs.u32.ipcs_data = CAST_DOWN_EXPLICIT(user32_addr_t, ipcs.u64.ipcs_data);
}
error = SYSCTL_OUT(req, &ipcs, ipcs_sz);
}
break;
default:
error = EINVAL;
break;
}
ipcs_shm_sysctl_out:
SYSV_SHM_SUBSYS_UNLOCK();
return error;
}
SYSCTL_NODE(_kern, KERN_SYSV, sysv, CTLFLAG_RW | CTLFLAG_LOCKED | CTLFLAG_ANYBODY, 0, "SYSV");
SYSCTL_PROC(_kern_sysv, OID_AUTO, shmmax, CTLTYPE_QUAD | CTLFLAG_RW | CTLFLAG_LOCKED,
&shminfo.shmmax, 0, &sysctl_shminfo, "Q", "shmmax");
SYSCTL_PROC(_kern_sysv, OID_AUTO, shmmin, CTLTYPE_QUAD | CTLFLAG_RW | CTLFLAG_LOCKED,
&shminfo.shmmin, 0, &sysctl_shminfo, "Q", "shmmin");
SYSCTL_PROC(_kern_sysv, OID_AUTO, shmmni, CTLTYPE_QUAD | CTLFLAG_RW | CTLFLAG_LOCKED,
&shminfo.shmmni, 0, &sysctl_shminfo, "Q", "shmmni");
SYSCTL_PROC(_kern_sysv, OID_AUTO, shmseg, CTLTYPE_QUAD | CTLFLAG_RW | CTLFLAG_LOCKED,
&shminfo.shmseg, 0, &sysctl_shminfo, "Q", "shmseg");
SYSCTL_PROC(_kern_sysv, OID_AUTO, shmall, CTLTYPE_QUAD | CTLFLAG_RW | CTLFLAG_LOCKED,
&shminfo.shmall, 0, &sysctl_shminfo, "Q", "shmall");
SYSCTL_NODE(_kern_sysv, OID_AUTO, ipcs, CTLFLAG_RW | CTLFLAG_LOCKED | CTLFLAG_ANYBODY, 0, "SYSVIPCS");
SYSCTL_PROC(_kern_sysv_ipcs, OID_AUTO, shm, CTLFLAG_RW | CTLFLAG_ANYBODY | CTLFLAG_LOCKED,
0, 0, IPCS_shm_sysctl,
"S,IPCS_shm_command",
"ipcs shm command interface");
#endif /* SYSV_SHM */
/* DSEP Review Done pl-20051108-v02 @2743,@2908,@2913,@3009 */