gems-kernel/source/THIRDPARTY/xnu/bsd/vfs/vfs_init.c
2024-06-03 11:29:39 -05:00

664 lines
18 KiB
C

/*
* Copyright (c) 2000-2010 Apple Inc. All rights reserved.
*
* @APPLE_OSREFERENCE_LICENSE_HEADER_START@
*
* This file contains Original Code and/or Modifications of Original Code
* as defined in and that are subject to the Apple Public Source License
* Version 2.0 (the 'License'). You may not use this file except in
* compliance with the License. The rights granted to you under the License
* may not be used to create, or enable the creation or redistribution of,
* unlawful or unlicensed copies of an Apple operating system, or to
* circumvent, violate, or enable the circumvention or violation of, any
* terms of an Apple operating system software license agreement.
*
* Please obtain a copy of the License at
* http://www.opensource.apple.com/apsl/ and read it before using this file.
*
* The Original Code and all software distributed under the License are
* distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
* EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
* INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
* Please see the License for the specific language governing rights and
* limitations under the License.
*
* @APPLE_OSREFERENCE_LICENSE_HEADER_END@
*/
/* Copyright (c) 1995 NeXT Computer, Inc. All Rights Reserved */
/*
* Copyright (c) 1989, 1993
* The Regents of the University of California. All rights reserved.
*
* This code is derived from software contributed
* to Berkeley by John Heidemann of the UCLA Ficus project.
*
* Source: * @(#)i405_init.c 2.10 92/04/27 UCLA Ficus project
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. All advertising materials mentioning features or use of this software
* must display the following acknowledgement:
* This product includes software developed by the University of
* California, Berkeley and its contributors.
* 4. Neither the name of the University nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* @(#)vfs_init.c 8.5 (Berkeley) 5/11/95
*/
/*
* NOTICE: This file was modified by SPARTA, Inc. in 2005 to introduce
* support for mandatory and extensible security protections. This notice
* is included in support of clause 2.2 (b) of the Apple Public License,
* Version 2.0.
*/
#include <sys/param.h>
#include <sys/mount_internal.h>
#include <sys/time.h>
#include <sys/vm.h>
#include <sys/vnode_internal.h>
#include <sys/stat.h>
#include <sys/namei.h>
#include <sys/ucred.h>
#include <sys/errno.h>
#include <kern/kalloc.h>
#include <kern/smr.h>
#include <sys/decmpfs.h>
#if CONFIG_MACF
#include <security/mac_framework.h>
#include <sys/kauth.h>
#endif
#if QUOTA
#include <sys/quota.h>
#endif
#if CONFIG_EXCLAVES
#include <vfs/vfs_exclave_fs.h>
#endif
/*
* Sigh, such primitive tools are these...
*/
#if 0
#define DODEBUG(A) A
#else
#define DODEBUG(A)
#endif
KALLOC_TYPE_DEFINE(mount_zone, struct mount, KT_DEFAULT);
SMR_DEFINE(_vfs_smr, "VFS");
const smr_t vfs_smr = &_vfs_smr;
__private_extern__ void vntblinit(void);
extern const struct vnodeopv_desc *vfs_opv_descs[];
/* a list of lists of vnodeops defns */
extern struct vnodeop_desc *vfs_op_descs[];
/* and the operations they perform */
/*
* This code doesn't work if the defn is **vnodop_defns with cc.
* The problem is because of the compiler sometimes putting in an
* extra level of indirection for arrays. It's an interesting
* "feature" of C.
*/
int vfs_opv_numops;
typedef int (*PFIvp)(void *);
/*
* A miscellaneous routine.
* A generic "default" routine that just returns an error.
*/
int
vn_default_error(void)
{
return ENOTSUP;
}
/*
* vfs_init.c
*
* Allocate and fill in operations vectors.
*
* An undocumented feature of this approach to defining operations is that
* there can be multiple entries in vfs_opv_descs for the same operations
* vector. This allows third parties to extend the set of operations
* supported by another layer in a binary compatibile way. For example,
* assume that NFS needed to be modified to support Ficus. NFS has an entry
* (probably nfs_vnopdeop_decls) declaring all the operations NFS supports by
* default. Ficus could add another entry (ficus_nfs_vnodeop_decl_entensions)
* listing those new operations Ficus adds to NFS, all without modifying the
* NFS code. (Of couse, the OTW NFS protocol still needs to be munged, but
* that is a(whole)nother story.) This is a feature.
*/
void
vfs_opv_init(void)
{
int i, j, k;
int(***opv_desc_vector_p)(void *);
int(**opv_desc_vector)(void *);
const struct vnodeopv_entry_desc *opve_descp;
/*
* Allocate the dynamic vectors and fill them in.
*/
for (i = 0; vfs_opv_descs[i]; i++) {
opv_desc_vector_p = vfs_opv_descs[i]->opv_desc_vector_p;
/*
* Allocate and init the vector, if it needs it.
* Also handle backwards compatibility.
*/
if (*opv_desc_vector_p == NULL) {
*opv_desc_vector_p = zalloc_permanent(vfs_opv_numops * sizeof(PFIvp),
ZALIGN(PFIvp));
DODEBUG(printf("vector at %x allocated\n",
opv_desc_vector_p));
}
opv_desc_vector = *opv_desc_vector_p;
for (j = 0; vfs_opv_descs[i]->opv_desc_ops[j].opve_op; j++) {
opve_descp = &(vfs_opv_descs[i]->opv_desc_ops[j]);
/* Silently skip known-disabled operations */
if (opve_descp->opve_op->vdesc_flags & VDESC_DISABLED) {
printf("vfs_fsadd: Ignoring reference in %p to disabled operation %s.\n",
vfs_opv_descs[i], opve_descp->opve_op->vdesc_name);
continue;
}
/*
* Sanity check: is this operation listed
* in the list of operations? We check this
* by seeing if its offest is zero. Since
* the default routine should always be listed
* first, it should be the only one with a zero
* offset. Any other operation with a zero
* offset is probably not listed in
* vfs_op_descs, and so is probably an error.
*
* A panic here means the layer programmer
* has committed the all-too common bug
* of adding a new operation to the layer's
* list of vnode operations but
* not adding the operation to the system-wide
* list of supported operations.
*/
if (opve_descp->opve_op->vdesc_offset == 0 &&
opve_descp->opve_op !=
VDESC(vnop_default)) {
printf("operation %s not listed in %s.\n",
opve_descp->opve_op->vdesc_name,
"vfs_op_descs");
panic("vfs_opv_init: bad operation");
}
/*
* Fill in this entry.
*/
opv_desc_vector[opve_descp->opve_op->vdesc_offset] =
opve_descp->opve_impl;
}
}
/*
* Finally, go back and replace unfilled routines
* with their default. (Sigh, an O(n^3) algorithm. I
* could make it better, but that'd be work, and n is small.)
*/
for (i = 0; vfs_opv_descs[i]; i++) {
opv_desc_vector = *(vfs_opv_descs[i]->opv_desc_vector_p);
/*
* Force every operations vector to have a default routine.
*/
if (opv_desc_vector[VOFFSET(vnop_default)] == NULL) {
panic("vfs_opv_init: operation vector without default routine.");
}
for (k = 0; k < vfs_opv_numops; k++) {
if (opv_desc_vector[k] == NULL) {
opv_desc_vector[k] =
opv_desc_vector[VOFFSET(vnop_default)];
}
}
}
}
/*
* Initialize known vnode operations vectors.
*/
void
vfs_op_init(void)
{
int i;
DODEBUG(printf("Vnode_interface_init.\n"));
/*
* Set all vnode vectors to a well known value.
*/
for (i = 0; vfs_opv_descs[i]; i++) {
*(vfs_opv_descs[i]->opv_desc_vector_p) = NULL;
}
/*
* Figure out how many ops there are by counting the table,
* and assign each its offset.
*/
for (vfs_opv_numops = 0, i = 0; vfs_op_descs[i]; i++) {
/* Silently skip known-disabled operations */
if (vfs_op_descs[i]->vdesc_flags & VDESC_DISABLED) {
continue;
}
vfs_op_descs[i]->vdesc_offset = vfs_opv_numops;
vfs_opv_numops++;
}
DODEBUG(printf("vfs_opv_numops=%d\n", vfs_opv_numops));
}
/*
* Routines having to do with the management of the vnode table.
*/
extern struct vnodeops dead_vnodeops;
extern struct vnodeops spec_vnodeops;
/* vars for vnode list lock */
static LCK_GRP_DECLARE(vnode_list_lck_grp, "vnode list");
static LCK_ATTR_DECLARE(vnode_list_lck_attr, 0, 0);
static LCK_SPIN_DECLARE_ATTR(vnode_list_spin_lock,
&vnode_list_lck_grp, &vnode_list_lck_attr);
static LCK_MTX_DECLARE_ATTR(spechash_mtx_lock,
&vnode_list_lck_grp, &vnode_list_lck_attr);
LCK_MTX_DECLARE_ATTR(pkg_extensions_lck,
&vnode_list_lck_grp, &vnode_list_lck_attr);
/* vars for mount lock */
static LCK_GRP_DECLARE(mnt_lck_grp, "mount");
static LCK_ATTR_DECLARE(mnt_lck_attr, 0, 0);
/* vars for mount list lock */
static LCK_GRP_DECLARE(mnt_list_lck_grp, "mount list");
LCK_MTX_DECLARE(mnt_list_mtx_lock, &mnt_list_lck_grp);
/*
* We want dead_mountp to be a constant pointer, but vfsinit() runs
* pretty late, so we'll allocate the dead_mount statically and
* statically-initialized dead_mountp.
*/
static struct mount dead_mount_store;
struct mount * const dead_mountp = &dead_mount_store;
/*
* Initialize the vnode structures and initialize each file system type.
*/
void
vfsinit(void)
{
struct vfstable *vfsp;
int i, maxtypenum;
struct mount * mp;
/*
* Initialize the vnode table
*/
vntblinit();
/*
* Initialize the filesystem event mechanism.
*/
vfs_event_init();
/*
* Initialize the vnode name cache
*/
nchinit();
/*
* Build vnode operation vectors.
*/
vfs_op_init();
vfs_opv_init(); /* finish the job */
/*
* Initialize each file system type in the static list,
* until the first NULL ->vfs_vfsops is encountered.
*/
maxtypenum = VT_NON;
for (vfsp = vfsconf, i = 0; i < maxvfsslots; i++, vfsp++) {
struct vfsconf vfsc;
if (vfsp->vfc_vfsops == (struct vfsops *)0) {
break;
}
if (i) {
vfsconf[i - 1].vfc_next = vfsp;
}
if (maxtypenum <= vfsp->vfc_typenum) {
maxtypenum = vfsp->vfc_typenum + 1;
}
bzero(&vfsc, sizeof(struct vfsconf));
vfsc.vfc_reserved1 = 0;
bcopy(vfsp->vfc_name, vfsc.vfc_name, sizeof(vfsc.vfc_name));
vfsc.vfc_typenum = vfsp->vfc_typenum;
vfsc.vfc_refcount = vfsp->vfc_refcount;
vfsc.vfc_flags = vfsp->vfc_flags;
vfsc.vfc_reserved2 = 0;
vfsc.vfc_reserved3 = 0;
if (vfsp->vfc_vfsops->vfs_sysctl) {
struct sysctl_oid *oidp = NULL;
struct sysctl_oid oid = SYSCTL_STRUCT_INIT(_vfs, vfsp->vfc_typenum, , CTLTYPE_NODE | CTLFLAG_KERN | CTLFLAG_RW | CTLFLAG_LOCKED, NULL, 0, vfs_sysctl_node, "-", "");
oidp = kalloc_type(struct sysctl_oid, Z_WAITOK);
*oidp = oid;
/* Memory for VFS oid held by vfsentry forever */
vfsp->vfc_sysctl = oidp;
oidp->oid_name = vfsp->vfc_name;
sysctl_register_oid(vfsp->vfc_sysctl);
}
(*vfsp->vfc_vfsops->vfs_init)(&vfsc);
numused_vfsslots++;
numregistered_fses++;
}
/* next vfc_typenum to be used */
maxvfstypenum = maxtypenum;
/*
* Initialize the vnop authorization scope.
*/
vnode_authorize_init();
/*
* create a mount point for dead vnodes
*/
mp = &dead_mount_store;
/* Initialize the default IO constraints */
mp->mnt_maxreadcnt = mp->mnt_maxwritecnt = MAXPHYS;
mp->mnt_segreadcnt = mp->mnt_segwritecnt = 32;
mp->mnt_maxsegreadsize = mp->mnt_maxreadcnt;
mp->mnt_maxsegwritesize = mp->mnt_maxwritecnt;
mp->mnt_devblocksize = DEV_BSIZE;
mp->mnt_alignmentmask = PAGE_MASK;
mp->mnt_ioqueue_depth = MNT_DEFAULT_IOQUEUE_DEPTH;
mp->mnt_ioscale = 1;
mp->mnt_ioflags = 0;
mp->mnt_realrootvp = NULLVP;
mp->mnt_authcache_ttl = CACHED_LOOKUP_RIGHT_TTL;
TAILQ_INIT(&mp->mnt_vnodelist);
TAILQ_INIT(&mp->mnt_workerqueue);
TAILQ_INIT(&mp->mnt_newvnodes);
mp->mnt_flag = MNT_LOCAL;
mp->mnt_lflag = MNT_LDEAD;
mount_lock_init(mp);
#if CONFIG_MACF
mac_mount_label_init(mp);
mac_mount_label_associate(vfs_context_kernel(), mp);
#endif
/*
* dead_mountp is a statically-initialized constant pointer
* to dead_mount_store.
*/
#if FS_COMPRESSION
decmpfs_init();
#endif
nspace_resolver_init();
#if CONFIG_EXCLAVES
vfs_exclave_fs_start();
#endif
}
void
vnode_list_lock(void)
{
lck_spin_lock_grp(&vnode_list_spin_lock, &vnode_list_lck_grp);
}
void
vnode_list_unlock(void)
{
lck_spin_unlock(&vnode_list_spin_lock);
}
void
mount_list_lock(void)
{
lck_mtx_lock(&mnt_list_mtx_lock);
}
void
mount_list_unlock(void)
{
lck_mtx_unlock(&mnt_list_mtx_lock);
}
void
mount_lock_init(mount_t mp)
{
lck_mtx_init(&mp->mnt_mlock, &mnt_lck_grp, &mnt_lck_attr);
lck_mtx_init(&mp->mnt_iter_lock, &mnt_lck_grp, &mnt_lck_attr);
lck_mtx_init(&mp->mnt_renamelock, &mnt_lck_grp, &mnt_lck_attr);
lck_rw_init(&mp->mnt_rwlock, &mnt_lck_grp, &mnt_lck_attr);
}
void
mount_lock_destroy(mount_t mp)
{
lck_mtx_destroy(&mp->mnt_mlock, &mnt_lck_grp);
lck_mtx_destroy(&mp->mnt_iter_lock, &mnt_lck_grp);
lck_mtx_destroy(&mp->mnt_renamelock, &mnt_lck_grp);
lck_rw_destroy(&mp->mnt_rwlock, &mnt_lck_grp);
}
/*
* Name: vfstable_add
*
* Description: Add a filesystem to the vfsconf list at the first
* unused slot. If no slots are available, return an
* error.
*
* Parameter: nvfsp vfsconf for VFS to add
*
* Returns: 0 Success
* -1 Failure
*
* Notes: The vfsconf should be treated as a linked list by
* all external references, as the implementation is
* expected to change in the future. The linkage is
* through ->vfc_next, and the list is NULL terminated.
*
* Warning: This code assumes that vfsconf[0] is non-empty.
*/
struct vfstable *
vfstable_add(struct vfstable *nvfsp)
{
int slot;
struct vfstable *slotp, *allocated = NULL;
struct sysctl_oid *oidp = NULL;
if (nvfsp->vfc_vfsops->vfs_sysctl) {
struct sysctl_oid oid = SYSCTL_STRUCT_INIT(_vfs, nvfsp->vfc_typenum, , CTLTYPE_NODE | CTLFLAG_KERN | CTLFLAG_RW | CTLFLAG_LOCKED, NULL, 0, vfs_sysctl_node, "-", "");
oidp = kalloc_type(struct sysctl_oid, Z_WAITOK);
*oidp = oid;
}
/*
* Find the next empty slot; we recognize an empty slot by a
* NULL-valued ->vfc_vfsops, so if we delete a VFS, we must
* ensure we set the entry back to NULL.
*/
findslot:
mount_list_lock();
for (slot = 0; slot < maxvfsslots; slot++) {
if (vfsconf[slot].vfc_vfsops == NULL) {
break;
}
}
if (slot == maxvfsslots) {
if (allocated == NULL) {
mount_list_unlock();
/* out of static slots; allocate one instead */
allocated = kalloc_type(struct vfstable, Z_WAITOK);
goto findslot;
} else {
slotp = allocated;
}
} else {
slotp = &vfsconf[slot];
}
/*
* Replace the contents of the next empty slot with the contents
* of the provided nvfsp.
*
* Note; Takes advantage of the fact that 'slot' was left
* with the value of 'maxvfslots' in the allocation case.
*/
bcopy(nvfsp, slotp, sizeof(struct vfstable));
if (slot != 0) {
slotp->vfc_next = vfsconf[slot - 1].vfc_next;
vfsconf[slot - 1].vfc_next = slotp;
} else {
slotp->vfc_next = NULL;
}
if (slotp != allocated) {
/* used a statically allocated slot */
numused_vfsslots++;
}
numregistered_fses++;
if (oidp) {
/* Memory freed in vfstable_del after unregistration */
slotp->vfc_sysctl = oidp;
oidp->oid_name = slotp->vfc_name;
sysctl_register_oid(slotp->vfc_sysctl);
}
mount_list_unlock();
if (allocated && allocated != slotp) {
/* did allocation, but ended up using static slot */
kfree_type(struct vfstable, allocated);
}
return slotp;
}
/*
* Name: vfstable_del
*
* Description: Remove a filesystem from the vfsconf list by name.
* If no such filesystem exists, return an error.
*
* Parameter: fs_name name of VFS to remove
*
* Returns: 0 Success
* -1 Failure
*
* Notes: Hopefully all filesystems have unique names.
*/
int
vfstable_del(struct vfstable * vtbl)
{
struct vfstable **vcpp;
struct vfstable *vcdelp;
#if DEBUG
lck_mtx_assert(&mnt_list_mtx_lock, LCK_MTX_ASSERT_OWNED);
#endif /* DEBUG */
/*
* Traverse the list looking for vtbl; if found, *vcpp
* will contain the address of the pointer to the entry to
* be removed.
*/
for (vcpp = &vfsconf; *vcpp; vcpp = &(*vcpp)->vfc_next) {
if (*vcpp == vtbl) {
break;
}
}
if (*vcpp == NULL) {
return ESRCH; /* vtbl not on vfsconf list */
}
if ((*vcpp)->vfc_sysctl) {
sysctl_unregister_oid((*vcpp)->vfc_sysctl);
(*vcpp)->vfc_sysctl->oid_name = NULL;
kfree_type(struct sysctl_oid, (*vcpp)->vfc_sysctl);
}
/* Unlink entry */
vcdelp = *vcpp;
*vcpp = (*vcpp)->vfc_next;
/*
* Is this an entry from our static table? We find out by
* seeing if the pointer to the object to be deleted places
* the object in the address space containing the table (or not).
*/
if (vcdelp >= vfsconf && vcdelp < (vfsconf + maxvfsslots)) { /* Y */
/* Mark as empty for vfscon_add() */
bzero(vcdelp, sizeof(struct vfstable));
numregistered_fses--;
numused_vfsslots--;
} else { /* N */
/*
* This entry was dynamically allocated; we must free it;
* we would prefer to have just linked the caller's
* vfsconf onto our list, but it may not be persistent
* because of the previous (copying) implementation.
*/
numregistered_fses--;
mount_list_unlock();
kfree_type(struct vfstable, vcdelp);
mount_list_lock();
}
#if DEBUG
lck_mtx_assert(&mnt_list_mtx_lock, LCK_MTX_ASSERT_OWNED);
#endif /* DEBUG */
return 0;
}
lck_mtx_t *
SPECHASH_LOCK_ADDR(void)
{
return &spechash_mtx_lock;
}
void
SPECHASH_LOCK(void)
{
lck_mtx_lock(&spechash_mtx_lock);
}
void
SPECHASH_UNLOCK(void)
{
lck_mtx_unlock(&spechash_mtx_lock);
}