gems-kernel/source/THIRDPARTY/xnu/bsd/miscfs/bindfs/bind_vfsops.c
2024-06-03 11:29:39 -05:00

555 lines
16 KiB
C

/*
* Copyright (c) 2019 Apple Inc. All rights reserved.
*
* @APPLE_LICENSE_HEADER_START@
*
* This file contains Original Code and/or Modifications of Original Code
* as defined in and that are subject to the Apple Public Source License
* Version 2.0 (the 'License'). You may not use this file except in
* compliance with the License. Please obtain a copy of the License at
* http://www.opensource.apple.com/apsl/ and read it before using this
* file.
*
* The Original Code and all software distributed under the License are
* distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
* EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
* INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
* Please see the License for the specific language governing rights and
* limitations under the License.
*
* @APPLE_LICENSE_HEADER_END@
*/
/*-
* Portions Copyright (c) 1992, 1993, 1995
* The Regents of the University of California. All rights reserved.
*
* This code is derived from software donated to Berkeley by
* Jan-Simon Pendry.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 4. Neither the name of the University nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* @(#)null_vfsops.c 8.2 (Berkeley) 1/21/94
*
* @(#)lofs_vfsops.c 1.2 (Berkeley) 6/18/92
* $FreeBSD$
*/
#include <sys/param.h>
#include <sys/systm.h>
#include <sys/fcntl.h>
#include <sys/kernel.h>
#include <sys/lock.h>
#include <sys/malloc.h>
#include <sys/mount.h>
#include <sys/mount_internal.h>
#include <sys/namei.h>
#include <sys/proc.h>
#include <sys/vnode.h>
#include <sys/vnode_internal.h>
#include <security/mac_internal.h>
#include <sys/param.h>
#include <IOKit/IOBSD.h>
#include "bindfs.h"
#define BINDFS_ENTITLEMENT "com.apple.private.bindfs-allow"
#define SIZEOF_MEMBER(type, member) (sizeof(((type *)0)->member))
#define MAX_MNT_FROM_LENGTH (SIZEOF_MEMBER(struct vfsstatfs, f_mntfromname))
static int
bindfs_vfs_getlowerattr(mount_t mp, struct vfs_attr * vfap, vfs_context_t ctx)
{
memset(vfap, 0, sizeof(*vfap));
VFSATTR_INIT(vfap);
VFSATTR_WANTED(vfap, f_bsize);
VFSATTR_WANTED(vfap, f_iosize);
VFSATTR_WANTED(vfap, f_blocks);
VFSATTR_WANTED(vfap, f_bfree);
VFSATTR_WANTED(vfap, f_bavail);
VFSATTR_WANTED(vfap, f_bused);
VFSATTR_WANTED(vfap, f_files);
VFSATTR_WANTED(vfap, f_ffree);
VFSATTR_WANTED(vfap, f_capabilities);
return vfs_getattr(mp, vfap, ctx);
}
/*
* Mount bind layer
*/
static int
bindfs_mount(struct mount * mp, __unused vnode_t devvp, user_addr_t user_data, vfs_context_t ctx)
{
int error = 0;
struct vnode *lowerrootvp = NULL, *vp = NULL;
struct vfsstatfs * sp = NULL;
struct bind_mount * xmp = NULL;
char data[MAXPATHLEN];
size_t count;
struct vfs_attr vfa;
/* set defaults (arbitrary since this file system is readonly) */
uint32_t bsize = BLKDEV_IOSIZE;
size_t iosize = BLKDEV_IOSIZE;
uint64_t blocks = 4711 * 4711;
uint64_t bfree = 0;
uint64_t bavail = 0;
uint64_t bused = 4711;
uint64_t files = 4711;
uint64_t ffree = 0;
kauth_cred_t cred = vfs_context_ucred(ctx);
BINDFSDEBUG("mp = %p %llx\n", (void *)mp, vfs_flags(mp));
if (vfs_flags(mp) & MNT_ROOTFS) {
return EOPNOTSUPP;
}
/*
* Update is a no-op
*/
if (vfs_isupdate(mp)) {
return ENOTSUP;
}
/* check entitlement */
if (!IOCurrentTaskHasEntitlement(BINDFS_ENTITLEMENT)) {
return EPERM;
}
/*
* Get argument
*/
error = copyinstr(user_data, data, MAXPATHLEN - 1, &count);
if (error) {
BINDFSERROR("error copying data from user %d\n", error);
goto error;
}
/* This could happen if the system is configured for 32 bit inodes instead of
* 64 bit */
if (count > MAX_MNT_FROM_LENGTH) {
error = EINVAL;
BINDFSERROR("path to mount too large for this system %zu vs %lu\n", count, MAX_MNT_FROM_LENGTH);
goto error;
}
error = vnode_lookup(data, 0, &lowerrootvp, ctx);
if (error) {
BINDFSERROR("lookup of %s failed error: %d\n", data, error);
goto error;
}
/* lowervrootvp has an iocount after vnode_lookup, drop that for a usecount.
* Keep this to signal what we want to keep around the thing we are mirroring.
* Drop it in unmount.*/
error = vnode_ref(lowerrootvp);
vnode_put(lowerrootvp);
if (error) {
// If vnode_ref failed, then bind it out so it can't be used anymore in cleanup.
lowerrootvp = NULL;
goto error;
}
BINDFSDEBUG("mount %s\n", data);
xmp = kalloc_type(struct bind_mount, Z_WAITOK | Z_ZERO | Z_NOFAIL);
/*
* Save reference to underlying FS
*/
xmp->bindm_lowerrootvp = lowerrootvp;
xmp->bindm_lowerrootvid = vnode_vid(lowerrootvp);
error = bind_nodeget(mp, lowerrootvp, NULL, &vp, NULL, 1);
if (error) {
goto error;
}
/* After bind_nodeget our root vnode is in the hash table and we have to usecounts on lowerrootvp
* One use count will get dropped when we reclaim the root during unmount.
* The other will get dropped in unmount */
/* vp has an iocount on it from vnode_create. drop that for a usecount. This
* is our root vnode so we drop the ref in unmount
*
* Assuming for now that because we created this vnode and we aren't finished mounting we can get a ref*/
vnode_ref(vp);
vnode_put(vp);
xmp->bindm_rootvp = vp;
/* read the flags the user set, but then ignore some of them, we will only
* allow them if they are set on the lower file system */
uint64_t flags = vfs_flags(mp) & (~(MNT_IGNORE_OWNERSHIP | MNT_LOCAL));
uint64_t lowerflags = vfs_flags(vnode_mount(lowerrootvp)) & (MNT_LOCAL | MNT_QUARANTINE | MNT_IGNORE_OWNERSHIP | MNT_NOEXEC);
if (lowerflags) {
flags |= lowerflags;
}
/* force these flags */
flags |= (MNT_DONTBROWSE | MNT_MULTILABEL | MNT_NOSUID | MNT_RDONLY);
vfs_setflags(mp, flags);
vfs_setfsprivate(mp, xmp);
vfs_getnewfsid(mp);
vfs_setlocklocal(mp);
/* fill in the stat block */
sp = vfs_statfs(mp);
strlcpy(sp->f_mntfromname, data, MAX_MNT_FROM_LENGTH);
sp->f_flags = flags;
xmp->bindm_flags = BINDM_CASEINSENSITIVE; /* default to case insensitive */
error = bindfs_vfs_getlowerattr(vnode_mount(lowerrootvp), &vfa, ctx);
if (error == 0) {
if (VFSATTR_IS_SUPPORTED(&vfa, f_bsize)) {
bsize = vfa.f_bsize;
}
if (VFSATTR_IS_SUPPORTED(&vfa, f_iosize)) {
iosize = vfa.f_iosize;
}
if (VFSATTR_IS_SUPPORTED(&vfa, f_blocks)) {
blocks = vfa.f_blocks;
}
if (VFSATTR_IS_SUPPORTED(&vfa, f_bfree)) {
bfree = vfa.f_bfree;
}
if (VFSATTR_IS_SUPPORTED(&vfa, f_bavail)) {
bavail = vfa.f_bavail;
}
if (VFSATTR_IS_SUPPORTED(&vfa, f_bused)) {
bused = vfa.f_bused;
}
if (VFSATTR_IS_SUPPORTED(&vfa, f_files)) {
files = vfa.f_files;
}
if (VFSATTR_IS_SUPPORTED(&vfa, f_ffree)) {
ffree = vfa.f_ffree;
}
if (VFSATTR_IS_SUPPORTED(&vfa, f_capabilities)) {
if ((vfa.f_capabilities.capabilities[VOL_CAPABILITIES_FORMAT] & (VOL_CAP_FMT_CASE_SENSITIVE)) &&
(vfa.f_capabilities.valid[VOL_CAPABILITIES_FORMAT] & (VOL_CAP_FMT_CASE_SENSITIVE))) {
xmp->bindm_flags &= ~BINDM_CASEINSENSITIVE;
}
}
} else {
goto error;
}
sp->f_bsize = bsize;
sp->f_iosize = iosize;
sp->f_blocks = blocks;
sp->f_bfree = bfree;
sp->f_bavail = bavail;
sp->f_bused = bused;
sp->f_files = files;
sp->f_ffree = ffree;
/* Associate the mac label information from the mirrored filesystem with the
* mirror */
MAC_PERFORM(mount_label_associate, cred, vnode_mount(lowerrootvp), vfs_mntlabel(mp));
BINDFSDEBUG("lower %s, alias at %s\n", sp->f_mntfromname, sp->f_mntonname);
return 0;
error:
if (xmp) {
kfree_type(struct bind_mount, xmp);
}
if (lowerrootvp) {
vnode_getwithref(lowerrootvp);
vnode_rele(lowerrootvp);
vnode_put(lowerrootvp);
}
if (vp) {
/* we made the root vnode but the mount is failed, so clean it up */
vnode_getwithref(vp);
vnode_rele(vp);
/* give vp back */
vnode_recycle(vp);
vnode_put(vp);
}
return error;
}
/*
* Free reference to bind layer
*/
static int
bindfs_unmount(struct mount * mp, int mntflags, __unused vfs_context_t ctx)
{
struct bind_mount * mntdata;
struct vnode * vp;
int error, flags;
BINDFSDEBUG("mp = %p\n", (void *)mp);
/* check entitlement or superuser*/
if (!IOCurrentTaskHasEntitlement(BINDFS_ENTITLEMENT) &&
vfs_context_suser(ctx) != 0) {
return EPERM;
}
if (mntflags & MNT_FORCE) {
flags = FORCECLOSE;
} else {
flags = 0;
}
mntdata = MOUNTTOBINDMOUNT(mp);
vp = mntdata->bindm_rootvp;
// release our reference on the root before flushing.
// it will get pulled out of the mount structure by reclaim
vnode_getalways(vp);
error = vflush(mp, vp, flags);
if (error) {
vnode_put(vp);
return error;
}
if (vnode_isinuse(vp, 1) && flags == 0) {
vnode_put(vp);
return EBUSY;
}
vnode_rele(vp); // Drop reference taken by bindfs_mount
vnode_put(vp); // Drop ref taken above
//Force close to get rid of the last vnode
(void)vflush(mp, NULL, FORCECLOSE);
/* no more vnodes, so tear down the mountpoint */
vfs_setfsprivate(mp, NULL);
vnode_getalways(mntdata->bindm_lowerrootvp);
vnode_rele(mntdata->bindm_lowerrootvp);
vnode_put(mntdata->bindm_lowerrootvp);
kfree_type(struct bind_mount, mntdata);
uint64_t vflags = vfs_flags(mp);
vfs_setflags(mp, vflags & ~MNT_LOCAL);
return 0;
}
static int
bindfs_root(struct mount * mp, struct vnode ** vpp, __unused vfs_context_t ctx)
{
struct vnode * vp;
int error;
BINDFSDEBUG("mp = %p, vp = %p\n", (void *)mp, (void *)MOUNTTOBINDMOUNT(mp)->bindm_rootvp);
/*
* Return locked reference to root.
*/
vp = MOUNTTOBINDMOUNT(mp)->bindm_rootvp;
error = vnode_get(vp);
if (error) {
return error;
}
*vpp = vp;
return 0;
}
static int
bindfs_vfs_getattr(struct mount * mp, struct vfs_attr * vfap, vfs_context_t ctx)
{
struct vnode * coveredvp = NULL;
struct vfs_attr vfa;
struct bind_mount * bind_mp = MOUNTTOBINDMOUNT(mp);
vol_capabilities_attr_t capabilities;
struct vfsstatfs * sp = vfs_statfs(mp);
struct timespec tzero = {.tv_sec = 0, .tv_nsec = 0};
BINDFSDEBUG("\n");
/* Set default capabilities in case the lower file system is gone */
memset(&capabilities, 0, sizeof(capabilities));
capabilities.capabilities[VOL_CAPABILITIES_FORMAT] = VOL_CAP_FMT_FAST_STATFS | VOL_CAP_FMT_HIDDEN_FILES;
capabilities.valid[VOL_CAPABILITIES_FORMAT] = VOL_CAP_FMT_FAST_STATFS | VOL_CAP_FMT_HIDDEN_FILES;
if (bindfs_vfs_getlowerattr(vnode_mount(bind_mp->bindm_lowerrootvp), &vfa, ctx) == 0) {
if (VFSATTR_IS_SUPPORTED(&vfa, f_capabilities)) {
memcpy(&capabilities, &vfa.f_capabilities, sizeof(capabilities));
/* don't support vget */
capabilities.capabilities[VOL_CAPABILITIES_FORMAT] &= ~(VOL_CAP_FMT_PERSISTENTOBJECTIDS | VOL_CAP_FMT_PATH_FROM_ID);
capabilities.capabilities[VOL_CAPABILITIES_FORMAT] |= VOL_CAP_FMT_HIDDEN_FILES; /* Always support UF_HIDDEN */
capabilities.valid[VOL_CAPABILITIES_FORMAT] &= ~(VOL_CAP_FMT_PERSISTENTOBJECTIDS | VOL_CAP_FMT_PATH_FROM_ID);
capabilities.valid[VOL_CAPABILITIES_FORMAT] |= VOL_CAP_FMT_HIDDEN_FILES; /* Always support UF_HIDDEN */
/* dont' support interfaces that only make sense on a writable file system
* or one with specific vnops implemented */
capabilities.capabilities[VOL_CAPABILITIES_INTERFACES] = 0;
capabilities.valid[VOL_CAPABILITIES_INTERFACES] &=
~(VOL_CAP_INT_SEARCHFS | VOL_CAP_INT_ATTRLIST | VOL_CAP_INT_READDIRATTR | VOL_CAP_INT_EXCHANGEDATA |
VOL_CAP_INT_COPYFILE | VOL_CAP_INT_ALLOCATE | VOL_CAP_INT_VOL_RENAME | VOL_CAP_INT_ADVLOCK | VOL_CAP_INT_FLOCK);
}
}
if (VFSATTR_IS_ACTIVE(vfap, f_create_time)) {
VFSATTR_RETURN(vfap, f_create_time, tzero);
}
if (VFSATTR_IS_ACTIVE(vfap, f_modify_time)) {
VFSATTR_RETURN(vfap, f_modify_time, tzero);
}
if (VFSATTR_IS_ACTIVE(vfap, f_access_time)) {
VFSATTR_RETURN(vfap, f_access_time, tzero);
}
if (VFSATTR_IS_ACTIVE(vfap, f_bsize)) {
VFSATTR_RETURN(vfap, f_bsize, sp->f_bsize);
}
if (VFSATTR_IS_ACTIVE(vfap, f_iosize)) {
VFSATTR_RETURN(vfap, f_iosize, sp->f_iosize);
}
if (VFSATTR_IS_ACTIVE(vfap, f_owner)) {
VFSATTR_RETURN(vfap, f_owner, 0);
}
if (VFSATTR_IS_ACTIVE(vfap, f_blocks)) {
VFSATTR_RETURN(vfap, f_blocks, sp->f_blocks);
}
if (VFSATTR_IS_ACTIVE(vfap, f_bfree)) {
VFSATTR_RETURN(vfap, f_bfree, sp->f_bfree);
}
if (VFSATTR_IS_ACTIVE(vfap, f_bavail)) {
VFSATTR_RETURN(vfap, f_bavail, sp->f_bavail);
}
if (VFSATTR_IS_ACTIVE(vfap, f_bused)) {
VFSATTR_RETURN(vfap, f_bused, sp->f_bused);
}
if (VFSATTR_IS_ACTIVE(vfap, f_files)) {
VFSATTR_RETURN(vfap, f_files, sp->f_files);
}
if (VFSATTR_IS_ACTIVE(vfap, f_ffree)) {
VFSATTR_RETURN(vfap, f_ffree, sp->f_ffree);
}
if (VFSATTR_IS_ACTIVE(vfap, f_fssubtype)) {
VFSATTR_RETURN(vfap, f_fssubtype, 0);
}
if (VFSATTR_IS_ACTIVE(vfap, f_capabilities)) {
memcpy(&vfap->f_capabilities, &capabilities, sizeof(vol_capabilities_attr_t));
VFSATTR_SET_SUPPORTED(vfap, f_capabilities);
}
if (VFSATTR_IS_ACTIVE(vfap, f_attributes)) {
vol_attributes_attr_t * volattr = &vfap->f_attributes;
volattr->validattr.commonattr = 0;
volattr->validattr.volattr = ATTR_VOL_NAME | ATTR_VOL_CAPABILITIES | ATTR_VOL_ATTRIBUTES;
volattr->validattr.dirattr = 0;
volattr->validattr.fileattr = 0;
volattr->validattr.forkattr = 0;
volattr->nativeattr.commonattr = 0;
volattr->nativeattr.volattr = ATTR_VOL_NAME | ATTR_VOL_CAPABILITIES | ATTR_VOL_ATTRIBUTES;
volattr->nativeattr.dirattr = 0;
volattr->nativeattr.fileattr = 0;
volattr->nativeattr.forkattr = 0;
VFSATTR_SET_SUPPORTED(vfap, f_attributes);
}
if (VFSATTR_IS_ACTIVE(vfap, f_vol_name)) {
/* The name of the volume is the same as the directory we mounted on */
coveredvp = vfs_vnodecovered(mp);
if (coveredvp) {
const char * name = vnode_getname_printable(coveredvp);
strlcpy(vfap->f_vol_name, name, MAXPATHLEN);
vnode_putname_printable(name);
VFSATTR_SET_SUPPORTED(vfap, f_vol_name);
vnode_put(coveredvp);
}
}
return 0;
}
static int
bindfs_sync(__unused struct mount * mp, __unused int waitfor, __unused vfs_context_t ctx)
{
return 0;
}
static int
bindfs_vfs_start(__unused struct mount * mp, __unused int flags, __unused vfs_context_t ctx)
{
BINDFSDEBUG("\n");
return 0;
}
extern const struct vnodeopv_desc bindfs_vnodeop_opv_desc;
const struct vnodeopv_desc * bindfs_vnodeopv_descs[] = {
&bindfs_vnodeop_opv_desc,
};
struct vfsops bindfs_vfsops = {
.vfs_mount = bindfs_mount,
.vfs_unmount = bindfs_unmount,
.vfs_start = bindfs_vfs_start,
.vfs_root = bindfs_root,
.vfs_getattr = bindfs_vfs_getattr,
.vfs_sync = bindfs_sync,
.vfs_init = bindfs_init,
.vfs_sysctl = NULL,
.vfs_setattr = NULL,
};