/* * Copyright (c) 2000-2014 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * * This file contains Original Code and/or Modifications of Original Code * as defined in and that are subject to the Apple Public Source License * Version 2.0 (the 'License'). You may not use this file except in * compliance with the License. The rights granted to you under the License * may not be used to create, or enable the creation or redistribution of, * unlawful or unlicensed copies of an Apple operating system, or to * circumvent, violate, or enable the circumvention or violation of, any * terms of an Apple operating system software license agreement. * * Please obtain a copy of the License at * http://www.opensource.apple.com/apsl/ and read it before using this file. * * The Original Code and all software distributed under the License are * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. * Please see the License for the specific language governing rights and * limitations under the License. * * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ */ /* * Copyright (c) 1992,7 NeXT Computer, Inc. * * Unix data structure initialization. * */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include extern uint32_t kern_maxvnodes; #if CONFIG_MBUF_MCACHE extern vm_map_t mb_map; #endif /* CONFIG_MBUF_MCACHE */ #if INET extern uint32_t tcp_sendspace; extern uint32_t tcp_recvspace; #endif void bsd_bufferinit(void); unsigned int bsd_mbuf_cluster_reserve(boolean_t *); void bsd_scale_setup(int); void bsd_exec_setup(int); /* * Declare these as initialized data so we can patch them. */ #ifdef NBUF int max_nbuf_headers = NBUF; int niobuf_headers = (NBUF / 2) + 2048; int nbuf_hashelements = NBUF; int nbuf_headers = NBUF; #else int max_nbuf_headers = 0; int niobuf_headers = 0; int nbuf_hashelements = 0; int nbuf_headers = 0; #endif SYSCTL_INT(_kern, OID_AUTO, nbuf, CTLFLAG_RD | CTLFLAG_LOCKED, &nbuf_headers, 0, ""); SYSCTL_INT(_kern, OID_AUTO, maxnbuf, CTLFLAG_RW | CTLFLAG_LOCKED | CTLFLAG_KERN, &max_nbuf_headers, 0, ""); __private_extern__ int customnbuf = 0; /* Indicates a server boot when set */ TUNABLE(int, serverperfmode, "serverperfmode", 0); #if SOCKETS static unsigned int mbuf_poolsz; #endif vm_map_t buffer_map; vm_map_t bufferhdr_map; static int vnodes_sized = 0; extern void bsd_startupearly(void); static vm_map_size_t bufferhdr_map_size; SECURITY_READ_ONLY_LATE(struct mach_vm_range) bufferhdr_range = {}; static vm_map_size_t bsd_get_bufferhdr_map_size(void) { vm_size_t size; /* clip the number of buf headers upto 16k */ if (max_nbuf_headers == 0) { max_nbuf_headers = (int)atop_kernel(sane_size / 50); /* Get 2% of ram, but no more than we can map */ } if ((customnbuf == 0) && ((unsigned int)max_nbuf_headers > 16384)) { max_nbuf_headers = 16384; } if (max_nbuf_headers < CONFIG_MIN_NBUF) { max_nbuf_headers = CONFIG_MIN_NBUF; } if (niobuf_headers == 0) { if (max_nbuf_headers < 4096) { niobuf_headers = max_nbuf_headers; } else { niobuf_headers = (max_nbuf_headers / 2) + 2048; } } if (niobuf_headers < CONFIG_MIN_NIOBUF) { niobuf_headers = CONFIG_MIN_NIOBUF; } size = (max_nbuf_headers + niobuf_headers) * sizeof(struct buf); size = round_page(size); return size; } KMEM_RANGE_REGISTER_DYNAMIC(bufferhdr, &bufferhdr_range, ^() { return bufferhdr_map_size = bsd_get_bufferhdr_map_size(); }); void bsd_startupearly(void) { vm_size_t size = bufferhdr_map_size; assert(size); /* clip the number of hash elements to 200000 */ if ((customnbuf == 0) && nbuf_hashelements == 0) { nbuf_hashelements = (int)atop_kernel(sane_size / 50); if ((unsigned int)nbuf_hashelements > 200000) { nbuf_hashelements = 200000; } } else { nbuf_hashelements = max_nbuf_headers; } bufferhdr_map = kmem_suballoc(kernel_map, &bufferhdr_range.min_address, size, VM_MAP_CREATE_NEVER_FAULTS, VM_FLAGS_FIXED | VM_FLAGS_OVERWRITE, KMS_PERMANENT | KMS_NOFAIL, VM_KERN_MEMORY_FILE).kmr_submap; kmem_alloc(bufferhdr_map, &(vm_offset_t){ bufferhdr_range.min_address }, size, KMA_NOFAIL | KMA_PERMANENT | KMA_ZERO | KMA_KOBJECT, VM_KERN_MEMORY_FILE); buf_headers = (struct buf *)bufferhdr_range.min_address; #if SOCKETS { static const unsigned int maxspace = 128 * 1024; int scale; #if INET if ((scale = nmbclusters / NMBCLUSTERS) > 1) { tcp_sendspace *= scale; tcp_recvspace *= scale; if (tcp_sendspace > maxspace) { tcp_sendspace = maxspace; } if (tcp_recvspace > maxspace) { tcp_recvspace = maxspace; } } #endif /* INET */ } #endif /* SOCKETS */ if (vnodes_sized == 0) { if (!PE_get_default("kern.maxvnodes", &desiredvnodes, sizeof(desiredvnodes))) { /* * Size vnodes based on memory * Number vnodes is (memsize/64k) + 1024 * This is the calculation that is used by launchd in tiger * we are clipping the max based on 16G * ie ((16*1024*1024*1024)/(64 *1024)) + 1024 = 263168; * CONFIG_VNODES is set to 263168 for "medium" configurations (the default) * but can be smaller or larger. */ desiredvnodes = (int)(sane_size / 65536) + 1024; #ifdef CONFIG_VNODES if (desiredvnodes > CONFIG_VNODES) { desiredvnodes = CONFIG_VNODES; } #endif } vnodes_sized = 1; } } #if SOCKETS SECURITY_READ_ONLY_LATE(struct mach_vm_range) mb_range = {}; KMEM_RANGE_REGISTER_DYNAMIC(mb, &mb_range, ^() { nmbclusters = bsd_mbuf_cluster_reserve(NULL) / MCLBYTES; return (vm_map_size_t)(nmbclusters * MCLBYTES); }); #endif /* SOCKETS */ void bsd_bufferinit(void) { /* * Note: Console device initialized in kminit() from bsd_autoconf() * prior to call to us in bsd_init(). */ bsd_startupearly(); #if CONFIG_MBUF_MCACHE mb_map = kmem_suballoc(kernel_map, &mb_range.min_address, (vm_size_t) (nmbclusters * MCLBYTES), FALSE, VM_FLAGS_FIXED | VM_FLAGS_OVERWRITE, KMS_PERMANENT | KMS_NOFAIL, VM_KERN_MEMORY_MBUF).kmr_submap; mbutl = (unsigned char *)mb_range.min_address; #endif /* CONFIG_MBUF_MCACHE */ /* * Set up buffers, so they can be used to read disk labels. */ bufinit(); } /* 512 MB (K32) or 2 GB (K64) hard limit on size of the mbuf pool */ #if !defined(__LP64__) #define MAX_MBUF_POOL (512 << MBSHIFT) #else #define MAX_MBUF_POOL (2ULL << GBSHIFT) #endif /* !__LP64__ */ #define MAX_NCL (MAX_MBUF_POOL >> MCLSHIFT) #if SOCKETS /* * this has been broken out into a separate routine that * can be called from the x86 early vm initialization to * determine how much lo memory to reserve on systems with * DMA hardware that can't fully address all of the physical * memory that is present. */ unsigned int bsd_mbuf_cluster_reserve(boolean_t *overridden) { int mbuf_pool = 0, ncl = 0; static boolean_t was_overridden = FALSE; /* If called more than once, return the previously calculated size */ if (mbuf_poolsz != 0) { goto done; } /* * Some of these are parsed in parse_bsd_args(), but for x86 we get * here early from i386_vm_init() and so we parse them now, in order * to correctly compute the size of the low-memory VM pool. It is * redundant but rather harmless. */ (void) PE_parse_boot_argn("ncl", &ncl, sizeof(ncl)); (void) PE_parse_boot_argn("mbuf_pool", &mbuf_pool, sizeof(mbuf_pool)); /* * Convert "mbuf_pool" from MB to # of 2KB clusters; it is * equivalent to "ncl", except that it uses different unit. */ if (mbuf_pool != 0) { ncl = (mbuf_pool << MBSHIFT) >> MCLSHIFT; } if (sane_size > (64 * 1024 * 1024) || ncl != 0) { if (ncl || serverperfmode) { was_overridden = TRUE; } if ((nmbclusters = ncl) == 0) { /* Auto-configure the mbuf pool size */ nmbclusters = mbuf_default_ncl(mem_actual); } else { /* Make sure it's not odd in case ncl is manually set */ if (nmbclusters & 0x1) { --nmbclusters; } /* And obey the upper limit */ if (nmbclusters > MAX_NCL) { nmbclusters = MAX_NCL; } } /* Round it down to nearest multiple of PAGE_SIZE */ nmbclusters = (unsigned int)P2ROUNDDOWN(nmbclusters, NCLPG); } mbuf_poolsz = nmbclusters << MCLSHIFT; done: if (overridden) { *overridden = was_overridden; } return mbuf_poolsz; } #endif #if defined(__LP64__) extern int tcp_tcbhashsize; extern int max_cached_sock_count; #endif #define SERVER_PERF_MODE_VALIDATION_DISABLES 0x5dee extern unsigned int kern_feature_overrides; void bsd_scale_setup(int scale) { #if defined(__LP64__) if ((scale > 0) && (serverperfmode == 0)) { maxproc *= scale; maxprocperuid = (maxproc * 2) / 3; if (scale > 2) { maxfiles *= scale; maxfilesperproc = maxfiles / 2; } } /* Apply server scaling rules */ if ((scale > 0) && (serverperfmode != 0)) { maxproc = 2500 * scale; hard_maxproc = maxproc; /* no fp usage */ maxprocperuid = (maxproc * 3) / 4; maxfiles = (150000 * scale); maxfilesperproc = maxfiles / 2; desiredvnodes = maxfiles; vnodes_sized = 1; tcp_tfo_backlog = 100 * scale; if (scale > 4) { /* clip somaxconn at 32G level */ somaxconn = 2048; /* * For scale > 4 (> 32G), clip * tcp_tcbhashsize to 32K */ tcp_tcbhashsize = 32 * 1024; if (scale > 7) { /* clip at 64G level */ max_cached_sock_count = 165000; } else { max_cached_sock_count = 60000 + ((scale - 1) * 15000); } } else { somaxconn = 512 * scale; tcp_tcbhashsize = 4 * 1024 * scale; max_cached_sock_count = 60000 + ((scale - 1) * 15000); } } if (maxproc > hard_maxproc) { hard_maxproc = maxproc; } #endif if (serverperfmode) { /* If running in serverperfmode disable some internal only diagnostics. */ kern_feature_overrides |= SERVER_PERF_MODE_VALIDATION_DISABLES; } bsd_exec_setup(scale); }