/* * Copyright (c) 2006-2020 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * * This file contains Original Code and/or Modifications of Original Code * as defined in and that are subject to the Apple Public Source License * Version 2.0 (the 'License'). You may not use this file except in * compliance with the License. The rights granted to you under the License * may not be used to create, or enable the creation or redistribution of, * unlawful or unlicensed copies of an Apple operating system, or to * circumvent, violate, or enable the circumvention or violation of, any * terms of an Apple operating system software license agreement. * * Please obtain a copy of the License at * http://www.opensource.apple.com/apsl/ and read it before using this file. * * The Original Code and all software distributed under the License are * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. * Please see the License for the specific language governing rights and * limitations under the License. * * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ */ /* * Memory allocator with per-CPU caching, derived from the kmem magazine * concept and implementation as described in the following paper: * http://www.usenix.org/events/usenix01/full_papers/bonwick/bonwick.pdf * That implementation is Copyright 2006 Sun Microsystems, Inc. All rights * reserved. Use is subject to license terms. * * There are several major differences between this and the original kmem * magazine: this derivative implementation allows for multiple objects to * be allocated and freed from/to the object cache in one call; in addition, * it provides for better flexibility where the user is allowed to define * its own slab allocator (instead of the default zone allocator). Finally, * no object construction/destruction takes place at the moment, although * this could be added in future to improve efficiency. */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #define MCACHE_SIZE(n) \ __builtin_offsetof(mcache_t, mc_cpu[n]) /* Allocate extra in case we need to manually align the pointer */ #define MCACHE_ALLOC_SIZE \ (sizeof (void *) + MCACHE_SIZE(ncpu) + CPU_CACHE_LINE_SIZE) #define MCACHE_CPU(c) \ (mcache_cpu_t *)((void *)((char *)(c) + MCACHE_SIZE(cpu_number()))) /* * MCACHE_LIST_LOCK() and MCACHE_LIST_UNLOCK() are macros used * to serialize accesses to the global list of caches in the system. * They also record the thread currently running in the critical * section, so that we can avoid recursive requests to reap the * caches when memory runs low. */ #define MCACHE_LIST_LOCK() { \ lck_mtx_lock(&mcache_llock); \ mcache_llock_owner = current_thread(); \ } #define MCACHE_LIST_UNLOCK() { \ mcache_llock_owner = NULL; \ lck_mtx_unlock(&mcache_llock); \ } #define MCACHE_LOCK(l) lck_mtx_lock(l) #define MCACHE_UNLOCK(l) lck_mtx_unlock(l) #define MCACHE_LOCK_TRY(l) lck_mtx_try_lock(l) static unsigned int ncpu; static unsigned int cache_line_size; static struct thread *mcache_llock_owner; static LCK_GRP_DECLARE(mcache_llock_grp, "mcache.list"); static LCK_MTX_DECLARE(mcache_llock, &mcache_llock_grp); static struct zone *mcache_zone; static const uint32_t mcache_reap_interval = 15; static const uint32_t mcache_reap_interval_leeway = 2; static UInt32 mcache_reaping; static int mcache_ready; static int mcache_updating; static int mcache_bkt_contention = 3; #if DEBUG static unsigned int mcache_flags = MCF_DEBUG; #else static unsigned int mcache_flags = 0; #endif int mca_trn_max = MCA_TRN_MAX; static mcache_bkttype_t mcache_bkttype[] = { { 1, 4096, 32768, NULL }, { 3, 2048, 16384, NULL }, { 7, 1024, 12288, NULL }, { 15, 256, 8192, NULL }, { 31, 64, 4096, NULL }, { 47, 0, 2048, NULL }, { 63, 0, 1024, NULL }, { 95, 0, 512, NULL }, { 143, 0, 256, NULL }, { 165, 0, 0, NULL }, }; static mcache_t *mcache_create_common(const char *, size_t, size_t, mcache_allocfn_t, mcache_freefn_t, mcache_auditfn_t, mcache_logfn_t, mcache_notifyfn_t, void *, u_int32_t, int); static unsigned int mcache_slab_alloc(void *, mcache_obj_t ***, unsigned int, int); static void mcache_slab_free(void *, mcache_obj_t *, boolean_t); static void mcache_slab_audit(void *, mcache_obj_t *, boolean_t); static void mcache_cpu_refill(mcache_cpu_t *, mcache_bkt_t *, int); static void mcache_cpu_batch_refill(mcache_cpu_t *, mcache_bkt_t *, int); static uint32_t mcache_bkt_batch_alloc(mcache_t *, mcache_bktlist_t *, mcache_bkt_t **, uint32_t); static void mcache_bkt_batch_free(mcache_t *, mcache_bktlist_t *, mcache_bkt_t *); static void mcache_cache_bkt_enable(mcache_t *); static void mcache_bkt_purge(mcache_t *); static void mcache_bkt_destroy(mcache_t *, mcache_bkt_t *, int); static void mcache_bkt_ws_update(mcache_t *); static void mcache_bkt_ws_zero(mcache_t *); static void mcache_bkt_ws_reap(mcache_t *); static void mcache_dispatch(void (*)(void *), void *); static void mcache_cache_reap(mcache_t *); static void mcache_cache_update(mcache_t *); static void mcache_cache_bkt_resize(void *); static void mcache_cache_enable(void *); static void mcache_update(thread_call_param_t __unused, thread_call_param_t __unused); static void mcache_update_timeout(void *); static void mcache_applyall(void (*)(mcache_t *)); static void mcache_reap_start(void *); static void mcache_reap_done(void *); static void mcache_reap_timeout(thread_call_param_t __unused, thread_call_param_t); static void mcache_notify(mcache_t *, u_int32_t); static void mcache_purge(void *); __attribute__((noreturn)) static void mcache_audit_panic(mcache_audit_t *mca, void *addr, size_t offset, int64_t expected, int64_t got); static LIST_HEAD(, mcache) mcache_head; mcache_t *mcache_audit_cache; static thread_call_t mcache_reap_tcall; static thread_call_t mcache_update_tcall; /* * Initialize the framework; this is currently called as part of BSD init. */ __private_extern__ void mcache_init(void) { mcache_bkttype_t *btp; unsigned int i; char name[32]; VERIFY(mca_trn_max >= 2); ncpu = ml_wait_max_cpus(); (void) mcache_cache_line_size(); /* prime it */ mcache_reap_tcall = thread_call_allocate(mcache_reap_timeout, NULL); mcache_update_tcall = thread_call_allocate(mcache_update, NULL); if (mcache_reap_tcall == NULL || mcache_update_tcall == NULL) { panic("mcache_init: thread_call_allocate failed"); /* NOTREACHED */ __builtin_unreachable(); } mcache_zone = zone_create("mcache", MCACHE_ALLOC_SIZE, ZC_PGZ_USE_GUARDS | ZC_DESTRUCTIBLE); LIST_INIT(&mcache_head); for (i = 0; i < sizeof(mcache_bkttype) / sizeof(*btp); i++) { btp = &mcache_bkttype[i]; (void) snprintf(name, sizeof(name), "bkt_%d", btp->bt_bktsize); btp->bt_cache = mcache_create(name, (btp->bt_bktsize + 1) * sizeof(void *), 0, 0, MCR_SLEEP); } PE_parse_boot_argn("mcache_flags", &mcache_flags, sizeof(mcache_flags)); mcache_flags &= MCF_FLAGS_MASK; mcache_audit_cache = mcache_create("audit", sizeof(mcache_audit_t), 0, 0, MCR_SLEEP); mcache_applyall(mcache_cache_bkt_enable); mcache_ready = 1; printf("mcache: %d CPU(s), %d bytes CPU cache line size\n", ncpu, CPU_CACHE_LINE_SIZE); } /* * Return the global mcache flags. */ __private_extern__ unsigned int mcache_getflags(void) { return mcache_flags; } /* * Return the CPU cache line size. */ __private_extern__ unsigned int mcache_cache_line_size(void) { if (cache_line_size == 0) { ml_cpu_info_t cpu_info; ml_cpu_get_info(&cpu_info); cache_line_size = (unsigned int)cpu_info.cache_line_size; } return cache_line_size; } /* * Create a cache using the zone allocator as the backend slab allocator. * The caller may specify any alignment for the object; if it specifies 0 * the default alignment (MCACHE_ALIGN) will be used. */ __private_extern__ mcache_t * mcache_create(const char *name, size_t bufsize, size_t align, u_int32_t flags, int wait __unused) { return mcache_create_common(name, bufsize, align, mcache_slab_alloc, mcache_slab_free, mcache_slab_audit, NULL, NULL, NULL, flags, 1); } /* * Create a cache using a custom backend slab allocator. Since the caller * is responsible for allocation, no alignment guarantee will be provided * by this framework. */ __private_extern__ mcache_t * mcache_create_ext(const char *name, size_t bufsize, mcache_allocfn_t allocfn, mcache_freefn_t freefn, mcache_auditfn_t auditfn, mcache_logfn_t logfn, mcache_notifyfn_t notifyfn, void *arg, u_int32_t flags, int wait __unused) { return mcache_create_common(name, bufsize, 0, allocfn, freefn, auditfn, logfn, notifyfn, arg, flags, 0); } /* * Common cache creation routine. */ static mcache_t * mcache_create_common(const char *name, size_t bufsize, size_t align, mcache_allocfn_t allocfn, mcache_freefn_t freefn, mcache_auditfn_t auditfn, mcache_logfn_t logfn, mcache_notifyfn_t notifyfn, void *arg, u_int32_t flags, int need_zone) { mcache_bkttype_t *btp; mcache_t *cp = NULL; size_t chunksize; void *buf, **pbuf; unsigned int c; char lck_name[64]; buf = zalloc_flags(mcache_zone, Z_WAITOK | Z_ZERO | Z_NOFAIL); /* * In case we didn't get a cache-aligned memory, round it up * accordingly. This is needed in order to get the rest of * structure members aligned properly. It also means that * the memory span gets shifted due to the round up, but it * is okay since we've allocated extra space for this. */ cp = (mcache_t *) P2ROUNDUP((intptr_t)buf + sizeof(void *), CPU_CACHE_LINE_SIZE); pbuf = (void **)((intptr_t)cp - sizeof(void *)); *pbuf = buf; /* * Guaranteed alignment is valid only when we use the internal * slab allocator (currently set to use the zone allocator). */ if (!need_zone) { align = 1; } else { /* Enforce 64-bit minimum alignment for zone-based buffers */ if (align == 0) { align = MCACHE_ALIGN; } align = P2ROUNDUP(align, MCACHE_ALIGN); } if ((align & (align - 1)) != 0) { panic("mcache_create: bad alignment %lu", align); /* NOTREACHED */ __builtin_unreachable(); } cp->mc_align = align; cp->mc_slab_alloc = allocfn; cp->mc_slab_free = freefn; cp->mc_slab_audit = auditfn; cp->mc_slab_log = logfn; cp->mc_slab_notify = notifyfn; cp->mc_private = need_zone ? cp : arg; cp->mc_bufsize = bufsize; cp->mc_flags = (flags & MCF_FLAGS_MASK) | mcache_flags; (void) snprintf(cp->mc_name, sizeof(cp->mc_name), "mcache.%s", name); (void) snprintf(lck_name, sizeof(lck_name), "%s.cpu", cp->mc_name); cp->mc_cpu_lock_grp = lck_grp_alloc_init(lck_name, LCK_GRP_ATTR_NULL); /* * Allocation chunk size is the object's size plus any extra size * needed to satisfy the object's alignment. It is enforced to be * at least the size of an LP64 pointer to simplify auditing and to * handle multiple-element allocation requests, where the elements * returned are linked together in a list. */ chunksize = MAX(bufsize, sizeof(u_int64_t)); if (need_zone) { VERIFY(align != 0 && (align % MCACHE_ALIGN) == 0); chunksize += sizeof(uint64_t) + align; chunksize = P2ROUNDUP(chunksize, align); cp->mc_slab_zone = zone_create(cp->mc_name, chunksize, ZC_PGZ_USE_GUARDS | ZC_DESTRUCTIBLE); } cp->mc_chunksize = chunksize; /* * Initialize the bucket layer. */ (void) snprintf(lck_name, sizeof(lck_name), "%s.bkt", cp->mc_name); cp->mc_bkt_lock_grp = lck_grp_alloc_init(lck_name, LCK_GRP_ATTR_NULL); lck_mtx_init(&cp->mc_bkt_lock, cp->mc_bkt_lock_grp, LCK_ATTR_NULL); (void) snprintf(lck_name, sizeof(lck_name), "%s.sync", cp->mc_name); cp->mc_sync_lock_grp = lck_grp_alloc_init(lck_name, LCK_GRP_ATTR_NULL); lck_mtx_init(&cp->mc_sync_lock, cp->mc_sync_lock_grp, LCK_ATTR_NULL); for (btp = mcache_bkttype; chunksize <= btp->bt_minbuf; btp++) { continue; } cp->cache_bkttype = btp; /* * Initialize the CPU layer. Each per-CPU structure is aligned * on the CPU cache line boundary to prevent false sharing. */ for (c = 0; c < ncpu; c++) { mcache_cpu_t *ccp = &cp->mc_cpu[c]; VERIFY(IS_P2ALIGNED(ccp, CPU_CACHE_LINE_SIZE)); lck_mtx_init(&ccp->cc_lock, cp->mc_cpu_lock_grp, LCK_ATTR_NULL); ccp->cc_objs = -1; ccp->cc_pobjs = -1; } if (mcache_ready) { mcache_cache_bkt_enable(cp); } /* TODO: dynamically create sysctl for stats */ MCACHE_LIST_LOCK(); LIST_INSERT_HEAD(&mcache_head, cp, mc_list); MCACHE_LIST_UNLOCK(); /* * If cache buckets are enabled and this is the first cache * created, start the periodic cache update. */ if (!(mcache_flags & MCF_NOCPUCACHE) && !mcache_updating) { mcache_updating = 1; mcache_update_timeout(NULL); } if (cp->mc_flags & MCF_DEBUG) { printf("mcache_create: %s (%s) arg %p bufsize %lu align %lu " "chunksize %lu bktsize %d\n", name, need_zone ? "i" : "e", arg, bufsize, cp->mc_align, chunksize, btp->bt_bktsize); } return cp; } /* * Allocate one or more objects from a cache. */ __private_extern__ unsigned int mcache_alloc_ext(mcache_t *cp, mcache_obj_t **list, unsigned int num, int wait) { mcache_cpu_t *ccp; mcache_obj_t **top = &(*list); mcache_bkt_t *bkt; unsigned int need = num; boolean_t nwretry = FALSE; /* MCR_NOSLEEP and MCR_FAILOK are mutually exclusive */ VERIFY((wait & (MCR_NOSLEEP | MCR_FAILOK)) != (MCR_NOSLEEP | MCR_FAILOK)); ASSERT(list != NULL); *list = NULL; if (num == 0) { return 0; } retry_alloc: /* We may not always be running in the same CPU in case of retries */ ccp = MCACHE_CPU(cp); MCACHE_LOCK(&ccp->cc_lock); for (;;) { /* * If we have an object in the current CPU's filled bucket, * chain the object to any previous objects and return if * we've satisfied the number of requested objects. */ if (ccp->cc_objs > 0) { mcache_obj_t *tail; int objs; /* * Objects in the bucket are already linked together * with the most recently freed object at the head of * the list; grab as many objects as we can. */ objs = MIN((unsigned int)ccp->cc_objs, need); *list = ccp->cc_filled->bkt_obj[ccp->cc_objs - 1]; ccp->cc_objs -= objs; ccp->cc_alloc += objs; tail = ccp->cc_filled->bkt_obj[ccp->cc_objs]; list = &tail->obj_next; *list = NULL; /* If we got them all, return to caller */ if ((need -= objs) == 0) { MCACHE_UNLOCK(&ccp->cc_lock); if (!(cp->mc_flags & MCF_NOLEAKLOG) && cp->mc_slab_log != NULL) { (*cp->mc_slab_log)(num, *top, TRUE); } if (cp->mc_flags & MCF_DEBUG) { goto debug_alloc; } return num; } } /* * The CPU's filled bucket is empty. If the previous filled * bucket was full, exchange and try again. */ if (ccp->cc_pobjs > 0) { mcache_cpu_refill(ccp, ccp->cc_pfilled, ccp->cc_pobjs); continue; } /* * If the bucket layer is disabled, allocate from slab. This * can happen either because MCF_NOCPUCACHE is set, or because * the bucket layer is currently being resized. */ if (ccp->cc_bktsize == 0) { break; } /* * Both of the CPU's buckets are empty; try to get full * bucket(s) from the bucket layer. Upon success, refill * this CPU and place any empty bucket into the empty list. * To prevent potential thrashing, replace both empty buckets * only if the requested count exceeds a bucket's worth of * objects. */ (void) mcache_bkt_batch_alloc(cp, &cp->mc_full, &bkt, (need <= ccp->cc_bktsize) ? 1 : 2); if (bkt != NULL) { mcache_bkt_t *bkt_list = NULL; if (ccp->cc_pfilled != NULL) { ccp->cc_pfilled->bkt_next = bkt_list; bkt_list = ccp->cc_pfilled; } if (bkt->bkt_next == NULL) { /* * Bucket layer allocation returns only 1 * magazine; retain current empty magazine. */ mcache_cpu_refill(ccp, bkt, ccp->cc_bktsize); } else { /* * We got 2 full buckets from the bucket * layer; release the current empty bucket * back to the bucket layer. */ if (ccp->cc_filled != NULL) { ccp->cc_filled->bkt_next = bkt_list; bkt_list = ccp->cc_filled; } mcache_cpu_batch_refill(ccp, bkt, ccp->cc_bktsize); } mcache_bkt_batch_free(cp, &cp->mc_empty, bkt_list); continue; } /* * The bucket layer has no full buckets; allocate the * object(s) directly from the slab layer. */ break; } MCACHE_UNLOCK(&ccp->cc_lock); need -= (*cp->mc_slab_alloc)(cp->mc_private, &list, need, wait); /* * If this is a blocking allocation, or if it is non-blocking and * the cache's full bucket is non-empty, then retry the allocation. */ if (need > 0) { if (!(wait & MCR_NONBLOCKING)) { os_atomic_inc(&cp->mc_wretry_cnt, relaxed); goto retry_alloc; } else if ((wait & (MCR_NOSLEEP | MCR_TRYHARD)) && !mcache_bkt_isempty(cp)) { if (!nwretry) { nwretry = TRUE; } os_atomic_inc(&cp->mc_nwretry_cnt, relaxed); goto retry_alloc; } else if (nwretry) { os_atomic_inc(&cp->mc_nwfail_cnt, relaxed); } } if (!(cp->mc_flags & MCF_NOLEAKLOG) && cp->mc_slab_log != NULL) { (*cp->mc_slab_log)((num - need), *top, TRUE); } if (!(cp->mc_flags & MCF_DEBUG)) { return num - need; } debug_alloc: if (cp->mc_flags & MCF_DEBUG) { mcache_obj_t **o = top; unsigned int n; n = 0; /* * Verify that the chain of objects have the same count as * what we are about to report to the caller. Any mismatch * here means that the object list is insanely broken and * therefore we must panic. */ while (*o != NULL) { o = &(*o)->obj_next; ++n; } if (n != (num - need)) { panic("mcache_alloc_ext: %s cp %p corrupted list " "(got %d actual %d)\n", cp->mc_name, (void *)cp, num - need, n); /* NOTREACHED */ __builtin_unreachable(); } } /* Invoke the slab layer audit callback if auditing is enabled */ if ((cp->mc_flags & MCF_DEBUG) && cp->mc_slab_audit != NULL) { (*cp->mc_slab_audit)(cp->mc_private, *top, TRUE); } return num - need; } /* * Allocate a single object from a cache. */ __private_extern__ void * mcache_alloc(mcache_t *cp, int wait) { mcache_obj_t *buf; (void) mcache_alloc_ext(cp, &buf, 1, wait); return buf; } __private_extern__ void mcache_waiter_inc(mcache_t *cp) { os_atomic_inc(&cp->mc_waiter_cnt, relaxed); } __private_extern__ void mcache_waiter_dec(mcache_t *cp) { os_atomic_dec(&cp->mc_waiter_cnt, relaxed); } __private_extern__ boolean_t mcache_bkt_isempty(mcache_t *cp) { /* * This isn't meant to accurately tell whether there are * any full buckets in the cache; it is simply a way to * obtain "hints" about the state of the cache. */ return cp->mc_full.bl_total == 0; } /* * Notify the slab layer about an event. */ static void mcache_notify(mcache_t *cp, u_int32_t event) { if (cp->mc_slab_notify != NULL) { (*cp->mc_slab_notify)(cp->mc_private, event); } } /* * Purge the cache and disable its buckets. */ static void mcache_purge(void *arg) { mcache_t *cp = arg; mcache_bkt_purge(cp); /* * We cannot simply call mcache_cache_bkt_enable() from here as * a bucket resize may be in flight and we would cause the CPU * layers of the cache to point to different sizes. Therefore, * we simply increment the enable count so that during the next * periodic cache update the buckets can be reenabled. */ lck_mtx_lock_spin(&cp->mc_sync_lock); cp->mc_enable_cnt++; lck_mtx_unlock(&cp->mc_sync_lock); } __private_extern__ boolean_t mcache_purge_cache(mcache_t *cp, boolean_t async) { /* * Purging a cache that has no per-CPU caches or is already * in the process of being purged is rather pointless. */ if (cp->mc_flags & MCF_NOCPUCACHE) { return FALSE; } lck_mtx_lock_spin(&cp->mc_sync_lock); if (cp->mc_purge_cnt > 0) { lck_mtx_unlock(&cp->mc_sync_lock); return FALSE; } cp->mc_purge_cnt++; lck_mtx_unlock(&cp->mc_sync_lock); if (async) { mcache_dispatch(mcache_purge, cp); } else { mcache_purge(cp); } return TRUE; } /* * Free a single object to a cache. */ __private_extern__ void mcache_free(mcache_t *cp, void *buf) { ((mcache_obj_t *)buf)->obj_next = NULL; mcache_free_ext(cp, (mcache_obj_t *)buf); } /* * Free one or more objects to a cache. */ __private_extern__ void mcache_free_ext(mcache_t *cp, mcache_obj_t *list) { mcache_cpu_t *ccp = MCACHE_CPU(cp); mcache_bkttype_t *btp; mcache_obj_t *nlist; mcache_bkt_t *bkt; if (!(cp->mc_flags & MCF_NOLEAKLOG) && cp->mc_slab_log != NULL) { (*cp->mc_slab_log)(0, list, FALSE); } /* Invoke the slab layer audit callback if auditing is enabled */ if ((cp->mc_flags & MCF_DEBUG) && cp->mc_slab_audit != NULL) { (*cp->mc_slab_audit)(cp->mc_private, list, FALSE); } MCACHE_LOCK(&ccp->cc_lock); for (;;) { /* * If there is space in the current CPU's filled bucket, put * the object there and return once all objects are freed. * Note the cast to unsigned integer takes care of the case * where the bucket layer is disabled (when cc_objs is -1). */ if ((unsigned int)ccp->cc_objs < (unsigned int)ccp->cc_bktsize) { /* * Reverse the list while we place the object into the * bucket; this effectively causes the most recently * freed object(s) to be reused during allocation. */ nlist = list->obj_next; list->obj_next = (ccp->cc_objs == 0) ? NULL : ccp->cc_filled->bkt_obj[ccp->cc_objs - 1]; ccp->cc_filled->bkt_obj[ccp->cc_objs++] = list; ccp->cc_free++; if ((list = nlist) != NULL) { continue; } /* We are done; return to caller */ MCACHE_UNLOCK(&ccp->cc_lock); /* If there is a waiter below, notify it */ if (cp->mc_waiter_cnt > 0) { mcache_notify(cp, MCN_RETRYALLOC); } return; } /* * The CPU's filled bucket is full. If the previous filled * bucket was empty, exchange and try again. */ if (ccp->cc_pobjs == 0) { mcache_cpu_refill(ccp, ccp->cc_pfilled, ccp->cc_pobjs); continue; } /* * If the bucket layer is disabled, free to slab. This can * happen either because MCF_NOCPUCACHE is set, or because * the bucket layer is currently being resized. */ if (ccp->cc_bktsize == 0) { break; } /* * Both of the CPU's buckets are full; try to get empty * buckets from the bucket layer. Upon success, empty this * CPU and place any full bucket into the full list. * * TODO: Because the caller currently doesn't indicate * the number of objects in the list, we choose the more * conservative approach of allocating only 1 empty * bucket (to prevent potential thrashing). Once we * have the object count, we can replace 1 with similar * logic as used in mcache_alloc_ext(). */ (void) mcache_bkt_batch_alloc(cp, &cp->mc_empty, &bkt, 1); if (bkt != NULL) { mcache_bkt_t *bkt_list = NULL; if (ccp->cc_pfilled != NULL) { ccp->cc_pfilled->bkt_next = bkt_list; bkt_list = ccp->cc_pfilled; } if (bkt->bkt_next == NULL) { /* * Bucket layer allocation returns only 1 * bucket; retain current full bucket. */ mcache_cpu_refill(ccp, bkt, 0); } else { /* * We got 2 empty buckets from the bucket * layer; release the current full bucket * back to the bucket layer. */ if (ccp->cc_filled != NULL) { ccp->cc_filled->bkt_next = bkt_list; bkt_list = ccp->cc_filled; } mcache_cpu_batch_refill(ccp, bkt, 0); } mcache_bkt_batch_free(cp, &cp->mc_full, bkt_list); continue; } btp = cp->cache_bkttype; /* * We need an empty bucket to put our freed objects into * but couldn't get an empty bucket from the bucket layer; * attempt to allocate one. We do not want to block for * allocation here, and if the bucket allocation fails * we will simply fall through to the slab layer. */ MCACHE_UNLOCK(&ccp->cc_lock); bkt = mcache_alloc(btp->bt_cache, MCR_NOSLEEP); MCACHE_LOCK(&ccp->cc_lock); if (bkt != NULL) { /* * We have an empty bucket, but since we drop the * CPU lock above, the cache's bucket size may have * changed. If so, free the bucket and try again. */ if (ccp->cc_bktsize != btp->bt_bktsize) { MCACHE_UNLOCK(&ccp->cc_lock); mcache_free(btp->bt_cache, bkt); MCACHE_LOCK(&ccp->cc_lock); continue; } /* * Store it in the bucket object since we'll * need to refer to it during bucket destroy; * we can't safely refer to cache_bkttype as * the bucket lock may not be acquired then. */ bkt->bkt_type = btp; /* * We have an empty bucket of the right size; * add it to the bucket layer and try again. */ ASSERT(bkt->bkt_next == NULL); mcache_bkt_batch_free(cp, &cp->mc_empty, bkt); continue; } /* * The bucket layer has no empty buckets; free the * object(s) directly to the slab layer. */ break; } MCACHE_UNLOCK(&ccp->cc_lock); /* If there is a waiter below, notify it */ if (cp->mc_waiter_cnt > 0) { mcache_notify(cp, MCN_RETRYALLOC); } /* Advise the slab layer to purge the object(s) */ (*cp->mc_slab_free)(cp->mc_private, list, (cp->mc_flags & MCF_DEBUG) || cp->mc_purge_cnt); } /* * Cache destruction routine. */ __private_extern__ void mcache_destroy(mcache_t *cp) { void **pbuf; MCACHE_LIST_LOCK(); LIST_REMOVE(cp, mc_list); MCACHE_LIST_UNLOCK(); mcache_bkt_purge(cp); /* * This cache is dead; there should be no further transaction. * If it's still invoked, make sure that it induces a fault. */ cp->mc_slab_alloc = NULL; cp->mc_slab_free = NULL; cp->mc_slab_audit = NULL; lck_grp_free(cp->mc_bkt_lock_grp); lck_grp_free(cp->mc_cpu_lock_grp); lck_grp_free(cp->mc_sync_lock_grp); /* * TODO: We need to destroy the zone here, but cannot do it * because there is no such way to achieve that. Until then * the memory allocated for the zone structure is leaked. * Once it is achievable, uncomment these lines: * * if (cp->mc_slab_zone != NULL) { * zdestroy(cp->mc_slab_zone); * cp->mc_slab_zone = NULL; * } */ /* Get the original address since we're about to free it */ pbuf = (void **)((intptr_t)cp - sizeof(void *)); zfree(mcache_zone, *pbuf); } /* * Internal slab allocator used as a backend for simple caches. The current * implementation uses the zone allocator for simplicity reasons. */ static unsigned int mcache_slab_alloc(void *arg, mcache_obj_t ***plist, unsigned int num, int wait) { #pragma unused(wait) mcache_t *cp = arg; unsigned int need = num; size_t rsize = P2ROUNDUP(cp->mc_bufsize, sizeof(u_int64_t)); u_int32_t flags = cp->mc_flags; void *buf, *base, **pbuf; mcache_obj_t **list = *plist; *list = NULL; for (;;) { buf = zalloc_flags(cp->mc_slab_zone, Z_WAITOK | Z_NOFAIL); /* Get the aligned base address for this object */ base = (void *)P2ROUNDUP((intptr_t)buf + sizeof(u_int64_t), cp->mc_align); /* * Wind back a pointer size from the aligned base and * save the original address so we can free it later. */ pbuf = (void **)((intptr_t)base - sizeof(void *)); *pbuf = buf; VERIFY(((intptr_t)base + cp->mc_bufsize) <= ((intptr_t)buf + cp->mc_chunksize)); /* * If auditing is enabled, patternize the contents of * the buffer starting from the 64-bit aligned base to * the end of the buffer; the length is rounded up to * the nearest 64-bit multiply; this is because we use * 64-bit memory access to set/check the pattern. */ if (flags & MCF_DEBUG) { VERIFY(((intptr_t)base + rsize) <= ((intptr_t)buf + cp->mc_chunksize)); mcache_set_pattern(MCACHE_FREE_PATTERN, base, rsize); } VERIFY(IS_P2ALIGNED(base, cp->mc_align)); *list = (mcache_obj_t *)base; (*list)->obj_next = NULL; list = *plist = &(*list)->obj_next; /* If we got them all, return to mcache */ if (--need == 0) { break; } } return num - need; } /* * Internal slab deallocator used as a backend for simple caches. */ static void mcache_slab_free(void *arg, mcache_obj_t *list, __unused boolean_t purged) { mcache_t *cp = arg; mcache_obj_t *nlist; size_t rsize = P2ROUNDUP(cp->mc_bufsize, sizeof(u_int64_t)); u_int32_t flags = cp->mc_flags; void *base; void **pbuf; for (;;) { nlist = list->obj_next; list->obj_next = NULL; base = list; VERIFY(IS_P2ALIGNED(base, cp->mc_align)); /* Get the original address since we're about to free it */ pbuf = (void **)((intptr_t)base - sizeof(void *)); VERIFY(((intptr_t)base + cp->mc_bufsize) <= ((intptr_t)*pbuf + cp->mc_chunksize)); if (flags & MCF_DEBUG) { VERIFY(((intptr_t)base + rsize) <= ((intptr_t)*pbuf + cp->mc_chunksize)); mcache_audit_free_verify(NULL, base, 0, rsize); } /* Free it to zone */ zfree(cp->mc_slab_zone, *pbuf); /* No more objects to free; return to mcache */ if ((list = nlist) == NULL) { break; } } } /* * Internal slab auditor for simple caches. */ static void mcache_slab_audit(void *arg, mcache_obj_t *list, boolean_t alloc) { mcache_t *cp = arg; size_t rsize = P2ROUNDUP(cp->mc_bufsize, sizeof(u_int64_t)); void *base, **pbuf; while (list != NULL) { mcache_obj_t *next = list->obj_next; base = list; VERIFY(IS_P2ALIGNED(base, cp->mc_align)); /* Get the original address */ pbuf = (void **)((intptr_t)base - sizeof(void *)); VERIFY(((intptr_t)base + rsize) <= ((intptr_t)*pbuf + cp->mc_chunksize)); if (!alloc) { mcache_set_pattern(MCACHE_FREE_PATTERN, base, rsize); } else { mcache_audit_free_verify_set(NULL, base, 0, rsize); } list = list->obj_next = next; } } /* * Refill the CPU's buckets with bkt and its follower (if any). */ static void mcache_cpu_batch_refill(mcache_cpu_t *ccp, mcache_bkt_t *bkt, int objs) { ASSERT((ccp->cc_filled == NULL && ccp->cc_objs == -1) || (ccp->cc_filled && ccp->cc_objs + objs == ccp->cc_bktsize)); ASSERT(ccp->cc_bktsize > 0); ccp->cc_filled = bkt; ccp->cc_objs = objs; if (__probable(bkt->bkt_next != NULL)) { ccp->cc_pfilled = bkt->bkt_next; ccp->cc_pobjs = objs; bkt->bkt_next = NULL; } else { ASSERT(bkt->bkt_next == NULL); ccp->cc_pfilled = NULL; ccp->cc_pobjs = -1; } } /* * Refill the CPU's filled bucket with bkt and save the previous one. */ static void mcache_cpu_refill(mcache_cpu_t *ccp, mcache_bkt_t *bkt, int objs) { ASSERT((ccp->cc_filled == NULL && ccp->cc_objs == -1) || (ccp->cc_filled && ccp->cc_objs + objs == ccp->cc_bktsize)); ASSERT(ccp->cc_bktsize > 0); ccp->cc_pfilled = ccp->cc_filled; ccp->cc_pobjs = ccp->cc_objs; ccp->cc_filled = bkt; ccp->cc_objs = objs; } /* * Get one or more buckets from the bucket layer. */ static uint32_t mcache_bkt_batch_alloc(mcache_t *cp, mcache_bktlist_t *blp, mcache_bkt_t **list, uint32_t num) { mcache_bkt_t *bkt_list = NULL; mcache_bkt_t *bkt; uint32_t need = num; ASSERT(list != NULL && need > 0); if (!MCACHE_LOCK_TRY(&cp->mc_bkt_lock)) { /* * The bucket layer lock is held by another CPU; increase * the contention count so that we can later resize the * bucket size accordingly. */ MCACHE_LOCK(&cp->mc_bkt_lock); cp->mc_bkt_contention++; } while ((bkt = blp->bl_list) != NULL) { blp->bl_list = bkt->bkt_next; bkt->bkt_next = bkt_list; bkt_list = bkt; if (--blp->bl_total < blp->bl_min) { blp->bl_min = blp->bl_total; } blp->bl_alloc++; if (--need == 0) { break; } } MCACHE_UNLOCK(&cp->mc_bkt_lock); *list = bkt_list; return num - need; } /* * Return one or more buckets to the bucket layer. */ static void mcache_bkt_batch_free(mcache_t *cp, mcache_bktlist_t *blp, mcache_bkt_t *bkt) { mcache_bkt_t *nbkt; MCACHE_LOCK(&cp->mc_bkt_lock); while (bkt != NULL) { nbkt = bkt->bkt_next; bkt->bkt_next = blp->bl_list; blp->bl_list = bkt; blp->bl_total++; bkt = nbkt; } MCACHE_UNLOCK(&cp->mc_bkt_lock); } /* * Enable the bucket layer of a cache. */ static void mcache_cache_bkt_enable(mcache_t *cp) { mcache_cpu_t *ccp; unsigned int cpu; if (cp->mc_flags & MCF_NOCPUCACHE) { return; } for (cpu = 0; cpu < ncpu; cpu++) { ccp = &cp->mc_cpu[cpu]; MCACHE_LOCK(&ccp->cc_lock); ccp->cc_bktsize = cp->cache_bkttype->bt_bktsize; MCACHE_UNLOCK(&ccp->cc_lock); } } /* * Purge all buckets from a cache and disable its bucket layer. */ static void mcache_bkt_purge(mcache_t *cp) { mcache_cpu_t *ccp; mcache_bkt_t *bp, *pbp; int objs, pobjs; unsigned int cpu; for (cpu = 0; cpu < ncpu; cpu++) { ccp = &cp->mc_cpu[cpu]; MCACHE_LOCK(&ccp->cc_lock); bp = ccp->cc_filled; pbp = ccp->cc_pfilled; objs = ccp->cc_objs; pobjs = ccp->cc_pobjs; ccp->cc_filled = NULL; ccp->cc_pfilled = NULL; ccp->cc_objs = -1; ccp->cc_pobjs = -1; ccp->cc_bktsize = 0; MCACHE_UNLOCK(&ccp->cc_lock); if (bp != NULL) { mcache_bkt_destroy(cp, bp, objs); } if (pbp != NULL) { mcache_bkt_destroy(cp, pbp, pobjs); } } mcache_bkt_ws_zero(cp); mcache_bkt_ws_reap(cp); } /* * Free one or more objects in the bucket to the slab layer, * and also free the bucket itself. */ static void mcache_bkt_destroy(mcache_t *cp, mcache_bkt_t *bkt, int nobjs) { if (nobjs > 0) { mcache_obj_t *top = bkt->bkt_obj[nobjs - 1]; if (cp->mc_flags & MCF_DEBUG) { mcache_obj_t *o = top; int cnt = 0; /* * Verify that the chain of objects in the bucket is * valid. Any mismatch here means a mistake when the * object(s) were freed to the CPU layer, so we panic. */ while (o != NULL) { o = o->obj_next; ++cnt; } if (cnt != nobjs) { panic("mcache_bkt_destroy: %s cp %p corrupted " "list in bkt %p (nobjs %d actual %d)\n", cp->mc_name, (void *)cp, (void *)bkt, nobjs, cnt); /* NOTREACHED */ __builtin_unreachable(); } } /* Advise the slab layer to purge the object(s) */ (*cp->mc_slab_free)(cp->mc_private, top, (cp->mc_flags & MCF_DEBUG) || cp->mc_purge_cnt); } mcache_free(bkt->bkt_type->bt_cache, bkt); } /* * Update the bucket layer working set statistics. */ static void mcache_bkt_ws_update(mcache_t *cp) { MCACHE_LOCK(&cp->mc_bkt_lock); cp->mc_full.bl_reaplimit = cp->mc_full.bl_min; cp->mc_full.bl_min = cp->mc_full.bl_total; cp->mc_empty.bl_reaplimit = cp->mc_empty.bl_min; cp->mc_empty.bl_min = cp->mc_empty.bl_total; MCACHE_UNLOCK(&cp->mc_bkt_lock); } /* * Mark everything as eligible for reaping (working set is zero). */ static void mcache_bkt_ws_zero(mcache_t *cp) { MCACHE_LOCK(&cp->mc_bkt_lock); cp->mc_full.bl_reaplimit = cp->mc_full.bl_total; cp->mc_full.bl_min = cp->mc_full.bl_total; cp->mc_empty.bl_reaplimit = cp->mc_empty.bl_total; cp->mc_empty.bl_min = cp->mc_empty.bl_total; MCACHE_UNLOCK(&cp->mc_bkt_lock); } /* * Reap all buckets that are beyond the working set. */ static void mcache_bkt_ws_reap(mcache_t *cp) { mcache_bkt_t *bkt, *nbkt; uint32_t reap; reap = MIN(cp->mc_full.bl_reaplimit, cp->mc_full.bl_min); if (reap != 0) { (void) mcache_bkt_batch_alloc(cp, &cp->mc_full, &bkt, reap); while (bkt != NULL) { nbkt = bkt->bkt_next; bkt->bkt_next = NULL; mcache_bkt_destroy(cp, bkt, bkt->bkt_type->bt_bktsize); bkt = nbkt; } } reap = MIN(cp->mc_empty.bl_reaplimit, cp->mc_empty.bl_min); if (reap != 0) { (void) mcache_bkt_batch_alloc(cp, &cp->mc_empty, &bkt, reap); while (bkt != NULL) { nbkt = bkt->bkt_next; bkt->bkt_next = NULL; mcache_bkt_destroy(cp, bkt, 0); bkt = nbkt; } } } static void mcache_reap_timeout(thread_call_param_t dummy __unused, thread_call_param_t arg) { volatile UInt32 *flag = arg; ASSERT(flag == &mcache_reaping); *flag = 0; } static void mcache_reap_done(void *flag) { uint64_t deadline, leeway; clock_interval_to_deadline(mcache_reap_interval, NSEC_PER_SEC, &deadline); clock_interval_to_absolutetime_interval(mcache_reap_interval_leeway, NSEC_PER_SEC, &leeway); thread_call_enter_delayed_with_leeway(mcache_reap_tcall, flag, deadline, leeway, THREAD_CALL_DELAY_LEEWAY); } static void mcache_reap_start(void *arg) { UInt32 *flag = arg; ASSERT(flag == &mcache_reaping); mcache_applyall(mcache_cache_reap); mcache_dispatch(mcache_reap_done, flag); } __private_extern__ void mcache_reap(void) { UInt32 *flag = &mcache_reaping; if (mcache_llock_owner == current_thread() || !OSCompareAndSwap(0, 1, flag)) { return; } mcache_dispatch(mcache_reap_start, flag); } __private_extern__ void mcache_reap_now(mcache_t *cp, boolean_t purge) { if (purge) { mcache_bkt_purge(cp); mcache_cache_bkt_enable(cp); } else { mcache_bkt_ws_zero(cp); mcache_bkt_ws_reap(cp); } } static void mcache_cache_reap(mcache_t *cp) { mcache_bkt_ws_reap(cp); } /* * Performs period maintenance on a cache. */ static void mcache_cache_update(mcache_t *cp) { int need_bkt_resize = 0; int need_bkt_reenable = 0; lck_mtx_assert(&mcache_llock, LCK_MTX_ASSERT_OWNED); mcache_bkt_ws_update(cp); /* * Cache resize and post-purge reenable are mutually exclusive. * If the cache was previously purged, there is no point of * increasing the bucket size as there was an indication of * memory pressure on the system. */ lck_mtx_lock_spin(&cp->mc_sync_lock); if (!(cp->mc_flags & MCF_NOCPUCACHE) && cp->mc_enable_cnt) { need_bkt_reenable = 1; } lck_mtx_unlock(&cp->mc_sync_lock); MCACHE_LOCK(&cp->mc_bkt_lock); /* * If the contention count is greater than the threshold, and if * we are not already at the maximum bucket size, increase it. * Otherwise, if this cache was previously purged by the user * then we simply reenable it. */ if ((unsigned int)cp->mc_chunksize < cp->cache_bkttype->bt_maxbuf && (int)(cp->mc_bkt_contention - cp->mc_bkt_contention_prev) > mcache_bkt_contention && !need_bkt_reenable) { need_bkt_resize = 1; } cp->mc_bkt_contention_prev = cp->mc_bkt_contention; MCACHE_UNLOCK(&cp->mc_bkt_lock); if (need_bkt_resize) { mcache_dispatch(mcache_cache_bkt_resize, cp); } else if (need_bkt_reenable) { mcache_dispatch(mcache_cache_enable, cp); } } /* * Recompute a cache's bucket size. This is an expensive operation * and should not be done frequently; larger buckets provide for a * higher transfer rate with the bucket while smaller buckets reduce * the memory consumption. */ static void mcache_cache_bkt_resize(void *arg) { mcache_t *cp = arg; mcache_bkttype_t *btp = cp->cache_bkttype; if ((unsigned int)cp->mc_chunksize < btp->bt_maxbuf) { mcache_bkt_purge(cp); /* * Upgrade to the next bucket type with larger bucket size; * temporarily set the previous contention snapshot to a * negative number to prevent unnecessary resize request. */ MCACHE_LOCK(&cp->mc_bkt_lock); cp->cache_bkttype = ++btp; cp->mc_bkt_contention_prev = cp->mc_bkt_contention + INT_MAX; MCACHE_UNLOCK(&cp->mc_bkt_lock); mcache_cache_enable(cp); } } /* * Reenable a previously disabled cache due to purge. */ static void mcache_cache_enable(void *arg) { mcache_t *cp = arg; lck_mtx_lock_spin(&cp->mc_sync_lock); cp->mc_purge_cnt = 0; cp->mc_enable_cnt = 0; lck_mtx_unlock(&cp->mc_sync_lock); mcache_cache_bkt_enable(cp); } static void mcache_update_timeout(__unused void *arg) { uint64_t deadline, leeway; clock_interval_to_deadline(mcache_reap_interval, NSEC_PER_SEC, &deadline); clock_interval_to_absolutetime_interval(mcache_reap_interval_leeway, NSEC_PER_SEC, &leeway); thread_call_enter_delayed_with_leeway(mcache_update_tcall, NULL, deadline, leeway, THREAD_CALL_DELAY_LEEWAY); } static void mcache_update(thread_call_param_t arg __unused, thread_call_param_t dummy __unused) { mcache_applyall(mcache_cache_update); mcache_update_timeout(NULL); } static void mcache_applyall(void (*func)(mcache_t *)) { mcache_t *cp; MCACHE_LIST_LOCK(); LIST_FOREACH(cp, &mcache_head, mc_list) { func(cp); } MCACHE_LIST_UNLOCK(); } static void mcache_dispatch(void (*func)(void *), void *arg) { ASSERT(func != NULL); timeout(func, arg, hz / 1000); } __private_extern__ void mcache_buffer_log(mcache_audit_t *mca, void *addr, mcache_t *cp, struct timeval *base_ts) { struct timeval now, base = { .tv_sec = 0, .tv_usec = 0 }; void *stack[MCACHE_STACK_DEPTH + 1]; struct mca_trn *transaction; transaction = &mca->mca_trns[mca->mca_next_trn]; mca->mca_addr = addr; mca->mca_cache = cp; transaction->mca_thread = current_thread(); bzero(stack, sizeof(stack)); transaction->mca_depth = (uint16_t)OSBacktrace(stack, MCACHE_STACK_DEPTH + 1) - 1; bcopy(&stack[1], transaction->mca_stack, sizeof(transaction->mca_stack)); microuptime(&now); if (base_ts != NULL) { base = *base_ts; } /* tstamp is in ms relative to base_ts */ transaction->mca_tstamp = ((now.tv_usec - base.tv_usec) / 1000); if ((now.tv_sec - base.tv_sec) > 0) { transaction->mca_tstamp += ((now.tv_sec - base.tv_sec) * 1000); } mca->mca_next_trn = (mca->mca_next_trn + 1) % mca_trn_max; } /* * N.B.: mcache_set_pattern(), mcache_verify_pattern() and * mcache_verify_set_pattern() are marked as noinline to prevent the * compiler from aliasing pointers when they are inlined inside the callers * (e.g. mcache_audit_free_verify_set()) which would be undefined behavior. */ __private_extern__ OS_NOINLINE void mcache_set_pattern(u_int64_t pattern, void *buf_arg, size_t size) { u_int64_t *buf_end = (u_int64_t *)((void *)((char *)buf_arg + size)); u_int64_t *buf = (u_int64_t *)buf_arg; VERIFY(IS_P2ALIGNED(buf_arg, sizeof(u_int64_t))); VERIFY(IS_P2ALIGNED(size, sizeof(u_int64_t))); while (buf < buf_end) { *buf++ = pattern; } } __private_extern__ OS_NOINLINE void * mcache_verify_pattern(u_int64_t pattern, void *buf_arg, size_t size) { u_int64_t *buf_end = (u_int64_t *)((void *)((char *)buf_arg + size)); u_int64_t *buf; VERIFY(IS_P2ALIGNED(buf_arg, sizeof(u_int64_t))); VERIFY(IS_P2ALIGNED(size, sizeof(u_int64_t))); for (buf = buf_arg; buf < buf_end; buf++) { if (*buf != pattern) { return buf; } } return NULL; } OS_NOINLINE static void * mcache_verify_set_pattern(u_int64_t old, u_int64_t new, void *buf_arg, size_t size) { u_int64_t *buf_end = (u_int64_t *)((void *)((char *)buf_arg + size)); u_int64_t *buf; VERIFY(IS_P2ALIGNED(buf_arg, sizeof(u_int64_t))); VERIFY(IS_P2ALIGNED(size, sizeof(u_int64_t))); for (buf = buf_arg; buf < buf_end; buf++) { if (*buf != old) { mcache_set_pattern(old, buf_arg, (uintptr_t)buf - (uintptr_t)buf_arg); return buf; } *buf = new; } return NULL; } __private_extern__ void mcache_audit_free_verify(mcache_audit_t *mca, void *base, size_t offset, size_t size) { void *addr; u_int64_t *oaddr64; mcache_obj_t *next; addr = (void *)((uintptr_t)base + offset); next = ((mcache_obj_t *)addr)->obj_next; /* For the "obj_next" pointer in the buffer */ oaddr64 = (u_int64_t *)P2ROUNDDOWN(addr, sizeof(u_int64_t)); *oaddr64 = MCACHE_FREE_PATTERN; if ((oaddr64 = mcache_verify_pattern(MCACHE_FREE_PATTERN, (caddr_t)base, size)) != NULL) { mcache_audit_panic(mca, addr, (caddr_t)oaddr64 - (caddr_t)base, (int64_t)MCACHE_FREE_PATTERN, (int64_t)*oaddr64); /* NOTREACHED */ } ((mcache_obj_t *)addr)->obj_next = next; } __private_extern__ void mcache_audit_free_verify_set(mcache_audit_t *mca, void *base, size_t offset, size_t size) { void *addr; u_int64_t *oaddr64; mcache_obj_t *next; addr = (void *)((uintptr_t)base + offset); next = ((mcache_obj_t *)addr)->obj_next; /* For the "obj_next" pointer in the buffer */ oaddr64 = (u_int64_t *)P2ROUNDDOWN(addr, sizeof(u_int64_t)); *oaddr64 = MCACHE_FREE_PATTERN; if ((oaddr64 = mcache_verify_set_pattern(MCACHE_FREE_PATTERN, MCACHE_UNINITIALIZED_PATTERN, (caddr_t)base, size)) != NULL) { mcache_audit_panic(mca, addr, (caddr_t)oaddr64 - (caddr_t)base, (int64_t)MCACHE_FREE_PATTERN, (int64_t)*oaddr64); /* NOTREACHED */ } ((mcache_obj_t *)addr)->obj_next = next; } #undef panic #define DUMP_TRN_FMT() \ "%s transaction thread %p saved PC stack (%d deep):\n" \ "\t%p, %p, %p, %p, %p, %p, %p, %p\n" \ "\t%p, %p, %p, %p, %p, %p, %p, %p\n" #define DUMP_TRN_FIELDS(s, x) \ s, \ mca->mca_trns[x].mca_thread, mca->mca_trns[x].mca_depth, \ mca->mca_trns[x].mca_stack[0], mca->mca_trns[x].mca_stack[1], \ mca->mca_trns[x].mca_stack[2], mca->mca_trns[x].mca_stack[3], \ mca->mca_trns[x].mca_stack[4], mca->mca_trns[x].mca_stack[5], \ mca->mca_trns[x].mca_stack[6], mca->mca_trns[x].mca_stack[7], \ mca->mca_trns[x].mca_stack[8], mca->mca_trns[x].mca_stack[9], \ mca->mca_trns[x].mca_stack[10], mca->mca_trns[x].mca_stack[11], \ mca->mca_trns[x].mca_stack[12], mca->mca_trns[x].mca_stack[13], \ mca->mca_trns[x].mca_stack[14], mca->mca_trns[x].mca_stack[15] #define MCA_TRN_LAST ((mca->mca_next_trn + mca_trn_max) % mca_trn_max) #define MCA_TRN_PREV ((mca->mca_next_trn + mca_trn_max - 1) % mca_trn_max) __private_extern__ char * mcache_dump_mca(char buf[static DUMP_MCA_BUF_SIZE], mcache_audit_t *mca) { snprintf(buf, DUMP_MCA_BUF_SIZE, "mca %p: addr %p, cache %p (%s) nxttrn %d\n" DUMP_TRN_FMT() DUMP_TRN_FMT(), mca, mca->mca_addr, mca->mca_cache, mca->mca_cache ? mca->mca_cache->mc_name : "?", mca->mca_next_trn, DUMP_TRN_FIELDS("last", MCA_TRN_LAST), DUMP_TRN_FIELDS("previous", MCA_TRN_PREV)); return buf; } __attribute__((noreturn)) static void mcache_audit_panic(mcache_audit_t *mca, void *addr, size_t offset, int64_t expected, int64_t got) { char buf[DUMP_MCA_BUF_SIZE]; if (mca == NULL) { panic("mcache_audit: buffer %p modified after free at " "offset 0x%lx (0x%llx instead of 0x%llx)\n", addr, offset, got, expected); /* NOTREACHED */ __builtin_unreachable(); } panic("mcache_audit: buffer %p modified after free at offset 0x%lx " "(0x%llx instead of 0x%llx)\n%s\n", addr, offset, got, expected, mcache_dump_mca(buf, mca)); /* NOTREACHED */ __builtin_unreachable(); }