gems-kernel/source/THIRDPARTY/xnu/bsd/kern/mcache.c
2024-06-03 11:29:39 -05:00

1742 lines
46 KiB
C

/*
* Copyright (c) 2006-2020 Apple Inc. All rights reserved.
*
* @APPLE_OSREFERENCE_LICENSE_HEADER_START@
*
* This file contains Original Code and/or Modifications of Original Code
* as defined in and that are subject to the Apple Public Source License
* Version 2.0 (the 'License'). You may not use this file except in
* compliance with the License. The rights granted to you under the License
* may not be used to create, or enable the creation or redistribution of,
* unlawful or unlicensed copies of an Apple operating system, or to
* circumvent, violate, or enable the circumvention or violation of, any
* terms of an Apple operating system software license agreement.
*
* Please obtain a copy of the License at
* http://www.opensource.apple.com/apsl/ and read it before using this file.
*
* The Original Code and all software distributed under the License are
* distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
* EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
* INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
* Please see the License for the specific language governing rights and
* limitations under the License.
*
* @APPLE_OSREFERENCE_LICENSE_HEADER_END@
*/
/*
* Memory allocator with per-CPU caching, derived from the kmem magazine
* concept and implementation as described in the following paper:
* http://www.usenix.org/events/usenix01/full_papers/bonwick/bonwick.pdf
* That implementation is Copyright 2006 Sun Microsystems, Inc. All rights
* reserved. Use is subject to license terms.
*
* There are several major differences between this and the original kmem
* magazine: this derivative implementation allows for multiple objects to
* be allocated and freed from/to the object cache in one call; in addition,
* it provides for better flexibility where the user is allowed to define
* its own slab allocator (instead of the default zone allocator). Finally,
* no object construction/destruction takes place at the moment, although
* this could be added in future to improve efficiency.
*/
#include <sys/param.h>
#include <sys/types.h>
#include <sys/malloc.h>
#include <sys/mbuf.h>
#include <sys/queue.h>
#include <sys/kernel.h>
#include <sys/systm.h>
#include <kern/debug.h>
#include <kern/zalloc.h>
#include <kern/cpu_number.h>
#include <kern/locks.h>
#include <kern/thread_call.h>
#include <libkern/libkern.h>
#include <libkern/OSAtomic.h>
#include <libkern/OSDebug.h>
#include <mach/vm_param.h>
#include <machine/limits.h>
#include <machine/machine_routines.h>
#include <os/atomic_private.h>
#include <string.h>
#include <sys/mcache.h>
#define MCACHE_SIZE(n) \
__builtin_offsetof(mcache_t, mc_cpu[n])
/* Allocate extra in case we need to manually align the pointer */
#define MCACHE_ALLOC_SIZE \
(sizeof (void *) + MCACHE_SIZE(ncpu) + CPU_CACHE_LINE_SIZE)
#define MCACHE_CPU(c) \
(mcache_cpu_t *)((void *)((char *)(c) + MCACHE_SIZE(cpu_number())))
/*
* MCACHE_LIST_LOCK() and MCACHE_LIST_UNLOCK() are macros used
* to serialize accesses to the global list of caches in the system.
* They also record the thread currently running in the critical
* section, so that we can avoid recursive requests to reap the
* caches when memory runs low.
*/
#define MCACHE_LIST_LOCK() { \
lck_mtx_lock(&mcache_llock); \
mcache_llock_owner = current_thread(); \
}
#define MCACHE_LIST_UNLOCK() { \
mcache_llock_owner = NULL; \
lck_mtx_unlock(&mcache_llock); \
}
#define MCACHE_LOCK(l) lck_mtx_lock(l)
#define MCACHE_UNLOCK(l) lck_mtx_unlock(l)
#define MCACHE_LOCK_TRY(l) lck_mtx_try_lock(l)
static unsigned int ncpu;
static unsigned int cache_line_size;
static struct thread *mcache_llock_owner;
static LCK_GRP_DECLARE(mcache_llock_grp, "mcache.list");
static LCK_MTX_DECLARE(mcache_llock, &mcache_llock_grp);
static struct zone *mcache_zone;
static const uint32_t mcache_reap_interval = 15;
static const uint32_t mcache_reap_interval_leeway = 2;
static UInt32 mcache_reaping;
static int mcache_ready;
static int mcache_updating;
static int mcache_bkt_contention = 3;
#if DEBUG
static unsigned int mcache_flags = MCF_DEBUG;
#else
static unsigned int mcache_flags = 0;
#endif
int mca_trn_max = MCA_TRN_MAX;
static mcache_bkttype_t mcache_bkttype[] = {
{ 1, 4096, 32768, NULL },
{ 3, 2048, 16384, NULL },
{ 7, 1024, 12288, NULL },
{ 15, 256, 8192, NULL },
{ 31, 64, 4096, NULL },
{ 47, 0, 2048, NULL },
{ 63, 0, 1024, NULL },
{ 95, 0, 512, NULL },
{ 143, 0, 256, NULL },
{ 165, 0, 0, NULL },
};
static mcache_t *mcache_create_common(const char *, size_t, size_t,
mcache_allocfn_t, mcache_freefn_t, mcache_auditfn_t, mcache_logfn_t,
mcache_notifyfn_t, void *, u_int32_t, int);
static unsigned int mcache_slab_alloc(void *, mcache_obj_t ***,
unsigned int, int);
static void mcache_slab_free(void *, mcache_obj_t *, boolean_t);
static void mcache_slab_audit(void *, mcache_obj_t *, boolean_t);
static void mcache_cpu_refill(mcache_cpu_t *, mcache_bkt_t *, int);
static void mcache_cpu_batch_refill(mcache_cpu_t *, mcache_bkt_t *, int);
static uint32_t mcache_bkt_batch_alloc(mcache_t *, mcache_bktlist_t *,
mcache_bkt_t **, uint32_t);
static void mcache_bkt_batch_free(mcache_t *, mcache_bktlist_t *, mcache_bkt_t *);
static void mcache_cache_bkt_enable(mcache_t *);
static void mcache_bkt_purge(mcache_t *);
static void mcache_bkt_destroy(mcache_t *, mcache_bkt_t *, int);
static void mcache_bkt_ws_update(mcache_t *);
static void mcache_bkt_ws_zero(mcache_t *);
static void mcache_bkt_ws_reap(mcache_t *);
static void mcache_dispatch(void (*)(void *), void *);
static void mcache_cache_reap(mcache_t *);
static void mcache_cache_update(mcache_t *);
static void mcache_cache_bkt_resize(void *);
static void mcache_cache_enable(void *);
static void mcache_update(thread_call_param_t __unused, thread_call_param_t __unused);
static void mcache_update_timeout(void *);
static void mcache_applyall(void (*)(mcache_t *));
static void mcache_reap_start(void *);
static void mcache_reap_done(void *);
static void mcache_reap_timeout(thread_call_param_t __unused, thread_call_param_t);
static void mcache_notify(mcache_t *, u_int32_t);
static void mcache_purge(void *);
__attribute__((noreturn))
static void mcache_audit_panic(mcache_audit_t *mca, void *addr, size_t offset,
int64_t expected, int64_t got);
static LIST_HEAD(, mcache) mcache_head;
mcache_t *mcache_audit_cache;
static thread_call_t mcache_reap_tcall;
static thread_call_t mcache_update_tcall;
/*
* Initialize the framework; this is currently called as part of BSD init.
*/
__private_extern__ void
mcache_init(void)
{
mcache_bkttype_t *btp;
unsigned int i;
char name[32];
VERIFY(mca_trn_max >= 2);
ncpu = ml_wait_max_cpus();
(void) mcache_cache_line_size(); /* prime it */
mcache_reap_tcall = thread_call_allocate(mcache_reap_timeout, NULL);
mcache_update_tcall = thread_call_allocate(mcache_update, NULL);
if (mcache_reap_tcall == NULL || mcache_update_tcall == NULL) {
panic("mcache_init: thread_call_allocate failed");
/* NOTREACHED */
__builtin_unreachable();
}
mcache_zone = zone_create("mcache", MCACHE_ALLOC_SIZE,
ZC_PGZ_USE_GUARDS | ZC_DESTRUCTIBLE);
LIST_INIT(&mcache_head);
for (i = 0; i < sizeof(mcache_bkttype) / sizeof(*btp); i++) {
btp = &mcache_bkttype[i];
(void) snprintf(name, sizeof(name), "bkt_%d",
btp->bt_bktsize);
btp->bt_cache = mcache_create(name,
(btp->bt_bktsize + 1) * sizeof(void *), 0, 0, MCR_SLEEP);
}
PE_parse_boot_argn("mcache_flags", &mcache_flags, sizeof(mcache_flags));
mcache_flags &= MCF_FLAGS_MASK;
mcache_audit_cache = mcache_create("audit", sizeof(mcache_audit_t),
0, 0, MCR_SLEEP);
mcache_applyall(mcache_cache_bkt_enable);
mcache_ready = 1;
printf("mcache: %d CPU(s), %d bytes CPU cache line size\n",
ncpu, CPU_CACHE_LINE_SIZE);
}
/*
* Return the global mcache flags.
*/
__private_extern__ unsigned int
mcache_getflags(void)
{
return mcache_flags;
}
/*
* Return the CPU cache line size.
*/
__private_extern__ unsigned int
mcache_cache_line_size(void)
{
if (cache_line_size == 0) {
ml_cpu_info_t cpu_info;
ml_cpu_get_info(&cpu_info);
cache_line_size = (unsigned int)cpu_info.cache_line_size;
}
return cache_line_size;
}
/*
* Create a cache using the zone allocator as the backend slab allocator.
* The caller may specify any alignment for the object; if it specifies 0
* the default alignment (MCACHE_ALIGN) will be used.
*/
__private_extern__ mcache_t *
mcache_create(const char *name, size_t bufsize, size_t align,
u_int32_t flags, int wait __unused)
{
return mcache_create_common(name, bufsize, align, mcache_slab_alloc,
mcache_slab_free, mcache_slab_audit, NULL, NULL, NULL, flags, 1);
}
/*
* Create a cache using a custom backend slab allocator. Since the caller
* is responsible for allocation, no alignment guarantee will be provided
* by this framework.
*/
__private_extern__ mcache_t *
mcache_create_ext(const char *name, size_t bufsize,
mcache_allocfn_t allocfn, mcache_freefn_t freefn, mcache_auditfn_t auditfn,
mcache_logfn_t logfn, mcache_notifyfn_t notifyfn, void *arg,
u_int32_t flags, int wait __unused)
{
return mcache_create_common(name, bufsize, 0, allocfn,
freefn, auditfn, logfn, notifyfn, arg, flags, 0);
}
/*
* Common cache creation routine.
*/
static mcache_t *
mcache_create_common(const char *name, size_t bufsize, size_t align,
mcache_allocfn_t allocfn, mcache_freefn_t freefn, mcache_auditfn_t auditfn,
mcache_logfn_t logfn, mcache_notifyfn_t notifyfn, void *arg,
u_int32_t flags, int need_zone)
{
mcache_bkttype_t *btp;
mcache_t *cp = NULL;
size_t chunksize;
void *buf, **pbuf;
unsigned int c;
char lck_name[64];
buf = zalloc_flags(mcache_zone, Z_WAITOK | Z_ZERO | Z_NOFAIL);
/*
* In case we didn't get a cache-aligned memory, round it up
* accordingly. This is needed in order to get the rest of
* structure members aligned properly. It also means that
* the memory span gets shifted due to the round up, but it
* is okay since we've allocated extra space for this.
*/
cp = (mcache_t *)
P2ROUNDUP((intptr_t)buf + sizeof(void *), CPU_CACHE_LINE_SIZE);
pbuf = (void **)((intptr_t)cp - sizeof(void *));
*pbuf = buf;
/*
* Guaranteed alignment is valid only when we use the internal
* slab allocator (currently set to use the zone allocator).
*/
if (!need_zone) {
align = 1;
} else {
/* Enforce 64-bit minimum alignment for zone-based buffers */
if (align == 0) {
align = MCACHE_ALIGN;
}
align = P2ROUNDUP(align, MCACHE_ALIGN);
}
if ((align & (align - 1)) != 0) {
panic("mcache_create: bad alignment %lu", align);
/* NOTREACHED */
__builtin_unreachable();
}
cp->mc_align = align;
cp->mc_slab_alloc = allocfn;
cp->mc_slab_free = freefn;
cp->mc_slab_audit = auditfn;
cp->mc_slab_log = logfn;
cp->mc_slab_notify = notifyfn;
cp->mc_private = need_zone ? cp : arg;
cp->mc_bufsize = bufsize;
cp->mc_flags = (flags & MCF_FLAGS_MASK) | mcache_flags;
(void) snprintf(cp->mc_name, sizeof(cp->mc_name), "mcache.%s", name);
(void) snprintf(lck_name, sizeof(lck_name), "%s.cpu", cp->mc_name);
cp->mc_cpu_lock_grp = lck_grp_alloc_init(lck_name, LCK_GRP_ATTR_NULL);
/*
* Allocation chunk size is the object's size plus any extra size
* needed to satisfy the object's alignment. It is enforced to be
* at least the size of an LP64 pointer to simplify auditing and to
* handle multiple-element allocation requests, where the elements
* returned are linked together in a list.
*/
chunksize = MAX(bufsize, sizeof(u_int64_t));
if (need_zone) {
VERIFY(align != 0 && (align % MCACHE_ALIGN) == 0);
chunksize += sizeof(uint64_t) + align;
chunksize = P2ROUNDUP(chunksize, align);
cp->mc_slab_zone = zone_create(cp->mc_name, chunksize,
ZC_PGZ_USE_GUARDS | ZC_DESTRUCTIBLE);
}
cp->mc_chunksize = chunksize;
/*
* Initialize the bucket layer.
*/
(void) snprintf(lck_name, sizeof(lck_name), "%s.bkt", cp->mc_name);
cp->mc_bkt_lock_grp = lck_grp_alloc_init(lck_name,
LCK_GRP_ATTR_NULL);
lck_mtx_init(&cp->mc_bkt_lock, cp->mc_bkt_lock_grp, LCK_ATTR_NULL);
(void) snprintf(lck_name, sizeof(lck_name), "%s.sync", cp->mc_name);
cp->mc_sync_lock_grp = lck_grp_alloc_init(lck_name,
LCK_GRP_ATTR_NULL);
lck_mtx_init(&cp->mc_sync_lock, cp->mc_sync_lock_grp, LCK_ATTR_NULL);
for (btp = mcache_bkttype; chunksize <= btp->bt_minbuf; btp++) {
continue;
}
cp->cache_bkttype = btp;
/*
* Initialize the CPU layer. Each per-CPU structure is aligned
* on the CPU cache line boundary to prevent false sharing.
*/
for (c = 0; c < ncpu; c++) {
mcache_cpu_t *ccp = &cp->mc_cpu[c];
VERIFY(IS_P2ALIGNED(ccp, CPU_CACHE_LINE_SIZE));
lck_mtx_init(&ccp->cc_lock, cp->mc_cpu_lock_grp, LCK_ATTR_NULL);
ccp->cc_objs = -1;
ccp->cc_pobjs = -1;
}
if (mcache_ready) {
mcache_cache_bkt_enable(cp);
}
/* TODO: dynamically create sysctl for stats */
MCACHE_LIST_LOCK();
LIST_INSERT_HEAD(&mcache_head, cp, mc_list);
MCACHE_LIST_UNLOCK();
/*
* If cache buckets are enabled and this is the first cache
* created, start the periodic cache update.
*/
if (!(mcache_flags & MCF_NOCPUCACHE) && !mcache_updating) {
mcache_updating = 1;
mcache_update_timeout(NULL);
}
if (cp->mc_flags & MCF_DEBUG) {
printf("mcache_create: %s (%s) arg %p bufsize %lu align %lu "
"chunksize %lu bktsize %d\n", name, need_zone ? "i" : "e",
arg, bufsize, cp->mc_align, chunksize, btp->bt_bktsize);
}
return cp;
}
/*
* Allocate one or more objects from a cache.
*/
__private_extern__ unsigned int
mcache_alloc_ext(mcache_t *cp, mcache_obj_t **list, unsigned int num, int wait)
{
mcache_cpu_t *ccp;
mcache_obj_t **top = &(*list);
mcache_bkt_t *bkt;
unsigned int need = num;
boolean_t nwretry = FALSE;
/* MCR_NOSLEEP and MCR_FAILOK are mutually exclusive */
VERIFY((wait & (MCR_NOSLEEP | MCR_FAILOK)) != (MCR_NOSLEEP | MCR_FAILOK));
ASSERT(list != NULL);
*list = NULL;
if (num == 0) {
return 0;
}
retry_alloc:
/* We may not always be running in the same CPU in case of retries */
ccp = MCACHE_CPU(cp);
MCACHE_LOCK(&ccp->cc_lock);
for (;;) {
/*
* If we have an object in the current CPU's filled bucket,
* chain the object to any previous objects and return if
* we've satisfied the number of requested objects.
*/
if (ccp->cc_objs > 0) {
mcache_obj_t *tail;
int objs;
/*
* Objects in the bucket are already linked together
* with the most recently freed object at the head of
* the list; grab as many objects as we can.
*/
objs = MIN((unsigned int)ccp->cc_objs, need);
*list = ccp->cc_filled->bkt_obj[ccp->cc_objs - 1];
ccp->cc_objs -= objs;
ccp->cc_alloc += objs;
tail = ccp->cc_filled->bkt_obj[ccp->cc_objs];
list = &tail->obj_next;
*list = NULL;
/* If we got them all, return to caller */
if ((need -= objs) == 0) {
MCACHE_UNLOCK(&ccp->cc_lock);
if (!(cp->mc_flags & MCF_NOLEAKLOG) &&
cp->mc_slab_log != NULL) {
(*cp->mc_slab_log)(num, *top, TRUE);
}
if (cp->mc_flags & MCF_DEBUG) {
goto debug_alloc;
}
return num;
}
}
/*
* The CPU's filled bucket is empty. If the previous filled
* bucket was full, exchange and try again.
*/
if (ccp->cc_pobjs > 0) {
mcache_cpu_refill(ccp, ccp->cc_pfilled, ccp->cc_pobjs);
continue;
}
/*
* If the bucket layer is disabled, allocate from slab. This
* can happen either because MCF_NOCPUCACHE is set, or because
* the bucket layer is currently being resized.
*/
if (ccp->cc_bktsize == 0) {
break;
}
/*
* Both of the CPU's buckets are empty; try to get full
* bucket(s) from the bucket layer. Upon success, refill
* this CPU and place any empty bucket into the empty list.
* To prevent potential thrashing, replace both empty buckets
* only if the requested count exceeds a bucket's worth of
* objects.
*/
(void) mcache_bkt_batch_alloc(cp, &cp->mc_full,
&bkt, (need <= ccp->cc_bktsize) ? 1 : 2);
if (bkt != NULL) {
mcache_bkt_t *bkt_list = NULL;
if (ccp->cc_pfilled != NULL) {
ccp->cc_pfilled->bkt_next = bkt_list;
bkt_list = ccp->cc_pfilled;
}
if (bkt->bkt_next == NULL) {
/*
* Bucket layer allocation returns only 1
* magazine; retain current empty magazine.
*/
mcache_cpu_refill(ccp, bkt, ccp->cc_bktsize);
} else {
/*
* We got 2 full buckets from the bucket
* layer; release the current empty bucket
* back to the bucket layer.
*/
if (ccp->cc_filled != NULL) {
ccp->cc_filled->bkt_next = bkt_list;
bkt_list = ccp->cc_filled;
}
mcache_cpu_batch_refill(ccp, bkt,
ccp->cc_bktsize);
}
mcache_bkt_batch_free(cp, &cp->mc_empty, bkt_list);
continue;
}
/*
* The bucket layer has no full buckets; allocate the
* object(s) directly from the slab layer.
*/
break;
}
MCACHE_UNLOCK(&ccp->cc_lock);
need -= (*cp->mc_slab_alloc)(cp->mc_private, &list, need, wait);
/*
* If this is a blocking allocation, or if it is non-blocking and
* the cache's full bucket is non-empty, then retry the allocation.
*/
if (need > 0) {
if (!(wait & MCR_NONBLOCKING)) {
os_atomic_inc(&cp->mc_wretry_cnt, relaxed);
goto retry_alloc;
} else if ((wait & (MCR_NOSLEEP | MCR_TRYHARD)) &&
!mcache_bkt_isempty(cp)) {
if (!nwretry) {
nwretry = TRUE;
}
os_atomic_inc(&cp->mc_nwretry_cnt, relaxed);
goto retry_alloc;
} else if (nwretry) {
os_atomic_inc(&cp->mc_nwfail_cnt, relaxed);
}
}
if (!(cp->mc_flags & MCF_NOLEAKLOG) && cp->mc_slab_log != NULL) {
(*cp->mc_slab_log)((num - need), *top, TRUE);
}
if (!(cp->mc_flags & MCF_DEBUG)) {
return num - need;
}
debug_alloc:
if (cp->mc_flags & MCF_DEBUG) {
mcache_obj_t **o = top;
unsigned int n;
n = 0;
/*
* Verify that the chain of objects have the same count as
* what we are about to report to the caller. Any mismatch
* here means that the object list is insanely broken and
* therefore we must panic.
*/
while (*o != NULL) {
o = &(*o)->obj_next;
++n;
}
if (n != (num - need)) {
panic("mcache_alloc_ext: %s cp %p corrupted list "
"(got %d actual %d)\n", cp->mc_name,
(void *)cp, num - need, n);
/* NOTREACHED */
__builtin_unreachable();
}
}
/* Invoke the slab layer audit callback if auditing is enabled */
if ((cp->mc_flags & MCF_DEBUG) && cp->mc_slab_audit != NULL) {
(*cp->mc_slab_audit)(cp->mc_private, *top, TRUE);
}
return num - need;
}
/*
* Allocate a single object from a cache.
*/
__private_extern__ void *
mcache_alloc(mcache_t *cp, int wait)
{
mcache_obj_t *buf;
(void) mcache_alloc_ext(cp, &buf, 1, wait);
return buf;
}
__private_extern__ void
mcache_waiter_inc(mcache_t *cp)
{
os_atomic_inc(&cp->mc_waiter_cnt, relaxed);
}
__private_extern__ void
mcache_waiter_dec(mcache_t *cp)
{
os_atomic_dec(&cp->mc_waiter_cnt, relaxed);
}
__private_extern__ boolean_t
mcache_bkt_isempty(mcache_t *cp)
{
/*
* This isn't meant to accurately tell whether there are
* any full buckets in the cache; it is simply a way to
* obtain "hints" about the state of the cache.
*/
return cp->mc_full.bl_total == 0;
}
/*
* Notify the slab layer about an event.
*/
static void
mcache_notify(mcache_t *cp, u_int32_t event)
{
if (cp->mc_slab_notify != NULL) {
(*cp->mc_slab_notify)(cp->mc_private, event);
}
}
/*
* Purge the cache and disable its buckets.
*/
static void
mcache_purge(void *arg)
{
mcache_t *cp = arg;
mcache_bkt_purge(cp);
/*
* We cannot simply call mcache_cache_bkt_enable() from here as
* a bucket resize may be in flight and we would cause the CPU
* layers of the cache to point to different sizes. Therefore,
* we simply increment the enable count so that during the next
* periodic cache update the buckets can be reenabled.
*/
lck_mtx_lock_spin(&cp->mc_sync_lock);
cp->mc_enable_cnt++;
lck_mtx_unlock(&cp->mc_sync_lock);
}
__private_extern__ boolean_t
mcache_purge_cache(mcache_t *cp, boolean_t async)
{
/*
* Purging a cache that has no per-CPU caches or is already
* in the process of being purged is rather pointless.
*/
if (cp->mc_flags & MCF_NOCPUCACHE) {
return FALSE;
}
lck_mtx_lock_spin(&cp->mc_sync_lock);
if (cp->mc_purge_cnt > 0) {
lck_mtx_unlock(&cp->mc_sync_lock);
return FALSE;
}
cp->mc_purge_cnt++;
lck_mtx_unlock(&cp->mc_sync_lock);
if (async) {
mcache_dispatch(mcache_purge, cp);
} else {
mcache_purge(cp);
}
return TRUE;
}
/*
* Free a single object to a cache.
*/
__private_extern__ void
mcache_free(mcache_t *cp, void *buf)
{
((mcache_obj_t *)buf)->obj_next = NULL;
mcache_free_ext(cp, (mcache_obj_t *)buf);
}
/*
* Free one or more objects to a cache.
*/
__private_extern__ void
mcache_free_ext(mcache_t *cp, mcache_obj_t *list)
{
mcache_cpu_t *ccp = MCACHE_CPU(cp);
mcache_bkttype_t *btp;
mcache_obj_t *nlist;
mcache_bkt_t *bkt;
if (!(cp->mc_flags & MCF_NOLEAKLOG) && cp->mc_slab_log != NULL) {
(*cp->mc_slab_log)(0, list, FALSE);
}
/* Invoke the slab layer audit callback if auditing is enabled */
if ((cp->mc_flags & MCF_DEBUG) && cp->mc_slab_audit != NULL) {
(*cp->mc_slab_audit)(cp->mc_private, list, FALSE);
}
MCACHE_LOCK(&ccp->cc_lock);
for (;;) {
/*
* If there is space in the current CPU's filled bucket, put
* the object there and return once all objects are freed.
* Note the cast to unsigned integer takes care of the case
* where the bucket layer is disabled (when cc_objs is -1).
*/
if ((unsigned int)ccp->cc_objs <
(unsigned int)ccp->cc_bktsize) {
/*
* Reverse the list while we place the object into the
* bucket; this effectively causes the most recently
* freed object(s) to be reused during allocation.
*/
nlist = list->obj_next;
list->obj_next = (ccp->cc_objs == 0) ? NULL :
ccp->cc_filled->bkt_obj[ccp->cc_objs - 1];
ccp->cc_filled->bkt_obj[ccp->cc_objs++] = list;
ccp->cc_free++;
if ((list = nlist) != NULL) {
continue;
}
/* We are done; return to caller */
MCACHE_UNLOCK(&ccp->cc_lock);
/* If there is a waiter below, notify it */
if (cp->mc_waiter_cnt > 0) {
mcache_notify(cp, MCN_RETRYALLOC);
}
return;
}
/*
* The CPU's filled bucket is full. If the previous filled
* bucket was empty, exchange and try again.
*/
if (ccp->cc_pobjs == 0) {
mcache_cpu_refill(ccp, ccp->cc_pfilled, ccp->cc_pobjs);
continue;
}
/*
* If the bucket layer is disabled, free to slab. This can
* happen either because MCF_NOCPUCACHE is set, or because
* the bucket layer is currently being resized.
*/
if (ccp->cc_bktsize == 0) {
break;
}
/*
* Both of the CPU's buckets are full; try to get empty
* buckets from the bucket layer. Upon success, empty this
* CPU and place any full bucket into the full list.
*
* TODO: Because the caller currently doesn't indicate
* the number of objects in the list, we choose the more
* conservative approach of allocating only 1 empty
* bucket (to prevent potential thrashing). Once we
* have the object count, we can replace 1 with similar
* logic as used in mcache_alloc_ext().
*/
(void) mcache_bkt_batch_alloc(cp, &cp->mc_empty, &bkt, 1);
if (bkt != NULL) {
mcache_bkt_t *bkt_list = NULL;
if (ccp->cc_pfilled != NULL) {
ccp->cc_pfilled->bkt_next = bkt_list;
bkt_list = ccp->cc_pfilled;
}
if (bkt->bkt_next == NULL) {
/*
* Bucket layer allocation returns only 1
* bucket; retain current full bucket.
*/
mcache_cpu_refill(ccp, bkt, 0);
} else {
/*
* We got 2 empty buckets from the bucket
* layer; release the current full bucket
* back to the bucket layer.
*/
if (ccp->cc_filled != NULL) {
ccp->cc_filled->bkt_next = bkt_list;
bkt_list = ccp->cc_filled;
}
mcache_cpu_batch_refill(ccp, bkt, 0);
}
mcache_bkt_batch_free(cp, &cp->mc_full, bkt_list);
continue;
}
btp = cp->cache_bkttype;
/*
* We need an empty bucket to put our freed objects into
* but couldn't get an empty bucket from the bucket layer;
* attempt to allocate one. We do not want to block for
* allocation here, and if the bucket allocation fails
* we will simply fall through to the slab layer.
*/
MCACHE_UNLOCK(&ccp->cc_lock);
bkt = mcache_alloc(btp->bt_cache, MCR_NOSLEEP);
MCACHE_LOCK(&ccp->cc_lock);
if (bkt != NULL) {
/*
* We have an empty bucket, but since we drop the
* CPU lock above, the cache's bucket size may have
* changed. If so, free the bucket and try again.
*/
if (ccp->cc_bktsize != btp->bt_bktsize) {
MCACHE_UNLOCK(&ccp->cc_lock);
mcache_free(btp->bt_cache, bkt);
MCACHE_LOCK(&ccp->cc_lock);
continue;
}
/*
* Store it in the bucket object since we'll
* need to refer to it during bucket destroy;
* we can't safely refer to cache_bkttype as
* the bucket lock may not be acquired then.
*/
bkt->bkt_type = btp;
/*
* We have an empty bucket of the right size;
* add it to the bucket layer and try again.
*/
ASSERT(bkt->bkt_next == NULL);
mcache_bkt_batch_free(cp, &cp->mc_empty, bkt);
continue;
}
/*
* The bucket layer has no empty buckets; free the
* object(s) directly to the slab layer.
*/
break;
}
MCACHE_UNLOCK(&ccp->cc_lock);
/* If there is a waiter below, notify it */
if (cp->mc_waiter_cnt > 0) {
mcache_notify(cp, MCN_RETRYALLOC);
}
/* Advise the slab layer to purge the object(s) */
(*cp->mc_slab_free)(cp->mc_private, list,
(cp->mc_flags & MCF_DEBUG) || cp->mc_purge_cnt);
}
/*
* Cache destruction routine.
*/
__private_extern__ void
mcache_destroy(mcache_t *cp)
{
void **pbuf;
MCACHE_LIST_LOCK();
LIST_REMOVE(cp, mc_list);
MCACHE_LIST_UNLOCK();
mcache_bkt_purge(cp);
/*
* This cache is dead; there should be no further transaction.
* If it's still invoked, make sure that it induces a fault.
*/
cp->mc_slab_alloc = NULL;
cp->mc_slab_free = NULL;
cp->mc_slab_audit = NULL;
lck_grp_free(cp->mc_bkt_lock_grp);
lck_grp_free(cp->mc_cpu_lock_grp);
lck_grp_free(cp->mc_sync_lock_grp);
/*
* TODO: We need to destroy the zone here, but cannot do it
* because there is no such way to achieve that. Until then
* the memory allocated for the zone structure is leaked.
* Once it is achievable, uncomment these lines:
*
* if (cp->mc_slab_zone != NULL) {
* zdestroy(cp->mc_slab_zone);
* cp->mc_slab_zone = NULL;
* }
*/
/* Get the original address since we're about to free it */
pbuf = (void **)((intptr_t)cp - sizeof(void *));
zfree(mcache_zone, *pbuf);
}
/*
* Internal slab allocator used as a backend for simple caches. The current
* implementation uses the zone allocator for simplicity reasons.
*/
static unsigned int
mcache_slab_alloc(void *arg, mcache_obj_t ***plist, unsigned int num,
int wait)
{
#pragma unused(wait)
mcache_t *cp = arg;
unsigned int need = num;
size_t rsize = P2ROUNDUP(cp->mc_bufsize, sizeof(u_int64_t));
u_int32_t flags = cp->mc_flags;
void *buf, *base, **pbuf;
mcache_obj_t **list = *plist;
*list = NULL;
for (;;) {
buf = zalloc_flags(cp->mc_slab_zone, Z_WAITOK | Z_NOFAIL);
/* Get the aligned base address for this object */
base = (void *)P2ROUNDUP((intptr_t)buf + sizeof(u_int64_t),
cp->mc_align);
/*
* Wind back a pointer size from the aligned base and
* save the original address so we can free it later.
*/
pbuf = (void **)((intptr_t)base - sizeof(void *));
*pbuf = buf;
VERIFY(((intptr_t)base + cp->mc_bufsize) <=
((intptr_t)buf + cp->mc_chunksize));
/*
* If auditing is enabled, patternize the contents of
* the buffer starting from the 64-bit aligned base to
* the end of the buffer; the length is rounded up to
* the nearest 64-bit multiply; this is because we use
* 64-bit memory access to set/check the pattern.
*/
if (flags & MCF_DEBUG) {
VERIFY(((intptr_t)base + rsize) <=
((intptr_t)buf + cp->mc_chunksize));
mcache_set_pattern(MCACHE_FREE_PATTERN, base, rsize);
}
VERIFY(IS_P2ALIGNED(base, cp->mc_align));
*list = (mcache_obj_t *)base;
(*list)->obj_next = NULL;
list = *plist = &(*list)->obj_next;
/* If we got them all, return to mcache */
if (--need == 0) {
break;
}
}
return num - need;
}
/*
* Internal slab deallocator used as a backend for simple caches.
*/
static void
mcache_slab_free(void *arg, mcache_obj_t *list, __unused boolean_t purged)
{
mcache_t *cp = arg;
mcache_obj_t *nlist;
size_t rsize = P2ROUNDUP(cp->mc_bufsize, sizeof(u_int64_t));
u_int32_t flags = cp->mc_flags;
void *base;
void **pbuf;
for (;;) {
nlist = list->obj_next;
list->obj_next = NULL;
base = list;
VERIFY(IS_P2ALIGNED(base, cp->mc_align));
/* Get the original address since we're about to free it */
pbuf = (void **)((intptr_t)base - sizeof(void *));
VERIFY(((intptr_t)base + cp->mc_bufsize) <=
((intptr_t)*pbuf + cp->mc_chunksize));
if (flags & MCF_DEBUG) {
VERIFY(((intptr_t)base + rsize) <=
((intptr_t)*pbuf + cp->mc_chunksize));
mcache_audit_free_verify(NULL, base, 0, rsize);
}
/* Free it to zone */
zfree(cp->mc_slab_zone, *pbuf);
/* No more objects to free; return to mcache */
if ((list = nlist) == NULL) {
break;
}
}
}
/*
* Internal slab auditor for simple caches.
*/
static void
mcache_slab_audit(void *arg, mcache_obj_t *list, boolean_t alloc)
{
mcache_t *cp = arg;
size_t rsize = P2ROUNDUP(cp->mc_bufsize, sizeof(u_int64_t));
void *base, **pbuf;
while (list != NULL) {
mcache_obj_t *next = list->obj_next;
base = list;
VERIFY(IS_P2ALIGNED(base, cp->mc_align));
/* Get the original address */
pbuf = (void **)((intptr_t)base - sizeof(void *));
VERIFY(((intptr_t)base + rsize) <=
((intptr_t)*pbuf + cp->mc_chunksize));
if (!alloc) {
mcache_set_pattern(MCACHE_FREE_PATTERN, base, rsize);
} else {
mcache_audit_free_verify_set(NULL, base, 0, rsize);
}
list = list->obj_next = next;
}
}
/*
* Refill the CPU's buckets with bkt and its follower (if any).
*/
static void
mcache_cpu_batch_refill(mcache_cpu_t *ccp, mcache_bkt_t *bkt, int objs)
{
ASSERT((ccp->cc_filled == NULL && ccp->cc_objs == -1) ||
(ccp->cc_filled && ccp->cc_objs + objs == ccp->cc_bktsize));
ASSERT(ccp->cc_bktsize > 0);
ccp->cc_filled = bkt;
ccp->cc_objs = objs;
if (__probable(bkt->bkt_next != NULL)) {
ccp->cc_pfilled = bkt->bkt_next;
ccp->cc_pobjs = objs;
bkt->bkt_next = NULL;
} else {
ASSERT(bkt->bkt_next == NULL);
ccp->cc_pfilled = NULL;
ccp->cc_pobjs = -1;
}
}
/*
* Refill the CPU's filled bucket with bkt and save the previous one.
*/
static void
mcache_cpu_refill(mcache_cpu_t *ccp, mcache_bkt_t *bkt, int objs)
{
ASSERT((ccp->cc_filled == NULL && ccp->cc_objs == -1) ||
(ccp->cc_filled && ccp->cc_objs + objs == ccp->cc_bktsize));
ASSERT(ccp->cc_bktsize > 0);
ccp->cc_pfilled = ccp->cc_filled;
ccp->cc_pobjs = ccp->cc_objs;
ccp->cc_filled = bkt;
ccp->cc_objs = objs;
}
/*
* Get one or more buckets from the bucket layer.
*/
static uint32_t
mcache_bkt_batch_alloc(mcache_t *cp, mcache_bktlist_t *blp, mcache_bkt_t **list,
uint32_t num)
{
mcache_bkt_t *bkt_list = NULL;
mcache_bkt_t *bkt;
uint32_t need = num;
ASSERT(list != NULL && need > 0);
if (!MCACHE_LOCK_TRY(&cp->mc_bkt_lock)) {
/*
* The bucket layer lock is held by another CPU; increase
* the contention count so that we can later resize the
* bucket size accordingly.
*/
MCACHE_LOCK(&cp->mc_bkt_lock);
cp->mc_bkt_contention++;
}
while ((bkt = blp->bl_list) != NULL) {
blp->bl_list = bkt->bkt_next;
bkt->bkt_next = bkt_list;
bkt_list = bkt;
if (--blp->bl_total < blp->bl_min) {
blp->bl_min = blp->bl_total;
}
blp->bl_alloc++;
if (--need == 0) {
break;
}
}
MCACHE_UNLOCK(&cp->mc_bkt_lock);
*list = bkt_list;
return num - need;
}
/*
* Return one or more buckets to the bucket layer.
*/
static void
mcache_bkt_batch_free(mcache_t *cp, mcache_bktlist_t *blp, mcache_bkt_t *bkt)
{
mcache_bkt_t *nbkt;
MCACHE_LOCK(&cp->mc_bkt_lock);
while (bkt != NULL) {
nbkt = bkt->bkt_next;
bkt->bkt_next = blp->bl_list;
blp->bl_list = bkt;
blp->bl_total++;
bkt = nbkt;
}
MCACHE_UNLOCK(&cp->mc_bkt_lock);
}
/*
* Enable the bucket layer of a cache.
*/
static void
mcache_cache_bkt_enable(mcache_t *cp)
{
mcache_cpu_t *ccp;
unsigned int cpu;
if (cp->mc_flags & MCF_NOCPUCACHE) {
return;
}
for (cpu = 0; cpu < ncpu; cpu++) {
ccp = &cp->mc_cpu[cpu];
MCACHE_LOCK(&ccp->cc_lock);
ccp->cc_bktsize = cp->cache_bkttype->bt_bktsize;
MCACHE_UNLOCK(&ccp->cc_lock);
}
}
/*
* Purge all buckets from a cache and disable its bucket layer.
*/
static void
mcache_bkt_purge(mcache_t *cp)
{
mcache_cpu_t *ccp;
mcache_bkt_t *bp, *pbp;
int objs, pobjs;
unsigned int cpu;
for (cpu = 0; cpu < ncpu; cpu++) {
ccp = &cp->mc_cpu[cpu];
MCACHE_LOCK(&ccp->cc_lock);
bp = ccp->cc_filled;
pbp = ccp->cc_pfilled;
objs = ccp->cc_objs;
pobjs = ccp->cc_pobjs;
ccp->cc_filled = NULL;
ccp->cc_pfilled = NULL;
ccp->cc_objs = -1;
ccp->cc_pobjs = -1;
ccp->cc_bktsize = 0;
MCACHE_UNLOCK(&ccp->cc_lock);
if (bp != NULL) {
mcache_bkt_destroy(cp, bp, objs);
}
if (pbp != NULL) {
mcache_bkt_destroy(cp, pbp, pobjs);
}
}
mcache_bkt_ws_zero(cp);
mcache_bkt_ws_reap(cp);
}
/*
* Free one or more objects in the bucket to the slab layer,
* and also free the bucket itself.
*/
static void
mcache_bkt_destroy(mcache_t *cp, mcache_bkt_t *bkt, int nobjs)
{
if (nobjs > 0) {
mcache_obj_t *top = bkt->bkt_obj[nobjs - 1];
if (cp->mc_flags & MCF_DEBUG) {
mcache_obj_t *o = top;
int cnt = 0;
/*
* Verify that the chain of objects in the bucket is
* valid. Any mismatch here means a mistake when the
* object(s) were freed to the CPU layer, so we panic.
*/
while (o != NULL) {
o = o->obj_next;
++cnt;
}
if (cnt != nobjs) {
panic("mcache_bkt_destroy: %s cp %p corrupted "
"list in bkt %p (nobjs %d actual %d)\n",
cp->mc_name, (void *)cp, (void *)bkt,
nobjs, cnt);
/* NOTREACHED */
__builtin_unreachable();
}
}
/* Advise the slab layer to purge the object(s) */
(*cp->mc_slab_free)(cp->mc_private, top,
(cp->mc_flags & MCF_DEBUG) || cp->mc_purge_cnt);
}
mcache_free(bkt->bkt_type->bt_cache, bkt);
}
/*
* Update the bucket layer working set statistics.
*/
static void
mcache_bkt_ws_update(mcache_t *cp)
{
MCACHE_LOCK(&cp->mc_bkt_lock);
cp->mc_full.bl_reaplimit = cp->mc_full.bl_min;
cp->mc_full.bl_min = cp->mc_full.bl_total;
cp->mc_empty.bl_reaplimit = cp->mc_empty.bl_min;
cp->mc_empty.bl_min = cp->mc_empty.bl_total;
MCACHE_UNLOCK(&cp->mc_bkt_lock);
}
/*
* Mark everything as eligible for reaping (working set is zero).
*/
static void
mcache_bkt_ws_zero(mcache_t *cp)
{
MCACHE_LOCK(&cp->mc_bkt_lock);
cp->mc_full.bl_reaplimit = cp->mc_full.bl_total;
cp->mc_full.bl_min = cp->mc_full.bl_total;
cp->mc_empty.bl_reaplimit = cp->mc_empty.bl_total;
cp->mc_empty.bl_min = cp->mc_empty.bl_total;
MCACHE_UNLOCK(&cp->mc_bkt_lock);
}
/*
* Reap all buckets that are beyond the working set.
*/
static void
mcache_bkt_ws_reap(mcache_t *cp)
{
mcache_bkt_t *bkt, *nbkt;
uint32_t reap;
reap = MIN(cp->mc_full.bl_reaplimit, cp->mc_full.bl_min);
if (reap != 0) {
(void) mcache_bkt_batch_alloc(cp, &cp->mc_full, &bkt, reap);
while (bkt != NULL) {
nbkt = bkt->bkt_next;
bkt->bkt_next = NULL;
mcache_bkt_destroy(cp, bkt, bkt->bkt_type->bt_bktsize);
bkt = nbkt;
}
}
reap = MIN(cp->mc_empty.bl_reaplimit, cp->mc_empty.bl_min);
if (reap != 0) {
(void) mcache_bkt_batch_alloc(cp, &cp->mc_empty, &bkt, reap);
while (bkt != NULL) {
nbkt = bkt->bkt_next;
bkt->bkt_next = NULL;
mcache_bkt_destroy(cp, bkt, 0);
bkt = nbkt;
}
}
}
static void
mcache_reap_timeout(thread_call_param_t dummy __unused,
thread_call_param_t arg)
{
volatile UInt32 *flag = arg;
ASSERT(flag == &mcache_reaping);
*flag = 0;
}
static void
mcache_reap_done(void *flag)
{
uint64_t deadline, leeway;
clock_interval_to_deadline(mcache_reap_interval, NSEC_PER_SEC,
&deadline);
clock_interval_to_absolutetime_interval(mcache_reap_interval_leeway,
NSEC_PER_SEC, &leeway);
thread_call_enter_delayed_with_leeway(mcache_reap_tcall, flag,
deadline, leeway, THREAD_CALL_DELAY_LEEWAY);
}
static void
mcache_reap_start(void *arg)
{
UInt32 *flag = arg;
ASSERT(flag == &mcache_reaping);
mcache_applyall(mcache_cache_reap);
mcache_dispatch(mcache_reap_done, flag);
}
__private_extern__ void
mcache_reap(void)
{
UInt32 *flag = &mcache_reaping;
if (mcache_llock_owner == current_thread() ||
!OSCompareAndSwap(0, 1, flag)) {
return;
}
mcache_dispatch(mcache_reap_start, flag);
}
__private_extern__ void
mcache_reap_now(mcache_t *cp, boolean_t purge)
{
if (purge) {
mcache_bkt_purge(cp);
mcache_cache_bkt_enable(cp);
} else {
mcache_bkt_ws_zero(cp);
mcache_bkt_ws_reap(cp);
}
}
static void
mcache_cache_reap(mcache_t *cp)
{
mcache_bkt_ws_reap(cp);
}
/*
* Performs period maintenance on a cache.
*/
static void
mcache_cache_update(mcache_t *cp)
{
int need_bkt_resize = 0;
int need_bkt_reenable = 0;
lck_mtx_assert(&mcache_llock, LCK_MTX_ASSERT_OWNED);
mcache_bkt_ws_update(cp);
/*
* Cache resize and post-purge reenable are mutually exclusive.
* If the cache was previously purged, there is no point of
* increasing the bucket size as there was an indication of
* memory pressure on the system.
*/
lck_mtx_lock_spin(&cp->mc_sync_lock);
if (!(cp->mc_flags & MCF_NOCPUCACHE) && cp->mc_enable_cnt) {
need_bkt_reenable = 1;
}
lck_mtx_unlock(&cp->mc_sync_lock);
MCACHE_LOCK(&cp->mc_bkt_lock);
/*
* If the contention count is greater than the threshold, and if
* we are not already at the maximum bucket size, increase it.
* Otherwise, if this cache was previously purged by the user
* then we simply reenable it.
*/
if ((unsigned int)cp->mc_chunksize < cp->cache_bkttype->bt_maxbuf &&
(int)(cp->mc_bkt_contention - cp->mc_bkt_contention_prev) >
mcache_bkt_contention && !need_bkt_reenable) {
need_bkt_resize = 1;
}
cp->mc_bkt_contention_prev = cp->mc_bkt_contention;
MCACHE_UNLOCK(&cp->mc_bkt_lock);
if (need_bkt_resize) {
mcache_dispatch(mcache_cache_bkt_resize, cp);
} else if (need_bkt_reenable) {
mcache_dispatch(mcache_cache_enable, cp);
}
}
/*
* Recompute a cache's bucket size. This is an expensive operation
* and should not be done frequently; larger buckets provide for a
* higher transfer rate with the bucket while smaller buckets reduce
* the memory consumption.
*/
static void
mcache_cache_bkt_resize(void *arg)
{
mcache_t *cp = arg;
mcache_bkttype_t *btp = cp->cache_bkttype;
if ((unsigned int)cp->mc_chunksize < btp->bt_maxbuf) {
mcache_bkt_purge(cp);
/*
* Upgrade to the next bucket type with larger bucket size;
* temporarily set the previous contention snapshot to a
* negative number to prevent unnecessary resize request.
*/
MCACHE_LOCK(&cp->mc_bkt_lock);
cp->cache_bkttype = ++btp;
cp->mc_bkt_contention_prev = cp->mc_bkt_contention + INT_MAX;
MCACHE_UNLOCK(&cp->mc_bkt_lock);
mcache_cache_enable(cp);
}
}
/*
* Reenable a previously disabled cache due to purge.
*/
static void
mcache_cache_enable(void *arg)
{
mcache_t *cp = arg;
lck_mtx_lock_spin(&cp->mc_sync_lock);
cp->mc_purge_cnt = 0;
cp->mc_enable_cnt = 0;
lck_mtx_unlock(&cp->mc_sync_lock);
mcache_cache_bkt_enable(cp);
}
static void
mcache_update_timeout(__unused void *arg)
{
uint64_t deadline, leeway;
clock_interval_to_deadline(mcache_reap_interval, NSEC_PER_SEC,
&deadline);
clock_interval_to_absolutetime_interval(mcache_reap_interval_leeway,
NSEC_PER_SEC, &leeway);
thread_call_enter_delayed_with_leeway(mcache_update_tcall, NULL,
deadline, leeway, THREAD_CALL_DELAY_LEEWAY);
}
static void
mcache_update(thread_call_param_t arg __unused,
thread_call_param_t dummy __unused)
{
mcache_applyall(mcache_cache_update);
mcache_update_timeout(NULL);
}
static void
mcache_applyall(void (*func)(mcache_t *))
{
mcache_t *cp;
MCACHE_LIST_LOCK();
LIST_FOREACH(cp, &mcache_head, mc_list) {
func(cp);
}
MCACHE_LIST_UNLOCK();
}
static void
mcache_dispatch(void (*func)(void *), void *arg)
{
ASSERT(func != NULL);
timeout(func, arg, hz / 1000);
}
__private_extern__ void
mcache_buffer_log(mcache_audit_t *mca, void *addr, mcache_t *cp,
struct timeval *base_ts)
{
struct timeval now, base = { .tv_sec = 0, .tv_usec = 0 };
void *stack[MCACHE_STACK_DEPTH + 1];
struct mca_trn *transaction;
transaction = &mca->mca_trns[mca->mca_next_trn];
mca->mca_addr = addr;
mca->mca_cache = cp;
transaction->mca_thread = current_thread();
bzero(stack, sizeof(stack));
transaction->mca_depth = (uint16_t)OSBacktrace(stack, MCACHE_STACK_DEPTH + 1) - 1;
bcopy(&stack[1], transaction->mca_stack,
sizeof(transaction->mca_stack));
microuptime(&now);
if (base_ts != NULL) {
base = *base_ts;
}
/* tstamp is in ms relative to base_ts */
transaction->mca_tstamp = ((now.tv_usec - base.tv_usec) / 1000);
if ((now.tv_sec - base.tv_sec) > 0) {
transaction->mca_tstamp += ((now.tv_sec - base.tv_sec) * 1000);
}
mca->mca_next_trn =
(mca->mca_next_trn + 1) % mca_trn_max;
}
/*
* N.B.: mcache_set_pattern(), mcache_verify_pattern() and
* mcache_verify_set_pattern() are marked as noinline to prevent the
* compiler from aliasing pointers when they are inlined inside the callers
* (e.g. mcache_audit_free_verify_set()) which would be undefined behavior.
*/
__private_extern__ OS_NOINLINE void
mcache_set_pattern(u_int64_t pattern, void *buf_arg, size_t size)
{
u_int64_t *buf_end = (u_int64_t *)((void *)((char *)buf_arg + size));
u_int64_t *buf = (u_int64_t *)buf_arg;
VERIFY(IS_P2ALIGNED(buf_arg, sizeof(u_int64_t)));
VERIFY(IS_P2ALIGNED(size, sizeof(u_int64_t)));
while (buf < buf_end) {
*buf++ = pattern;
}
}
__private_extern__ OS_NOINLINE void *
mcache_verify_pattern(u_int64_t pattern, void *buf_arg, size_t size)
{
u_int64_t *buf_end = (u_int64_t *)((void *)((char *)buf_arg + size));
u_int64_t *buf;
VERIFY(IS_P2ALIGNED(buf_arg, sizeof(u_int64_t)));
VERIFY(IS_P2ALIGNED(size, sizeof(u_int64_t)));
for (buf = buf_arg; buf < buf_end; buf++) {
if (*buf != pattern) {
return buf;
}
}
return NULL;
}
OS_NOINLINE static void *
mcache_verify_set_pattern(u_int64_t old, u_int64_t new, void *buf_arg,
size_t size)
{
u_int64_t *buf_end = (u_int64_t *)((void *)((char *)buf_arg + size));
u_int64_t *buf;
VERIFY(IS_P2ALIGNED(buf_arg, sizeof(u_int64_t)));
VERIFY(IS_P2ALIGNED(size, sizeof(u_int64_t)));
for (buf = buf_arg; buf < buf_end; buf++) {
if (*buf != old) {
mcache_set_pattern(old, buf_arg,
(uintptr_t)buf - (uintptr_t)buf_arg);
return buf;
}
*buf = new;
}
return NULL;
}
__private_extern__ void
mcache_audit_free_verify(mcache_audit_t *mca, void *base, size_t offset,
size_t size)
{
void *addr;
u_int64_t *oaddr64;
mcache_obj_t *next;
addr = (void *)((uintptr_t)base + offset);
next = ((mcache_obj_t *)addr)->obj_next;
/* For the "obj_next" pointer in the buffer */
oaddr64 = (u_int64_t *)P2ROUNDDOWN(addr, sizeof(u_int64_t));
*oaddr64 = MCACHE_FREE_PATTERN;
if ((oaddr64 = mcache_verify_pattern(MCACHE_FREE_PATTERN,
(caddr_t)base, size)) != NULL) {
mcache_audit_panic(mca, addr, (caddr_t)oaddr64 - (caddr_t)base,
(int64_t)MCACHE_FREE_PATTERN, (int64_t)*oaddr64);
/* NOTREACHED */
}
((mcache_obj_t *)addr)->obj_next = next;
}
__private_extern__ void
mcache_audit_free_verify_set(mcache_audit_t *mca, void *base, size_t offset,
size_t size)
{
void *addr;
u_int64_t *oaddr64;
mcache_obj_t *next;
addr = (void *)((uintptr_t)base + offset);
next = ((mcache_obj_t *)addr)->obj_next;
/* For the "obj_next" pointer in the buffer */
oaddr64 = (u_int64_t *)P2ROUNDDOWN(addr, sizeof(u_int64_t));
*oaddr64 = MCACHE_FREE_PATTERN;
if ((oaddr64 = mcache_verify_set_pattern(MCACHE_FREE_PATTERN,
MCACHE_UNINITIALIZED_PATTERN, (caddr_t)base, size)) != NULL) {
mcache_audit_panic(mca, addr, (caddr_t)oaddr64 - (caddr_t)base,
(int64_t)MCACHE_FREE_PATTERN, (int64_t)*oaddr64);
/* NOTREACHED */
}
((mcache_obj_t *)addr)->obj_next = next;
}
#undef panic
#define DUMP_TRN_FMT() \
"%s transaction thread %p saved PC stack (%d deep):\n" \
"\t%p, %p, %p, %p, %p, %p, %p, %p\n" \
"\t%p, %p, %p, %p, %p, %p, %p, %p\n"
#define DUMP_TRN_FIELDS(s, x) \
s, \
mca->mca_trns[x].mca_thread, mca->mca_trns[x].mca_depth, \
mca->mca_trns[x].mca_stack[0], mca->mca_trns[x].mca_stack[1], \
mca->mca_trns[x].mca_stack[2], mca->mca_trns[x].mca_stack[3], \
mca->mca_trns[x].mca_stack[4], mca->mca_trns[x].mca_stack[5], \
mca->mca_trns[x].mca_stack[6], mca->mca_trns[x].mca_stack[7], \
mca->mca_trns[x].mca_stack[8], mca->mca_trns[x].mca_stack[9], \
mca->mca_trns[x].mca_stack[10], mca->mca_trns[x].mca_stack[11], \
mca->mca_trns[x].mca_stack[12], mca->mca_trns[x].mca_stack[13], \
mca->mca_trns[x].mca_stack[14], mca->mca_trns[x].mca_stack[15]
#define MCA_TRN_LAST ((mca->mca_next_trn + mca_trn_max) % mca_trn_max)
#define MCA_TRN_PREV ((mca->mca_next_trn + mca_trn_max - 1) % mca_trn_max)
__private_extern__ char *
mcache_dump_mca(char buf[static DUMP_MCA_BUF_SIZE], mcache_audit_t *mca)
{
snprintf(buf, DUMP_MCA_BUF_SIZE,
"mca %p: addr %p, cache %p (%s) nxttrn %d\n"
DUMP_TRN_FMT()
DUMP_TRN_FMT(),
mca, mca->mca_addr, mca->mca_cache,
mca->mca_cache ? mca->mca_cache->mc_name : "?",
mca->mca_next_trn,
DUMP_TRN_FIELDS("last", MCA_TRN_LAST),
DUMP_TRN_FIELDS("previous", MCA_TRN_PREV));
return buf;
}
__attribute__((noreturn))
static void
mcache_audit_panic(mcache_audit_t *mca, void *addr, size_t offset,
int64_t expected, int64_t got)
{
char buf[DUMP_MCA_BUF_SIZE];
if (mca == NULL) {
panic("mcache_audit: buffer %p modified after free at "
"offset 0x%lx (0x%llx instead of 0x%llx)\n", addr,
offset, got, expected);
/* NOTREACHED */
__builtin_unreachable();
}
panic("mcache_audit: buffer %p modified after free at offset 0x%lx "
"(0x%llx instead of 0x%llx)\n%s\n",
addr, offset, got, expected, mcache_dump_mca(buf, mca));
/* NOTREACHED */
__builtin_unreachable();
}