/* * Copyright (c) 2016-2021 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * * This file contains Original Code and/or Modifications of Original Code * as defined in and that are subject to the Apple Public Source License * Version 2.0 (the 'License'). You may not use this file except in * compliance with the License. The rights granted to you under the License * may not be used to create, or enable the creation or redistribution of, * unlawful or unlicensed copies of an Apple operating system, or to * circumvent, violate, or enable the circumvention or violation of, any * terms of an Apple operating system software license agreement. * * Please obtain a copy of the License at * http://www.opensource.apple.com/apsl/ and read it before using this file. * * The Original Code and all software distributed under the License are * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. * Please see the License for the specific language governing rights and * limitations under the License. * * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ */ #ifndef _SKYWALK_MEM_SKMEMCACHEVAR_H #define _SKYWALK_MEM_SKMEMCACHEVAR_H #ifdef BSD_KERNEL_PRIVATE #include #include #include /* * Buffer control. */ struct skmem_bufctl { SLIST_ENTRY(skmem_bufctl) bc_link; /* bufctl linkage */ void *bc_addr; /* buffer obj address */ void *bc_addrm; /* mirrored buffer obj addr */ struct skmem_slab *bc_slab; /* controlling slab */ uint32_t bc_lim; /* buffer obj limit */ uint32_t bc_flags; /* SKMEM_BUFCTL_* flags */ uint32_t bc_idx; /* buffer index within slab */ volatile uint32_t bc_usecnt; /* outstanding use */ }; #define SKMEM_BUFCTL_SHAREOK 0x1 /* supports sharing */ #define SKMEM_STACK_DEPTH 16 /* maximum audit stack depth */ #define SKMEM_CACHE_ALIGN 8 /* min guaranteed alignment */ /* * Alternative buffer control if SKM_MODE_AUDIT is set. */ struct skmem_bufctl_audit { SLIST_ENTRY(skmem_bufctl) bc_link; /* bufctl linkage */ void *bc_addr; /* buffer address */ void *bc_addrm; /* mirrored buffer address */ struct skmem_slab *bc_slab; /* controlling slab */ uint32_t bc_flags; /* SKMEM_BUFCTL_* flags */ uint32_t bc_idx; /* buffer index within slab */ volatile uint32_t bc_usecnt; /* outstanding use */ struct thread *bc_thread; /* thread doing transaction */ uint32_t bc_timestamp; /* transaction time */ uint32_t bc_depth; /* stack depth */ void *bc_stack[SKMEM_STACK_DEPTH]; /* stack */ }; /* * Buffer control hash bucket. */ struct skmem_bufctl_bkt { SLIST_HEAD(, skmem_bufctl) bcb_head; /* bufctl allocated list */ }; /* * Slab. */ struct skmem_slab { TAILQ_ENTRY(skmem_slab) sl_link; /* slab freelist linkage */ struct skmem_cache *sl_cache; /* controlling cache */ void *sl_base; /* base of allocated memory */ void *sl_basem; /* base of mirrored memory */ struct sksegment *sl_seg; /* backing segment */ struct sksegment *sl_segm; /* backing mirrored segment */ SLIST_HEAD(, skmem_bufctl) sl_head; /* bufctl free list */ uint32_t sl_refcnt; /* outstanding allocations */ uint32_t sl_chunks; /* # of buffers in slab */ }; #define SKMEM_SLAB_IS_PARTIAL(sl) \ ((sl)->sl_refcnt > 0 && (sl)->sl_refcnt < (sl)->sl_chunks) #define SKMEM_SLAB_MEMBER(sl, buf) \ (((size_t)(buf) - (size_t)(sl)->sl_base) < (sl)->sl_cache->skm_slabsize) /* * Magazine type. */ struct skmem_magtype { int mt_magsize; /* magazine size (# of objs) */ int mt_align; /* magazine alignment */ size_t mt_minbuf; /* all smaller bufs qualify */ size_t mt_maxbuf; /* no larger bufs qualify */ struct skmem_cache *mt_cache; /* magazine cache */ char mt_cname[64]; /* magazine cache name */ }; /* * Magazine. */ struct skmem_mag { SLIST_ENTRY(skmem_mag) mg_link; /* magazine linkage */ struct skmem_magtype *mg_magtype; /* magazine type */ void *mg_round[1]; /* one or more objs */ }; #define SKMEM_MAG_SIZE(n) \ offsetof(struct skmem_mag, mg_round[n]) /* * Magazine depot. */ struct skmem_maglist { SLIST_HEAD(, skmem_mag) ml_list; /* magazine list */ uint32_t ml_total; /* number of magazines */ uint32_t ml_min; /* min since last update */ uint32_t ml_reaplimit; /* max reapable magazines */ uint64_t ml_alloc; /* allocations from this list */ }; /* * Per-CPU cache structure. */ struct skmem_cpu_cache { decl_lck_mtx_data(, cp_lock); struct skmem_mag *cp_loaded; /* currently filled magazine */ struct skmem_mag *cp_ploaded; /* previously filled magazine */ uint64_t cp_alloc; /* allocations from this cpu */ uint64_t cp_free; /* frees to this cpu */ int cp_rounds; /* # of objs in filled mag */ int cp_prounds; /* # of objs in previous mag */ int cp_magsize; /* # of objs in a full mag */ } __attribute__((aligned(CHANNEL_CACHE_ALIGN_MAX))); /* * Object's region information. * * This info is provided to skmem_ctor_fn_t() to assist master and * slave objects construction. It is also provided separately via * skmem_cache_get_obj_info() when called on an object that's been * allocated from skmem_cache. Information about slave object is * available only at constructor time. */ struct skmem_obj_info { void *oi_addr; /* object address */ struct skmem_bufctl *oi_bc; /* buffer control (master) */ uint32_t oi_size; /* actual object size */ obj_idx_t oi_idx_reg; /* object idx within region */ obj_idx_t oi_idx_seg; /* object idx within segment */ } __attribute__((__packed__)); /* * Generic one-way linked list element structure. This is used to * handle skmem_cache_batch_alloc() requests in order to chain the * allocated objects together before returning them to the caller. * It is also used when freeing a batch of packets by the caller of * skmem_cache_batch_free(). Note that this requires the region's * object to be at least the size of struct skmem_obj, as we store * this information at the beginning of each object in the chain. */ struct skmem_obj { /* * Given that we overlay this structure on top of whatever * structure that the object represents, the constructor must * ensure that it reserves at least the size of a pointer * at the top for the linkage. */ struct skmem_obj *mo_next; /* next object in the list */ /* * The following are used only for raw (unconstructed) objects * coming out of the slab layer during allocations. They are * not touched otherwise by skmem_cache when the object resides * in the magazine. By utilizing this space, we avoid having * to allocate temporary storage elsewhere. */ struct skmem_obj_info mo_info; /* object's info */ struct skmem_obj_info mo_minfo; /* mirrored object's info */ }; #define SKMEM_OBJ_ADDR(_oi) (_oi)->oi_addr #define SKMEM_OBJ_BUFCTL(_oi) (_oi)->oi_bc #define SKMEM_OBJ_SIZE(_oi) (_oi)->oi_size #define SKMEM_OBJ_IDX_REG(_oi) (_oi)->oi_idx_reg #define SKMEM_OBJ_IDX_SEG(_oi) (_oi)->oi_idx_seg /* segment the object belongs to (only for master) */ #define SKMEM_OBJ_SEG(_oi) (_oi)->oi_bc->bc_slab->sl_seg /* offset of object relative to the object's own region */ #define SKMEM_OBJ_ROFF(_oi) \ ((mach_vm_offset_t)(SKMEM_OBJ_SIZE(_oi) * SKMEM_OBJ_IDX_REG(_oi))) typedef int (*skmem_ctor_fn_t)(struct skmem_obj_info *, struct skmem_obj_info *, void *, uint32_t); typedef void (*skmem_dtor_fn_t)(void *, void *); typedef void (*skmem_reclaim_fn_t)(void *); typedef int (*skmem_slab_alloc_fn_t)(struct skmem_cache *, struct skmem_obj_info *, struct skmem_obj_info *, uint32_t); typedef void (*skmem_slab_free_fn_t)(struct skmem_cache *, void *); /* * Cache. */ struct skmem_cache { /* * Commonly-accessed elements during alloc and free. */ uint32_t skm_mode; /* cache mode flags */ skmem_ctor_fn_t skm_ctor; /* object constructor */ skmem_dtor_fn_t skm_dtor; /* object destructor */ skmem_reclaim_fn_t skm_reclaim; /* cache reclaim */ void *skm_private; /* opaque arg to callbacks */ /* * Depot. */ decl_lck_mtx_data(, skm_dp_lock); /* protects depot layer */ struct skmem_magtype *skm_magtype; /* magazine type */ struct skmem_maglist skm_full; /* full magazines */ struct skmem_maglist skm_empty; /* empty magazines */ /* * Slab. */ decl_lck_mtx_data(, skm_sl_lock); /* protects slab layer */ skmem_slab_alloc_fn_t skm_slab_alloc; /* slab allocate */ skmem_slab_free_fn_t skm_slab_free; /* slab free */ size_t skm_chunksize; /* bufsize + alignment */ size_t skm_objsize; /* actual obj size in slab */ size_t skm_slabsize; /* size of a slab */ size_t skm_hash_initial; /* initial hash table size */ size_t skm_hash_limit; /* hash table size limit */ size_t skm_hash_shift; /* get to interesting bits */ size_t skm_hash_mask; /* hash table mask */ struct skmem_bufctl_bkt *skm_hash_table; /* alloc'd buffer htable */ TAILQ_HEAD(, skmem_slab) skm_sl_partial_list; /* partially-allocated */ TAILQ_HEAD(, skmem_slab) skm_sl_empty_list; /* fully-allocated */ struct skmem_region *skm_region; /* region source for slabs */ /* * Statistics. */ uint32_t skm_cpu_mag_size; /* current magazine size */ uint32_t skm_cpu_mag_resize; /* # of magazine resizes */ uint32_t skm_cpu_mag_purge; /* # of magazine purges */ uint32_t skm_cpu_mag_reap; /* # of magazine reaps */ uint64_t skm_depot_contention; /* mutex contention count */ uint64_t skm_depot_contention_prev; /* previous snapshot */ uint32_t skm_depot_full; /* # of full magazines */ uint32_t skm_depot_empty; /* # of empty magazines */ uint32_t skm_depot_ws_zero; /* # of working set flushes */ uint32_t skm_sl_rescale; /* # of hash table rescales */ uint32_t skm_sl_create; /* slab creates */ uint32_t skm_sl_destroy; /* slab destroys */ uint32_t skm_sl_alloc; /* slab layer allocations */ uint32_t skm_sl_free; /* slab layer frees */ uint32_t skm_sl_partial; /* # of partial slabs */ uint32_t skm_sl_empty; /* # of empty slabs */ uint64_t skm_sl_alloc_fail; /* total failed allocations */ uint64_t skm_sl_bufinuse; /* total unfreed buffers */ uint64_t skm_sl_bufmax; /* max buffers ever */ /* * Cache properties. */ TAILQ_ENTRY(skmem_cache) skm_link; /* cache linkage */ char skm_name[64]; /* cache name */ uuid_t skm_uuid; /* cache uuid */ size_t skm_bufsize; /* buffer size */ size_t skm_bufalign; /* buffer alignment */ size_t skm_objalign; /* object alignment */ /* * CPU layer, aligned at (maximum) cache line boundary. */ decl_lck_mtx_data(, skm_rs_lock); /* protects resizing */ struct thread *skm_rs_owner; /* resize owner */ uint32_t skm_rs_busy; /* prevent resizing */ uint32_t skm_rs_want; /* # of threads blocked */ struct skmem_cpu_cache skm_cpu_cache[1] __attribute__((aligned(CHANNEL_CACHE_ALIGN_MAX))); }; #define SKMEM_CACHE_SIZE(n) \ offsetof(struct skmem_cache, skm_cpu_cache[n]) #define SKMEM_CPU_CACHE(c) \ ((struct skmem_cpu_cache *)((void *)((char *)(c) + \ SKMEM_CACHE_SIZE(cpu_number())))) /* valid values for skm_mode, set only by skmem_cache_create() */ #define SKM_MODE_NOMAGAZINES 0x00000001 /* disable magazines layer */ #define SKM_MODE_AUDIT 0x00000002 /* audit transactions */ #define SKM_MODE_NOREDIRECT 0x00000004 /* unaffected by defunct */ #define SKM_MODE_BATCH 0x00000008 /* supports batch alloc/free */ #define SKM_MODE_DYNAMIC 0x00000010 /* enable magazine resizing */ #define SKM_MODE_CLEARONFREE 0x00000020 /* zero-out upon slab free */ #define SKM_MODE_PSEUDO 0x00000040 /* external backing store */ #define SKM_MODE_RECLAIM 0x00000080 /* aggressive memory reclaim */ #define SKM_MODE_BITS \ "\020\01NOMAGAZINES\02AUDIT\03NOREDIRECT\04BATCH\05DYNAMIC" \ "\06CLEARONFREE\07PSEUDO\10RECLAIM" /* * Valid flags for sk{mem,region}_alloc(). SKMEM_FAILOK is valid only if * SKMEM_SLEEP is set, i.e. SKMEM_{NOSLEEP,FAILOK} are mutually exclusive. * If set, SKMEM_FAILOK indicates that the segment allocation may fail, * and that the cache layer would handle the retries rather than blocking * inside the region allocator. */ #define SKMEM_SLEEP 0x0 /* can block for memory; won't fail */ #define SKMEM_NOSLEEP 0x1 /* cannot block for memory; may fail */ #define SKMEM_PANIC 0x2 /* panic upon allocation failure */ #define SKMEM_FAILOK 0x4 /* can fail for blocking alloc */ /* valid flag values for skmem_cache_create() */ #define SKMEM_CR_NOMAGAZINES 0x1 /* disable magazines layer */ #define SKMEM_CR_BATCH 0x2 /* support batch alloc/free */ #define SKMEM_CR_DYNAMIC 0x4 /* enable magazine resizing */ #define SKMEM_CR_CLEARONFREE 0x8 /* zero-out upon slab free */ #define SKMEM_CR_RECLAIM 0x10 /* aggressive memory reclaim */ __BEGIN_DECLS /* * Given a buffer control, add a use count to it. */ __attribute__((always_inline)) static inline void skmem_bufctl_use(struct skmem_bufctl *bc) { uint32_t old, new; os_atomic_rmw_loop(&bc->bc_usecnt, old, new, relaxed, { new = old + 1; VERIFY(new != 0); ASSERT(new == 1 || (bc->bc_flags & SKMEM_BUFCTL_SHAREOK)); }); } /* * Given a buffer control, remove a use count from it (returns new value). */ __attribute__((always_inline)) static inline uint32_t skmem_bufctl_unuse(struct skmem_bufctl *bc) { uint32_t old, new; os_atomic_rmw_loop(&bc->bc_usecnt, old, new, relaxed, { new = old - 1; VERIFY(old != 0); ASSERT(old == 1 || (bc->bc_flags & SKMEM_BUFCTL_SHAREOK)); }); return new; } extern void skmem_cache_pre_init(void); extern void skmem_cache_init(void); extern void skmem_cache_fini(void); extern struct skmem_cache *skmem_cache_create(const char *, size_t, size_t, skmem_ctor_fn_t, skmem_dtor_fn_t, skmem_reclaim_fn_t, void *, struct skmem_region *, uint32_t); extern void skmem_cache_destroy(struct skmem_cache *); extern void *skmem_cache_alloc(struct skmem_cache *, uint32_t); extern uint32_t skmem_cache_batch_alloc(struct skmem_cache *, struct skmem_obj **list, uint32_t, uint32_t); extern void skmem_cache_free(struct skmem_cache *, void *); extern void skmem_cache_batch_free(struct skmem_cache *, struct skmem_obj *); extern void skmem_cache_reap_now(struct skmem_cache *, boolean_t); extern void skmem_cache_reap(void); extern void skmem_reap_caches(boolean_t); extern void skmem_cache_get_obj_info(struct skmem_cache *, void *, struct skmem_obj_info *, struct skmem_obj_info *); extern uint32_t skmem_cache_magazine_max(uint32_t); extern boolean_t skmem_allow_magazines(void); #if (DEVELOPMENT || DEBUG) extern void skmem_cache_test_start(uint32_t); extern void skmem_cache_test_stop(void); #endif /* (DEVELOPMENT || DEBUG) */ __END_DECLS #endif /* BSD_KERNEL_PRIVATE */ #endif /* _SKYWALK_MEM_SKMEMCACHEVAR_H */