/* * Copyright (c) 2000-2021 Apple Inc. All rights reserved. * * @Apple_LICENSE_HEADER_START@ * * The contents of this file constitute Original Code as defined in and * are subject to the Apple Public Source License Version 1.1 (the * "License"). You may not use this file except in compliance with the * License. Please obtain a copy of the License at * http://www.apple.com/publicsource and read it before using this file. * * This Original Code and all software distributed under the License are * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the * License for the specific language governing rights and limitations * under the License. * * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ */ #include LCK_GRP_DECLARE(kdebug_lck_grp, "kdebug"); int kdbg_debug = 0; extern struct kd_control kd_control_trace, kd_control_triage; int kdebug_storage_lock(struct kd_control *kd_ctrl_page) { int intrs_en = ml_set_interrupts_enabled(false); lck_spin_lock_grp(&kd_ctrl_page->kdc_storage_lock, &kdebug_lck_grp); return intrs_en; } void kdebug_storage_unlock(struct kd_control *kd_ctrl_page, int intrs_en) { lck_spin_unlock(&kd_ctrl_page->kdc_storage_lock); ml_set_interrupts_enabled(intrs_en); } // Turn on boot tracing and set the number of events. static TUNABLE(unsigned int, new_nkdbufs, "trace", 0); // Enable wrapping during boot tracing. TUNABLE(unsigned int, trace_wrap, "trace_wrap", 0); // The filter description to apply to boot tracing. static TUNABLE_STR(trace_typefilter, 256, "trace_typefilter", ""); // Turn on wake tracing and set the number of events. TUNABLE(unsigned int, wake_nkdbufs, "trace_wake", 0); // Write trace events to a file in the event of a panic. TUNABLE(unsigned int, write_trace_on_panic, "trace_panic", 0); // Obsolete leak logging system. TUNABLE(int, log_leaks, "-l", 0); void kdebug_startup(void) { lck_spin_init(&kd_control_trace.kdc_storage_lock, &kdebug_lck_grp, LCK_ATTR_NULL); lck_spin_init(&kd_control_triage.kdc_storage_lock, &kdebug_lck_grp, LCK_ATTR_NULL); kdebug_init(new_nkdbufs, trace_typefilter, (trace_wrap ? KDOPT_WRAPPING : 0) | KDOPT_ATBOOT); create_buffers_triage(); } uint32_t kdbg_cpu_count(void) { #if defined(__x86_64__) return ml_early_cpu_max_number() + 1; #else // defined(__x86_64__) return ml_get_cpu_count(); #endif // !defined(__x86_64__) } /* * Both kdebug_timestamp and kdebug_using_continuous_time are known * to kexts. And going forward we always want to use mach_continuous_time(). * So we keep these 2 routines as-is to keep the TRACE mode use outside * the kernel intact. TRIAGE mode will explicitly only use mach_continuous_time() * for its timestamp. */ bool kdebug_using_continuous_time(void) { return kd_control_trace.kdc_flags & KDBG_CONTINUOUS_TIME; } uint64_t kdebug_timestamp(void) { if (kdebug_using_continuous_time()) { return mach_continuous_time(); } else { return mach_absolute_time(); } } int create_buffers( struct kd_control *kd_ctrl_page, struct kd_buffer *kd_data_page, vm_tag_t tag) { unsigned int i; unsigned int p_buffer_size; unsigned int f_buffer_size; unsigned int f_buffers; int error = 0; int ncpus, count_storage_units = 0; struct kd_bufinfo *kdbip = NULL; struct kd_region *kd_bufs = NULL; int kdb_storage_count = kd_data_page->kdb_storage_count; ncpus = kd_ctrl_page->alloc_cpus; kdbip = kalloc_type_tag(struct kd_bufinfo, ncpus, Z_WAITOK | Z_ZERO, tag); if (kdbip == NULL) { error = ENOSPC; goto out; } kd_data_page->kdb_info = kdbip; f_buffers = kdb_storage_count / N_STORAGE_UNITS_PER_BUFFER; kd_data_page->kdb_region_count = f_buffers; f_buffer_size = N_STORAGE_UNITS_PER_BUFFER * sizeof(struct kd_storage); p_buffer_size = (kdb_storage_count % N_STORAGE_UNITS_PER_BUFFER) * sizeof(struct kd_storage); if (p_buffer_size) { kd_data_page->kdb_region_count++; } if (kd_data_page->kdcopybuf == 0) { if (kmem_alloc(kernel_map, (vm_offset_t *)&kd_data_page->kdcopybuf, (vm_size_t) kd_ctrl_page->kdebug_kdcopybuf_size, KMA_DATA | KMA_ZERO, tag) != KERN_SUCCESS) { error = ENOSPC; goto out; } } kd_bufs = kalloc_type_tag(struct kd_region, kd_data_page->kdb_region_count, Z_WAITOK | Z_ZERO, tag); if (kd_bufs == NULL) { error = ENOSPC; goto out; } kd_data_page->kd_bufs = kd_bufs; for (i = 0; i < f_buffers; i++) { if (kmem_alloc(kernel_map, (vm_offset_t *)&kd_bufs[i].kdr_addr, (vm_size_t)f_buffer_size, KMA_DATA | KMA_ZERO, tag) != KERN_SUCCESS) { error = ENOSPC; goto out; } kd_bufs[i].kdr_size = f_buffer_size; } if (p_buffer_size) { if (kmem_alloc(kernel_map, (vm_offset_t *)&kd_bufs[i].kdr_addr, (vm_size_t)p_buffer_size, KMA_DATA | KMA_ZERO, tag) != KERN_SUCCESS) { error = ENOSPC; goto out; } kd_bufs[i].kdr_size = p_buffer_size; } count_storage_units = 0; for (i = 0; i < kd_data_page->kdb_region_count; i++) { struct kd_storage *kds; uint16_t n_elements; static_assert(N_STORAGE_UNITS_PER_BUFFER <= UINT16_MAX); assert(kd_bufs[i].kdr_size <= N_STORAGE_UNITS_PER_BUFFER * sizeof(struct kd_storage)); n_elements = kd_bufs[i].kdr_size / sizeof(struct kd_storage); kds = kd_bufs[i].kdr_addr; for (uint16_t n = 0; n < n_elements; n++) { kds[n].kds_next.buffer_index = kd_ctrl_page->kds_free_list.buffer_index; kds[n].kds_next.offset = kd_ctrl_page->kds_free_list.offset; kd_ctrl_page->kds_free_list.buffer_index = i; kd_ctrl_page->kds_free_list.offset = n; } count_storage_units += n_elements; } kd_data_page->kdb_storage_count = count_storage_units; for (i = 0; i < ncpus; i++) { kdbip[i].kd_list_head.raw = KDS_PTR_NULL; kdbip[i].kd_list_tail.raw = KDS_PTR_NULL; kdbip[i].kd_lostevents = false; kdbip[i].num_bufs = 0; } kd_ctrl_page->kdc_flags |= KDBG_BUFINIT; kd_ctrl_page->kdc_storage_used = 0; out: if (error) { delete_buffers(kd_ctrl_page, kd_data_page); } return error; } void delete_buffers(struct kd_control *kd_ctrl_page, struct kd_buffer *kd_data_page) { unsigned int i; int kdb_region_count = kd_data_page->kdb_region_count; struct kd_bufinfo *kdbip = kd_data_page->kdb_info; struct kd_region *kd_bufs = kd_data_page->kd_bufs; if (kd_bufs) { for (i = 0; i < kdb_region_count; i++) { if (kd_bufs[i].kdr_addr) { kmem_free(kernel_map, (vm_offset_t)kd_bufs[i].kdr_addr, (vm_size_t)kd_bufs[i].kdr_size); } } kfree_type(struct kd_region, kdb_region_count, kd_bufs); kd_data_page->kd_bufs = NULL; kd_data_page->kdb_region_count = 0; } if (kd_data_page->kdcopybuf) { kmem_free(kernel_map, (vm_offset_t)kd_data_page->kdcopybuf, kd_ctrl_page->kdebug_kdcopybuf_size); kd_data_page->kdcopybuf = NULL; } kd_ctrl_page->kds_free_list.raw = KDS_PTR_NULL; if (kdbip) { kfree_type(struct kd_bufinfo, kd_ctrl_page->alloc_cpus, kdbip); kd_data_page->kdb_info = NULL; } kd_ctrl_page->kdc_coprocs = NULL; kd_ctrl_page->kdebug_cpus = 0; kd_ctrl_page->alloc_cpus = 0; kd_ctrl_page->kdc_flags &= ~KDBG_BUFINIT; } static bool allocate_storage_unit(struct kd_control *kd_ctrl_page, struct kd_buffer *kd_data_page, int cpu) { union kds_ptr kdsp; struct kd_storage *kdsp_actual, *kdsp_next_actual; struct kd_bufinfo *kdbip, *kdbp, *kdbp_vict, *kdbp_try; uint64_t oldest_ts, ts; bool retval = true; struct kd_region *kd_bufs; int intrs_en = kdebug_storage_lock(kd_ctrl_page); kdbp = &kd_data_page->kdb_info[cpu]; kd_bufs = kd_data_page->kd_bufs; kdbip = kd_data_page->kdb_info; /* If someone beat us to the allocate, return success */ if (kdbp->kd_list_tail.raw != KDS_PTR_NULL) { kdsp_actual = POINTER_FROM_KDS_PTR(kd_bufs, kdbp->kd_list_tail); if (kdsp_actual->kds_bufindx < kd_ctrl_page->kdebug_events_per_storage_unit) { goto out; } } if ((kdsp = kd_ctrl_page->kds_free_list).raw != KDS_PTR_NULL) { /* * If there's a free page, grab it from the free list. */ kdsp_actual = POINTER_FROM_KDS_PTR(kd_bufs, kdsp); kd_ctrl_page->kds_free_list = kdsp_actual->kds_next; kd_ctrl_page->kdc_storage_used++; } else { /* * Otherwise, we're going to lose events and repurpose the oldest * storage unit we can find. */ if (kd_ctrl_page->kdc_live_flags & KDBG_NOWRAP) { kd_ctrl_page->kdc_emit = KDEMIT_DISABLE; kd_ctrl_page->kdc_live_flags |= KDBG_WRAPPED; kdebug_enable = 0; kd_ctrl_page->enabled = 0; commpage_update_kdebug_state(); kdbp->kd_lostevents = true; retval = false; goto out; } kdbp_vict = NULL; oldest_ts = UINT64_MAX; for (kdbp_try = &kdbip[0]; kdbp_try < &kdbip[kd_ctrl_page->kdebug_cpus]; kdbp_try++) { if (kdbp_try->kd_list_head.raw == KDS_PTR_NULL) { /* * no storage unit to steal */ continue; } kdsp_actual = POINTER_FROM_KDS_PTR(kd_bufs, kdbp_try->kd_list_head); if (kdsp_actual->kds_bufcnt < kd_ctrl_page->kdebug_events_per_storage_unit) { /* * make sure we don't steal the storage unit * being actively recorded to... need to * move on because we don't want an out-of-order * set of events showing up later */ continue; } /* * When wrapping, steal the storage unit with the * earliest timestamp on its last event, instead of the * earliest timestamp on the first event. This allows a * storage unit with more recent events to be preserved, * even if the storage unit contains events that are * older than those found in other CPUs. */ ts = kdbg_get_timestamp(&kdsp_actual->kds_records[kd_ctrl_page->kdebug_events_per_storage_unit - 1]); if (ts < oldest_ts) { oldest_ts = ts; kdbp_vict = kdbp_try; } } if (kdbp_vict == NULL && kd_ctrl_page->mode == KDEBUG_MODE_TRACE) { kd_ctrl_page->kdc_emit = KDEMIT_DISABLE; kdebug_enable = 0; kd_ctrl_page->enabled = 0; commpage_update_kdebug_state(); retval = false; goto out; } kdsp = kdbp_vict->kd_list_head; kdsp_actual = POINTER_FROM_KDS_PTR(kd_bufs, kdsp); kdbp_vict->kd_list_head = kdsp_actual->kds_next; if (kdbp_vict->kd_list_head.raw != KDS_PTR_NULL) { kdsp_next_actual = POINTER_FROM_KDS_PTR(kd_bufs, kdbp_vict->kd_list_head); kdsp_next_actual->kds_lostevents = true; } else { kdbp_vict->kd_lostevents = true; } if (kd_ctrl_page->kdc_oldest_time < oldest_ts) { kd_ctrl_page->kdc_oldest_time = oldest_ts; } kd_ctrl_page->kdc_live_flags |= KDBG_WRAPPED; } if (kd_ctrl_page->mode == KDEBUG_MODE_TRACE) { kdsp_actual->kds_timestamp = kdebug_timestamp(); } else { kdsp_actual->kds_timestamp = mach_continuous_time(); } kdsp_actual->kds_next.raw = KDS_PTR_NULL; kdsp_actual->kds_bufcnt = 0; kdsp_actual->kds_readlast = 0; kdsp_actual->kds_lostevents = kdbp->kd_lostevents; kdbp->kd_lostevents = false; kdsp_actual->kds_bufindx = 0; if (kdbp->kd_list_head.raw == KDS_PTR_NULL) { kdbp->kd_list_head = kdsp; } else { POINTER_FROM_KDS_PTR(kd_bufs, kdbp->kd_list_tail)->kds_next = kdsp; } kdbp->kd_list_tail = kdsp; out: kdebug_storage_unlock(kd_ctrl_page, intrs_en); return retval; } static void release_storage_unit(struct kd_control *kd_ctrl_page, struct kd_buffer *kd_data_page, int cpu, uint32_t kdsp_raw) { struct kd_storage *kdsp_actual; struct kd_bufinfo *kdbp; union kds_ptr kdsp; kdbp = &kd_data_page->kdb_info[cpu]; kdsp.raw = kdsp_raw; int intrs_en = kdebug_storage_lock(kd_ctrl_page); if (kdsp.raw == kdbp->kd_list_head.raw) { /* * it's possible for the storage unit pointed to * by kdsp to have already been stolen... so * check to see if it's still the head of the list * now that we're behind the lock that protects * adding and removing from the queue... * since we only ever release and steal units from * that position, if it's no longer the head * we having nothing to do in this context */ kdsp_actual = POINTER_FROM_KDS_PTR(kd_data_page->kd_bufs, kdsp); kdbp->kd_list_head = kdsp_actual->kds_next; kdsp_actual->kds_next = kd_ctrl_page->kds_free_list; kd_ctrl_page->kds_free_list = kdsp; kd_ctrl_page->kdc_storage_used--; } kdebug_storage_unlock(kd_ctrl_page, intrs_en); } bool kdebug_disable_wrap(struct kd_control *ctl, kdebug_emit_filter_t *old_emit, kdebug_live_flags_t *old_live) { int intrs_en = kdebug_storage_lock(ctl); *old_emit = ctl->kdc_emit; *old_live = ctl->kdc_live_flags; bool wrapped = ctl->kdc_live_flags & KDBG_WRAPPED; ctl->kdc_live_flags &= ~KDBG_WRAPPED; ctl->kdc_live_flags |= KDBG_NOWRAP; kdebug_storage_unlock(ctl, intrs_en); return wrapped; } static void _enable_wrap(struct kd_control *kd_ctrl_page, kdebug_emit_filter_t emit) { int intrs_en = kdebug_storage_lock(kd_ctrl_page); kd_ctrl_page->kdc_live_flags &= ~KDBG_NOWRAP; if (emit) { kd_ctrl_page->kdc_emit = emit; } kdebug_storage_unlock(kd_ctrl_page, intrs_en); } __attribute__((always_inline)) void kernel_debug_write(struct kd_control *kd_ctrl_page, struct kd_buffer *kd_data_page, struct kd_record kd_rec) { uint64_t now = 0; uint32_t bindx; kd_buf *kd; int cpu; struct kd_bufinfo *kdbp; struct kd_storage *kdsp_actual; union kds_ptr kds_raw; disable_preemption(); if (kd_ctrl_page->enabled == 0) { goto out; } if (kd_rec.cpu == -1) { cpu = cpu_number(); } else { cpu = kd_rec.cpu; } kdbp = &kd_data_page->kdb_info[cpu]; bool timestamp_is_continuous = kdbp->continuous_timestamps; if (kd_rec.timestamp != -1) { if (kdebug_using_continuous_time()) { if (!timestamp_is_continuous) { kd_rec.timestamp = absolutetime_to_continuoustime(kd_rec.timestamp); } } else { if (timestamp_is_continuous) { kd_rec.timestamp = continuoustime_to_absolutetime(kd_rec.timestamp); } } kd_rec.timestamp &= KDBG_TIMESTAMP_MASK; if (kd_rec.timestamp < kd_ctrl_page->kdc_oldest_time) { if (kdbp->latest_past_event_timestamp < kd_rec.timestamp) { kdbp->latest_past_event_timestamp = kd_rec.timestamp; } goto out; } } retry_q: kds_raw = kdbp->kd_list_tail; if (kds_raw.raw != KDS_PTR_NULL) { kdsp_actual = POINTER_FROM_KDS_PTR(kd_data_page->kd_bufs, kds_raw); bindx = kdsp_actual->kds_bufindx; } else { kdsp_actual = NULL; bindx = kd_ctrl_page->kdebug_events_per_storage_unit; } if (kdsp_actual == NULL || bindx >= kd_ctrl_page->kdebug_events_per_storage_unit) { if (allocate_storage_unit(kd_ctrl_page, kd_data_page, cpu) == false) { /* * this can only happen if wrapping * has been disabled */ goto out; } goto retry_q; } if (kd_rec.timestamp != -1) { /* * IOP entries can be allocated before xnu allocates and inits the buffer * And, Intel uses a special 0 value as a early tracing timestamp sentinel * to set the start of trace-time-start-of-interest. */ if (kd_rec.timestamp < kdsp_actual->kds_timestamp) { kdsp_actual->kds_timestamp = kd_rec.timestamp; } now = kd_rec.timestamp; } else { if (kd_ctrl_page->mode == KDEBUG_MODE_TRACE) { now = kdebug_timestamp() & KDBG_TIMESTAMP_MASK; } else { now = mach_continuous_time() & KDBG_TIMESTAMP_MASK; } } if (!OSCompareAndSwap(bindx, bindx + 1, &kdsp_actual->kds_bufindx)) { goto retry_q; } kd = &kdsp_actual->kds_records[bindx]; if (kd_ctrl_page->kdc_flags & KDBG_DEBUGID_64) { /*DebugID has been passed in arg 4*/ kd->debugid = 0; } else { kd->debugid = kd_rec.debugid; } kd->arg1 = kd_rec.arg1; kd->arg2 = kd_rec.arg2; kd->arg3 = kd_rec.arg3; kd->arg4 = kd_rec.arg4; kd->arg5 = kd_rec.arg5; kdbg_set_timestamp_and_cpu(kd, now, cpu); OSAddAtomic(1, &kdsp_actual->kds_bufcnt); out: enable_preemption(); } // Read events from kdebug storage units into a user space buffer or file. // // This code runs while events are emitted -- storage unit allocation and // deallocation wll synchronize with the emitters. Only one reader per control // structure is allowed. int kernel_debug_read(struct kd_control *kd_ctrl_page, struct kd_buffer *kd_data_page, user_addr_t buffer, size_t *number, vnode_t vp, vfs_context_t ctx, uint32_t file_version) { size_t count; unsigned int cpu, min_cpu; uint64_t barrier_min = 0, barrier_max = 0, t, earliest_time; int error = 0; kd_buf *tempbuf; uint32_t rcursor; kd_buf lostevent; union kds_ptr kdsp; bool traced_retrograde = false; struct kd_storage *kdsp_actual; struct kd_bufinfo *kdbp; struct kd_bufinfo *min_kdbp; size_t tempbuf_count; uint32_t tempbuf_number; kdebug_emit_filter_t old_emit; uint32_t old_live_flags; bool out_of_events = false; bool wrapped = false; bool set_preempt = true; bool should_disable = false; struct kd_bufinfo *kdbip = kd_data_page->kdb_info; struct kd_region *kd_bufs = kd_data_page->kd_bufs; assert(number != NULL); count = *number / sizeof(kd_buf); *number = 0; if (count == 0 || !(kd_ctrl_page->kdc_flags & KDBG_BUFINIT) || kd_data_page->kdcopybuf == 0) { return EINVAL; } if (kd_ctrl_page->mode == KDEBUG_MODE_TRIAGE) { /* * A corpse can be created due to 'TASK_HAS_TOO_MANY_THREADS' * and that can be handled by a callout thread that already * has the eager-preemption set. * So check to see if we are dealing with one such thread. */ set_preempt = !(thread_is_eager_preempt(current_thread())); } if (set_preempt) { thread_set_eager_preempt(current_thread()); } memset(&lostevent, 0, sizeof(lostevent)); lostevent.debugid = TRACE_LOST_EVENTS; /* * Capture the current time. Only sort events that have occured * before now. Since the IOPs are being flushed here, it is possible * that events occur on the AP while running live tracing. */ if (kd_ctrl_page->mode == KDEBUG_MODE_TRACE) { barrier_max = kdebug_timestamp() & KDBG_TIMESTAMP_MASK; } else { barrier_max = mach_continuous_time() & KDBG_TIMESTAMP_MASK; } /* * Disable wrap so storage units cannot be stolen out from underneath us * while merging events. * * Because we hold ktrace_lock, no other control threads can be playing * with kdc_flags. The code that emits new events could be running, * but it grabs kdc_storage_lock if it needs to acquire a new storage * chunk, which is where it examines kdc_flags. If it is adding to * the same chunk we're reading from, check for that below. */ wrapped = kdebug_disable_wrap(kd_ctrl_page, &old_emit, &old_live_flags); if (count > kd_data_page->kdb_event_count) { count = kd_data_page->kdb_event_count; } if ((tempbuf_count = count) > kd_ctrl_page->kdebug_kdcopybuf_count) { tempbuf_count = kd_ctrl_page->kdebug_kdcopybuf_count; } /* * If the buffers have wrapped, do not emit additional lost events for the * oldest storage units. */ if (wrapped) { kd_ctrl_page->kdc_live_flags &= ~KDBG_WRAPPED; for (cpu = 0, kdbp = &kdbip[0]; cpu < kd_ctrl_page->kdebug_cpus; cpu++, kdbp++) { if ((kdsp = kdbp->kd_list_head).raw == KDS_PTR_NULL) { continue; } kdsp_actual = POINTER_FROM_KDS_PTR(kd_bufs, kdsp); kdsp_actual->kds_lostevents = false; } } if (kd_ctrl_page->mode == KDEBUG_MODE_TRIAGE) { /* * In TRIAGE mode we want to extract all the current * records regardless of where we stopped reading last * time so that we have the best shot at getting older * records for threads before the buffers are wrapped. * So set:- * a) kd_prev_timebase to 0 so we (re-)consider older records * b) readlast to 0 to initiate the search from the * 1st record. */ for (cpu = 0, kdbp = &kdbip[0]; cpu < kd_ctrl_page->kdebug_cpus; cpu++, kdbp++) { kdbp->kd_prev_timebase = 0; if ((kdsp = kdbp->kd_list_head).raw == KDS_PTR_NULL) { continue; } kdsp_actual = POINTER_FROM_KDS_PTR(kd_bufs, kdsp); kdsp_actual->kds_readlast = 0; } } /* * Capture the earliest time where there are events for all CPUs and don't * emit events with timestamps prior. */ barrier_min = kd_ctrl_page->kdc_oldest_time; while (count) { tempbuf = kd_data_page->kdcopybuf; tempbuf_number = 0; if (wrapped) { /* * Emit a lost events tracepoint to indicate that previous events * were lost -- the thread map cannot be trusted. A new one must * be taken so tools can analyze the trace in a backwards-facing * fashion. */ kdbg_set_timestamp_and_cpu(&lostevent, barrier_min, 0); *tempbuf = lostevent; wrapped = false; goto nextevent; } /* While space left in merged events scratch buffer. */ while (tempbuf_count) { bool lostevents = false; int lostcpu = 0; earliest_time = UINT64_MAX; min_kdbp = NULL; min_cpu = 0; /* Check each CPU's buffers for the earliest event. */ for (cpu = 0, kdbp = &kdbip[0]; cpu < kd_ctrl_page->kdebug_cpus; cpu++, kdbp++) { /* Skip CPUs without data in their oldest storage unit. */ if ((kdsp = kdbp->kd_list_head).raw == KDS_PTR_NULL) { next_cpu: continue; } /* From CPU data to buffer header to buffer. */ kdsp_actual = POINTER_FROM_KDS_PTR(kd_bufs, kdsp); next_event: /* The next event to be read from this buffer. */ rcursor = kdsp_actual->kds_readlast; /* Skip this buffer if there are no events left. */ if (rcursor == kdsp_actual->kds_bufindx) { continue; } if (kd_ctrl_page->mode == KDEBUG_MODE_TRIAGE) { /* * TRIAGE mode record keeping doesn't (currently) * use lostevent markers. It also doesn't want to * call release_storage_unit() in this read call. * It expects the buffers to wrap and records reclaimed * in that way solely. */ t = kdbg_get_timestamp(&kdsp_actual->kds_records[rcursor]); goto skip_record_checks; } /* * Check that this storage unit wasn't stolen and events were * lost. This must have happened while wrapping was disabled * in this function. */ if (kdsp_actual->kds_lostevents) { lostevents = true; kdsp_actual->kds_lostevents = false; /* * The earliest event we can trust is the first one in this * stolen storage unit. */ uint64_t lost_time = kdbg_get_timestamp(&kdsp_actual->kds_records[0]); if (kd_ctrl_page->kdc_oldest_time < lost_time) { /* * If this is the first time we've seen lost events for * this gap, record its timestamp as the oldest * timestamp we're willing to merge for the lost events * tracepoint. */ kd_ctrl_page->kdc_oldest_time = barrier_min = lost_time; lostcpu = cpu; } } t = kdbg_get_timestamp(&kdsp_actual->kds_records[rcursor]); if (t > barrier_max) { goto next_cpu; } if (t < kdsp_actual->kds_timestamp) { /* * This indicates the event emitter hasn't completed * filling in the event (becuase we're looking at the * buffer that the record head is using). The max barrier * timestamp should have saved us from seeing these kinds * of things, but other CPUs might be slow on the up-take. * * Bail out so we don't get out-of-order events by * continuing to read events from other CPUs' events. */ out_of_events = true; break; } /* * Ignore events that have aged out due to wrapping or storage * unit exhaustion while merging events. */ if (t < barrier_min) { kdsp_actual->kds_readlast++; if (kdsp_actual->kds_readlast >= kd_ctrl_page->kdebug_events_per_storage_unit) { release_storage_unit(kd_ctrl_page, kd_data_page, cpu, kdsp.raw); if ((kdsp = kdbp->kd_list_head).raw == KDS_PTR_NULL) { goto next_cpu; } kdsp_actual = POINTER_FROM_KDS_PTR(kd_bufs, kdsp); } goto next_event; } /* * Don't worry about merging any events -- just walk through * the CPUs and find the latest timestamp of lost events. */ if (lostevents) { continue; } skip_record_checks: if (t < earliest_time) { earliest_time = t; min_kdbp = kdbp; min_cpu = cpu; } } if (lostevents) { /* * If any lost events were hit in the buffers, emit an event * with the latest timestamp. */ kdbg_set_timestamp_and_cpu(&lostevent, barrier_min, lostcpu); *tempbuf = lostevent; tempbuf->arg1 = 1; goto nextevent; } if (min_kdbp == NULL) { /* All buffers ran empty. */ out_of_events = true; } if (out_of_events) { break; } kdsp = min_kdbp->kd_list_head; kdsp_actual = POINTER_FROM_KDS_PTR(kd_bufs, kdsp); if (min_kdbp->latest_past_event_timestamp != 0) { if (kdbg_debug) { printf("kdebug: PAST EVENT: debugid %#8x: " "time %lld from CPU %u " "(barrier at time %lld)\n", kdsp_actual->kds_records[rcursor].debugid, t, cpu, barrier_min); } kdbg_set_timestamp_and_cpu(tempbuf, earliest_time, min_cpu); tempbuf->arg1 = (kd_buf_argtype)min_kdbp->latest_past_event_timestamp; tempbuf->arg2 = 0; tempbuf->arg3 = 0; tempbuf->arg4 = 0; tempbuf->debugid = TRACE_PAST_EVENTS; min_kdbp->latest_past_event_timestamp = 0; goto nextevent; } /* Copy earliest event into merged events scratch buffer. */ *tempbuf = kdsp_actual->kds_records[kdsp_actual->kds_readlast++]; kd_buf *earliest_event = tempbuf; if (kd_control_trace.kdc_flags & KDBG_MATCH_DISABLE) { kd_event_matcher *match = &kd_control_trace.disable_event_match; kd_event_matcher *mask = &kd_control_trace.disable_event_mask; if ((earliest_event->debugid & mask->kem_debugid) == match->kem_debugid && (earliest_event->arg1 & mask->kem_args[0]) == match->kem_args[0] && (earliest_event->arg2 & mask->kem_args[1]) == match->kem_args[1] && (earliest_event->arg3 & mask->kem_args[2]) == match->kem_args[2] && (earliest_event->arg4 & mask->kem_args[3]) == match->kem_args[3]) { should_disable = true; } } if (kd_ctrl_page->mode == KDEBUG_MODE_TRACE) { if (kdsp_actual->kds_readlast == kd_ctrl_page->kdebug_events_per_storage_unit) { release_storage_unit(kd_ctrl_page, kd_data_page, min_cpu, kdsp.raw); } } /* * Watch for out of order timestamps (from IOPs). */ if (earliest_time < min_kdbp->kd_prev_timebase) { /* * If we haven't already, emit a retrograde events event. * Otherwise, ignore this event. */ if (traced_retrograde) { continue; } if (kdbg_debug) { printf("kdebug: RETRO EVENT: debugid %#8x: " "time %lld from CPU %u " "(barrier at time %lld)\n", kdsp_actual->kds_records[rcursor].debugid, t, cpu, barrier_min); } kdbg_set_timestamp_and_cpu(tempbuf, min_kdbp->kd_prev_timebase, kdbg_get_cpu(tempbuf)); tempbuf->arg1 = tempbuf->debugid; tempbuf->arg2 = (kd_buf_argtype)earliest_time; tempbuf->arg3 = 0; tempbuf->arg4 = 0; tempbuf->debugid = TRACE_RETROGRADE_EVENTS; traced_retrograde = true; } else { min_kdbp->kd_prev_timebase = earliest_time; } nextevent: tempbuf_count--; tempbuf_number++; tempbuf++; if (kd_ctrl_page->mode == KDEBUG_MODE_TRACE && (RAW_file_written += sizeof(kd_buf)) >= RAW_FLUSH_SIZE) { break; } } if (tempbuf_number) { /* * Remember the latest timestamp of events that we've merged so we * don't think we've lost events later. */ uint64_t latest_time = kdbg_get_timestamp(tempbuf - 1); if (kd_ctrl_page->kdc_oldest_time < latest_time) { kd_ctrl_page->kdc_oldest_time = latest_time; } if (kd_ctrl_page->mode == KDEBUG_MODE_TRACE) { extern int kernel_debug_trace_write_to_file(user_addr_t *buffer, size_t *number, size_t *count, size_t tempbuf_number, vnode_t vp, vfs_context_t ctx, uint32_t file_version); error = kernel_debug_trace_write_to_file(&buffer, number, &count, tempbuf_number, vp, ctx, file_version); } else if (kd_ctrl_page->mode == KDEBUG_MODE_TRIAGE) { memcpy((void*)buffer, kd_data_page->kdcopybuf, tempbuf_number * sizeof(kd_buf)); buffer += tempbuf_number * sizeof(kd_buf); } else { panic("kdebug: invalid kdebug mode %d", kd_ctrl_page->mode); } if (error) { *number = 0; error = EINVAL; break; } count -= tempbuf_number; *number += tempbuf_number; } if (out_of_events) { break; } if ((tempbuf_count = count) > kd_ctrl_page->kdebug_kdcopybuf_count) { tempbuf_count = kd_ctrl_page->kdebug_kdcopybuf_count; } } if ((old_live_flags & KDBG_NOWRAP) == 0) { _enable_wrap(kd_ctrl_page, old_emit); } if (set_preempt) { thread_clear_eager_preempt(current_thread()); } if (should_disable) { kernel_debug_disable(); } return error; }