/* Copyright 2013-2019 IBM Corp. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or * implied. * See the License for the specific language governing permissions and * limitations under the License. */ #define pr_fmt(fmt) "FSPMEMERR: " fmt #include #include #include #include #include #include /* FSP sends real address of 4K memory page. */ #define MEM_ERR_PAGE_SIZE_4K (1UL << 12) /* maximum number of error event to hold until linux consumes it. */ #define MERR_MAX_RECORD 1024 struct fsp_mem_err_node { struct list_node list; struct OpalMemoryErrorData data; }; static LIST_HEAD(merr_free_list); static LIST_HEAD(mem_error_list); /* * lock is used to protect overwriting of merr_free_list and mem_error_list * list. */ static struct lock mem_err_lock = LOCK_UNLOCKED; DEFINE_LOG_ENTRY(OPAL_RC_MEM_ERR_RES, OPAL_PLATFORM_ERR_EVT, OPAL_MEM_ERR, OPAL_MISC_SUBSYSTEM, OPAL_PREDICTIVE_ERR_GENERAL, OPAL_NA); DEFINE_LOG_ENTRY(OPAL_RC_MEM_ERR_DEALLOC, OPAL_PLATFORM_ERR_EVT, OPAL_MEM_ERR, OPAL_MISC_SUBSYSTEM, OPAL_PREDICTIVE_ERR_GENERAL, OPAL_NA); static bool send_response_to_fsp(u32 cmd_sub_mod) { struct fsp_msg *rsp; int rc = -ENOMEM; rsp = fsp_mkmsg(cmd_sub_mod, 0); if (rsp) rc = fsp_queue_msg(rsp, fsp_freemsg); if (rc) { fsp_freemsg(rsp); /* XXX Generate error logs */ prerror("Error %d queueing FSP memory error reply\n", rc); return false; } return true; } /* * Queue up the memory error message for delivery. * * queue_event_for_delivery get called from two places. * 1) from queue_mem_err_node when new fsp mem error is available and * 2) from completion callback indicating that linux has consumed an message. * * TODO: * There is a chance that, we may not get a free slot to queue our event * for delivery to linux during both the above invocations. In that case * we end up holding events with us until next fsp memory error comes in. * We need to address this case either here OR fix up messaging infrastructure * to make sure at least one slot will always be available per message type. * * XXX: BenH: I changed the msg infrastructure to attempt an allocation * in that case, at least until we clarify a bit better how * we want to handle things. */ static void queue_event_for_delivery(void *data __unused, int staus __unused) { struct fsp_mem_err_node *entry; uint64_t *merr_data; int rc; lock(&mem_err_lock); entry = list_pop(&mem_error_list, struct fsp_mem_err_node, list); unlock(&mem_err_lock); if (!entry) return; /* * struct OpalMemoryErrorData is of (4 * 64 bits) size and well packed * structure. Hence use uint64_t pointer to pass entire structure * using 4 params in generic message format. */ merr_data = (uint64_t *)&entry->data; /* queue up for delivery */ rc = opal_queue_msg(OPAL_MSG_MEM_ERR, NULL, queue_event_for_delivery, merr_data[0], merr_data[1], merr_data[2], merr_data[3]); lock(&mem_err_lock); if (rc) { /* * Failed to queue up the event for delivery. No free slot * available. There is a chance that we are trying to queue * up multiple event at the same time. We may already have * at least one event queued up, in that case we will be * called again through completion callback and we should * be able to grab empty slot then. * * For now, put this node back on mem_error_list. */ list_add(&mem_error_list, &entry->list); } else list_add(&merr_free_list, &entry->list); unlock(&mem_err_lock); } static int queue_mem_err_node(struct OpalMemoryErrorData *merr_evt) { struct fsp_mem_err_node *entry; lock(&mem_err_lock); entry = list_pop(&merr_free_list, struct fsp_mem_err_node, list); if (!entry) { printf("Failed to queue up memory error event.\n"); unlock(&mem_err_lock); return -ENOMEM; } entry->data = *merr_evt; list_add(&mem_error_list, &entry->list); unlock(&mem_err_lock); /* Queue up the event for delivery to OS. */ queue_event_for_delivery(NULL, OPAL_SUCCESS); return 0; } /* Check if memory resilience event for same address already exists. */ static bool is_resilience_event_exist(u64 paddr) { struct fsp_mem_err_node *entry; struct OpalMemoryErrorData *merr_evt; int found = 0; lock(&mem_err_lock); list_for_each(&mem_error_list, entry, list) { merr_evt = &entry->data; if ((merr_evt->type == OPAL_MEM_ERR_TYPE_RESILIENCE) && (be64_to_cpu(merr_evt->u.resilience.physical_address_start) == paddr)) { found = 1; break; } } unlock(&mem_err_lock); return !!found; } /* * handle Memory Resilience error message. * Section 28.2 of Hypervisor to FSP Mailbox Interface Specification. * * The flow for Memory Resilence Event is: * 1. PRD component in FSP gets a recoverable attention from hardware when * there is a corretable/uncorrectable memory error to free up a page. * 2. PRD sends Memory Resilence Command to hypervisor with the real address of * the 4K memory page in which the error occurred. * 3. The hypervisor acknowledges with a status immediately. Immediate * acknowledgment doesn’t require the freeing of the page to be completed. */ static bool handle_memory_resilience(u32 cmd_sub_mod, u64 paddr) { int rc = 0; struct OpalMemoryErrorData mem_err_evt; struct errorlog *buf; memset(&mem_err_evt, 0, sizeof(struct OpalMemoryErrorData)); /* Check arguments */ if (paddr == 0) { prerror("memory resilience: Invalid real address.\n"); return send_response_to_fsp(FSP_RSP_MEM_RES | FSP_STATUS_GENERIC_ERROR); } /* Check if event already exist for same address. */ if (is_resilience_event_exist(paddr)) goto send_response; /* Populate an event. */ mem_err_evt.version = OpalMemErr_V1; mem_err_evt.type = OPAL_MEM_ERR_TYPE_RESILIENCE; switch (cmd_sub_mod) { case FSP_CMD_MEM_RES_CE: /* * Should we keep counter for corrected errors in * sapphire OR let linux (PowerNV) handle it? * * For now, send corrected errors to linux and let * linux handle corrected errors thresholding. */ mem_err_evt.flags |= cpu_to_be16(OPAL_MEM_CORRECTED_ERROR); mem_err_evt.u.resilience.resil_err_type = OPAL_MEM_RESILIENCE_CE; break; case FSP_CMD_MEM_RES_UE: mem_err_evt.u.resilience.resil_err_type = OPAL_MEM_RESILIENCE_UE; break; case FSP_CMD_MEM_RES_UE_SCRB: mem_err_evt.u.resilience.resil_err_type = OPAL_MEM_RESILIENCE_UE_SCRUB; break; } mem_err_evt.u.resilience.physical_address_start = cpu_to_be64(paddr); mem_err_evt.u.resilience.physical_address_end = cpu_to_be64(paddr + MEM_ERR_PAGE_SIZE_4K); /* Queue up the event and inform OS about it. */ rc = queue_mem_err_node(&mem_err_evt); send_response: /* Queue up an OK response to the resilience message itself */ if (!rc) return send_response_to_fsp(FSP_RSP_MEM_RES); else { buf = opal_elog_create(&e_info(OPAL_RC_MEM_ERR_RES), 0); log_append_msg(buf, "OPAL_MEM_ERR: Cannot queue up memory " "resilience error event to the OS"); log_add_section(buf, 0x44455350); log_append_data(buf, (char *) &mem_err_evt, sizeof(struct OpalMemoryErrorData)); log_commit(buf); return false; } } /* update existing event entry if match is found. */ static bool update_memory_deallocation_event(u64 paddr_start, u64 paddr_end) { struct fsp_mem_err_node *entry; struct OpalMemoryErrorData *merr_evt; int found = 0; lock(&mem_err_lock); list_for_each(&mem_error_list, entry, list) { merr_evt = &entry->data; if ((merr_evt->type == OPAL_MEM_ERR_TYPE_DYN_DALLOC) && (be64_to_cpu(merr_evt->u.dyn_dealloc.physical_address_start) == paddr_start)) { found = 1; if (be64_to_cpu(merr_evt->u.dyn_dealloc.physical_address_end) < paddr_end) merr_evt->u.dyn_dealloc.physical_address_end = cpu_to_be64(paddr_end); break; } } unlock(&mem_err_lock); return !!found; } /* * Handle dynamic memory deallocation message. * * When a condition occurs in which we need to do a large scale memory * deallocation, PRD will send a starting and ending address of an area of * memory to Hypervisor. Hypervisor then need to use this to deallocate all * pages between and including the addresses. * */ static bool handle_memory_deallocation(u64 paddr_start, u64 paddr_end) { int rc = 0; u8 err = 0; struct OpalMemoryErrorData mem_err_evt; struct errorlog *buf; memset(&mem_err_evt, 0, sizeof(struct OpalMemoryErrorData)); /* Check arguments */ if ((paddr_start == 0) || (paddr_end == 0)) { prerror("memory deallocation: Invalid " "starting/ending real address.\n"); err = FSP_STATUS_GENERIC_ERROR; } /* If we had an error, send response to fsp and return */ if (err) return send_response_to_fsp(FSP_RSP_MEM_DYN_DEALLOC | err); /* * FSP can send dynamic memory deallocation multiple times for the * same address/address ranges. Hence check and update if we already * have sam event queued. */ if (update_memory_deallocation_event(paddr_start, paddr_end)) goto send_response; /* Populate an new event. */ mem_err_evt.version = OpalMemErr_V1; mem_err_evt.type = OPAL_MEM_ERR_TYPE_DYN_DALLOC; mem_err_evt.u.dyn_dealloc.dyn_err_type = OPAL_MEM_DYNAMIC_DEALLOC; mem_err_evt.u.dyn_dealloc.physical_address_start = cpu_to_be64(paddr_start); mem_err_evt.u.dyn_dealloc.physical_address_end = cpu_to_be64(paddr_end); /* Queue up the event and inform OS about it. */ rc = queue_mem_err_node(&mem_err_evt); send_response: /* Queue up an OK response to the memory deallocation message itself */ if (!rc) return send_response_to_fsp(FSP_RSP_MEM_DYN_DEALLOC); else { buf = opal_elog_create(&e_info(OPAL_RC_MEM_ERR_DEALLOC), 0); log_append_msg(buf, "OPAL_MEM_ERR: Cannot queue up memory " "deallocation error event to the OS"); log_add_section(buf, 0x44455350); log_append_data(buf, (char *)&mem_err_evt, sizeof(struct OpalMemoryErrorData)); log_commit(buf); return false; } } /* Receive a memory error mesages and handle it. */ static bool fsp_mem_err_msg(u32 cmd_sub_mod, struct fsp_msg *msg) { u64 paddr_start, paddr_end; printf("Received 0x%08ux command\n", cmd_sub_mod); switch (cmd_sub_mod) { case FSP_CMD_MEM_RES_CE: case FSP_CMD_MEM_RES_UE: case FSP_CMD_MEM_RES_UE_SCRB: /* * We get the memory relilence command from FSP for * correctable/Uncorrectable/scrub UE errors with real * address of 4K memory page in which the error occurred. */ paddr_start = be64_to_cpu(*((__be64 *)&msg->data.words[0])); printf("Got memory resilience error message for " "paddr=0x%016llux\n", paddr_start); return handle_memory_resilience(cmd_sub_mod, paddr_start); case FSP_CMD_MEM_DYN_DEALLOC: paddr_start = be64_to_cpu(*((__be64 *)&msg->data.words[0])); paddr_end = be64_to_cpu(*((__be64 *)&msg->data.words[2])); printf("Got dynamic memory deallocation message: " "paddr_start=0x%016llux, paddr_end=0x%016llux\n", paddr_start, paddr_end); return handle_memory_deallocation(paddr_start, paddr_end); } return false; } /* * pre allocate memory to hold maximum of 128 memory error event until linux * consumes it. */ static int init_merr_free_list(uint32_t num_entries) { struct fsp_mem_err_node *entry; int i; entry = zalloc(sizeof(struct fsp_mem_err_node) * num_entries); if (!entry) return -ENOMEM; for (i = 0; i < num_entries; ++i, entry++) list_add_tail(&merr_free_list, &entry->list); return 0; } static struct fsp_client fsp_mem_err_client = { .message = fsp_mem_err_msg, }; void fsp_memory_err_init(void) { int rc; printf("Intializing fsp memory handling.\n"); /* If we have an FSP, register for notifications */ if (!fsp_present()) return; /* pre allocate memory for 128 record */ rc = init_merr_free_list(MERR_MAX_RECORD); if (rc < 0) return; fsp_register_client(&fsp_mem_err_client, FSP_MCLASS_MEMORY_ERR); }