233 lines
5.9 KiB
C
233 lines
5.9 KiB
C
/* Copyright 2013-2014 IBM Corp.
|
|
*
|
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
|
* you may not use this file except in compliance with the License.
|
|
* You may obtain a copy of the License at
|
|
*
|
|
* http://www.apache.org/licenses/LICENSE-2.0
|
|
*
|
|
* Unless required by applicable law or agreed to in writing, software
|
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
|
|
* implied.
|
|
* See the License for the specific language governing permissions and
|
|
* limitations under the License.
|
|
*/
|
|
#include <skiboot.h>
|
|
#include <fsp.h>
|
|
#include <lock.h>
|
|
#include <processor.h>
|
|
#include <timebase.h>
|
|
#include <fsp-sysparam.h>
|
|
#include <errorlog.h>
|
|
#include <opal-api.h>
|
|
|
|
static bool fsp_surv_state = false;
|
|
static bool fsp_surv_ack_pending = false;
|
|
static u64 surv_timer;
|
|
static u64 surv_ack_timer;
|
|
static u32 surv_state_param;
|
|
static struct lock surv_lock = LOCK_UNLOCKED;
|
|
|
|
#define FSP_SURV_ACK_TIMEOUT 120 /* surv ack timeout in seconds */
|
|
|
|
DEFINE_LOG_ENTRY(OPAL_RC_SURVE_INIT, OPAL_MISC_ERR_EVT, OPAL_SURVEILLANCE,
|
|
OPAL_SURVEILLANCE_ERR, OPAL_PREDICTIVE_ERR_GENERAL,
|
|
OPAL_MISCELLANEOUS_INFO_ONLY);
|
|
|
|
DEFINE_LOG_ENTRY(OPAL_RC_SURVE_STATUS, OPAL_MISC_ERR_EVT, OPAL_SURVEILLANCE,
|
|
OPAL_SURVEILLANCE_ERR, OPAL_PREDICTIVE_ERR_GENERAL,
|
|
OPAL_MISCELLANEOUS_INFO_ONLY);
|
|
|
|
DEFINE_LOG_ENTRY(OPAL_RC_SURVE_ACK, OPAL_MISC_ERR_EVT, OPAL_SURVEILLANCE,
|
|
OPAL_SURVEILLANCE_ERR, OPAL_PREDICTIVE_ERR_GENERAL,
|
|
OPAL_MISCELLANEOUS_INFO_ONLY);
|
|
|
|
static void fsp_surv_ack(struct fsp_msg *msg)
|
|
{
|
|
uint8_t val;
|
|
|
|
if (!msg->resp)
|
|
return;
|
|
|
|
val = (msg->resp->word1 >> 8) & 0xff;
|
|
if (val == 0) {
|
|
/* reset the pending flag */
|
|
prlog(PR_TRACE,
|
|
"SURV: Received heartbeat acknowledge from FSP\n");
|
|
lock(&surv_lock);
|
|
fsp_surv_ack_pending = false;
|
|
unlock(&surv_lock);
|
|
} else {
|
|
/**
|
|
* @fwts-label FSPHeartbeatAckError
|
|
* @fwts-advice Error in acknowledging heartbeat to FSP.
|
|
* This could mean the FSP has gone away or it may mean
|
|
* the FSP may kill us for missing too many heartbeats.
|
|
*/
|
|
prlog(PR_ERR,
|
|
"SURV: Heartbeat Acknowledgment error from FSP\n");
|
|
}
|
|
|
|
fsp_freemsg(msg);
|
|
}
|
|
|
|
static void fsp_surv_check_timeout(void)
|
|
{
|
|
u64 now = mftb();
|
|
|
|
/*
|
|
* We just checked fsp_surv_ack_pending to be true in fsp_surv_hbeat
|
|
* and we haven't dropped the surv_lock between then and now. So, we
|
|
* just go ahead and check timeouts.
|
|
*/
|
|
if (tb_compare(now, surv_ack_timer) == TB_AAFTERB) {
|
|
uint32_t plid = log_simple_error(&e_info(OPAL_RC_SURVE_ACK),
|
|
"SURV: Surv ACK timed out; initiating R/R\n");
|
|
|
|
/* Reset the pending trigger too */
|
|
fsp_surv_ack_pending = false;
|
|
fsp_trigger_reset(plid);
|
|
}
|
|
|
|
return;
|
|
}
|
|
|
|
/* Send surveillance heartbeat based on a timebase trigger */
|
|
static void fsp_surv_hbeat(void)
|
|
{
|
|
u64 now = mftb();
|
|
struct fsp_msg *msg;
|
|
|
|
/* Check if an ack is pending... if so, don't send the ping just yet */
|
|
if (fsp_surv_ack_pending) {
|
|
fsp_surv_check_timeout();
|
|
return;
|
|
}
|
|
|
|
/* add timebase callbacks */
|
|
/*
|
|
* XXX This packet needs to be pushed to FSP in an interval
|
|
* less than 120s that's advertised to FSP.
|
|
*
|
|
* Verify if the command building format and call is fine.
|
|
*/
|
|
if (surv_timer == 0 ||
|
|
(tb_compare(now, surv_timer) == TB_AAFTERB) ||
|
|
(tb_compare(now, surv_timer) == TB_AEQUALB)) {
|
|
prlog(PR_TRACE,
|
|
"SURV: Sending the heartbeat command to FSP\n");
|
|
msg = fsp_mkmsg(FSP_CMD_SURV_HBEAT, 1, 120);
|
|
if (!msg) {
|
|
prerror("SURV: Failed to allocate heartbeat msg\n");
|
|
return;
|
|
}
|
|
if (fsp_queue_msg(msg, fsp_surv_ack)) {
|
|
fsp_freemsg(msg);
|
|
prerror("SURV: Failed to queue heartbeat msg\n");
|
|
} else {
|
|
fsp_surv_ack_pending = true;
|
|
surv_timer = now + secs_to_tb(60);
|
|
surv_ack_timer = now + secs_to_tb(FSP_SURV_ACK_TIMEOUT);
|
|
}
|
|
}
|
|
}
|
|
|
|
static void fsp_surv_poll(void *data __unused)
|
|
{
|
|
if (!fsp_surv_state)
|
|
return;
|
|
lock(&surv_lock);
|
|
fsp_surv_hbeat();
|
|
unlock(&surv_lock);
|
|
}
|
|
|
|
static void fsp_surv_got_param(uint32_t param_id __unused, int err_len,
|
|
void *data __unused)
|
|
{
|
|
if (err_len != 4) {
|
|
uint32_t plid = log_simple_error(&e_info(OPAL_RC_SURVE_STATUS),
|
|
"SURV: Error (%d) retrieving surv status; initiating R/R\n",
|
|
err_len);
|
|
fsp_trigger_reset(plid);
|
|
return;
|
|
}
|
|
|
|
if (!(surv_state_param & 0x01)) {
|
|
prlog(PR_NOTICE, "SURV: Status from FSP: disabled\n");
|
|
return;
|
|
}
|
|
prlog(PR_NOTICE, "SURV: Status from FSP: enabled\n");
|
|
|
|
lock(&surv_lock);
|
|
fsp_surv_state = true;
|
|
|
|
/* Also send one heartbeat now. The next one will not happen
|
|
* until we hit the OS.
|
|
*/
|
|
fsp_surv_hbeat();
|
|
unlock(&surv_lock);
|
|
}
|
|
|
|
void fsp_surv_query(void)
|
|
{
|
|
int rc;
|
|
|
|
printf("SURV: Querying FSP's surveillance status\n");
|
|
|
|
/* Reset surveillance settings */
|
|
lock(&surv_lock);
|
|
fsp_surv_state = false;
|
|
surv_timer = 0;
|
|
surv_ack_timer = 0;
|
|
unlock(&surv_lock);
|
|
|
|
/* Query FPS for surveillance state */
|
|
rc = fsp_get_sys_param(SYS_PARAM_SURV, &surv_state_param, 4,
|
|
fsp_surv_got_param, NULL);
|
|
if (rc) {
|
|
log_simple_error(&e_info(OPAL_RC_SURVE_INIT),
|
|
"SURV: Error %d queueing param request\n", rc);
|
|
}
|
|
}
|
|
|
|
static bool fsp_surv_msg_rr(u32 cmd_sub_mod, struct fsp_msg *msg)
|
|
{
|
|
assert(msg == NULL);
|
|
|
|
switch (cmd_sub_mod) {
|
|
case FSP_RESET_START:
|
|
printf("SURV: Disabling surveillance\n");
|
|
lock(&surv_lock);
|
|
fsp_surv_state = false;
|
|
fsp_surv_ack_pending = false;
|
|
unlock(&surv_lock);
|
|
return true;
|
|
case FSP_RELOAD_COMPLETE:
|
|
fsp_surv_query();
|
|
return true;
|
|
}
|
|
return false;
|
|
}
|
|
|
|
static struct fsp_client fsp_surv_client_rr = {
|
|
.message = fsp_surv_msg_rr,
|
|
};
|
|
|
|
/* This is called at boot time */
|
|
void fsp_init_surveillance(void)
|
|
{
|
|
/* Always register the poller, so we don't have to add/remove
|
|
* it on reset-reload or change of surveillance state. Also the
|
|
* poller list has no locking so we don't want to play with it
|
|
* at runtime.
|
|
*/
|
|
opal_add_poller(fsp_surv_poll, NULL);
|
|
|
|
/* Register for the reset/reload event */
|
|
fsp_register_client(&fsp_surv_client_rr, FSP_MCLASS_RR_EVENT);
|
|
|
|
/* Send query to FSP */
|
|
fsp_surv_query();
|
|
}
|
|
|