553 lines
15 KiB
C
553 lines
15 KiB
C
/* Copyright 2013-2014 IBM Corp.
|
|
*
|
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
|
* you may not use this file except in compliance with the License.
|
|
* You may obtain a copy of the License at
|
|
*
|
|
* http://www.apache.org/licenses/LICENSE-2.0
|
|
*
|
|
* Unless required by applicable law or agreed to in writing, software
|
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
|
|
* implied.
|
|
* See the License for the specific language governing permissions and
|
|
* limitations under the License.
|
|
*/
|
|
#include <skiboot.h>
|
|
#include <xscom.h>
|
|
#include <processor.h>
|
|
#include <device.h>
|
|
#include <chip.h>
|
|
#include <centaur.h>
|
|
#include <lock.h>
|
|
#include <fsi-master.h>
|
|
#include <timebase.h>
|
|
|
|
/*
|
|
* Centaur chip IDs are using the XSCOM "partID" encoding
|
|
* described in xscom.h. recap:
|
|
*
|
|
* 0b1000.0000.0000.0000.0000.00NN.NCCC.MMMM
|
|
* N=Node, C=Chip, M=Memory Channel
|
|
*
|
|
* We currently use FSI exclusively for centaur access. We can
|
|
* start using MMIO on Centaur DD2.x when we have a way to handle
|
|
* machine checks happening inside Sapphire which we don't at the
|
|
* moment.
|
|
*/
|
|
|
|
/* Is that correct ? */
|
|
#define MAX_CENTAURS_PER_CHIP 8
|
|
|
|
/* Mark the centaur offline after this many consecutive errors */
|
|
#define CENTAUR_ERR_OFFLINE_THRESHOLD 10
|
|
|
|
/*
|
|
* FSI2PIB register definitions (this could be moved out if we were to
|
|
* support FSI master to other chips.
|
|
*/
|
|
#define FSI_DATA0_REG 0x1000
|
|
#define FSI_DATA1_REG 0x1004
|
|
#define FSI_CMD_REG 0x1008
|
|
#define FSI_CMD_WR 0x80000000
|
|
#define FSI_CMD_RD 0x00000000
|
|
#define FSI_ENG_RESET_REG 0x1018
|
|
#define FSI_STATUS_REG 0x101c
|
|
#define FSI_STATUS_ABORT 0x00100000
|
|
#define FSI_STATUS_ERRORS 0x00007000
|
|
|
|
/* Some Centaur XSCOMs we care about */
|
|
#define SCAC_CONFIG_REG 0x020115ce
|
|
#define SCAC_CONFIG_SET 0x020115cf
|
|
#define SCAC_CONFIG_CLR 0x020115d0
|
|
#define SCAC_ENABLE_MSK PPC_BIT(0)
|
|
|
|
#define cent_log(__lev, __c, __fmt, ...) \
|
|
prlog(__lev, "CENTAUR %x: " __fmt, __c->part_id, ##__VA_ARGS__)
|
|
|
|
static int64_t centaur_fsiscom_complete(struct centaur_chip *centaur)
|
|
{
|
|
int64_t rc;
|
|
uint32_t stat;
|
|
|
|
rc = mfsi_read(centaur->fsi_master_chip_id, centaur->fsi_master_engine,
|
|
centaur->fsi_master_port, FSI_STATUS_REG, &stat);
|
|
if (rc) {
|
|
cent_log(PR_ERR, centaur, "MFSI read error %lld reading STAT\n", rc);
|
|
return rc;
|
|
}
|
|
if ((stat & (FSI_STATUS_ABORT | FSI_STATUS_ERRORS)) == 0)
|
|
return OPAL_SUCCESS;
|
|
|
|
cent_log(PR_ERR, centaur, "Remote FSI SCOM error, status=0x%08x\n", stat);
|
|
|
|
/* All 1's ? Assume it's gone */
|
|
if (stat == 0xffffffffu) {
|
|
cent_log(PR_ERR, centaur, "Chip appears to be dead !\n");
|
|
centaur->valid = false;
|
|
|
|
/* Here, hostboot grabs a pile of FFDC from the FSI layer,
|
|
* we could do that too ...
|
|
*/
|
|
return OPAL_HARDWARE;
|
|
}
|
|
|
|
/* Here HB prints the GPx registers which I believe are only
|
|
* in the host (FSI master). We skip that for now, we don't have
|
|
* a good API to them
|
|
*/
|
|
|
|
/* Recovery sequence from HostBoot fsiscom.C
|
|
* if SCOM fails and FSI Master displays "MasterTimeOut"
|
|
* then 7,6 <covered by FSI driver>
|
|
* else if SCOM fails and FSI2PIB Status shows PIB abort
|
|
* then just perform unit reset (6) and wait 1 ms
|
|
* else (PIB_abort='0' but PIB error is unequal 0)
|
|
* then just perform unit reset (6) (wait not needed).
|
|
*
|
|
* Note: Waiting 1ms inside OPAL is a BIG NO NO !!! We have
|
|
* no choice but doing it at the moment but that will have
|
|
* to be fixed one way or another, possibly by returning some
|
|
* kind of busy status until the delay is expired.
|
|
*/
|
|
rc = mfsi_write(centaur->fsi_master_chip_id, centaur->fsi_master_engine,
|
|
centaur->fsi_master_port, FSI_ENG_RESET_REG, 0);
|
|
if (rc) {
|
|
cent_log(PR_ERR, centaur, "MFSI write error %lld resetting SCOM engine\n",
|
|
rc);
|
|
}
|
|
return OPAL_HARDWARE;
|
|
}
|
|
|
|
static int64_t centaur_fsiscom_read(struct centaur_chip *centaur, uint32_t pcb_addr,
|
|
uint64_t *val)
|
|
{
|
|
int64_t rc;
|
|
uint32_t data0, data1;
|
|
|
|
rc = mfsi_write(centaur->fsi_master_chip_id, centaur->fsi_master_engine,
|
|
centaur->fsi_master_port, FSI_CMD_REG, pcb_addr | FSI_CMD_RD);
|
|
if (rc) {
|
|
cent_log(PR_ERR, centaur, "MFSI write error %lld writing CMD\n", rc);
|
|
return rc;
|
|
}
|
|
|
|
rc = centaur_fsiscom_complete(centaur);
|
|
if (rc)
|
|
return rc;
|
|
|
|
rc = mfsi_read(centaur->fsi_master_chip_id, centaur->fsi_master_engine,
|
|
centaur->fsi_master_port, FSI_DATA0_REG, &data0);
|
|
if (rc) {
|
|
cent_log(PR_ERR, centaur, "MFSI read error %lld reading DATA0\n", rc);
|
|
return rc;
|
|
}
|
|
rc = mfsi_read(centaur->fsi_master_chip_id, centaur->fsi_master_engine,
|
|
centaur->fsi_master_port, FSI_DATA1_REG, &data1);
|
|
if (rc) {
|
|
cent_log(PR_ERR, centaur, "MFSI read error %lld readking DATA1\n", rc);
|
|
return rc;
|
|
}
|
|
|
|
*val = (((uint64_t)data0) << 32) | data1;
|
|
|
|
return OPAL_SUCCESS;
|
|
}
|
|
|
|
static int64_t centaur_fsiscom_write(struct centaur_chip *centaur, uint32_t pcb_addr,
|
|
uint64_t val)
|
|
{
|
|
int64_t rc;
|
|
|
|
rc = mfsi_write(centaur->fsi_master_chip_id, centaur->fsi_master_engine,
|
|
centaur->fsi_master_port, FSI_DATA0_REG, hi32(val));
|
|
if (rc) {
|
|
cent_log(PR_ERR, centaur, "MFSI write error %lld writing DATA0\n", rc);
|
|
return rc;
|
|
}
|
|
rc = mfsi_write(centaur->fsi_master_chip_id, centaur->fsi_master_engine,
|
|
centaur->fsi_master_port, FSI_DATA1_REG, lo32(val));
|
|
if (rc) {
|
|
cent_log(PR_ERR, centaur, "MFSI write error %lld writing DATA1\n", rc);
|
|
return rc;
|
|
}
|
|
rc = mfsi_write(centaur->fsi_master_chip_id, centaur->fsi_master_engine,
|
|
centaur->fsi_master_port, FSI_CMD_REG, pcb_addr | FSI_CMD_WR);
|
|
if (rc) {
|
|
cent_log(PR_ERR, centaur, "MFSI write error %lld writing CMD\n", rc);
|
|
return rc;
|
|
}
|
|
|
|
return centaur_fsiscom_complete(centaur);
|
|
}
|
|
|
|
struct centaur_chip *get_centaur(uint32_t part_id)
|
|
{
|
|
uint32_t hchip_id, mchan;
|
|
struct proc_chip *hchip;
|
|
struct centaur_chip *centaur;
|
|
|
|
if ((part_id >> 28) != 8) {
|
|
prerror("CENTAUR: Invalid part ID 0x%x\n", part_id);
|
|
return NULL;
|
|
}
|
|
hchip_id = (part_id & 0x0fffffff) >> 4;
|
|
mchan = part_id & 0xf;
|
|
|
|
hchip = get_chip(hchip_id);
|
|
if (!hchip) {
|
|
prerror("CENTAUR: Centaur 0x%x not found on non-existing chip 0%x\n",
|
|
part_id, hchip_id);
|
|
return NULL;
|
|
}
|
|
if (mchan >= MAX_CENTAURS_PER_CHIP) {
|
|
prerror("CENTAUR: Centaur 0x%x channel out of bounds !\n", part_id);
|
|
return NULL;
|
|
}
|
|
if (!hchip->centaurs) {
|
|
prerror("CENTAUR: Centaur 0x%x not found on chip 0%x (no centaurs)\n",
|
|
part_id, hchip_id);
|
|
return NULL;
|
|
}
|
|
centaur = &hchip->centaurs[mchan];
|
|
if (!centaur->valid) {
|
|
prerror("CENTAUR: Centaur 0x%x not valid on chip 0%x\n",
|
|
part_id, hchip_id);
|
|
return NULL;
|
|
}
|
|
return centaur;
|
|
}
|
|
|
|
/*
|
|
* Indirect XSCOM access functions. Copied from xscom.c, at a
|
|
* latter date, we should merge these properly.
|
|
*/
|
|
static void centaur_xscom_handle_ind_error(struct centaur_chip *centaur,
|
|
uint64_t data, uint64_t pcb_addr,
|
|
bool is_write)
|
|
{
|
|
unsigned int stat = GETFIELD(XSCOM_DATA_IND_ERR, data);
|
|
bool timeout = !(data & XSCOM_DATA_IND_COMPLETE);
|
|
|
|
/* XXX: Create error log entry ? */
|
|
if (timeout)
|
|
cent_log(PR_ERR, centaur,
|
|
"inddirect %s timeout, pcb_addr=0x%llx stat=0x%x\n",
|
|
is_write ? "write" : "read", pcb_addr, stat);
|
|
else
|
|
cent_log(PR_ERR, centaur,
|
|
"indirect %s error, pcb_addr=0x%llx stat=0x%x\n",
|
|
is_write ? "write" : "read", pcb_addr, stat);
|
|
}
|
|
|
|
static int centaur_xscom_ind_read(struct centaur_chip *centaur,
|
|
uint64_t pcb_addr, uint64_t *val)
|
|
{
|
|
uint32_t addr;
|
|
uint64_t data;
|
|
int rc, retries;
|
|
|
|
/* Write indirect address */
|
|
addr = pcb_addr & 0x7fffffff;
|
|
data = XSCOM_DATA_IND_READ |
|
|
(pcb_addr & XSCOM_ADDR_IND_ADDR);
|
|
rc = centaur_fsiscom_write(centaur, addr, data);
|
|
if (rc)
|
|
goto bail;
|
|
|
|
/* Wait for completion */
|
|
for (retries = 0; retries < XSCOM_IND_MAX_RETRIES; retries++) {
|
|
rc = centaur_fsiscom_read(centaur, addr, &data);
|
|
if (rc)
|
|
goto bail;
|
|
if ((data & XSCOM_DATA_IND_COMPLETE) &&
|
|
((data & XSCOM_DATA_IND_ERR) == 0)) {
|
|
*val = data & XSCOM_DATA_IND_DATA;
|
|
break;
|
|
}
|
|
if ((data & XSCOM_DATA_IND_COMPLETE) ||
|
|
(retries >= XSCOM_IND_MAX_RETRIES)) {
|
|
centaur_xscom_handle_ind_error(centaur, data, pcb_addr,
|
|
false);
|
|
rc = OPAL_HARDWARE;
|
|
goto bail;
|
|
}
|
|
}
|
|
bail:
|
|
if (rc)
|
|
*val = (uint64_t)-1;
|
|
return rc;
|
|
}
|
|
|
|
static int centaur_xscom_ind_write(struct centaur_chip *centaur,
|
|
uint64_t pcb_addr, uint64_t val)
|
|
{
|
|
uint32_t addr;
|
|
uint64_t data;
|
|
int rc, retries;
|
|
|
|
/* Write indirect address & data */
|
|
addr = pcb_addr & 0x7fffffff;
|
|
data = pcb_addr & XSCOM_ADDR_IND_ADDR;
|
|
data |= val & XSCOM_ADDR_IND_DATA;
|
|
|
|
rc = centaur_fsiscom_write(centaur, addr, data);
|
|
if (rc)
|
|
goto bail;
|
|
|
|
/* Wait for completion */
|
|
for (retries = 0; retries < XSCOM_IND_MAX_RETRIES; retries++) {
|
|
rc = centaur_fsiscom_read(centaur, addr, &data);
|
|
if (rc)
|
|
goto bail;
|
|
if ((data & XSCOM_DATA_IND_COMPLETE) &&
|
|
((data & XSCOM_DATA_IND_ERR) == 0))
|
|
break;
|
|
if ((data & XSCOM_DATA_IND_COMPLETE) ||
|
|
(retries >= XSCOM_IND_MAX_RETRIES)) {
|
|
centaur_xscom_handle_ind_error(centaur, data, pcb_addr,
|
|
true);
|
|
rc = OPAL_HARDWARE;
|
|
goto bail;
|
|
}
|
|
}
|
|
bail:
|
|
return rc;
|
|
}
|
|
|
|
int64_t centaur_xscom_read(uint32_t id, uint64_t pcb_addr, uint64_t *val)
|
|
{
|
|
struct centaur_chip *centaur = get_centaur(id);
|
|
int64_t rc;
|
|
|
|
if (!centaur)
|
|
return OPAL_PARAMETER;
|
|
if (!centaur->online)
|
|
return OPAL_XSCOM_CTR_OFFLINED;
|
|
|
|
lock(¢aur->lock);
|
|
if (pcb_addr & XSCOM_ADDR_IND_FLAG)
|
|
rc = centaur_xscom_ind_read(centaur, pcb_addr, val);
|
|
else
|
|
rc = centaur_fsiscom_read(centaur, pcb_addr, val);
|
|
|
|
/* We mark the centaur offline if we get too many errors on
|
|
* consecutive accesses
|
|
*/
|
|
if (rc) {
|
|
centaur->error_count++;
|
|
if (centaur->error_count > CENTAUR_ERR_OFFLINE_THRESHOLD) {
|
|
centaur->online = false;
|
|
/**
|
|
* @fwts-label CentaurOfflinedTooManyErrors
|
|
* @fwts-advice OPAL marked a Centaur (memory buffer)
|
|
* as offline due to CENTAUR_ERR_OFFLINE_THRESHOLD (10)
|
|
* consecutive errors on XSCOMs to this centaur.
|
|
* OPAL will now return OPAL_XSCOM_CTR_OFFLINED and not
|
|
* try any further XSCOMs. This is likely caused by
|
|
* some hardware issue or PRD recovery issue.
|
|
*/
|
|
prlog(PR_ERR, "CENTAUR: Offlined %x due to > %d consecutive XSCOM errors. No more XSCOMs to this centaur.\n",
|
|
id, CENTAUR_ERR_OFFLINE_THRESHOLD);
|
|
}
|
|
} else
|
|
centaur->error_count = 0;
|
|
unlock(¢aur->lock);
|
|
|
|
return rc;
|
|
}
|
|
|
|
int64_t centaur_xscom_write(uint32_t id, uint64_t pcb_addr, uint64_t val)
|
|
{
|
|
struct centaur_chip *centaur = get_centaur(id);
|
|
int64_t rc;
|
|
|
|
if (!centaur)
|
|
return OPAL_PARAMETER;
|
|
if (!centaur->online)
|
|
return OPAL_XSCOM_CTR_OFFLINED;
|
|
|
|
lock(¢aur->lock);
|
|
if (pcb_addr & XSCOM_ADDR_IND_FLAG)
|
|
rc = centaur_xscom_ind_write(centaur, pcb_addr, val);
|
|
else
|
|
rc = centaur_fsiscom_write(centaur, pcb_addr, val);
|
|
|
|
/* We mark the centaur offline if we get too many errors on
|
|
* consecutive accesses
|
|
*/
|
|
if (rc) {
|
|
centaur->error_count++;
|
|
if (centaur->error_count > CENTAUR_ERR_OFFLINE_THRESHOLD)
|
|
centaur->online = false;
|
|
} else
|
|
centaur->error_count = 0;
|
|
unlock(¢aur->lock);
|
|
|
|
return rc;
|
|
}
|
|
|
|
static bool centaur_check_id(struct centaur_chip *centaur)
|
|
{
|
|
int64_t rc;
|
|
uint64_t val;
|
|
|
|
rc = centaur_fsiscom_read(centaur, 0xf000f, &val);
|
|
if (rc) {
|
|
cent_log(PR_ERR, centaur,
|
|
" FSISCOM error %lld reading ID register\n",
|
|
rc);
|
|
return false;
|
|
}
|
|
|
|
/* Extract CFAM id */
|
|
val >>= 44;
|
|
|
|
/* Identify chip */
|
|
if ((val & 0xff) != 0xe9) {
|
|
cent_log(PR_ERR, centaur,
|
|
" CFAM ID 0x%02x is not a Centaur !\n",
|
|
(unsigned int)(val & 0xff));
|
|
return false;
|
|
}
|
|
|
|
/* Get EC level from CFAM ID */
|
|
centaur->ec_level = ((val >> 16) & 0xf) << 4;
|
|
centaur->ec_level |= (val >> 8) & 0xf;
|
|
|
|
return true;
|
|
}
|
|
|
|
static bool centaur_add(uint32_t part_id, uint32_t mchip, uint32_t meng,
|
|
uint32_t mport)
|
|
{
|
|
uint32_t hchip_id, mchan;
|
|
struct proc_chip *hchip;
|
|
struct centaur_chip *centaur;
|
|
|
|
if ((part_id >> 28) != 8) {
|
|
prerror("CENTAUR: Invalid part ID 0x%x\n", part_id);
|
|
return false;
|
|
}
|
|
hchip_id = (part_id & 0x0fffffff) >> 4;
|
|
mchan = part_id & 0xf;
|
|
|
|
printf("CENTAUR: Found centaur for chip 0x%x channel %d\n",
|
|
hchip_id, mchan);
|
|
printf("CENTAUR: FSI host: 0x%x cMFSI%d port %d\n",
|
|
mchip, meng, mport);
|
|
|
|
hchip = get_chip(hchip_id);
|
|
if (!hchip) {
|
|
prerror("CENTAUR: No such chip !!!\n");
|
|
return false;
|
|
}
|
|
|
|
if (mchan >= MAX_CENTAURS_PER_CHIP) {
|
|
prerror("CENTAUR: Channel out of bounds !\n");
|
|
return false;
|
|
}
|
|
|
|
if (!hchip->centaurs) {
|
|
hchip->centaurs =
|
|
zalloc(sizeof(struct centaur_chip) *
|
|
MAX_CENTAURS_PER_CHIP);
|
|
assert(hchip->centaurs);
|
|
}
|
|
|
|
centaur = &hchip->centaurs[mchan];
|
|
if (centaur->valid) {
|
|
prerror("CENTAUR: Duplicate centaur !\n");
|
|
return false;
|
|
}
|
|
centaur->part_id = part_id;
|
|
centaur->fsi_master_chip_id = mchip;
|
|
centaur->fsi_master_port = mport;
|
|
centaur->fsi_master_engine = meng ? MFSI_cMFSI1 : MFSI_cMFSI0;
|
|
centaur->online = true;
|
|
init_lock(¢aur->lock);
|
|
list_head_init(¢aur->i2cms);
|
|
|
|
if (!centaur_check_id(centaur))
|
|
return false;
|
|
|
|
cent_log(PR_INFO, centaur, "Found DD%x.%x chip\n",
|
|
centaur->ec_level >> 4,
|
|
centaur->ec_level & 0xf);
|
|
|
|
centaur->valid = true;
|
|
return true;
|
|
}
|
|
|
|
/* Returns how long to wait for logic to stop in TB ticks or a negative
|
|
* value on error
|
|
*/
|
|
int64_t centaur_disable_sensor_cache(uint32_t part_id)
|
|
{
|
|
struct centaur_chip *centaur = get_centaur(part_id);
|
|
int64_t rc = 0;
|
|
uint64_t ctrl;
|
|
|
|
if (!centaur)
|
|
return false;
|
|
|
|
lock(¢aur->lock);
|
|
centaur->scache_disable_count++;
|
|
if (centaur->scache_disable_count == 1) {
|
|
centaur->scache_was_enabled = false;
|
|
rc = centaur_fsiscom_read(centaur, SCAC_CONFIG_REG, &ctrl);
|
|
if (rc)
|
|
goto bail;
|
|
centaur->scache_was_enabled = !!(ctrl & SCAC_ENABLE_MSK);
|
|
rc = centaur_fsiscom_write(centaur, SCAC_CONFIG_CLR, SCAC_ENABLE_MSK);
|
|
if (rc)
|
|
goto bail;
|
|
rc = msecs_to_tb(30);
|
|
}
|
|
bail:
|
|
unlock(¢aur->lock);
|
|
return rc;
|
|
}
|
|
|
|
int64_t centaur_enable_sensor_cache(uint32_t part_id)
|
|
{
|
|
struct centaur_chip *centaur = get_centaur(part_id);
|
|
int64_t rc = 0;
|
|
|
|
if (!centaur)
|
|
return false;
|
|
|
|
lock(¢aur->lock);
|
|
if (centaur->scache_disable_count == 0) {
|
|
cent_log(PR_ERR, centaur, "Cache count going negative !\n");
|
|
backtrace();
|
|
goto bail;
|
|
}
|
|
centaur->scache_disable_count--;
|
|
if (centaur->scache_disable_count == 0 && centaur->scache_was_enabled)
|
|
rc = centaur_fsiscom_write(centaur, SCAC_CONFIG_SET, SCAC_ENABLE_MSK);
|
|
bail:
|
|
unlock(¢aur->lock);
|
|
return rc;
|
|
}
|
|
|
|
void centaur_init(void)
|
|
{
|
|
struct dt_node *cn;
|
|
|
|
dt_for_each_compatible(dt_root, cn, "ibm,centaur") {
|
|
uint32_t chip_id, mchip, meng, mport;
|
|
|
|
chip_id = dt_prop_get_u32(cn, "ibm,chip-id");
|
|
mchip = dt_prop_get_u32(cn, "ibm,fsi-master-chip-id");
|
|
meng = dt_prop_get_cell(cn, "ibm,fsi-master-port", 0);
|
|
mport = dt_prop_get_cell(cn, "ibm,fsi-master-port", 1);
|
|
|
|
/*
|
|
* If adding the centaur succeeds, we expose it to
|
|
* Linux as a scom-controller
|
|
*/
|
|
if (centaur_add(chip_id, mchip, meng, mport))
|
|
dt_add_property(cn, "scom-controller", NULL, 0);
|
|
}
|
|
}
|