260 lines
7.7 KiB
C
260 lines
7.7 KiB
C
|
/* Copyright 2013-2016 IBM Corp.
|
||
|
*
|
||
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
||
|
* you may not use this file except in compliance with the License.
|
||
|
* You may obtain a copy of the License at
|
||
|
*
|
||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||
|
*
|
||
|
* Unless required by applicable law or agreed to in writing, software
|
||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
|
||
|
* implied.
|
||
|
* See the License for the specific language governing permissions and
|
||
|
* limitations under the License.
|
||
|
*/
|
||
|
|
||
|
#ifndef __NPU2_H
|
||
|
#define __NPU2_H
|
||
|
|
||
|
#include <pci.h>
|
||
|
#include <phys-map.h>
|
||
|
#include <npu2-regs.h>
|
||
|
|
||
|
/* Debugging options */
|
||
|
#define NPU2DBG(p, fmt, a...) prlog(PR_DEBUG, "NPU%d: " fmt, \
|
||
|
(p)->phb_nvlink.opal_id, ##a)
|
||
|
#define NPU2INF(p, fmt, a...) prlog(PR_INFO, "NPU%d: " fmt, \
|
||
|
(p)->phb_nvlink.opal_id, ##a)
|
||
|
#define NPU2ERR(p, fmt, a...) prlog(PR_ERR, "NPU%d: " fmt, \
|
||
|
(p)->phb_nvlink.opal_id, ##a)
|
||
|
|
||
|
#define NPU2DEVLOG(l, p, fmt, a...) prlog(l, "NPU%d:%d:%d.%d " fmt, \
|
||
|
(p)->npu->phb_nvlink.opal_id, \
|
||
|
((p)->bdfn >> 8) & 0xff, \
|
||
|
((p)->bdfn >> 3) & 0x1f, \
|
||
|
(p)->bdfn & 0x7, ##a)
|
||
|
#define NPU2DEVDBG(p, fmt, a...) NPU2DEVLOG(PR_DEBUG, p, fmt, ##a)
|
||
|
#define NPU2DEVINF(p, fmt, a...) NPU2DEVLOG(PR_INFO, p, fmt, ##a)
|
||
|
#define NPU2DEVERR(p, fmt, a...) NPU2DEVLOG(PR_ERR, p, fmt, ##a)
|
||
|
|
||
|
#define OCAPIDBG(dev, fmt, a...) prlog(PR_DEBUG, "OCAPI[%d:%d]: " fmt, \
|
||
|
dev->npu->chip_id, dev->brick_index, ## a)
|
||
|
#define OCAPIINF(dev, fmt, a...) prlog(PR_INFO, "OCAPI[%d:%d]: " fmt, \
|
||
|
dev->npu->chip_id, dev->brick_index, ## a)
|
||
|
#define OCAPIERR(dev, fmt, a...) prlog(PR_ERR, "OCAPI[%d:%d]: " fmt, \
|
||
|
dev->npu->chip_id, dev->brick_index, ## a)
|
||
|
|
||
|
|
||
|
/*
|
||
|
* Number of PEs supported
|
||
|
*
|
||
|
* The NPU supports PE numbers from 0-15. At present, we only assign a maximum
|
||
|
* of 1 PE per brick.
|
||
|
*
|
||
|
* NVLink devices are currently exposed to Linux underneath a single virtual
|
||
|
* PHB. Therefore, we give NVLink half the available PEs, which is enough for
|
||
|
* 6 bricks plus 1 reserved PE.
|
||
|
*
|
||
|
* For OpenCAPI, the BDF-to-PE registers are used exclusively for mapping
|
||
|
* bricks to System Interrupt Log registers (the BDF component of those
|
||
|
* registers is ignored). Currently, we allocate a fixed PE based on the brick
|
||
|
* index in the upper half of the PE namespace.
|
||
|
*/
|
||
|
#define NPU2_MAX_PE_NUM 8
|
||
|
#define NPU2_RESERVED_PE_NUM 7
|
||
|
#define NPU2_OCAPI_PE(ndev) ((ndev)->brick_index + NPU2_MAX_PE_NUM)
|
||
|
|
||
|
#define NPU2_LINKS_PER_CHIP 6
|
||
|
|
||
|
/* Link flags */
|
||
|
#define NPU2_DEV_PCI_LINKED 0x1
|
||
|
#define NPU2_DEV_DL_RESET 0x2
|
||
|
|
||
|
/* Return the stack (0-2) of a device */
|
||
|
#define NPU2DEV_STACK(ndev) ((ndev)->brick_index / 2)
|
||
|
|
||
|
/* Return the brick number (0-1) within a stack */
|
||
|
#define NPU2DEV_BRICK(ndev) ((ndev)->brick_index % 2)
|
||
|
|
||
|
/* This represents the state of the actual hardware BARs not the
|
||
|
* emulated PCIe BARs. The is a subtle difference between the two as
|
||
|
* not all BARs are exposed outside of skiboot. */
|
||
|
struct npu2_bar {
|
||
|
enum phys_map_type type;
|
||
|
int index;
|
||
|
#define NPU2_BAR_FLAG_ENABLED 0x0010
|
||
|
|
||
|
/* Generation ID's are a single space in the hardware but we split
|
||
|
* them in two for the emulated PCIe devices so we need to keep track
|
||
|
* of which one has been enabled/disabled. */
|
||
|
#define NPU2_BAR_FLAG_ENABLED0 0x0080
|
||
|
#define NPU2_BAR_FLAG_ENABLED1 0x0100
|
||
|
uint32_t flags;
|
||
|
uint64_t base;
|
||
|
uint64_t size;
|
||
|
uint64_t reg;
|
||
|
};
|
||
|
|
||
|
/* Rpresents a BAR that is exposed via the PCIe emulated
|
||
|
* devices */
|
||
|
struct npu2_pcie_bar {
|
||
|
#define NPU2_PCIE_BAR_FLAG_SIZE_HI 0x0020
|
||
|
#define NPU2_PCIE_BAR_FLAG_TRAPPED 0x0040
|
||
|
uint32_t flags;
|
||
|
struct npu2_bar npu2_bar;
|
||
|
};
|
||
|
|
||
|
enum npu2_dev_type {
|
||
|
NPU2_DEV_TYPE_UNKNOWN,
|
||
|
NPU2_DEV_TYPE_NVLINK,
|
||
|
NPU2_DEV_TYPE_OPENCAPI,
|
||
|
};
|
||
|
|
||
|
struct npu2;
|
||
|
|
||
|
struct npu2_dev_nvlink {
|
||
|
/* For NVLink, device and function numbers are allocated based
|
||
|
* on GPU association. Links to connected to the same GPU will
|
||
|
* be exposed as different functions of the same
|
||
|
* bus/device. */
|
||
|
uint32_t gpu_bdfn;
|
||
|
|
||
|
/* PCI virtual device and the associated GPU device */
|
||
|
struct pci_virt_device *pvd;
|
||
|
struct phb *phb;
|
||
|
struct pci_device *pd;
|
||
|
|
||
|
uint8_t link_flags;
|
||
|
|
||
|
/* Used to associate the NPU device with GPU PCI devices */
|
||
|
const char *slot_label;
|
||
|
};
|
||
|
|
||
|
struct npu2_dev {
|
||
|
enum npu2_dev_type type;
|
||
|
uint32_t link_index;
|
||
|
uint32_t brick_index;
|
||
|
uint64_t pl_xscom_base;
|
||
|
struct dt_node *dt_node;
|
||
|
struct npu2_pcie_bar bars[2];
|
||
|
struct npu2 *npu;
|
||
|
|
||
|
uint32_t bdfn;
|
||
|
|
||
|
/* Which PHY lanes this device is associated with */
|
||
|
uint32_t lane_mask;
|
||
|
uint64_t link_speed; /* not used for NVLink */
|
||
|
|
||
|
/* Track currently running procedure and step number */
|
||
|
uint16_t procedure_number;
|
||
|
uint16_t procedure_step;
|
||
|
unsigned long procedure_tb;
|
||
|
uint32_t procedure_status;
|
||
|
|
||
|
/* NVLink */
|
||
|
struct npu2_dev_nvlink nvlink;
|
||
|
|
||
|
/* OpenCAPI */
|
||
|
struct phb phb_ocapi;
|
||
|
uint64_t linux_pe;
|
||
|
bool train_need_fence;
|
||
|
bool train_fenced;
|
||
|
};
|
||
|
|
||
|
struct npu2 {
|
||
|
uint32_t index;
|
||
|
struct dt_node *dt_node;
|
||
|
uint32_t chip_id;
|
||
|
uint64_t xscom_base;
|
||
|
void *regs;
|
||
|
uint64_t mm_base;
|
||
|
uint64_t mm_size;
|
||
|
uint32_t base_lsi;
|
||
|
uint32_t total_devices;
|
||
|
struct npu2_dev *devices;
|
||
|
enum phys_map_type gpu_map_type;
|
||
|
int ctx_ref[NPU2_XTS_BDF_MAP_SIZE];
|
||
|
|
||
|
/* IODA cache */
|
||
|
uint64_t tve_cache[16];
|
||
|
bool tx_zcal_complete[2];
|
||
|
|
||
|
/*
|
||
|
* Used to protect global MMIO space, in particular the XTS
|
||
|
* tables, and LPC allocation
|
||
|
*/
|
||
|
struct lock lock;
|
||
|
|
||
|
/* NVLink */
|
||
|
struct phb phb_nvlink;
|
||
|
uint32_t phb_index;
|
||
|
|
||
|
/* OCAPI */
|
||
|
uint64_t i2c_port_id_ocapi;
|
||
|
struct lock i2c_lock;
|
||
|
uint8_t i2c_pin_mode;
|
||
|
uint8_t i2c_pin_wr_state;
|
||
|
/*
|
||
|
* Which device currently has an LPC allocation.
|
||
|
* Temporary as long as we only support 1 LPC alloc per chip.
|
||
|
*/
|
||
|
struct npu2_dev *lpc_mem_allocated;
|
||
|
};
|
||
|
|
||
|
static inline struct npu2 *phb_to_npu2_nvlink(struct phb *phb)
|
||
|
{
|
||
|
assert(phb->phb_type == phb_type_npu_v2);
|
||
|
return container_of(phb, struct npu2, phb_nvlink);
|
||
|
}
|
||
|
|
||
|
static inline struct npu2_dev *phb_to_npu2_dev_ocapi(struct phb *phb)
|
||
|
{
|
||
|
assert(phb->phb_type == phb_type_npu_v2_opencapi);
|
||
|
return container_of(phb, struct npu2_dev, phb_ocapi);
|
||
|
}
|
||
|
|
||
|
static inline struct phb *npu2_dev_to_phb(struct npu2_dev *ndev)
|
||
|
{
|
||
|
switch (ndev->type) {
|
||
|
case NPU2_DEV_TYPE_NVLINK:
|
||
|
return &ndev->npu->phb_nvlink;
|
||
|
case NPU2_DEV_TYPE_OPENCAPI:
|
||
|
return &ndev->phb_ocapi;
|
||
|
default:
|
||
|
assert(false);
|
||
|
}
|
||
|
}
|
||
|
|
||
|
void npu2_i2c_presence_detect(struct npu2 *npu);
|
||
|
int npu2_opencapi_init_npu(struct npu2 *npu);
|
||
|
int npu2_nvlink_init_npu(struct npu2 *npu);
|
||
|
void npu2_nvlink_create_phb(struct npu2 *npu, struct dt_node *dn);
|
||
|
|
||
|
void npu2_write_4b(struct npu2 *p, uint64_t reg, uint32_t val);
|
||
|
uint32_t npu2_read_4b(struct npu2 *p, uint64_t reg);
|
||
|
void npu2_write(struct npu2 *p, uint64_t reg, uint64_t val);
|
||
|
uint64_t npu2_read(struct npu2 *p, uint64_t reg);
|
||
|
void npu2_write_mask(struct npu2 *p, uint64_t reg, uint64_t val, uint64_t mask);
|
||
|
void npu2_write_mask_4b(struct npu2 *p, uint64_t reg, uint32_t val, uint32_t mask);
|
||
|
int64_t npu2_dev_procedure(void *dev, struct pci_cfg_reg_filter *pcrf,
|
||
|
uint32_t offset, uint32_t len, uint32_t *data,
|
||
|
bool write);
|
||
|
void npu2_dev_procedure_reset(struct npu2_dev *dev);
|
||
|
|
||
|
void npu2_set_link_flag(struct npu2_dev *ndev, uint8_t flag);
|
||
|
void npu2_clear_link_flag(struct npu2_dev *ndev, uint8_t flag);
|
||
|
uint32_t reset_ntl(struct npu2_dev *ndev);
|
||
|
extern int nv_zcal_nominal;
|
||
|
void npu2_opencapi_phy_init(struct npu2_dev *dev);
|
||
|
void npu2_opencapi_phy_reset(struct npu2_dev *dev);
|
||
|
void npu2_opencapi_phy_prbs31(struct npu2_dev *dev);
|
||
|
void npu2_opencapi_bump_ui_lane(struct npu2_dev *dev);
|
||
|
int64_t npu2_freeze_status(struct phb *phb __unused,
|
||
|
uint64_t pe_number __unused,
|
||
|
uint8_t *freeze_state,
|
||
|
uint16_t *pci_error_type __unused,
|
||
|
uint16_t *severity __unused);
|
||
|
void npu2_dump_scoms(int chip_id);
|
||
|
#endif /* __NPU2_H */
|