/* Copyright 2013-2016 IBM Corp. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or * implied. * See the License for the specific language governing permissions and * limitations under the License. */ #ifndef __NPU2_H #define __NPU2_H #include #include #include /* Debugging options */ #define NPU2DBG(p, fmt, a...) prlog(PR_DEBUG, "NPU%d: " fmt, \ (p)->phb_nvlink.opal_id, ##a) #define NPU2INF(p, fmt, a...) prlog(PR_INFO, "NPU%d: " fmt, \ (p)->phb_nvlink.opal_id, ##a) #define NPU2ERR(p, fmt, a...) prlog(PR_ERR, "NPU%d: " fmt, \ (p)->phb_nvlink.opal_id, ##a) #define NPU2DEVLOG(l, p, fmt, a...) prlog(l, "NPU%d:%d:%d.%d " fmt, \ (p)->npu->phb_nvlink.opal_id, \ ((p)->bdfn >> 8) & 0xff, \ ((p)->bdfn >> 3) & 0x1f, \ (p)->bdfn & 0x7, ##a) #define NPU2DEVDBG(p, fmt, a...) NPU2DEVLOG(PR_DEBUG, p, fmt, ##a) #define NPU2DEVINF(p, fmt, a...) NPU2DEVLOG(PR_INFO, p, fmt, ##a) #define NPU2DEVERR(p, fmt, a...) NPU2DEVLOG(PR_ERR, p, fmt, ##a) #define OCAPIDBG(dev, fmt, a...) prlog(PR_DEBUG, "OCAPI[%d:%d]: " fmt, \ dev->npu->chip_id, dev->brick_index, ## a) #define OCAPIINF(dev, fmt, a...) prlog(PR_INFO, "OCAPI[%d:%d]: " fmt, \ dev->npu->chip_id, dev->brick_index, ## a) #define OCAPIERR(dev, fmt, a...) prlog(PR_ERR, "OCAPI[%d:%d]: " fmt, \ dev->npu->chip_id, dev->brick_index, ## a) /* * Number of PEs supported * * The NPU supports PE numbers from 0-15. At present, we only assign a maximum * of 1 PE per brick. * * NVLink devices are currently exposed to Linux underneath a single virtual * PHB. Therefore, we give NVLink half the available PEs, which is enough for * 6 bricks plus 1 reserved PE. * * For OpenCAPI, the BDF-to-PE registers are used exclusively for mapping * bricks to System Interrupt Log registers (the BDF component of those * registers is ignored). Currently, we allocate a fixed PE based on the brick * index in the upper half of the PE namespace. */ #define NPU2_MAX_PE_NUM 8 #define NPU2_RESERVED_PE_NUM 7 #define NPU2_OCAPI_PE(ndev) ((ndev)->brick_index + NPU2_MAX_PE_NUM) #define NPU2_LINKS_PER_CHIP 6 /* Link flags */ #define NPU2_DEV_PCI_LINKED 0x1 #define NPU2_DEV_DL_RESET 0x2 /* Return the stack (0-2) of a device */ #define NPU2DEV_STACK(ndev) ((ndev)->brick_index / 2) /* Return the brick number (0-1) within a stack */ #define NPU2DEV_BRICK(ndev) ((ndev)->brick_index % 2) /* This represents the state of the actual hardware BARs not the * emulated PCIe BARs. The is a subtle difference between the two as * not all BARs are exposed outside of skiboot. */ struct npu2_bar { enum phys_map_type type; int index; #define NPU2_BAR_FLAG_ENABLED 0x0010 /* Generation ID's are a single space in the hardware but we split * them in two for the emulated PCIe devices so we need to keep track * of which one has been enabled/disabled. */ #define NPU2_BAR_FLAG_ENABLED0 0x0080 #define NPU2_BAR_FLAG_ENABLED1 0x0100 uint32_t flags; uint64_t base; uint64_t size; uint64_t reg; }; /* Rpresents a BAR that is exposed via the PCIe emulated * devices */ struct npu2_pcie_bar { #define NPU2_PCIE_BAR_FLAG_SIZE_HI 0x0020 #define NPU2_PCIE_BAR_FLAG_TRAPPED 0x0040 uint32_t flags; struct npu2_bar npu2_bar; }; enum npu2_dev_type { NPU2_DEV_TYPE_UNKNOWN, NPU2_DEV_TYPE_NVLINK, NPU2_DEV_TYPE_OPENCAPI, }; struct npu2; struct npu2_dev_nvlink { /* For NVLink, device and function numbers are allocated based * on GPU association. Links to connected to the same GPU will * be exposed as different functions of the same * bus/device. */ uint32_t gpu_bdfn; /* PCI virtual device and the associated GPU device */ struct pci_virt_device *pvd; struct phb *phb; struct pci_device *pd; uint8_t link_flags; /* Used to associate the NPU device with GPU PCI devices */ const char *slot_label; }; struct npu2_dev { enum npu2_dev_type type; uint32_t link_index; uint32_t brick_index; uint64_t pl_xscom_base; struct dt_node *dt_node; struct npu2_pcie_bar bars[2]; struct npu2 *npu; uint32_t bdfn; /* Which PHY lanes this device is associated with */ uint32_t lane_mask; uint64_t link_speed; /* not used for NVLink */ /* Track currently running procedure and step number */ uint16_t procedure_number; uint16_t procedure_step; unsigned long procedure_tb; uint32_t procedure_status; /* NVLink */ struct npu2_dev_nvlink nvlink; /* OpenCAPI */ struct phb phb_ocapi; uint64_t linux_pe; bool train_need_fence; bool train_fenced; }; struct npu2 { uint32_t index; struct dt_node *dt_node; uint32_t chip_id; uint64_t xscom_base; void *regs; uint64_t mm_base; uint64_t mm_size; uint32_t base_lsi; uint32_t total_devices; struct npu2_dev *devices; enum phys_map_type gpu_map_type; int ctx_ref[NPU2_XTS_BDF_MAP_SIZE]; /* IODA cache */ uint64_t tve_cache[16]; bool tx_zcal_complete[2]; /* * Used to protect global MMIO space, in particular the XTS * tables, and LPC allocation */ struct lock lock; /* NVLink */ struct phb phb_nvlink; uint32_t phb_index; /* OCAPI */ uint64_t i2c_port_id_ocapi; struct lock i2c_lock; uint8_t i2c_pin_mode; uint8_t i2c_pin_wr_state; /* * Which device currently has an LPC allocation. * Temporary as long as we only support 1 LPC alloc per chip. */ struct npu2_dev *lpc_mem_allocated; }; static inline struct npu2 *phb_to_npu2_nvlink(struct phb *phb) { assert(phb->phb_type == phb_type_npu_v2); return container_of(phb, struct npu2, phb_nvlink); } static inline struct npu2_dev *phb_to_npu2_dev_ocapi(struct phb *phb) { assert(phb->phb_type == phb_type_npu_v2_opencapi); return container_of(phb, struct npu2_dev, phb_ocapi); } static inline struct phb *npu2_dev_to_phb(struct npu2_dev *ndev) { switch (ndev->type) { case NPU2_DEV_TYPE_NVLINK: return &ndev->npu->phb_nvlink; case NPU2_DEV_TYPE_OPENCAPI: return &ndev->phb_ocapi; default: assert(false); } } void npu2_i2c_presence_detect(struct npu2 *npu); int npu2_opencapi_init_npu(struct npu2 *npu); int npu2_nvlink_init_npu(struct npu2 *npu); void npu2_nvlink_create_phb(struct npu2 *npu, struct dt_node *dn); void npu2_write_4b(struct npu2 *p, uint64_t reg, uint32_t val); uint32_t npu2_read_4b(struct npu2 *p, uint64_t reg); void npu2_write(struct npu2 *p, uint64_t reg, uint64_t val); uint64_t npu2_read(struct npu2 *p, uint64_t reg); void npu2_write_mask(struct npu2 *p, uint64_t reg, uint64_t val, uint64_t mask); void npu2_write_mask_4b(struct npu2 *p, uint64_t reg, uint32_t val, uint32_t mask); int64_t npu2_dev_procedure(void *dev, struct pci_cfg_reg_filter *pcrf, uint32_t offset, uint32_t len, uint32_t *data, bool write); void npu2_dev_procedure_reset(struct npu2_dev *dev); void npu2_set_link_flag(struct npu2_dev *ndev, uint8_t flag); void npu2_clear_link_flag(struct npu2_dev *ndev, uint8_t flag); uint32_t reset_ntl(struct npu2_dev *ndev); extern int nv_zcal_nominal; void npu2_opencapi_phy_init(struct npu2_dev *dev); void npu2_opencapi_phy_reset(struct npu2_dev *dev); void npu2_opencapi_phy_prbs31(struct npu2_dev *dev); void npu2_opencapi_bump_ui_lane(struct npu2_dev *dev); int64_t npu2_freeze_status(struct phb *phb __unused, uint64_t pe_number __unused, uint8_t *freeze_state, uint16_t *pci_error_type __unused, uint16_t *severity __unused); void npu2_dump_scoms(int chip_id); #endif /* __NPU2_H */