1 // SPDX-License-Identifier: Apache-2.0 OR GPL-2.0-or-later
2 /* Copyright 2013-2019 IBM Corp. */
3
4 #ifndef __NPU2_H
5 #define __NPU2_H
6
7 #include <pci.h>
8 #include <phys-map.h>
9 #include <npu2-regs.h>
10
11 /* Debugging options */
12 #define NPU2DBG(p, fmt, a...) prlog(PR_DEBUG, "NPU%d: " fmt, \
13 (p)->phb_nvlink.opal_id, ##a)
14 #define NPU2INF(p, fmt, a...) prlog(PR_INFO, "NPU%d: " fmt, \
15 (p)->phb_nvlink.opal_id, ##a)
16 #define NPU2ERR(p, fmt, a...) prlog(PR_ERR, "NPU%d: " fmt, \
17 (p)->phb_nvlink.opal_id, ##a)
18
19 #define NPU2DEVLOG(l, p, fmt, a...) prlog(l, "NPU%d:%d:%d.%d " fmt, \
20 (p)->npu->phb_nvlink.opal_id, \
21 PCI_BUS_NUM((p)->bdfn), \
22 PCI_DEV((p)->bdfn), \
23 PCI_FUNC((p)->bdfn), ##a)
24 #define NPU2DEVDBG(p, fmt, a...) NPU2DEVLOG(PR_DEBUG, p, fmt, ##a)
25 #define NPU2DEVINF(p, fmt, a...) NPU2DEVLOG(PR_INFO, p, fmt, ##a)
26 #define NPU2DEVERR(p, fmt, a...) NPU2DEVLOG(PR_ERR, p, fmt, ##a)
27
28 #define OCAPIDBG(dev, fmt, a...) prlog(PR_DEBUG, "OCAPI[%d:%d]: " fmt, \
29 dev->npu->chip_id, dev->brick_index, ## a)
30 #define OCAPIINF(dev, fmt, a...) prlog(PR_INFO, "OCAPI[%d:%d]: " fmt, \
31 dev->npu->chip_id, dev->brick_index, ## a)
32 #define OCAPIERR(dev, fmt, a...) prlog(PR_ERR, "OCAPI[%d:%d]: " fmt, \
33 dev->npu->chip_id, dev->brick_index, ## a)
34
35
36 /*
37 * Number of PEs supported
38 *
39 * The NPU supports PE numbers from 0-15. At present, we only assign a maximum
40 * of 1 PE per brick.
41 *
42 * NVLink devices are currently exposed to Linux underneath a single virtual
43 * PHB. Therefore, we give NVLink half the available PEs, which is enough for
44 * 6 bricks plus 1 reserved PE.
45 *
46 * For OpenCAPI, the BDF-to-PE registers are used exclusively for mapping
47 * bricks to System Interrupt Log registers (the BDF component of those
48 * registers is ignored). Currently, we allocate a fixed PE based on the brick
49 * index in the upper half of the PE namespace.
50 */
51 #define NPU2_MAX_PE_NUM 8
52 #define NPU2_RESERVED_PE_NUM 7
53 #define NPU2_OCAPI_PE(ndev) ((ndev)->brick_index + NPU2_MAX_PE_NUM)
54
55 #define NPU2_LINKS_PER_CHIP 6
56
57 /* Link flags */
58 #define NPU2_DEV_PCI_LINKED 0x1
59 #define NPU2_DEV_DL_RESET 0x2
60
61 /* Return the stack (0-2) of a device */
62 #define NPU2DEV_STACK(ndev) ((ndev)->brick_index / 2)
63
64 /* Return the brick number (0-1) within a stack */
65 #define NPU2DEV_BRICK(ndev) ((ndev)->brick_index % 2)
66
67 /* This represents the state of the actual hardware BARs not the
68 * emulated PCIe BARs. The is a subtle difference between the two as
69 * not all BARs are exposed outside of skiboot. */
70 struct npu2_bar {
71 enum phys_map_type type;
72 int index;
73 #define NPU2_BAR_FLAG_ENABLED 0x0010
74
75 /* Generation ID's are a single space in the hardware but we split
76 * them in two for the emulated PCIe devices so we need to keep track
77 * of which one has been enabled/disabled. */
78 #define NPU2_BAR_FLAG_ENABLED0 0x0080
79 #define NPU2_BAR_FLAG_ENABLED1 0x0100
80 uint32_t flags;
81 uint64_t base;
82 uint64_t size;
83 uint64_t reg;
84 };
85
86 /* Rpresents a BAR that is exposed via the PCIe emulated
87 * devices */
88 struct npu2_pcie_bar {
89 #define NPU2_PCIE_BAR_FLAG_SIZE_HI 0x0020
90 #define NPU2_PCIE_BAR_FLAG_TRAPPED 0x0040
91 uint32_t flags;
92 struct npu2_bar npu2_bar;
93 };
94
95 enum npu2_dev_type {
96 NPU2_DEV_TYPE_UNKNOWN,
97 NPU2_DEV_TYPE_NVLINK,
98 NPU2_DEV_TYPE_OPENCAPI,
99 };
100
101 struct npu2;
102
103 struct npu2_dev_nvlink {
104 /* For NVLink, device and function numbers are allocated based
105 * on GPU association. Links to connected to the same GPU will
106 * be exposed as different functions of the same
107 * bus/device. */
108 uint32_t gpu_bdfn;
109
110 /* PCI virtual device and the associated GPU device */
111 struct pci_virt_device *pvd;
112 struct phb *phb;
113 struct pci_device *pd;
114
115 uint8_t link_flags;
116
117 /* Used to associate the NPU device with GPU PCI devices */
118 const char *slot_label;
119 };
120
121 #define NPU2_DEV_BROKEN 0x1
122
123 struct npu2_dev {
124 enum npu2_dev_type type;
125 uint32_t link_index;
126 uint32_t brick_index;
127 uint64_t pl_xscom_base;
128 struct dt_node *dt_node;
129 struct npu2_pcie_bar bars[2];
130 struct npu2 *npu;
131 long flags;
132
133 uint32_t bdfn;
134
135 /* Which PHY lanes this device is associated with */
136 uint32_t lane_mask;
137 uint64_t link_speed; /* not used for NVLink */
138
139 /* Track currently running procedure and step number */
140 uint16_t procedure_number;
141 uint16_t procedure_step;
142 unsigned long procedure_tb;
143 uint32_t procedure_status;
144
145 /* NVLink */
146 struct npu2_dev_nvlink nvlink;
147
148 /* OpenCAPI */
149 struct phb phb_ocapi;
150 uint64_t linux_pe;
151 unsigned long train_start;
152 unsigned long train_timeout;
153 uint64_t lpc_mem_base;
154 uint64_t lpc_mem_size;
155 };
156
157 struct npu2 {
158 uint32_t index;
159 struct dt_node *dt_node;
160 uint32_t chip_id;
161 uint64_t xscom_base;
162 void *regs;
163 uint64_t mm_base;
164 uint64_t mm_size;
165 uint32_t base_lsi;
166 uint32_t total_devices;
167 struct npu2_dev *devices;
168 enum phys_map_type gpu_map_type;
169 int ctx_ref[NPU2_XTS_BDF_MAP_SIZE];
170
171 /* IODA cache */
172 uint64_t tve_cache[16];
173 bool tx_zcal_complete[2];
174
175 /*
176 * Used to protect global MMIO space, in particular the XTS
177 * tables, and LPC allocation
178 */
179 struct lock lock;
180
181 /* NVLink */
182 struct phb phb_nvlink;
183
184 /* OCAPI */
185 uint64_t i2c_port_id_ocapi;
186 struct lock i2c_lock;
187 uint8_t i2c_pin_mode;
188 uint8_t i2c_pin_wr_state;
189 };
190
phb_to_npu2_nvlink(struct phb * phb)191 static inline struct npu2 *phb_to_npu2_nvlink(struct phb *phb)
192 {
193 assert(phb->phb_type == phb_type_npu_v2);
194 return container_of(phb, struct npu2, phb_nvlink);
195 }
196
phb_to_npu2_dev_ocapi(struct phb * phb)197 static inline struct npu2_dev *phb_to_npu2_dev_ocapi(struct phb *phb)
198 {
199 assert(phb->phb_type == phb_type_npu_v2_opencapi);
200 return container_of(phb, struct npu2_dev, phb_ocapi);
201 }
202
npu2_dev_to_phb(struct npu2_dev * ndev)203 static inline struct phb *npu2_dev_to_phb(struct npu2_dev *ndev)
204 {
205 switch (ndev->type) {
206 case NPU2_DEV_TYPE_NVLINK:
207 return &ndev->npu->phb_nvlink;
208 case NPU2_DEV_TYPE_OPENCAPI:
209 return &ndev->phb_ocapi;
210 default:
211 assert(false);
212 }
213 }
214
215 void npu2_i2c_presence_detect(struct npu2 *npu);
216 int npu2_opencapi_init_npu(struct npu2 *npu);
217 int npu2_nvlink_init_npu(struct npu2 *npu);
218 void npu2_nvlink_create_phb(struct npu2 *npu, struct dt_node *dn);
219
220 void npu2_write_4b(struct npu2 *p, uint64_t reg, uint32_t val);
221 uint32_t npu2_read_4b(struct npu2 *p, uint64_t reg);
222 void npu2_write(struct npu2 *p, uint64_t reg, uint64_t val);
223 uint64_t npu2_read(struct npu2 *p, uint64_t reg);
224 void npu2_write_mask(struct npu2 *p, uint64_t reg, uint64_t val, uint64_t mask);
225 void npu2_write_mask_4b(struct npu2 *p, uint64_t reg, uint32_t val, uint32_t mask);
226 int64_t npu2_dev_procedure(void *dev, struct pci_cfg_reg_filter *pcrf,
227 uint32_t offset, uint32_t len, uint32_t *data,
228 bool write);
229 void npu2_dev_procedure_reset(struct npu2_dev *dev);
230
231 void npu2_set_link_flag(struct npu2_dev *ndev, uint8_t flag);
232 void npu2_clear_link_flag(struct npu2_dev *ndev, uint8_t flag);
233 uint32_t reset_ntl(struct npu2_dev *ndev);
234 extern int nv_zcal_nominal;
235 void npu2_opencapi_phy_init(struct npu2_dev *dev);
236 int npu2_opencapi_phy_reset(struct npu2_dev *dev);
237 void npu2_opencapi_phy_prbs31(struct npu2_dev *dev);
238 void npu2_opencapi_bump_ui_lane(struct npu2_dev *dev);
239 int64_t npu2_freeze_status(struct phb *phb __unused,
240 uint64_t pe_number __unused,
241 uint8_t *freeze_state,
242 uint16_t *pci_error_type __unused,
243 uint16_t *severity __unused);
244 void npu2_dump_scoms(int chip_id);
245
246 int64_t npu2_init_context(struct phb *phb, uint64_t msr, uint64_t bdf);
247 int64_t npu2_destroy_context(struct phb *phb, uint64_t bdf);
248 int64_t npu2_map_lpar(struct phb *phb, uint64_t bdf, uint64_t lparid,
249 uint64_t lpcr);
250 int64_t npu2_set_relaxed_order(struct phb *phb, uint32_t gcid, int pec,
251 bool enable);
252
253 void npu2_opencapi_set_broken(struct npu2 *npu, int brick);
254
255 #define NPU2_PHB_INDEX_BASE 7
256 /* to avoid conflicts with PCI and for historical reasons */
257
npu2_get_phb_index(unsigned int brick_index)258 static inline int npu2_get_phb_index(unsigned int brick_index)
259 {
260 /*
261 * There's one virtual PHB per brick with opencapi, so we no
262 * longer have a 1-to-1 mapping between a NPU and a virtual
263 * PHB. And we want a static phb-index, as it is needed to use
264 * a slot table on some platforms. So we associate a per-chip
265 * phb-index based on the brick index.
266 *
267 * nvlink only creates one virtual PHB per chip, so it is
268 * treated as if using brick 0, which is never used by
269 * opencapi.
270 */
271 return NPU2_PHB_INDEX_BASE + brick_index;
272 }
273
274 #endif /* __NPU2_H */
275