1 /*
2 * virpci.c: helper APIs for managing host PCI devices
3 *
4 * Copyright (C) 2009-2015 Red Hat, Inc.
5 *
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
10 *
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
15 *
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library. If not, see
18 * <http://www.gnu.org/licenses/>.
19 */
20
21 #include <config.h>
22
23 #include "virpci.h"
24 #include "virnetdev.h"
25
26 #include <dirent.h>
27 #include <fcntl.h>
28 #include <inttypes.h>
29 #include <sys/types.h>
30 #include <sys/stat.h>
31 #include <unistd.h>
32
33 #include "virlog.h"
34 #include "vircommand.h"
35 #include "virerror.h"
36 #include "virfile.h"
37 #include "virkmod.h"
38 #include "virstring.h"
39 #include "viralloc.h"
40 #include "virpcivpd.h"
41
42 VIR_LOG_INIT("util.pci");
43
44 #define PCI_SYSFS "/sys/bus/pci/"
45 #define PCI_ID_LEN 10 /* "XXXX XXXX" */
46
47 VIR_ENUM_IMPL(virPCIELinkSpeed,
48 VIR_PCIE_LINK_SPEED_LAST,
49 "", "2.5", "5", "8", "16",
50 );
51
52 VIR_ENUM_IMPL(virPCIStubDriver,
53 VIR_PCI_STUB_DRIVER_LAST,
54 "none",
55 "pciback", /* XEN */
56 "vfio-pci", /* VFIO */
57 );
58
59 VIR_ENUM_IMPL(virPCIHeader,
60 VIR_PCI_HEADER_LAST,
61 "endpoint",
62 "pci-bridge",
63 "cardbus-bridge",
64 );
65
66 struct _virPCIDevice {
67 virPCIDeviceAddress address;
68
69 char *name; /* domain:bus:slot.function */
70 char id[PCI_ID_LEN]; /* product vendor */
71 char *path;
72
73 /* The driver:domain which uses the device */
74 char *used_by_drvname;
75 char *used_by_domname;
76
77 /* The following 5 items are only valid after virPCIDeviceInit()
78 * has been called for the virPCIDevice object. This is *not* done
79 * in most cases (because it creates extra overhead, and parts of
80 * it can fail if libvirtd is running unprivileged)
81 */
82 unsigned int pcie_cap_pos;
83 unsigned int pci_pm_cap_pos;
84 bool has_flr;
85 bool has_pm_reset;
86 bool is_pcie;
87 /**/
88
89 bool managed;
90
91 virPCIStubDriver stubDriver;
92
93 /* used by reattach function */
94 bool unbind_from_stub;
95 bool remove_slot;
96 bool reprobe;
97 };
98
99 struct _virPCIDeviceList {
100 virObjectLockable parent;
101
102 size_t count;
103 virPCIDevice **devs;
104 };
105
106
107 #define VIR_FROM_THIS VIR_FROM_NONE
108
109 /* Specifications referenced in comments:
110 * PCI30 - PCI Local Bus Specification 3.0
111 * PCIe20 - PCI Express Base Specification 2.0
112 * BR12 - PCI-to-PCI Bridge Architecture Specification 1.2
113 * PM12 - PCI Bus Power Management Interface Specification 1.2
114 * ECN_AF - Advanced Capabilities for Conventional PCI ECN
115 */
116
117 /* Type 0 config space header length; PCI30 Section 6.1 Configuration Space Organization */
118 #define PCI_CONF_LEN 0x100
119 #define PCI_CONF_HEADER_LEN 0x40
120
121 /* PCI30 6.2.1 */
122 #define PCI_HEADER_TYPE 0x0e /* Header type */
123 #define PCI_HEADER_TYPE_BRIDGE 0x1
124 #define PCI_HEADER_TYPE_MASK 0x7f
125 #define PCI_HEADER_TYPE_MULTI 0x80
126
127 /* PCI30 6.2.1 Device Identification */
128 #define PCI_CLASS_DEVICE 0x0a /* Device class */
129
130 /* Class Code for bridge; PCI30 D.7 Base Class 06h */
131 #define PCI_CLASS_BRIDGE_PCI 0x0604
132
133 /* PCI30 6.2.3 Device Status */
134 #define PCI_STATUS 0x06 /* 16 bits */
135 #define PCI_STATUS_CAP_LIST 0x10 /* Support Capability List */
136
137 /* PCI30 6.7 Capabilities List */
138 #define PCI_CAPABILITY_LIST 0x34 /* Offset of first capability list entry */
139 #define PCI_CAP_FLAGS 2 /* Capability defined flags (16 bits) */
140
141 /* PM12 3.2.1 Capability Identifier */
142 #define PCI_CAP_ID_PM 0x01 /* Power Management */
143 /* PCI30 H Capability IDs */
144 #define PCI_CAP_ID_EXP 0x10 /* PCI Express */
145 /* ECN_AF 6.x.1.1 Capability ID for AF */
146 #define PCI_CAP_ID_AF 0x13 /* Advanced Features */
147
148 /* PCIe20 7.8.3 Device Capabilities Register (Offset 04h) */
149 #define PCI_EXP_DEVCAP 0x4 /* Device capabilities */
150 #define PCI_EXP_DEVCAP_FLR (1<<28) /* Function Level Reset */
151 #define PCI_EXP_LNKCAP 0xc /* Link Capabilities */
152 #define PCI_EXP_LNKCAP_SPEED 0x0000f /* Maximum Link Speed */
153 #define PCI_EXP_LNKCAP_WIDTH 0x003f0 /* Maximum Link Width */
154 #define PCI_EXP_LNKSTA 0x12 /* Link Status */
155 #define PCI_EXP_LNKSTA_SPEED 0x000f /* Negotiated Link Speed */
156 #define PCI_EXP_LNKSTA_WIDTH 0x03f0 /* Negotiated Link Width */
157
158 /* Header type 1 BR12 3.2 PCI-to-PCI Bridge Configuration Space Header Format */
159 #define PCI_PRIMARY_BUS 0x18 /* BR12 3.2.5.2 Primary bus number */
160 #define PCI_SECONDARY_BUS 0x19 /* BR12 3.2.5.3 Secondary bus number */
161 #define PCI_SUBORDINATE_BUS 0x1a /* BR12 3.2.5.4 Highest bus number behind the bridge */
162 #define PCI_BRIDGE_CONTROL 0x3e
163 /* BR12 3.2.5.18 Bridge Control Register */
164 #define PCI_BRIDGE_CTL_RESET 0x40 /* Secondary bus reset */
165
166 /* PM12 3.2.4 Power Management Control/Status (Offset = 4) */
167 #define PCI_PM_CTRL 4 /* PM control and status register */
168 #define PCI_PM_CTRL_STATE_MASK 0x3 /* Current power state (D0 to D3) */
169 #define PCI_PM_CTRL_STATE_D0 0x0 /* D0 state */
170 #define PCI_PM_CTRL_STATE_D3hot 0x3 /* D3 state */
171 #define PCI_PM_CTRL_NO_SOFT_RESET 0x8 /* No reset for D3hot->D0 */
172
173 /* ECN_AF 6.x.1 Advanced Features Capability Structure */
174 #define PCI_AF_CAP 0x3 /* Advanced features capabilities */
175 #define PCI_AF_CAP_FLR 0x2 /* Function Level Reset */
176
177 #define PCI_EXP_FLAGS 0x2
178 #define PCI_EXP_FLAGS_TYPE 0x00f0
179 #define PCI_EXP_TYPE_DOWNSTREAM 0x6
180
181 #define PCI_EXT_CAP_BASE 0x100
182 #define PCI_EXT_CAP_LIMIT 0x1000
183 #define PCI_EXT_CAP_ID_MASK 0x0000ffff
184 #define PCI_EXT_CAP_OFFSET_SHIFT 20
185 #define PCI_EXT_CAP_OFFSET_MASK 0x00000ffc
186
187 #define PCI_EXT_CAP_ID_ACS 0x000d
188 #define PCI_EXT_ACS_CTRL 0x06
189
190 #define PCI_EXT_CAP_ACS_SV 0x01
191 #define PCI_EXT_CAP_ACS_RR 0x04
192 #define PCI_EXT_CAP_ACS_CR 0x08
193 #define PCI_EXT_CAP_ACS_UF 0x10
194 #define PCI_EXT_CAP_ACS_ENABLED (PCI_EXT_CAP_ACS_SV | \
195 PCI_EXT_CAP_ACS_RR | \
196 PCI_EXT_CAP_ACS_CR | \
197 PCI_EXT_CAP_ACS_UF)
198
199 #define PCI_EXP_TYPE_ROOT_INT_EP 0x9 /* Root Complex Integrated Endpoint */
200 #define PCI_EXP_TYPE_ROOT_EC 0xa /* Root Complex Event Collector */
201
202 static virClass *virPCIDeviceListClass;
203
204 static void virPCIDeviceListDispose(void *obj);
205
virPCIOnceInit(void)206 static int virPCIOnceInit(void)
207 {
208 if (!VIR_CLASS_NEW(virPCIDeviceList, virClassForObjectLockable()))
209 return -1;
210
211 return 0;
212 }
213
214 VIR_ONCE_GLOBAL_INIT(virPCI);
215
216
217 static char *
virPCIDriverDir(const char * driver)218 virPCIDriverDir(const char *driver)
219 {
220 return g_strdup_printf(PCI_SYSFS "drivers/%s", driver);
221 }
222
223
224 static char *
virPCIFile(const char * device,const char * file)225 virPCIFile(const char *device, const char *file)
226 {
227 return g_strdup_printf(PCI_SYSFS "devices/%s/%s", device, file);
228 }
229
230
231 /* virPCIDeviceGetDriverPathAndName - put the path to the driver
232 * directory of the driver in use for this device in @path and the
233 * name of the driver in @name. Both could be NULL if it's not bound
234 * to any driver.
235 *
236 * Return 0 for success, -1 for error.
237 */
238 int
virPCIDeviceGetDriverPathAndName(virPCIDevice * dev,char ** path,char ** name)239 virPCIDeviceGetDriverPathAndName(virPCIDevice *dev, char **path, char **name)
240 {
241 int ret = -1;
242 g_autofree char *drvlink = NULL;
243
244 *path = *name = NULL;
245
246 /* drvlink = "/sys/bus/pci/dddd:bb:ss.ff/driver" */
247 drvlink = virPCIFile(dev->name, "driver");
248
249 if (!virFileExists(drvlink)) {
250 ret = 0;
251 goto cleanup;
252 }
253
254 if (virFileIsLink(drvlink) != 1) {
255 virReportError(VIR_ERR_INTERNAL_ERROR,
256 _("Invalid device %s driver file %s is not a symlink"),
257 dev->name, drvlink);
258 goto cleanup;
259 }
260 if (virFileResolveLink(drvlink, path) < 0) {
261 virReportError(VIR_ERR_INTERNAL_ERROR,
262 _("Unable to resolve device %s driver symlink %s"),
263 dev->name, drvlink);
264 goto cleanup;
265 }
266 /* path = "/sys/bus/pci/drivers/${drivername}" */
267
268 *name = g_path_get_basename(*path);
269 /* name = "${drivername}" */
270
271 ret = 0;
272 cleanup:
273 if (ret < 0) {
274 VIR_FREE(*path);
275 VIR_FREE(*name);
276 }
277 return ret;
278 }
279
280
281 static int
virPCIDeviceConfigOpenInternal(virPCIDevice * dev,bool readonly,bool fatal)282 virPCIDeviceConfigOpenInternal(virPCIDevice *dev, bool readonly, bool fatal)
283 {
284 int fd;
285
286 fd = open(dev->path, readonly ? O_RDONLY : O_RDWR);
287
288 if (fd < 0) {
289 if (fatal) {
290 virReportSystemError(errno,
291 _("Failed to open config space file '%s'"),
292 dev->path);
293 } else {
294 VIR_WARN("Failed to open config space file '%s': %s",
295 dev->path, g_strerror(errno));
296 }
297 return -1;
298 }
299
300 VIR_DEBUG("%s %s: opened %s", dev->id, dev->name, dev->path);
301 return fd;
302 }
303
304 static int
virPCIDeviceConfigOpen(virPCIDevice * dev)305 virPCIDeviceConfigOpen(virPCIDevice *dev)
306 {
307 return virPCIDeviceConfigOpenInternal(dev, true, true);
308 }
309
310 static int
virPCIDeviceConfigOpenTry(virPCIDevice * dev)311 virPCIDeviceConfigOpenTry(virPCIDevice *dev)
312 {
313 return virPCIDeviceConfigOpenInternal(dev, true, false);
314 }
315
316 static int
virPCIDeviceConfigOpenWrite(virPCIDevice * dev)317 virPCIDeviceConfigOpenWrite(virPCIDevice *dev)
318 {
319 return virPCIDeviceConfigOpenInternal(dev, false, true);
320 }
321
322 static void
virPCIDeviceConfigClose(virPCIDevice * dev,int cfgfd)323 virPCIDeviceConfigClose(virPCIDevice *dev, int cfgfd)
324 {
325 if (VIR_CLOSE(cfgfd) < 0) {
326 VIR_WARN("Failed to close config space file '%s': %s",
327 dev->path, g_strerror(errno));
328 }
329 }
330
331
332 static int
virPCIDeviceRead(virPCIDevice * dev,int cfgfd,unsigned int pos,uint8_t * buf,unsigned int buflen)333 virPCIDeviceRead(virPCIDevice *dev,
334 int cfgfd,
335 unsigned int pos,
336 uint8_t *buf,
337 unsigned int buflen)
338 {
339 memset(buf, 0, buflen);
340 errno = 0;
341
342 if (lseek(cfgfd, pos, SEEK_SET) != pos ||
343 saferead(cfgfd, buf, buflen) != buflen) {
344 VIR_DEBUG("Failed to read %u bytes at %u from '%s' : %s",
345 buflen, pos, dev->path, g_strerror(errno));
346 return -1;
347 }
348 return 0;
349 }
350
351
352 /**
353 * virPCIDeviceReadN:
354 * @dev: virPCIDevice object (used only to log name of config file)
355 * @cfgfd: open file descriptor for device config file in sysfs
356 * @pos: byte offset in the file to read from
357 *
358 * read "N" (where "N" is "8", "16", or "32", and appears at the end
359 * of the function name) bytes from a PCI device's already-opened
360 * sysfs config file and return them as the return value from the
361 * function.
362 *
363 * Returns the value at @pos in the file, or 0 if there was an
364 * error. NB: since 0 could be a valid value, occurrence of an error
365 * must be determined by examining errno. errno is always reset to 0
366 * before the seek/read is attempted (see virPCIDeviceRead()), so if
367 * errno != 0 on return from one of these functions, then either the
368 * seek or the read operation failed for some reason. If errno == 0
369 * and the return value is 0, then the config file really does contain
370 * the value 0 at @pos.
371 */
372 static uint8_t
virPCIDeviceRead8(virPCIDevice * dev,int cfgfd,unsigned int pos)373 virPCIDeviceRead8(virPCIDevice *dev, int cfgfd, unsigned int pos)
374 {
375 uint8_t buf;
376 virPCIDeviceRead(dev, cfgfd, pos, &buf, sizeof(buf));
377 return buf;
378 }
379
380 static uint16_t
virPCIDeviceRead16(virPCIDevice * dev,int cfgfd,unsigned int pos)381 virPCIDeviceRead16(virPCIDevice *dev, int cfgfd, unsigned int pos)
382 {
383 uint8_t buf[2];
384 virPCIDeviceRead(dev, cfgfd, pos, &buf[0], sizeof(buf));
385 return (buf[0] << 0) | (buf[1] << 8);
386 }
387
388 static uint32_t
virPCIDeviceRead32(virPCIDevice * dev,int cfgfd,unsigned int pos)389 virPCIDeviceRead32(virPCIDevice *dev, int cfgfd, unsigned int pos)
390 {
391 uint8_t buf[4];
392 virPCIDeviceRead(dev, cfgfd, pos, &buf[0], sizeof(buf));
393 return (buf[0] << 0) | (buf[1] << 8) | (buf[2] << 16) | (buf[3] << 24);
394 }
395
396 static int
virPCIDeviceReadClass(virPCIDevice * dev,uint16_t * device_class)397 virPCIDeviceReadClass(virPCIDevice *dev, uint16_t *device_class)
398 {
399 g_autofree char *path = NULL;
400 g_autofree char *id_str = NULL;
401 unsigned int value;
402
403 path = virPCIFile(dev->name, "class");
404
405 /* class string is '0xNNNNNN\n' ... i.e. 9 bytes */
406 if (virFileReadAll(path, 9, &id_str) < 0)
407 return -1;
408
409 id_str[8] = '\0';
410 if (virStrToLong_ui(id_str, NULL, 16, &value) < 0) {
411 virReportError(VIR_ERR_INTERNAL_ERROR,
412 _("Unusual value in %s/devices/%s/class: %s"),
413 PCI_SYSFS, dev->name, id_str);
414 return -1;
415 }
416
417 *device_class = (value >> 8) & 0xFFFF;
418 return 0;
419 }
420
421 static int
virPCIDeviceWrite(virPCIDevice * dev,int cfgfd,unsigned int pos,uint8_t * buf,unsigned int buflen)422 virPCIDeviceWrite(virPCIDevice *dev,
423 int cfgfd,
424 unsigned int pos,
425 uint8_t *buf,
426 unsigned int buflen)
427 {
428 if (lseek(cfgfd, pos, SEEK_SET) != pos ||
429 safewrite(cfgfd, buf, buflen) != buflen) {
430 VIR_WARN("Failed to write to '%s' : %s", dev->path,
431 g_strerror(errno));
432 return -1;
433 }
434 return 0;
435 }
436
437 static void
virPCIDeviceWrite16(virPCIDevice * dev,int cfgfd,unsigned int pos,uint16_t val)438 virPCIDeviceWrite16(virPCIDevice *dev, int cfgfd, unsigned int pos, uint16_t val)
439 {
440 uint8_t buf[2] = { (val >> 0), (val >> 8) };
441 virPCIDeviceWrite(dev, cfgfd, pos, &buf[0], sizeof(buf));
442 }
443
444 static void
virPCIDeviceWrite32(virPCIDevice * dev,int cfgfd,unsigned int pos,uint32_t val)445 virPCIDeviceWrite32(virPCIDevice *dev, int cfgfd, unsigned int pos, uint32_t val)
446 {
447 uint8_t buf[4] = { (val >> 0), (val >> 8), (val >> 16), (val >> 24) };
448 virPCIDeviceWrite(dev, cfgfd, pos, &buf[0], sizeof(buf));
449 }
450
451 typedef int (*virPCIDeviceIterPredicate)(virPCIDevice *, virPCIDevice *,
452 void *);
453
454 /* Iterate over available PCI devices calling @predicate
455 * to compare each one to @dev.
456 * Return -1 on error since we don't want to assume it is
457 * safe to reset if there is an error.
458 */
459 static int
virPCIDeviceIterDevices(virPCIDeviceIterPredicate predicate,virPCIDevice * dev,virPCIDevice ** matched,void * data)460 virPCIDeviceIterDevices(virPCIDeviceIterPredicate predicate,
461 virPCIDevice *dev,
462 virPCIDevice **matched,
463 void *data)
464 {
465 g_autoptr(DIR) dir = NULL;
466 struct dirent *entry;
467 int ret = 0;
468 int rc;
469
470 *matched = NULL;
471
472 VIR_DEBUG("%s %s: iterating over " PCI_SYSFS "devices", dev->id, dev->name);
473
474 if (virDirOpen(&dir, PCI_SYSFS "devices") < 0)
475 return -1;
476
477 while ((ret = virDirRead(dir, &entry, PCI_SYSFS "devices")) > 0) {
478 g_autoptr(virPCIDevice) check = NULL;
479 virPCIDeviceAddress devAddr;
480 char *tmp;
481
482 /* expected format: <domain>:<bus>:<slot>.<function> */
483 if (/* domain */
484 virStrToLong_ui(entry->d_name, &tmp, 16, &devAddr.domain) < 0 || *tmp != ':' ||
485 /* bus */
486 virStrToLong_ui(tmp + 1, &tmp, 16, &devAddr.bus) < 0 || *tmp != ':' ||
487 /* slot */
488 virStrToLong_ui(tmp + 1, &tmp, 16, &devAddr.slot) < 0 || *tmp != '.' ||
489 /* function */
490 virStrToLong_ui(tmp + 1, NULL, 16, &devAddr.function) < 0) {
491 VIR_WARN("Unusual entry in " PCI_SYSFS "devices: %s", entry->d_name);
492 continue;
493 }
494
495 check = virPCIDeviceNew(&devAddr);
496 if (!check) {
497 ret = -1;
498 break;
499 }
500
501 rc = predicate(dev, check, data);
502 if (rc < 0) {
503 /* the predicate returned an error, bail */
504 ret = -1;
505 break;
506 } else if (rc == 1) {
507 VIR_DEBUG("%s %s: iter matched on %s", dev->id, dev->name, check->name);
508 *matched = g_steal_pointer(&check);
509 ret = 1;
510 break;
511 }
512 }
513 return ret;
514 }
515
516
517 /**
518 * virPCIDeviceFindCapabilityOffset:
519 * @dev: virPCIDevice object (used only to log name of config file)
520 * @cfgfd: open file descriptor for device config file in sysfs
521 * @capability: PCI_CAP_ID_* being requested
522 * @offset: used to return the offset of @capability in the file
523 *
524 * Find the offset of @capability within the PCI config file @cfgfd of
525 * the device @dev. if found, the offset is returned in @offset,
526 * otherwise @offset is set to 0.
527 *
528 * Returns 0 on success, -1 on failure.
529 */
530 static int
virPCIDeviceFindCapabilityOffset(virPCIDevice * dev,int cfgfd,unsigned int capability,unsigned int * offset)531 virPCIDeviceFindCapabilityOffset(virPCIDevice *dev,
532 int cfgfd,
533 unsigned int capability,
534 unsigned int *offset)
535 {
536 uint16_t status;
537 uint8_t pos;
538
539 *offset = 0; /* assume failure (*nothing* can be at offset 0) */
540
541 status = virPCIDeviceRead16(dev, cfgfd, PCI_STATUS);
542 if (errno != 0 || !(status & PCI_STATUS_CAP_LIST))
543 goto error;
544
545 pos = virPCIDeviceRead8(dev, cfgfd, PCI_CAPABILITY_LIST);
546 if (errno != 0)
547 goto error;
548
549 /* Zero indicates last capability, capabilities can't
550 * be in the config space header and 0xff is returned
551 * by the kernel if we don't have access to this region
552 *
553 * Note: we're not handling loops or extended
554 * capabilities here.
555 */
556 while (pos >= PCI_CONF_HEADER_LEN && pos != 0xff) {
557 uint8_t capid = virPCIDeviceRead8(dev, cfgfd, pos);
558 if (errno != 0)
559 goto error;
560
561 if (capid == capability) {
562 VIR_DEBUG("%s %s: found cap 0x%.2x at 0x%.2x",
563 dev->id, dev->name, capability, pos);
564 *offset = pos;
565 return 0;
566 }
567
568 pos = virPCIDeviceRead8(dev, cfgfd, pos + 1);
569 if (errno != 0)
570 goto error;
571 }
572
573 error:
574 VIR_DEBUG("%s %s: failed to find cap 0x%.2x (%s)",
575 dev->id, dev->name, capability, g_strerror(errno));
576
577 /* reset errno in case the failure was due to insufficient
578 * privileges to read the entire PCI config file
579 */
580 errno = 0;
581
582 return -1;
583 }
584
585 static unsigned int
virPCIDeviceFindExtendedCapabilityOffset(virPCIDevice * dev,int cfgfd,unsigned int capability)586 virPCIDeviceFindExtendedCapabilityOffset(virPCIDevice *dev,
587 int cfgfd,
588 unsigned int capability)
589 {
590 int ttl;
591 unsigned int pos;
592 uint32_t header;
593
594 /* minimum 8 bytes per capability */
595 ttl = (PCI_EXT_CAP_LIMIT - PCI_EXT_CAP_BASE) / 8;
596 pos = PCI_EXT_CAP_BASE;
597
598 while (ttl > 0 && pos >= PCI_EXT_CAP_BASE) {
599 header = virPCIDeviceRead32(dev, cfgfd, pos);
600
601 if ((header & PCI_EXT_CAP_ID_MASK) == capability)
602 return pos;
603
604 pos = (header >> PCI_EXT_CAP_OFFSET_SHIFT) & PCI_EXT_CAP_OFFSET_MASK;
605 ttl--;
606 }
607
608 return 0;
609 }
610
611 /* detects whether this device has FLR. Returns 0 if the device does
612 * not have FLR, 1 if it does, and -1 on error
613 */
614 static bool
virPCIDeviceDetectFunctionLevelReset(virPCIDevice * dev,int cfgfd)615 virPCIDeviceDetectFunctionLevelReset(virPCIDevice *dev, int cfgfd)
616 {
617 uint32_t caps;
618 unsigned int pos;
619 g_autofree char *path = NULL;
620 int found;
621
622 /* The PCIe Function Level Reset capability allows
623 * individual device functions to be reset without
624 * affecting any other functions on the device or
625 * any other devices on the bus. This is only common
626 * on SR-IOV NICs at the moment.
627 */
628 if (dev->pcie_cap_pos) {
629 caps = virPCIDeviceRead32(dev, cfgfd, dev->pcie_cap_pos + PCI_EXP_DEVCAP);
630 if (caps & PCI_EXP_DEVCAP_FLR) {
631 VIR_DEBUG("%s %s: detected PCIe FLR capability", dev->id, dev->name);
632 return true;
633 }
634 }
635
636 /* The PCI AF Function Level Reset capability is
637 * the same thing, except for conventional PCI
638 * devices. This is not common yet.
639 */
640 if (virPCIDeviceFindCapabilityOffset(dev, cfgfd, PCI_CAP_ID_AF, &pos) < 0)
641 goto error;
642
643 if (pos) {
644 caps = virPCIDeviceRead16(dev, cfgfd, pos + PCI_AF_CAP);
645 if (caps & PCI_AF_CAP_FLR) {
646 VIR_DEBUG("%s %s: detected PCI FLR capability", dev->id, dev->name);
647 return true;
648 }
649 }
650
651 /* there are some buggy devices that do support FLR, but forget to
652 * advertise that fact in their capabilities. However, FLR is *required*
653 * to be present for virtual functions (VFs), so if we see that this
654 * device is a VF, we just assume FLR works
655 */
656
657 path = g_strdup_printf(PCI_SYSFS "devices/%s/physfn", dev->name);
658
659 found = virFileExists(path);
660 if (found) {
661 VIR_DEBUG("%s %s: buggy device didn't advertise FLR, but is a VF; forcing flr on",
662 dev->id, dev->name);
663 return true;
664 }
665
666 error:
667 VIR_DEBUG("%s %s: no FLR capability found", dev->id, dev->name);
668 return false;
669 }
670
671 /* Require the device has the PCI Power Management capability
672 * and that a D3hot->D0 transition will results in a full
673 * internal reset, not just a soft reset.
674 */
675 static bool
virPCIDeviceDetectPowerManagementReset(virPCIDevice * dev,int cfgfd)676 virPCIDeviceDetectPowerManagementReset(virPCIDevice *dev, int cfgfd)
677 {
678 if (dev->pci_pm_cap_pos) {
679 uint32_t ctl;
680
681 /* require the NO_SOFT_RESET bit is clear */
682 ctl = virPCIDeviceRead32(dev, cfgfd, dev->pci_pm_cap_pos + PCI_PM_CTRL);
683 if (!(ctl & PCI_PM_CTRL_NO_SOFT_RESET)) {
684 VIR_DEBUG("%s %s: detected PM reset capability", dev->id, dev->name);
685 return true;
686 }
687 }
688
689 VIR_DEBUG("%s %s: no PM reset capability found", dev->id, dev->name);
690
691 return false;
692 }
693
694 /* Any active devices on the same domain/bus ? */
695 static int
virPCIDeviceSharesBusWithActive(virPCIDevice * dev,virPCIDevice * check,void * data)696 virPCIDeviceSharesBusWithActive(virPCIDevice *dev, virPCIDevice *check, void *data)
697 {
698 virPCIDeviceList *inactiveDevs = data;
699
700 /* Different domain, different bus, or simply identical device */
701 if (dev->address.domain != check->address.domain ||
702 dev->address.bus != check->address.bus ||
703 (dev->address.slot == check->address.slot &&
704 dev->address.function == check->address.function))
705 return 0;
706
707 /* same bus, but inactive, i.e. about to be assigned to guest */
708 if (inactiveDevs && virPCIDeviceListFind(inactiveDevs, &check->address))
709 return 0;
710
711 return 1;
712 }
713
714 static virPCIDevice *
virPCIDeviceBusContainsActiveDevices(virPCIDevice * dev,virPCIDeviceList * inactiveDevs)715 virPCIDeviceBusContainsActiveDevices(virPCIDevice *dev,
716 virPCIDeviceList *inactiveDevs)
717 {
718 virPCIDevice *active = NULL;
719 if (virPCIDeviceIterDevices(virPCIDeviceSharesBusWithActive,
720 dev, &active, inactiveDevs) < 0)
721 return NULL;
722 return active;
723 }
724
725 /* Is @check the parent of @dev ? */
726 static int
virPCIDeviceIsParent(virPCIDevice * dev,virPCIDevice * check,void * data)727 virPCIDeviceIsParent(virPCIDevice *dev, virPCIDevice *check, void *data)
728 {
729 uint16_t device_class;
730 uint8_t header_type, secondary, subordinate;
731 virPCIDevice **best = data;
732 int ret = 0;
733 int fd;
734
735 if (dev->address.domain != check->address.domain)
736 return 0;
737
738 if ((fd = virPCIDeviceConfigOpenTry(check)) < 0)
739 return 0;
740
741 /* Is it a bridge? */
742 ret = virPCIDeviceReadClass(check, &device_class);
743 if (ret < 0 || device_class != PCI_CLASS_BRIDGE_PCI)
744 goto cleanup;
745
746 /* Is it a plane? */
747 header_type = virPCIDeviceRead8(check, fd, PCI_HEADER_TYPE);
748 if ((header_type & PCI_HEADER_TYPE_MASK) != PCI_HEADER_TYPE_BRIDGE)
749 goto cleanup;
750
751 secondary = virPCIDeviceRead8(check, fd, PCI_SECONDARY_BUS);
752 subordinate = virPCIDeviceRead8(check, fd, PCI_SUBORDINATE_BUS);
753
754 VIR_DEBUG("%s %s: found parent device %s", dev->id, dev->name, check->name);
755
756 /* if the secondary bus exactly equals the device's bus, then we found
757 * the direct parent. No further work is necessary
758 */
759 if (dev->address.bus == secondary) {
760 ret = 1;
761 goto cleanup;
762 }
763
764 /* otherwise, SRIOV allows VFs to be on different buses than their PFs.
765 * In this case, what we need to do is look for the "best" match; i.e.
766 * the most restrictive match that still satisfies all of the conditions.
767 */
768 if (dev->address.bus > secondary && dev->address.bus <= subordinate) {
769 if (*best == NULL) {
770 *best = virPCIDeviceNew(&check->address);
771 if (*best == NULL) {
772 ret = -1;
773 goto cleanup;
774 }
775 } else {
776 /* OK, we had already recorded a previous "best" match for the
777 * parent. See if the current device is more restrictive than the
778 * best, and if so, make it the new best
779 */
780 int bestfd;
781 uint8_t best_secondary;
782
783 if ((bestfd = virPCIDeviceConfigOpenTry(*best)) < 0)
784 goto cleanup;
785 best_secondary = virPCIDeviceRead8(*best, bestfd, PCI_SECONDARY_BUS);
786 virPCIDeviceConfigClose(*best, bestfd);
787
788 if (secondary > best_secondary) {
789 virPCIDeviceFree(*best);
790 *best = virPCIDeviceNew(&check->address);
791 if (*best == NULL) {
792 ret = -1;
793 goto cleanup;
794 }
795 }
796 }
797 }
798
799 cleanup:
800 virPCIDeviceConfigClose(check, fd);
801 return ret;
802 }
803
804 static int
virPCIDeviceGetParent(virPCIDevice * dev,virPCIDevice ** parent)805 virPCIDeviceGetParent(virPCIDevice *dev, virPCIDevice **parent)
806 {
807 virPCIDevice *best = NULL;
808 int ret;
809
810 *parent = NULL;
811 ret = virPCIDeviceIterDevices(virPCIDeviceIsParent, dev, parent, &best);
812 if (ret == 1)
813 virPCIDeviceFree(best);
814 else if (ret == 0)
815 *parent = best;
816 return ret;
817 }
818
819 /* Secondary Bus Reset is our sledgehammer - it resets all
820 * devices behind a bus.
821 */
822 static int
virPCIDeviceTrySecondaryBusReset(virPCIDevice * dev,int cfgfd,virPCIDeviceList * inactiveDevs)823 virPCIDeviceTrySecondaryBusReset(virPCIDevice *dev,
824 int cfgfd,
825 virPCIDeviceList *inactiveDevs)
826 {
827 g_autoptr(virPCIDevice) parent = NULL;
828 g_autoptr(virPCIDevice) conflict = NULL;
829 uint8_t config_space[PCI_CONF_LEN];
830 uint16_t ctl;
831 int ret = -1;
832 int parentfd;
833
834 /* Refuse to do a secondary bus reset if there are other
835 * devices/functions behind the bus are used by the host
836 * or other guests.
837 */
838 if ((conflict = virPCIDeviceBusContainsActiveDevices(dev, inactiveDevs))) {
839 virReportError(VIR_ERR_INTERNAL_ERROR,
840 _("Active %s devices on bus with %s, not doing bus reset"),
841 conflict->name, dev->name);
842 return -1;
843 }
844
845 /* Find the parent bus */
846 if (virPCIDeviceGetParent(dev, &parent) < 0)
847 return -1;
848 if (!parent) {
849 virReportError(VIR_ERR_INTERNAL_ERROR,
850 _("Failed to find parent device for %s"),
851 dev->name);
852 return -1;
853 }
854 if ((parentfd = virPCIDeviceConfigOpenWrite(parent)) < 0)
855 goto out;
856
857 VIR_DEBUG("%s %s: doing a secondary bus reset", dev->id, dev->name);
858
859 /* Save and restore the device's config space; we only do this
860 * for the supplied device since we refuse to do a reset if there
861 * are multiple devices/functions
862 */
863 if (virPCIDeviceRead(dev, cfgfd, 0, config_space, PCI_CONF_LEN) < 0) {
864 virReportError(VIR_ERR_INTERNAL_ERROR,
865 _("Failed to read PCI config space for %s"),
866 dev->name);
867 goto out;
868 }
869
870 /* Read the control register, set the reset flag, wait 200ms,
871 * unset the reset flag and wait 200ms.
872 */
873 ctl = virPCIDeviceRead16(dev, parentfd, PCI_BRIDGE_CONTROL);
874
875 virPCIDeviceWrite16(parent, parentfd, PCI_BRIDGE_CONTROL,
876 ctl | PCI_BRIDGE_CTL_RESET);
877
878 g_usleep(200 * 1000); /* sleep 200ms */
879
880 virPCIDeviceWrite16(parent, parentfd, PCI_BRIDGE_CONTROL, ctl);
881
882 g_usleep(200 * 1000); /* sleep 200ms */
883
884 if (virPCIDeviceWrite(dev, cfgfd, 0, config_space, PCI_CONF_LEN) < 0) {
885 virReportError(VIR_ERR_INTERNAL_ERROR,
886 _("Failed to restore PCI config space for %s"),
887 dev->name);
888 goto out;
889 }
890 ret = 0;
891
892 out:
893 virPCIDeviceConfigClose(parent, parentfd);
894 return ret;
895 }
896
897 /* Power management reset attempts to reset a device using a
898 * D-state transition from D3hot to D0. Note, in detect_pm_reset()
899 * above we require the device supports a full internal reset.
900 */
901 static int
virPCIDeviceTryPowerManagementReset(virPCIDevice * dev,int cfgfd)902 virPCIDeviceTryPowerManagementReset(virPCIDevice *dev, int cfgfd)
903 {
904 uint8_t config_space[PCI_CONF_LEN];
905 uint32_t ctl;
906
907 if (!dev->pci_pm_cap_pos)
908 return -1;
909
910 /* Save and restore the device's config space. */
911 if (virPCIDeviceRead(dev, cfgfd, 0, &config_space[0], PCI_CONF_LEN) < 0) {
912 virReportError(VIR_ERR_INTERNAL_ERROR,
913 _("Failed to read PCI config space for %s"),
914 dev->name);
915 return -1;
916 }
917
918 VIR_DEBUG("%s %s: doing a power management reset", dev->id, dev->name);
919
920 ctl = virPCIDeviceRead32(dev, cfgfd, dev->pci_pm_cap_pos + PCI_PM_CTRL);
921 ctl &= ~PCI_PM_CTRL_STATE_MASK;
922
923 virPCIDeviceWrite32(dev, cfgfd, dev->pci_pm_cap_pos + PCI_PM_CTRL,
924 ctl | PCI_PM_CTRL_STATE_D3hot);
925
926 g_usleep(10 * 1000); /* sleep 10ms */
927
928 virPCIDeviceWrite32(dev, cfgfd, dev->pci_pm_cap_pos + PCI_PM_CTRL,
929 ctl | PCI_PM_CTRL_STATE_D0);
930
931 g_usleep(10 * 1000); /* sleep 10ms */
932
933 if (virPCIDeviceWrite(dev, cfgfd, 0, &config_space[0], PCI_CONF_LEN) < 0) {
934 virReportError(VIR_ERR_INTERNAL_ERROR,
935 _("Failed to restore PCI config space for %s"),
936 dev->name);
937 return -1;
938 }
939
940 return 0;
941 }
942
943 /**
944 * virPCIDeviceInit:
945 * @dev: virPCIDevice object needing its PCI capabilities info initialized
946 * @cfgfd: open file descriptor for device config file in sysfs
947 *
948 * Initialize the PCI capabilities attributes of a virPCIDevice object
949 * (i.e. pcie_cap_pos, pci_pm_cap_pos, has_flr, has_pm_reset, and
950 * is_pcie). This is done by walking the info in the (already-opened)
951 * device PCI config file in sysfs. This function can be called
952 * regardless of whether a process has sufficient privilege to read
953 * the entire file (unprivileged processes can only read the 1st 64
954 * bytes, while the Express Capabilities are all located beyond that
955 * boundary).
956 *
957 * In the case that we are unable to read a capability
958 * directly, we will attempt to infer its value by other means. In
959 * particular, we can determine that a device is (almost surely) PCIe
960 * by checking that the length of the config file is != 256 (since all
961 * conventional PCI config files are 256 bytes), and we know that any
962 * device that is an SR-IOV VF will have FLR available (since that is
963 * required by the SR-IOV spec.)
964 *
965 * Always returns success (0) (for now)
966 */
967 static int
virPCIDeviceInit(virPCIDevice * dev,int cfgfd)968 virPCIDeviceInit(virPCIDevice *dev, int cfgfd)
969 {
970 dev->is_pcie = false;
971 if (virPCIDeviceFindCapabilityOffset(dev, cfgfd, PCI_CAP_ID_EXP, &dev->pcie_cap_pos) < 0) {
972 /* an unprivileged process is unable to read *all* of a
973 * device's PCI config (it can only read the first 64
974 * bytes, which isn't enough for see the Express
975 * Capabilities data). If virPCIDeviceFindCapabilityOffset
976 * returns failure (and not just a pcie_cap_pos == 0,
977 * which is *success* at determining the device is *not*
978 * PCIe) we make an educated guess based on the length of
979 * the device's config file - if it is 256 bytes, then it
980 * is definitely a legacy PCI device. If it's larger than
981 * that, then it is *probably PCIe (although it could be
982 * PCI-x, but those are extremely rare). If the config
983 * file can't be found (in which case the "length" will be
984 * -1), then we blindly assume the most likely outcome -
985 * PCIe.
986 */
987 off_t configLen = virFileLength(virPCIDeviceGetConfigPath(dev), -1);
988
989 if (configLen != 256)
990 dev->is_pcie = true;
991
992 } else {
993 dev->is_pcie = (dev->pcie_cap_pos != 0);
994 }
995
996 virPCIDeviceFindCapabilityOffset(dev, cfgfd, PCI_CAP_ID_PM, &dev->pci_pm_cap_pos);
997 dev->has_flr = virPCIDeviceDetectFunctionLevelReset(dev, cfgfd);
998 dev->has_pm_reset = virPCIDeviceDetectPowerManagementReset(dev, cfgfd);
999
1000 return 0;
1001 }
1002
1003 int
virPCIDeviceReset(virPCIDevice * dev,virPCIDeviceList * activeDevs,virPCIDeviceList * inactiveDevs)1004 virPCIDeviceReset(virPCIDevice *dev,
1005 virPCIDeviceList *activeDevs,
1006 virPCIDeviceList *inactiveDevs)
1007 {
1008 g_autofree char *drvPath = NULL;
1009 g_autofree char *drvName = NULL;
1010 int ret = -1;
1011 int fd = -1;
1012 int hdrType = -1;
1013
1014 if (virPCIGetHeaderType(dev, &hdrType) < 0)
1015 return -1;
1016
1017 if (hdrType != VIR_PCI_HEADER_ENDPOINT) {
1018 virReportError(VIR_ERR_INTERNAL_ERROR,
1019 _("Invalid attempt to reset PCI device %s. "
1020 "Only PCI endpoint devices can be reset"),
1021 dev->name);
1022 return -1;
1023 }
1024
1025 if (activeDevs && virPCIDeviceListFind(activeDevs, &dev->address)) {
1026 virReportError(VIR_ERR_INTERNAL_ERROR,
1027 _("Not resetting active device %s"), dev->name);
1028 return -1;
1029 }
1030
1031 /* If the device is currently bound to vfio-pci, ignore all
1032 * requests to reset it, since the vfio-pci driver will always
1033 * reset it whenever appropriate, so doing it ourselves would just
1034 * be redundant.
1035 */
1036 if (virPCIDeviceGetDriverPathAndName(dev, &drvPath, &drvName) < 0)
1037 goto cleanup;
1038
1039 if (virPCIStubDriverTypeFromString(drvName) == VIR_PCI_STUB_DRIVER_VFIO) {
1040 VIR_DEBUG("Device %s is bound to vfio-pci - skip reset",
1041 dev->name);
1042 ret = 0;
1043 goto cleanup;
1044 }
1045 VIR_DEBUG("Resetting device %s", dev->name);
1046
1047 if ((fd = virPCIDeviceConfigOpenWrite(dev)) < 0)
1048 goto cleanup;
1049
1050 if (virPCIDeviceInit(dev, fd) < 0)
1051 goto cleanup;
1052
1053 /* KVM will perform FLR when starting and stopping
1054 * a guest, so there is no need for us to do it here.
1055 */
1056 if (dev->has_flr) {
1057 ret = 0;
1058 goto cleanup;
1059 }
1060
1061 /* If the device supports PCI power management reset,
1062 * that's the next best thing because it only resets
1063 * the function, not the whole device.
1064 */
1065 if (dev->has_pm_reset)
1066 ret = virPCIDeviceTryPowerManagementReset(dev, fd);
1067
1068 /* Bus reset is not an option with the root bus */
1069 if (ret < 0 && dev->address.bus != 0)
1070 ret = virPCIDeviceTrySecondaryBusReset(dev, fd, inactiveDevs);
1071
1072 if (ret < 0) {
1073 virErrorPtr err = virGetLastError();
1074 virReportError(VIR_ERR_INTERNAL_ERROR,
1075 _("Unable to reset PCI device %s: %s"),
1076 dev->name,
1077 err ? err->message :
1078 _("no FLR, PM reset or bus reset available"));
1079 }
1080
1081 cleanup:
1082 virPCIDeviceConfigClose(dev, fd);
1083 return ret;
1084 }
1085
1086
1087 static int
virPCIProbeStubDriver(virPCIStubDriver driver)1088 virPCIProbeStubDriver(virPCIStubDriver driver)
1089 {
1090 const char *drvname = NULL;
1091 g_autofree char *drvpath = NULL;
1092 g_autofree char *errbuf = NULL;
1093
1094 if (driver == VIR_PCI_STUB_DRIVER_NONE ||
1095 !(drvname = virPCIStubDriverTypeToString(driver))) {
1096 virReportError(VIR_ERR_INTERNAL_ERROR,
1097 "%s",
1098 _("Attempting to use unknown stub driver"));
1099 return -1;
1100 }
1101
1102 drvpath = virPCIDriverDir(drvname);
1103
1104 /* driver previously loaded, return */
1105 if (virFileExists(drvpath))
1106 return 0;
1107
1108 if ((errbuf = virKModLoad(drvname))) {
1109 VIR_WARN("failed to load driver %s: %s", drvname, errbuf);
1110 goto cleanup;
1111 }
1112
1113 /* driver loaded after probing */
1114 if (virFileExists(drvpath))
1115 return 0;
1116
1117 cleanup:
1118 /* If we know failure was because of admin config, let's report that;
1119 * otherwise, report a more generic failure message
1120 */
1121 if (virKModIsProhibited(drvname)) {
1122 virReportError(VIR_ERR_INTERNAL_ERROR,
1123 _("Failed to load PCI stub module %s: "
1124 "administratively prohibited"),
1125 drvname);
1126 } else {
1127 virReportError(VIR_ERR_INTERNAL_ERROR,
1128 _("Failed to load PCI stub module %s"),
1129 drvname);
1130 }
1131
1132 return -1;
1133 }
1134
1135 int
virPCIDeviceUnbind(virPCIDevice * dev)1136 virPCIDeviceUnbind(virPCIDevice *dev)
1137 {
1138 g_autofree char *path = NULL;
1139 g_autofree char *drvpath = NULL;
1140 g_autofree char *driver = NULL;
1141
1142 if (virPCIDeviceGetDriverPathAndName(dev, &drvpath, &driver) < 0)
1143 return -1;
1144
1145 if (!driver)
1146 /* The device is not bound to any driver */
1147 return 0;
1148
1149 path = virPCIFile(dev->name, "driver/unbind");
1150
1151 if (virFileExists(path)) {
1152 if (virFileWriteStr(path, dev->name, 0) < 0) {
1153 virReportSystemError(errno,
1154 _("Failed to unbind PCI device '%s' from %s"),
1155 dev->name, driver);
1156 return -1;
1157 }
1158 }
1159
1160 return 0;
1161 }
1162
1163
1164 /**
1165 * virPCIDeviceRebind:
1166 * @dev: virPCIDevice object describing the device to rebind
1167 *
1168 * unbind a device from its driver, then immediately rebind it.
1169 *
1170 * Returns 0 on success, -1 on failure
1171 */
virPCIDeviceRebind(virPCIDevice * dev)1172 int virPCIDeviceRebind(virPCIDevice *dev)
1173 {
1174 if (virPCIDeviceUnbind(dev) < 0)
1175 return -1;
1176
1177 if (virFileWriteStr(PCI_SYSFS "drivers_probe", dev->name, 0) < 0) {
1178 virReportSystemError(errno,
1179 _("Failed to trigger a probe for PCI device '%s'"),
1180 dev->name);
1181 return -1;
1182 }
1183
1184 return 0;
1185 }
1186
1187
1188 /*
1189 * Bind a PCI device to a driver using driver_override sysfs interface.
1190 * E.g.
1191 *
1192 * echo driver-name > /sys/bus/pci/devices/0000:03:00.0/driver_override
1193 * echo 0000:03:00.0 > /sys/bus/pci/devices/0000:03:00.0/driver/unbind
1194 * echo 0000:03:00.0 > /sys/bus/pci/drivers_probe
1195 *
1196 * An empty driverName will cause the device to be bound to its
1197 * preferred driver.
1198 */
1199 static int
virPCIDeviceBindWithDriverOverride(virPCIDevice * dev,const char * driverName)1200 virPCIDeviceBindWithDriverOverride(virPCIDevice *dev,
1201 const char *driverName)
1202 {
1203 g_autofree char *path = NULL;
1204
1205 path = virPCIFile(dev->name, "driver_override");
1206
1207 if (virFileWriteStr(path, driverName, 0) < 0) {
1208 virReportSystemError(errno,
1209 _("Failed to add driver '%s' to driver_override "
1210 " interface of PCI device '%s'"),
1211 driverName, dev->name);
1212 return -1;
1213 }
1214
1215 if (virPCIDeviceRebind(dev) < 0)
1216 return -1;
1217
1218 return 0;
1219 }
1220
1221 static int
virPCIDeviceUnbindFromStub(virPCIDevice * dev)1222 virPCIDeviceUnbindFromStub(virPCIDevice *dev)
1223 {
1224 if (!dev->unbind_from_stub) {
1225 VIR_DEBUG("Unbind from stub skipped for PCI device %s", dev->name);
1226 return 0;
1227 }
1228
1229 return virPCIDeviceBindWithDriverOverride(dev, "\n");
1230 }
1231
1232 static int
virPCIDeviceBindToStub(virPCIDevice * dev)1233 virPCIDeviceBindToStub(virPCIDevice *dev)
1234 {
1235 const char *stubDriverName;
1236 g_autofree char *stubDriverPath = NULL;
1237 g_autofree char *driverLink = NULL;
1238
1239 /* Check the device is configured to use one of the known stub drivers */
1240 if (dev->stubDriver == VIR_PCI_STUB_DRIVER_NONE) {
1241 virReportError(VIR_ERR_INTERNAL_ERROR,
1242 _("No stub driver configured for PCI device %s"),
1243 dev->name);
1244 return -1;
1245 } else if (!(stubDriverName = virPCIStubDriverTypeToString(dev->stubDriver))) {
1246 virReportError(VIR_ERR_INTERNAL_ERROR,
1247 _("Unknown stub driver configured for PCI device %s"),
1248 dev->name);
1249 return -1;
1250 }
1251
1252 stubDriverPath = virPCIDriverDir(stubDriverName);
1253 driverLink = virPCIFile(dev->name, "driver");
1254
1255 if (virFileExists(driverLink)) {
1256 if (virFileLinkPointsTo(driverLink, stubDriverPath)) {
1257 /* The device is already bound to the correct driver */
1258 VIR_DEBUG("Device %s is already bound to %s",
1259 dev->name, stubDriverName);
1260 return 0;
1261 }
1262 }
1263
1264 if (virPCIDeviceBindWithDriverOverride(dev, stubDriverName) < 0)
1265 return -1;
1266
1267 dev->unbind_from_stub = true;
1268 return 0;
1269 }
1270
1271 /* virPCIDeviceDetach:
1272 *
1273 * Detach this device from the host driver, attach it to the stub
1274 * driver (previously set with virPCIDeviceSetStubDriver(), and add *a
1275 * copy* of the object to the inactiveDevs list (if provided). This
1276 * function will *never* consume dev, so the caller should free it.
1277 *
1278 * Returns 0 on success, -1 on failure (will fail if the device is
1279 * already in the activeDevs list, but will be a NOP if the device is
1280 * already bound to the stub).
1281 *
1282 * GENERAL NOTE: activeDevs should be a list of all PCI devices
1283 * currently in use by a domain. inactiveDevs is a list of all PCI
1284 * devices that libvirt has detached from the host driver + attached
1285 * to the stub driver, but hasn't yet assigned to a domain. Any device
1286 * that is still attached to its host driver should not be on either
1287 * list.
1288 */
1289 int
virPCIDeviceDetach(virPCIDevice * dev,virPCIDeviceList * activeDevs,virPCIDeviceList * inactiveDevs)1290 virPCIDeviceDetach(virPCIDevice *dev,
1291 virPCIDeviceList *activeDevs,
1292 virPCIDeviceList *inactiveDevs)
1293 {
1294 if (virPCIProbeStubDriver(dev->stubDriver) < 0)
1295 return -1;
1296
1297 if (activeDevs && virPCIDeviceListFind(activeDevs, &dev->address)) {
1298 virReportError(VIR_ERR_INTERNAL_ERROR,
1299 _("Not detaching active device %s"), dev->name);
1300 return -1;
1301 }
1302
1303 if (virPCIDeviceBindToStub(dev) < 0)
1304 return -1;
1305
1306 /* Add *a copy of* the dev into list inactiveDevs, if
1307 * it's not already there.
1308 */
1309 if (inactiveDevs && !virPCIDeviceListFind(inactiveDevs, &dev->address)) {
1310 VIR_DEBUG("Adding PCI device %s to inactive list", dev->name);
1311 if (virPCIDeviceListAddCopy(inactiveDevs, dev) < 0)
1312 return -1;
1313 }
1314
1315 return 0;
1316 }
1317
1318 /*
1319 * Pre-condition: inactivePCIHostdevs & activePCIHostdevs
1320 * are locked
1321 */
1322 int
virPCIDeviceReattach(virPCIDevice * dev,virPCIDeviceList * activeDevs,virPCIDeviceList * inactiveDevs)1323 virPCIDeviceReattach(virPCIDevice *dev,
1324 virPCIDeviceList *activeDevs,
1325 virPCIDeviceList *inactiveDevs)
1326 {
1327 if (activeDevs && virPCIDeviceListFind(activeDevs, &dev->address)) {
1328 virReportError(VIR_ERR_INTERNAL_ERROR,
1329 _("Not reattaching active device %s"), dev->name);
1330 return -1;
1331 }
1332
1333 if (virPCIDeviceUnbindFromStub(dev) < 0)
1334 return -1;
1335
1336 /* Steal the dev from list inactiveDevs */
1337 if (inactiveDevs) {
1338 VIR_DEBUG("Removing PCI device %s from inactive list", dev->name);
1339 virPCIDeviceListDel(inactiveDevs, &dev->address);
1340 }
1341
1342 return 0;
1343 }
1344
1345 static char *
virPCIDeviceReadID(virPCIDevice * dev,const char * id_name)1346 virPCIDeviceReadID(virPCIDevice *dev, const char *id_name)
1347 {
1348 g_autofree char *path = NULL;
1349 g_autofree char *id_str = NULL;
1350
1351 path = virPCIFile(dev->name, id_name);
1352
1353 /* ID string is '0xNNNN\n' ... i.e. 7 bytes */
1354 if (virFileReadAll(path, 7, &id_str) < 0)
1355 return NULL;
1356
1357 /* Check for 0x suffix */
1358 if (id_str[0] != '0' || id_str[1] != 'x')
1359 return NULL;
1360
1361 /* Chop off the newline; we know the string is 7 bytes */
1362 id_str[6] = '\0';
1363
1364 return g_steal_pointer(&id_str);
1365 }
1366
1367 bool
virPCIDeviceAddressIsValid(virPCIDeviceAddress * addr,bool report)1368 virPCIDeviceAddressIsValid(virPCIDeviceAddress *addr,
1369 bool report)
1370 {
1371 if (addr->bus > 0xFF) {
1372 if (report)
1373 virReportError(VIR_ERR_XML_ERROR,
1374 _("Invalid PCI address bus='0x%x', "
1375 "must be <= 0xFF"),
1376 addr->bus);
1377 return false;
1378 }
1379 if (addr->slot > 0x1F) {
1380 if (report)
1381 virReportError(VIR_ERR_XML_ERROR,
1382 _("Invalid PCI address slot='0x%x', "
1383 "must be <= 0x1F"),
1384 addr->slot);
1385 return false;
1386 }
1387 if (addr->function > 7) {
1388 if (report)
1389 virReportError(VIR_ERR_XML_ERROR,
1390 _("Invalid PCI address function=0x%x, "
1391 "must be <= 7"),
1392 addr->function);
1393 return false;
1394 }
1395 if (virPCIDeviceAddressIsEmpty(addr)) {
1396 if (report)
1397 virReportError(VIR_ERR_XML_ERROR, "%s",
1398 _("Invalid PCI address 0000:00:00, at least "
1399 "one of domain, bus, or slot must be > 0"));
1400 return false;
1401 }
1402 return true;
1403 }
1404
1405 bool
virPCIDeviceAddressIsEmpty(const virPCIDeviceAddress * addr)1406 virPCIDeviceAddressIsEmpty(const virPCIDeviceAddress *addr)
1407 {
1408 return !(addr->domain || addr->bus || addr->slot);
1409 }
1410
1411 bool
virPCIDeviceAddressEqual(const virPCIDeviceAddress * addr1,const virPCIDeviceAddress * addr2)1412 virPCIDeviceAddressEqual(const virPCIDeviceAddress *addr1,
1413 const virPCIDeviceAddress *addr2)
1414 {
1415 if (addr1->domain == addr2->domain &&
1416 addr1->bus == addr2->bus &&
1417 addr1->slot == addr2->slot &&
1418 addr1->function == addr2->function) {
1419 return true;
1420 }
1421 return false;
1422 }
1423
1424 /**
1425 * virPCIDeviceAddressCopy:
1426 * @dst: where to store address
1427 * @src: source address to copy
1428 *
1429 * Creates a deep copy of given @src address and stores it into
1430 * @dst which has to be pre-allocated by caller.
1431 */
virPCIDeviceAddressCopy(virPCIDeviceAddress * dst,const virPCIDeviceAddress * src)1432 void virPCIDeviceAddressCopy(virPCIDeviceAddress *dst,
1433 const virPCIDeviceAddress *src)
1434 {
1435 memcpy(dst, src, sizeof(*src));
1436 }
1437
1438 char *
virPCIDeviceAddressAsString(const virPCIDeviceAddress * addr)1439 virPCIDeviceAddressAsString(const virPCIDeviceAddress *addr)
1440 {
1441 return g_strdup_printf(VIR_PCI_DEVICE_ADDRESS_FMT, addr->domain,
1442 addr->bus, addr->slot, addr->function);
1443 }
1444
1445 bool
virPCIDeviceExists(const virPCIDeviceAddress * addr)1446 virPCIDeviceExists(const virPCIDeviceAddress *addr)
1447 {
1448 g_autofree char *devName = virPCIDeviceAddressAsString(addr);
1449 g_autofree char *devPath = g_strdup_printf(PCI_SYSFS "devices/%s/config",
1450 devName);
1451
1452 return virFileExists(devPath);
1453 }
1454
1455 virPCIDevice *
virPCIDeviceNew(const virPCIDeviceAddress * address)1456 virPCIDeviceNew(const virPCIDeviceAddress *address)
1457 {
1458 g_autoptr(virPCIDevice) dev = NULL;
1459 g_autofree char *vendor = NULL;
1460 g_autofree char *product = NULL;
1461
1462 dev = g_new0(virPCIDevice, 1);
1463
1464 virPCIDeviceAddressCopy(&dev->address, address);
1465
1466 dev->name = virPCIDeviceAddressAsString(&dev->address);
1467
1468 dev->path = g_strdup_printf(PCI_SYSFS "devices/%s/config", dev->name);
1469
1470 if (!virFileExists(dev->path)) {
1471 virReportSystemError(errno,
1472 _("Device %s not found: could not access %s"),
1473 dev->name, dev->path);
1474 return NULL;
1475 }
1476
1477 vendor = virPCIDeviceReadID(dev, "vendor");
1478 product = virPCIDeviceReadID(dev, "device");
1479
1480 if (!vendor || !product) {
1481 virReportError(VIR_ERR_INTERNAL_ERROR,
1482 _("Failed to read product/vendor ID for %s"),
1483 dev->name);
1484 return NULL;
1485 }
1486
1487 /* strings contain '0x' prefix */
1488 if (g_snprintf(dev->id, sizeof(dev->id), "%s %s", &vendor[2],
1489 &product[2]) >= sizeof(dev->id)) {
1490 virReportError(VIR_ERR_INTERNAL_ERROR,
1491 _("dev->id buffer overflow: %s %s"),
1492 &vendor[2], &product[2]);
1493 return NULL;
1494 }
1495
1496 VIR_DEBUG("%s %s: initialized", dev->id, dev->name);
1497
1498 return g_steal_pointer(&dev);
1499 }
1500
1501
1502 virPCIDevice *
virPCIDeviceCopy(virPCIDevice * dev)1503 virPCIDeviceCopy(virPCIDevice *dev)
1504 {
1505 virPCIDevice *copy;
1506
1507 copy = g_new0(virPCIDevice, 1);
1508
1509 /* shallow copy to take care of most attributes */
1510 *copy = *dev;
1511 copy->path = NULL;
1512 copy->used_by_drvname = copy->used_by_domname = NULL;
1513 copy->name = g_strdup(dev->name);
1514 copy->path = g_strdup(dev->path);
1515 copy->used_by_drvname = g_strdup(dev->used_by_drvname);
1516 copy->used_by_domname = g_strdup(dev->used_by_domname);
1517 return copy;
1518 }
1519
1520
1521 void
virPCIDeviceFree(virPCIDevice * dev)1522 virPCIDeviceFree(virPCIDevice *dev)
1523 {
1524 if (!dev)
1525 return;
1526 VIR_DEBUG("%s %s: freeing", dev->id, dev->name);
1527 g_free(dev->name);
1528 g_free(dev->path);
1529 g_free(dev->used_by_drvname);
1530 g_free(dev->used_by_domname);
1531 g_free(dev);
1532 }
1533
1534 /**
1535 * virPCIDeviceGetAddress:
1536 * @dev: device to get address from
1537 *
1538 * Take a PCI device on input and return its PCI address. The
1539 * returned object is owned by the device and must not be freed.
1540 *
1541 * Returns: a pointer to the address, which can never be NULL.
1542 */
1543 virPCIDeviceAddress *
virPCIDeviceGetAddress(virPCIDevice * dev)1544 virPCIDeviceGetAddress(virPCIDevice *dev)
1545 {
1546 return &(dev->address);
1547 }
1548
1549 const char *
virPCIDeviceGetName(virPCIDevice * dev)1550 virPCIDeviceGetName(virPCIDevice *dev)
1551 {
1552 return dev->name;
1553 }
1554
1555 /**
1556 * virPCIDeviceGetConfigPath:
1557 *
1558 * Returns a pointer to a string containing the path of @dev's PCI
1559 * config file.
1560 */
1561 const char *
virPCIDeviceGetConfigPath(virPCIDevice * dev)1562 virPCIDeviceGetConfigPath(virPCIDevice *dev)
1563 {
1564 return dev->path;
1565 }
1566
virPCIDeviceSetManaged(virPCIDevice * dev,bool managed)1567 void virPCIDeviceSetManaged(virPCIDevice *dev, bool managed)
1568 {
1569 dev->managed = managed;
1570 }
1571
1572 bool
virPCIDeviceGetManaged(virPCIDevice * dev)1573 virPCIDeviceGetManaged(virPCIDevice *dev)
1574 {
1575 return dev->managed;
1576 }
1577
1578 void
virPCIDeviceSetStubDriver(virPCIDevice * dev,virPCIStubDriver driver)1579 virPCIDeviceSetStubDriver(virPCIDevice *dev, virPCIStubDriver driver)
1580 {
1581 dev->stubDriver = driver;
1582 }
1583
1584 virPCIStubDriver
virPCIDeviceGetStubDriver(virPCIDevice * dev)1585 virPCIDeviceGetStubDriver(virPCIDevice *dev)
1586 {
1587 return dev->stubDriver;
1588 }
1589
1590 bool
virPCIDeviceGetUnbindFromStub(virPCIDevice * dev)1591 virPCIDeviceGetUnbindFromStub(virPCIDevice *dev)
1592 {
1593 return dev->unbind_from_stub;
1594 }
1595
1596 void
virPCIDeviceSetUnbindFromStub(virPCIDevice * dev,bool unbind)1597 virPCIDeviceSetUnbindFromStub(virPCIDevice *dev, bool unbind)
1598 {
1599 dev->unbind_from_stub = unbind;
1600 }
1601
1602 bool
virPCIDeviceGetRemoveSlot(virPCIDevice * dev)1603 virPCIDeviceGetRemoveSlot(virPCIDevice *dev)
1604 {
1605 return dev->remove_slot;
1606 }
1607
1608 void
virPCIDeviceSetRemoveSlot(virPCIDevice * dev,bool remove_slot)1609 virPCIDeviceSetRemoveSlot(virPCIDevice *dev, bool remove_slot)
1610 {
1611 dev->remove_slot = remove_slot;
1612 }
1613
1614 bool
virPCIDeviceGetReprobe(virPCIDevice * dev)1615 virPCIDeviceGetReprobe(virPCIDevice *dev)
1616 {
1617 return dev->reprobe;
1618 }
1619
1620 void
virPCIDeviceSetReprobe(virPCIDevice * dev,bool reprobe)1621 virPCIDeviceSetReprobe(virPCIDevice *dev, bool reprobe)
1622 {
1623 dev->reprobe = reprobe;
1624 }
1625
1626 int
virPCIDeviceSetUsedBy(virPCIDevice * dev,const char * drv_name,const char * dom_name)1627 virPCIDeviceSetUsedBy(virPCIDevice *dev,
1628 const char *drv_name,
1629 const char *dom_name)
1630 {
1631 VIR_FREE(dev->used_by_drvname);
1632 VIR_FREE(dev->used_by_domname);
1633 dev->used_by_drvname = g_strdup(drv_name);
1634 dev->used_by_domname = g_strdup(dom_name);
1635
1636 return 0;
1637 }
1638
1639 void
virPCIDeviceGetUsedBy(virPCIDevice * dev,const char ** drv_name,const char ** dom_name)1640 virPCIDeviceGetUsedBy(virPCIDevice *dev,
1641 const char **drv_name,
1642 const char **dom_name)
1643 {
1644 *drv_name = dev->used_by_drvname;
1645 *dom_name = dev->used_by_domname;
1646 }
1647
1648 virPCIDeviceList *
virPCIDeviceListNew(void)1649 virPCIDeviceListNew(void)
1650 {
1651 virPCIDeviceList *list;
1652
1653 if (virPCIInitialize() < 0)
1654 return NULL;
1655
1656 if (!(list = virObjectLockableNew(virPCIDeviceListClass)))
1657 return NULL;
1658
1659 return list;
1660 }
1661
1662 static void
virPCIDeviceListDispose(void * obj)1663 virPCIDeviceListDispose(void *obj)
1664 {
1665 virPCIDeviceList *list = obj;
1666 size_t i;
1667
1668 for (i = 0; i < list->count; i++) {
1669 virPCIDeviceFree(list->devs[i]);
1670 list->devs[i] = NULL;
1671 }
1672
1673 list->count = 0;
1674 g_free(list->devs);
1675 }
1676
1677 int
virPCIDeviceListAdd(virPCIDeviceList * list,virPCIDevice * dev)1678 virPCIDeviceListAdd(virPCIDeviceList *list,
1679 virPCIDevice *dev)
1680 {
1681 if (virPCIDeviceListFind(list, &dev->address)) {
1682 virReportError(VIR_ERR_INTERNAL_ERROR,
1683 _("Device %s is already in use"), dev->name);
1684 return -1;
1685 }
1686 VIR_APPEND_ELEMENT(list->devs, list->count, dev);
1687
1688 return 0;
1689 }
1690
1691
1692 /* virPCIDeviceListAddCopy - add a *copy* of the device to this list */
1693 int
virPCIDeviceListAddCopy(virPCIDeviceList * list,virPCIDevice * dev)1694 virPCIDeviceListAddCopy(virPCIDeviceList *list, virPCIDevice *dev)
1695 {
1696 g_autoptr(virPCIDevice) copy = virPCIDeviceCopy(dev);
1697
1698 if (!copy)
1699 return -1;
1700 if (virPCIDeviceListAdd(list, copy) < 0)
1701 return -1;
1702
1703 copy = NULL;
1704 return 0;
1705 }
1706
1707
1708 virPCIDevice *
virPCIDeviceListGet(virPCIDeviceList * list,int idx)1709 virPCIDeviceListGet(virPCIDeviceList *list,
1710 int idx)
1711 {
1712 if (idx >= list->count)
1713 return NULL;
1714 if (idx < 0)
1715 return NULL;
1716
1717 return list->devs[idx];
1718 }
1719
1720 size_t
virPCIDeviceListCount(virPCIDeviceList * list)1721 virPCIDeviceListCount(virPCIDeviceList *list)
1722 {
1723 return list->count;
1724 }
1725
1726 virPCIDevice *
virPCIDeviceListStealIndex(virPCIDeviceList * list,int idx)1727 virPCIDeviceListStealIndex(virPCIDeviceList *list,
1728 int idx)
1729 {
1730 virPCIDevice *ret;
1731
1732 if (idx < 0 || idx >= list->count)
1733 return NULL;
1734
1735 ret = list->devs[idx];
1736 VIR_DELETE_ELEMENT(list->devs, idx, list->count);
1737 return ret;
1738 }
1739
1740 virPCIDevice *
virPCIDeviceListSteal(virPCIDeviceList * list,virPCIDeviceAddress * devAddr)1741 virPCIDeviceListSteal(virPCIDeviceList *list,
1742 virPCIDeviceAddress *devAddr)
1743 {
1744 return virPCIDeviceListStealIndex(list, virPCIDeviceListFindIndex(list, devAddr));
1745 }
1746
1747 void
virPCIDeviceListDel(virPCIDeviceList * list,virPCIDeviceAddress * devAddr)1748 virPCIDeviceListDel(virPCIDeviceList *list,
1749 virPCIDeviceAddress *devAddr)
1750 {
1751 virPCIDeviceFree(virPCIDeviceListSteal(list, devAddr));
1752 }
1753
1754 int
virPCIDeviceListFindIndex(virPCIDeviceList * list,virPCIDeviceAddress * devAddr)1755 virPCIDeviceListFindIndex(virPCIDeviceList *list,
1756 virPCIDeviceAddress *devAddr)
1757 {
1758 size_t i;
1759
1760 for (i = 0; i < list->count; i++) {
1761 virPCIDevice *other = list->devs[i];
1762 if (other->address.domain == devAddr->domain &&
1763 other->address.bus == devAddr->bus &&
1764 other->address.slot == devAddr->slot &&
1765 other->address.function == devAddr->function)
1766 return i;
1767 }
1768 return -1;
1769 }
1770
1771
1772 virPCIDevice *
virPCIDeviceListFindByIDs(virPCIDeviceList * list,unsigned int domain,unsigned int bus,unsigned int slot,unsigned int function)1773 virPCIDeviceListFindByIDs(virPCIDeviceList *list,
1774 unsigned int domain,
1775 unsigned int bus,
1776 unsigned int slot,
1777 unsigned int function)
1778 {
1779 size_t i;
1780
1781 for (i = 0; i < list->count; i++) {
1782 virPCIDevice *other = list->devs[i];
1783 if (other->address.domain == domain &&
1784 other->address.bus == bus &&
1785 other->address.slot == slot &&
1786 other->address.function == function)
1787 return list->devs[i];
1788 }
1789 return NULL;
1790 }
1791
1792
1793 virPCIDevice *
virPCIDeviceListFind(virPCIDeviceList * list,virPCIDeviceAddress * devAddr)1794 virPCIDeviceListFind(virPCIDeviceList *list, virPCIDeviceAddress *devAddr)
1795 {
1796 int idx;
1797
1798 if ((idx = virPCIDeviceListFindIndex(list, devAddr)) >= 0)
1799 return list->devs[idx];
1800 else
1801 return NULL;
1802 }
1803
1804
virPCIDeviceFileIterate(virPCIDevice * dev,virPCIDeviceFileActor actor,void * opaque)1805 int virPCIDeviceFileIterate(virPCIDevice *dev,
1806 virPCIDeviceFileActor actor,
1807 void *opaque)
1808 {
1809 g_autofree char *pcidir = NULL;
1810 g_autoptr(DIR) dir = NULL;
1811 struct dirent *ent;
1812 int direrr;
1813
1814 pcidir = g_strdup_printf("/sys/bus/pci/devices/" VIR_PCI_DEVICE_ADDRESS_FMT,
1815 dev->address.domain, dev->address.bus, dev->address.slot,
1816 dev->address.function);
1817
1818 if (virDirOpen(&dir, pcidir) < 0)
1819 return -1;
1820
1821 while ((direrr = virDirRead(dir, &ent, pcidir)) > 0) {
1822 g_autofree char *file = NULL;
1823 /* Device assignment requires:
1824 * $PCIDIR/config, $PCIDIR/resource, $PCIDIR/resourceNNN,
1825 * $PCIDIR/rom, $PCIDIR/reset, $PCIDIR/vendor, $PCIDIR/device
1826 */
1827 if (STREQ(ent->d_name, "config") ||
1828 STRPREFIX(ent->d_name, "resource") ||
1829 STREQ(ent->d_name, "rom") ||
1830 STREQ(ent->d_name, "vendor") ||
1831 STREQ(ent->d_name, "device") ||
1832 STREQ(ent->d_name, "reset")) {
1833 file = g_strdup_printf("%s/%s", pcidir, ent->d_name);
1834 if ((actor)(dev, file, opaque) < 0)
1835 return -1;
1836 }
1837 }
1838 if (direrr < 0)
1839 return -1;
1840
1841 return 0;
1842 }
1843
1844
1845 /* virPCIDeviceAddressIOMMUGroupIterate:
1846 * Call @actor for all devices in the same iommu_group as orig
1847 * (including orig itself) Even if there is no iommu_group for the
1848 * device, call @actor once for orig.
1849 */
1850 int
virPCIDeviceAddressIOMMUGroupIterate(virPCIDeviceAddress * orig,virPCIDeviceAddressActor actor,void * opaque)1851 virPCIDeviceAddressIOMMUGroupIterate(virPCIDeviceAddress *orig,
1852 virPCIDeviceAddressActor actor,
1853 void *opaque)
1854 {
1855 g_autofree char *groupPath = NULL;
1856 g_autoptr(DIR) groupDir = NULL;
1857 struct dirent *ent;
1858 int direrr;
1859
1860 groupPath = g_strdup_printf(PCI_SYSFS "devices/" VIR_PCI_DEVICE_ADDRESS_FMT "/iommu_group/devices",
1861 orig->domain, orig->bus, orig->slot, orig->function);
1862
1863 if (virDirOpenQuiet(&groupDir, groupPath) < 0) {
1864 /* just process the original device, nothing more */
1865 return (actor)(orig, opaque);
1866 }
1867
1868 while ((direrr = virDirRead(groupDir, &ent, groupPath)) > 0) {
1869 virPCIDeviceAddress newDev;
1870
1871 if (virPCIDeviceAddressParse(ent->d_name, &newDev) < 0) {
1872 virReportError(VIR_ERR_INTERNAL_ERROR,
1873 _("Found invalid device link '%s' in '%s'"),
1874 ent->d_name, groupPath);
1875 return -1;
1876 }
1877
1878 if ((actor)(&newDev, opaque) < 0)
1879 return -1;
1880 }
1881 if (direrr < 0)
1882 return -1;
1883
1884 return 0;
1885 }
1886
1887
1888 static int
virPCIDeviceGetIOMMUGroupAddOne(virPCIDeviceAddress * newDevAddr,void * opaque)1889 virPCIDeviceGetIOMMUGroupAddOne(virPCIDeviceAddress *newDevAddr, void *opaque)
1890 {
1891 virPCIDeviceList *groupList = opaque;
1892 g_autoptr(virPCIDevice) newDev = NULL;
1893
1894 if (!(newDev = virPCIDeviceNew(newDevAddr)))
1895 return -1;
1896
1897 if (virPCIDeviceListAdd(groupList, newDev) < 0)
1898 return -1;
1899
1900 newDev = NULL; /* it's now on the list */
1901 return 0;
1902 }
1903
1904
1905 /*
1906 * virPCIDeviceGetIOMMUGroupList - return a virPCIDeviceList containing
1907 * all of the devices in the same iommu_group as @dev.
1908 *
1909 * Return the new list, or NULL on failure
1910 */
1911 virPCIDeviceList *
virPCIDeviceGetIOMMUGroupList(virPCIDevice * dev)1912 virPCIDeviceGetIOMMUGroupList(virPCIDevice *dev)
1913 {
1914 virPCIDeviceList *groupList = virPCIDeviceListNew();
1915
1916 if (!groupList)
1917 goto error;
1918
1919 if (virPCIDeviceAddressIOMMUGroupIterate(&(dev->address),
1920 virPCIDeviceGetIOMMUGroupAddOne,
1921 groupList) < 0)
1922 goto error;
1923
1924 return groupList;
1925
1926 error:
1927 virObjectUnref(groupList);
1928 return NULL;
1929 }
1930
1931
1932 typedef struct {
1933 virPCIDeviceAddress ***iommuGroupDevices;
1934 size_t *nIommuGroupDevices;
1935 } virPCIDeviceAddressList;
1936
1937 static int
virPCIGetIOMMUGroupAddressesAddOne(virPCIDeviceAddress * newDevAddr,void * opaque)1938 virPCIGetIOMMUGroupAddressesAddOne(virPCIDeviceAddress *newDevAddr, void *opaque)
1939 {
1940 virPCIDeviceAddressList *addrList = opaque;
1941 g_autofree virPCIDeviceAddress *copyAddr = NULL;
1942
1943 /* make a copy to insert onto the list */
1944 copyAddr = g_new0(virPCIDeviceAddress, 1);
1945
1946 *copyAddr = *newDevAddr;
1947
1948 VIR_APPEND_ELEMENT(*addrList->iommuGroupDevices,
1949 *addrList->nIommuGroupDevices, copyAddr);
1950
1951 return 0;
1952 }
1953
1954
1955 /*
1956 * virPCIDeviceAddressGetIOMMUGroupAddresses - return a
1957 * virPCIDeviceList containing all of the devices in the same
1958 * iommu_group as @dev.
1959 *
1960 * Return the new list, or NULL on failure
1961 */
1962 int
virPCIDeviceAddressGetIOMMUGroupAddresses(virPCIDeviceAddress * devAddr,virPCIDeviceAddress *** iommuGroupDevices,size_t * nIommuGroupDevices)1963 virPCIDeviceAddressGetIOMMUGroupAddresses(virPCIDeviceAddress *devAddr,
1964 virPCIDeviceAddress ***iommuGroupDevices,
1965 size_t *nIommuGroupDevices)
1966 {
1967 virPCIDeviceAddressList addrList = { iommuGroupDevices,
1968 nIommuGroupDevices };
1969
1970 if (virPCIDeviceAddressIOMMUGroupIterate(devAddr,
1971 virPCIGetIOMMUGroupAddressesAddOne,
1972 &addrList) < 0)
1973 return -1;
1974
1975 return 0;
1976 }
1977
1978
1979 /* virPCIDeviceAddressGetIOMMUGroupNum - return the group number of
1980 * this PCI device's iommu_group, or -2 if there is no iommu_group for
1981 * the device (or -1 if there was any other error)
1982 */
1983 int
virPCIDeviceAddressGetIOMMUGroupNum(virPCIDeviceAddress * addr)1984 virPCIDeviceAddressGetIOMMUGroupNum(virPCIDeviceAddress *addr)
1985 {
1986 g_autofree char *devName = NULL;
1987 g_autofree char *devPath = NULL;
1988 g_autofree char *groupPath = NULL;
1989 g_autofree char *groupNumStr = NULL;
1990 unsigned int groupNum;
1991
1992 devName = virPCIDeviceAddressAsString(addr);
1993
1994 devPath = virPCIFile(devName, "iommu_group");
1995
1996 if (virFileIsLink(devPath) != 1)
1997 return -2;
1998 if (virFileResolveLink(devPath, &groupPath) < 0) {
1999 virReportError(VIR_ERR_INTERNAL_ERROR,
2000 _("Unable to resolve device %s iommu_group symlink %s"),
2001 devName, devPath);
2002 return -1;
2003 }
2004
2005 groupNumStr = g_path_get_basename(groupPath);
2006 if (virStrToLong_ui(groupNumStr, NULL, 10, &groupNum) < 0) {
2007 virReportError(VIR_ERR_INTERNAL_ERROR,
2008 _("device %s iommu_group symlink %s has "
2009 "invalid group number %s"),
2010 devName, groupPath, groupNumStr);
2011 return -1;
2012 }
2013
2014 return groupNum;
2015 }
2016
2017
2018 char *
virPCIDeviceAddressGetIOMMUGroupDev(const virPCIDeviceAddress * devAddr)2019 virPCIDeviceAddressGetIOMMUGroupDev(const virPCIDeviceAddress *devAddr)
2020 {
2021 g_autoptr(virPCIDevice) pci = NULL;
2022
2023 if (!(pci = virPCIDeviceNew(devAddr)))
2024 return NULL;
2025
2026 return virPCIDeviceGetIOMMUGroupDev(pci);
2027 }
2028
2029
2030 /* virPCIDeviceGetIOMMUGroupDev - return the name of the device used
2031 * to control this PCI device's group (e.g. "/dev/vfio/15")
2032 */
2033 char *
virPCIDeviceGetIOMMUGroupDev(virPCIDevice * dev)2034 virPCIDeviceGetIOMMUGroupDev(virPCIDevice *dev)
2035 {
2036 g_autofree char *devPath = NULL;
2037 g_autofree char *groupPath = NULL;
2038 g_autofree char *groupFile = NULL;
2039
2040 devPath = virPCIFile(dev->name, "iommu_group");
2041
2042 if (virFileIsLink(devPath) != 1) {
2043 virReportError(VIR_ERR_INTERNAL_ERROR,
2044 _("Invalid device %s iommu_group file %s is not a symlink"),
2045 dev->name, devPath);
2046 return NULL;
2047 }
2048 if (virFileResolveLink(devPath, &groupPath) < 0) {
2049 virReportError(VIR_ERR_INTERNAL_ERROR,
2050 _("Unable to resolve device %s iommu_group symlink %s"),
2051 dev->name, devPath);
2052 return NULL;
2053 }
2054 groupFile = g_path_get_basename(groupPath);
2055
2056 return g_strdup_printf("/dev/vfio/%s", groupFile);
2057 }
2058
2059 static int
virPCIDeviceDownstreamLacksACS(virPCIDevice * dev)2060 virPCIDeviceDownstreamLacksACS(virPCIDevice *dev)
2061 {
2062 uint16_t flags;
2063 uint16_t ctrl;
2064 unsigned int pos;
2065 int fd;
2066 int ret = 0;
2067 uint16_t device_class;
2068
2069 if ((fd = virPCIDeviceConfigOpen(dev)) < 0)
2070 return -1;
2071
2072 if (virPCIDeviceInit(dev, fd) < 0) {
2073 ret = -1;
2074 goto cleanup;
2075 }
2076
2077 if (virPCIDeviceReadClass(dev, &device_class) < 0)
2078 goto cleanup;
2079
2080 pos = dev->pcie_cap_pos;
2081 if (!pos || device_class != PCI_CLASS_BRIDGE_PCI)
2082 goto cleanup;
2083
2084 flags = virPCIDeviceRead16(dev, fd, pos + PCI_EXP_FLAGS);
2085 if (((flags & PCI_EXP_FLAGS_TYPE) >> 4) != PCI_EXP_TYPE_DOWNSTREAM)
2086 goto cleanup;
2087
2088 pos = virPCIDeviceFindExtendedCapabilityOffset(dev, fd, PCI_EXT_CAP_ID_ACS);
2089 if (!pos) {
2090 VIR_DEBUG("%s %s: downstream port lacks ACS", dev->id, dev->name);
2091 ret = 1;
2092 goto cleanup;
2093 }
2094
2095 ctrl = virPCIDeviceRead16(dev, fd, pos + PCI_EXT_ACS_CTRL);
2096 if ((ctrl & PCI_EXT_CAP_ACS_ENABLED) != PCI_EXT_CAP_ACS_ENABLED) {
2097 VIR_DEBUG("%s %s: downstream port has ACS disabled",
2098 dev->id, dev->name);
2099 ret = 1;
2100 goto cleanup;
2101 }
2102
2103 cleanup:
2104 virPCIDeviceConfigClose(dev, fd);
2105 return ret;
2106 }
2107
2108 static int
virPCIDeviceIsBehindSwitchLackingACS(virPCIDevice * dev)2109 virPCIDeviceIsBehindSwitchLackingACS(virPCIDevice *dev)
2110 {
2111 g_autoptr(virPCIDevice) parent = NULL;
2112
2113 if (virPCIDeviceGetParent(dev, &parent) < 0)
2114 return -1;
2115 if (!parent) {
2116 /* if we have no parent, and this is the root bus, ACS doesn't come
2117 * into play since devices on the root bus can't P2P without going
2118 * through the root IOMMU.
2119 */
2120 if (dev->address.bus == 0) {
2121 return 0;
2122 } else {
2123 virReportError(VIR_ERR_INTERNAL_ERROR,
2124 _("Failed to find parent device for %s"),
2125 dev->name);
2126 return -1;
2127 }
2128 }
2129
2130 /* XXX we should rather fail when we can't find device's parent and
2131 * stop the loop when we get to root instead of just stopping when no
2132 * parent can be found
2133 */
2134 do {
2135 g_autoptr(virPCIDevice) tmp = NULL;
2136 int acs;
2137 int ret;
2138
2139 acs = virPCIDeviceDownstreamLacksACS(parent);
2140
2141 if (acs) {
2142 if (acs < 0)
2143 return -1;
2144 else
2145 return 1;
2146 }
2147
2148 tmp = g_steal_pointer(&parent);
2149 ret = virPCIDeviceGetParent(tmp, &parent);
2150 if (ret < 0)
2151 return -1;
2152 } while (parent);
2153
2154 return 0;
2155 }
2156
virPCIDeviceIsAssignable(virPCIDevice * dev,int strict_acs_check)2157 int virPCIDeviceIsAssignable(virPCIDevice *dev,
2158 int strict_acs_check)
2159 {
2160 int ret;
2161
2162 /* XXX This could be a great place to actually check that a non-managed
2163 * device isn't in use, e.g. by checking that device is either un-bound
2164 * or bound to a stub driver.
2165 */
2166
2167 ret = virPCIDeviceIsBehindSwitchLackingACS(dev);
2168 if (ret < 0)
2169 return 0;
2170
2171 if (ret) {
2172 if (!strict_acs_check) {
2173 VIR_DEBUG("%s %s: strict ACS check disabled; device assignment allowed",
2174 dev->id, dev->name);
2175 } else {
2176 virReportError(VIR_ERR_INTERNAL_ERROR,
2177 _("Device %s is behind a switch lacking ACS and "
2178 "cannot be assigned"),
2179 dev->name);
2180 return 0;
2181 }
2182 }
2183
2184 return 1;
2185 }
2186
2187 static int
logStrToLong_ui(char const * s,char ** end_ptr,int base,unsigned int * result)2188 logStrToLong_ui(char const *s,
2189 char **end_ptr,
2190 int base,
2191 unsigned int *result)
2192 {
2193 int ret = 0;
2194
2195 ret = virStrToLong_ui(s, end_ptr, base, result);
2196 if (ret != 0)
2197 VIR_ERROR(_("Failed to convert '%s' to unsigned int"), s);
2198 return ret;
2199 }
2200
2201 int
virPCIDeviceAddressParse(char * address,virPCIDeviceAddress * bdf)2202 virPCIDeviceAddressParse(char *address,
2203 virPCIDeviceAddress *bdf)
2204 {
2205 char *p = NULL;
2206
2207 if ((address == NULL) || (logStrToLong_ui(address, &p, 16,
2208 &bdf->domain) == -1)) {
2209 return -1;
2210 }
2211
2212 if ((p == NULL) || (logStrToLong_ui(p+1, &p, 16,
2213 &bdf->bus) == -1)) {
2214 return -1;
2215 }
2216
2217 if ((p == NULL) || (logStrToLong_ui(p+1, &p, 16,
2218 &bdf->slot) == -1)) {
2219 return -1;
2220 }
2221
2222 if ((p == NULL) || (logStrToLong_ui(p+1, &p, 16,
2223 &bdf->function) == -1)) {
2224 return -1;
2225 }
2226
2227 return 0;
2228 }
2229
2230
2231 bool
virZPCIDeviceAddressIsIncomplete(const virZPCIDeviceAddress * addr)2232 virZPCIDeviceAddressIsIncomplete(const virZPCIDeviceAddress *addr)
2233 {
2234 return !addr->uid.isSet || !addr->fid.isSet;
2235 }
2236
2237
2238 bool
virZPCIDeviceAddressIsPresent(const virZPCIDeviceAddress * addr)2239 virZPCIDeviceAddressIsPresent(const virZPCIDeviceAddress *addr)
2240 {
2241 return addr->uid.isSet || addr->fid.isSet;
2242 }
2243
2244
2245 void
virPCIVirtualFunctionListFree(virPCIVirtualFunctionList * list)2246 virPCIVirtualFunctionListFree(virPCIVirtualFunctionList *list)
2247 {
2248 size_t i;
2249
2250 if (!list)
2251 return;
2252
2253 for (i = 0; i < list->nfunctions; i++) {
2254 g_free(list->functions[i].addr);
2255 g_free(list->functions[i].ifname);
2256 }
2257
2258 g_free(list);
2259 }
2260
2261
2262 int
virPCIGetVirtualFunctions(const char * sysfs_path,virPCIVirtualFunctionList ** vfs)2263 virPCIGetVirtualFunctions(const char *sysfs_path,
2264 virPCIVirtualFunctionList **vfs)
2265 {
2266 return virPCIGetVirtualFunctionsFull(sysfs_path, vfs, NULL);
2267 }
2268
2269
2270 #ifdef __linux__
2271
2272 virPCIDeviceAddress *
virPCIGetDeviceAddressFromSysfsLink(const char * device_link)2273 virPCIGetDeviceAddressFromSysfsLink(const char *device_link)
2274 {
2275 g_autofree virPCIDeviceAddress *bdf = NULL;
2276 g_autofree char *config_address = NULL;
2277 g_autofree char *device_path = NULL;
2278
2279 if (!virFileExists(device_link)) {
2280 VIR_DEBUG("'%s' does not exist", device_link);
2281 return NULL;
2282 }
2283
2284 device_path = virFileCanonicalizePath(device_link);
2285 if (device_path == NULL) {
2286 virReportSystemError(errno,
2287 _("Failed to resolve device link '%s'"),
2288 device_link);
2289 return NULL;
2290 }
2291
2292 config_address = g_path_get_basename(device_path);
2293 bdf = g_new0(virPCIDeviceAddress, 1);
2294
2295 if (virPCIDeviceAddressParse(config_address, bdf) < 0) {
2296 virReportError(VIR_ERR_INTERNAL_ERROR,
2297 _("Failed to parse PCI config address '%s'"),
2298 config_address);
2299 return NULL;
2300 }
2301
2302 return g_steal_pointer(&bdf);
2303 }
2304
2305 /**
2306 * virPCIGetPhysicalFunction:
2307 * @vf_sysfs_path: sysfs path for the virtual function
2308 * @pf: where to store the physical function's address
2309 *
2310 * Given @vf_sysfs_path, this function will store the pointer
2311 * to a newly-allocated virPCIDeviceAddress in @pf.
2312 *
2313 * @pf might be NULL if @vf_sysfs_path does not point to a
2314 * virtual function. If it's not NULL, then it should be
2315 * freed by the caller when no longer needed.
2316 *
2317 * Returns: >=0 on success, <0 on failure
2318 */
2319 int
virPCIGetPhysicalFunction(const char * vf_sysfs_path,virPCIDeviceAddress ** pf)2320 virPCIGetPhysicalFunction(const char *vf_sysfs_path,
2321 virPCIDeviceAddress **pf)
2322 {
2323 g_autofree char *device_link = NULL;
2324
2325 *pf = NULL;
2326
2327 virBuildPath(&device_link, vf_sysfs_path, "physfn");
2328
2329 if ((*pf = virPCIGetDeviceAddressFromSysfsLink(device_link))) {
2330 VIR_DEBUG("PF for VF device '%s': " VIR_PCI_DEVICE_ADDRESS_FMT,
2331 vf_sysfs_path,
2332 (*pf)->domain, (*pf)->bus, (*pf)->slot, (*pf)->function);
2333 }
2334
2335 return 0;
2336 }
2337
2338
2339 /**
2340 * virPCIGetVirtualFunctionsFull:
2341 * @sysfs_path: path to physical function sysfs entry
2342 * @vfs: filled with the virtual function data
2343 * @pfPhysPortID: Optional physical port id. If provided the network interface
2344 * name of the VFs is queried too.
2345 *
2346 *
2347 * Returns virtual functions of a physical function.
2348 */
2349 int
virPCIGetVirtualFunctionsFull(const char * sysfs_path,virPCIVirtualFunctionList ** vfs,const char * pfPhysPortID)2350 virPCIGetVirtualFunctionsFull(const char *sysfs_path,
2351 virPCIVirtualFunctionList **vfs,
2352 const char *pfPhysPortID)
2353 {
2354 g_autofree char *totalvfs_file = NULL;
2355 g_autofree char *totalvfs_str = NULL;
2356 g_autoptr(virPCIVirtualFunctionList) list = g_new0(virPCIVirtualFunctionList, 1);
2357
2358 *vfs = NULL;
2359
2360 totalvfs_file = g_strdup_printf("%s/sriov_totalvfs", sysfs_path);
2361 if (virFileExists(totalvfs_file)) {
2362 char *end = NULL; /* so that terminating \n doesn't create error */
2363 unsigned long long maxfunctions = 0;
2364
2365 if (virFileReadAll(totalvfs_file, 16, &totalvfs_str) < 0)
2366 return -1;
2367 if (virStrToLong_ull(totalvfs_str, &end, 10, &maxfunctions) < 0) {
2368 virReportError(VIR_ERR_INTERNAL_ERROR,
2369 _("Unrecognized value in %s: %s"),
2370 totalvfs_file, totalvfs_str);
2371 return -1;
2372 }
2373 list->maxfunctions = maxfunctions;
2374 }
2375
2376 do {
2377 g_autofree char *device_link = NULL;
2378 struct virPCIVirtualFunction fnc = { NULL, NULL };
2379
2380 /* look for virtfn%d links until one isn't found */
2381 device_link = g_strdup_printf("%s/virtfn%zu", sysfs_path, list->nfunctions);
2382
2383 if (!virFileExists(device_link))
2384 break;
2385
2386 if (!(fnc.addr = virPCIGetDeviceAddressFromSysfsLink(device_link))) {
2387 virReportError(VIR_ERR_INTERNAL_ERROR,
2388 _("Failed to get SRIOV function from device link '%s'"),
2389 device_link);
2390 return -1;
2391 }
2392
2393 if (pfPhysPortID) {
2394 if (virPCIGetNetName(device_link, 0, pfPhysPortID, &fnc.ifname) < 0) {
2395 g_free(fnc.addr);
2396 return -1;
2397 }
2398 }
2399
2400 VIR_APPEND_ELEMENT(list->functions, list->nfunctions, fnc);
2401 } while (1);
2402
2403 VIR_DEBUG("Found %zu virtual functions for %s", list->nfunctions, sysfs_path);
2404
2405 *vfs = g_steal_pointer(&list);
2406 return 0;
2407 }
2408
2409
2410 /*
2411 * Returns 1 if vf device is a virtual function, 0 if not, -1 on error
2412 */
2413 int
virPCIIsVirtualFunction(const char * vf_sysfs_device_link)2414 virPCIIsVirtualFunction(const char *vf_sysfs_device_link)
2415 {
2416 g_autofree char *vf_sysfs_physfn_link = NULL;
2417
2418 vf_sysfs_physfn_link = g_strdup_printf("%s/physfn", vf_sysfs_device_link);
2419
2420 return virFileExists(vf_sysfs_physfn_link);
2421 }
2422
2423 /*
2424 * Returns the sriov virtual function index of vf given its pf
2425 */
2426 int
virPCIGetVirtualFunctionIndex(const char * pf_sysfs_device_link,const char * vf_sysfs_device_link,int * vf_index)2427 virPCIGetVirtualFunctionIndex(const char *pf_sysfs_device_link,
2428 const char *vf_sysfs_device_link,
2429 int *vf_index)
2430 {
2431 size_t i;
2432 g_autofree virPCIDeviceAddress *vf_bdf = NULL;
2433 g_autoptr(virPCIVirtualFunctionList) virt_fns = NULL;
2434
2435 if (!(vf_bdf = virPCIGetDeviceAddressFromSysfsLink(vf_sysfs_device_link)))
2436 return -1;
2437
2438 if (virPCIGetVirtualFunctions(pf_sysfs_device_link, &virt_fns) < 0) {
2439 virReportError(VIR_ERR_INTERNAL_ERROR,
2440 _("Error getting physical function's '%s' "
2441 "virtual_functions"), pf_sysfs_device_link);
2442 return -1;
2443 }
2444
2445 for (i = 0; i < virt_fns->nfunctions; i++) {
2446 if (virPCIDeviceAddressEqual(vf_bdf, virt_fns->functions[i].addr)) {
2447 *vf_index = i;
2448 return 0;
2449 }
2450 }
2451
2452 return -1;
2453 }
2454
2455 /*
2456 * Returns a path to the PCI sysfs file given the BDF of the PCI function
2457 */
2458
2459 int
virPCIDeviceAddressGetSysfsFile(virPCIDeviceAddress * addr,char ** pci_sysfs_device_link)2460 virPCIDeviceAddressGetSysfsFile(virPCIDeviceAddress *addr,
2461 char **pci_sysfs_device_link)
2462 {
2463 *pci_sysfs_device_link = g_strdup_printf(PCI_SYSFS "devices/" VIR_PCI_DEVICE_ADDRESS_FMT, addr->domain,
2464 addr->bus, addr->slot, addr->function);
2465 return 0;
2466 }
2467
2468 /**
2469 * virPCIGetNetName:
2470 * @device_link_sysfs_path: sysfs path to the PCI device
2471 * @idx: used to choose which netdev when there are several
2472 * (ignored if physPortID is set or physPortName is available)
2473 * @physPortID: match this string in the netdev's phys_port_id
2474 * (or NULL to ignore and use phys_port_name or idx instead)
2475 * @netname: used to return the name of the netdev
2476 * (set to NULL (but returns success) if there is no netdev)
2477 *
2478 * Returns 0 on success, -1 on error (error has been logged)
2479 */
2480 int
virPCIGetNetName(const char * device_link_sysfs_path,size_t idx,const char * physPortID,char ** netname)2481 virPCIGetNetName(const char *device_link_sysfs_path,
2482 size_t idx,
2483 const char *physPortID,
2484 char **netname)
2485 {
2486 g_autofree char *pcidev_sysfs_net_path = NULL;
2487 g_autofree char *firstEntryName = NULL;
2488 g_autoptr(DIR) dir = NULL;
2489 struct dirent *entry = NULL;
2490 size_t i = 0;
2491
2492 *netname = NULL;
2493
2494 virBuildPath(&pcidev_sysfs_net_path, device_link_sysfs_path, "net");
2495
2496 if (virDirOpenQuiet(&dir, pcidev_sysfs_net_path) < 0) {
2497 /* this *isn't* an error - caller needs to check for netname == NULL */
2498 return 0;
2499 }
2500
2501 while (virDirRead(dir, &entry, pcidev_sysfs_net_path) > 0) {
2502 /* save the first entry we find to use as a failsafe
2503 * in case we don't match the phys_port_id. This is
2504 * needed because some NIC drivers (e.g. i40e)
2505 * implement phys_port_id for PFs, but not for VFs
2506 */
2507 if (!firstEntryName)
2508 firstEntryName = g_strdup(entry->d_name);
2509
2510 /* if the caller sent a physPortID, compare it to the
2511 * physportID of this netdev. If not, look for entry[idx].
2512 */
2513 if (physPortID) {
2514 g_autofree char *thisPhysPortID = NULL;
2515
2516 if (virNetDevGetPhysPortID(entry->d_name, &thisPhysPortID) < 0)
2517 return -1;
2518
2519 /* if this one doesn't match, keep looking */
2520 if (STRNEQ_NULLABLE(physPortID, thisPhysPortID))
2521 continue;
2522
2523 } else {
2524 /* Most switch devices use phys_port_name instead of
2525 * phys_port_id.
2526 * NOTE: VFs' representors net devices can be linked to PF's PCI
2527 * device, which mean that there'll be multiple net devices
2528 * instances and to get a proper net device need to match on
2529 * specific regex.
2530 * To get PF netdev, for ex., used following regex:
2531 * "(p[0-9]+$)|(p[0-9]+s[0-9]+$)"
2532 * or to get exact VF's netdev next regex is used:
2533 * "pf0vf1$"
2534 */
2535 g_autofree char *thisPhysPortName = NULL;
2536
2537 if (virNetDevGetPhysPortName(entry->d_name, &thisPhysPortName) < 0)
2538 return -1;
2539
2540 if (thisPhysPortName) {
2541
2542 /* if this one doesn't match, keep looking */
2543 if (!virStringMatch(thisPhysPortName, VIR_PF_PHYS_PORT_NAME_REGEX))
2544 continue;
2545
2546 } else {
2547
2548 if (i++ < idx)
2549 continue;
2550 }
2551 }
2552
2553 *netname = g_strdup(entry->d_name);
2554 return 0;
2555 }
2556
2557 if (firstEntryName) {
2558 /* we didn't match the provided phys_port_id / find a
2559 * phys_port_name matching VIR_PF_PHYS_PORT_NAME_REGEX / find
2560 * as many net devices as the value of idx, but this is
2561 * probably because phys_port_id / phys_port_name isn't
2562 * implemented for this NIC driver, so just return the first
2563 * (probably only) netname we found.
2564 */
2565 *netname = g_steal_pointer(&firstEntryName);
2566 return 0;
2567 }
2568
2569 virReportError(VIR_ERR_INTERNAL_ERROR,
2570 _("Could not find any network device under PCI device at %s"),
2571 device_link_sysfs_path);
2572 return -1;
2573 }
2574
2575 int
virPCIGetVirtualFunctionInfo(const char * vf_sysfs_device_path,int pfNetDevIdx,char ** pfname,int * vf_index)2576 virPCIGetVirtualFunctionInfo(const char *vf_sysfs_device_path,
2577 int pfNetDevIdx,
2578 char **pfname,
2579 int *vf_index)
2580 {
2581 g_autofree virPCIDeviceAddress *pf_config_address = NULL;
2582 g_autofree char *pf_sysfs_device_path = NULL;
2583 g_autofree char *vfname = NULL;
2584 g_autofree char *vfPhysPortID = NULL;
2585
2586 if (virPCIGetPhysicalFunction(vf_sysfs_device_path, &pf_config_address) < 0)
2587 return -1;
2588
2589 if (!pf_config_address)
2590 return -1;
2591
2592 if (virPCIDeviceAddressGetSysfsFile(pf_config_address,
2593 &pf_sysfs_device_path) < 0) {
2594 return -1;
2595 }
2596
2597 if (virPCIGetVirtualFunctionIndex(pf_sysfs_device_path,
2598 vf_sysfs_device_path, vf_index) < 0) {
2599 return -1;
2600 }
2601
2602 /* If the caller hasn't asked for a specific pfNetDevIdx, and VF
2603 * is bound to a netdev, learn that netdev's phys_port_id (if
2604 * available). This can be used to disambiguate when the PF has
2605 * multiple netdevs. If the VF isn't bound to a netdev, then we
2606 * return netdev[pfNetDevIdx] on the PF, which may or may not be
2607 * correct.
2608 */
2609 if (pfNetDevIdx == -1) {
2610 if (virPCIGetNetName(vf_sysfs_device_path, 0, NULL, &vfname) < 0)
2611 return -1;
2612
2613 if (vfname) {
2614 if (virNetDevGetPhysPortID(vfname, &vfPhysPortID) < 0)
2615 return -1;
2616 }
2617 pfNetDevIdx = 0;
2618 }
2619
2620 if (virPCIGetNetName(pf_sysfs_device_path,
2621 pfNetDevIdx, vfPhysPortID, pfname) < 0) {
2622 return -1;
2623 }
2624
2625 if (!*pfname) {
2626 /* this shouldn't be possible. A VF can't exist unless its
2627 * PF device is bound to a network driver
2628 */
2629 virReportError(VIR_ERR_INTERNAL_ERROR,
2630 _("The PF device for VF %s has no network device name"),
2631 vf_sysfs_device_path);
2632 return -1;
2633 }
2634
2635 return 0;
2636 }
2637
2638
2639 bool
virPCIDeviceHasVPD(virPCIDevice * dev)2640 virPCIDeviceHasVPD(virPCIDevice *dev)
2641 {
2642 g_autofree char *vpdPath = NULL;
2643
2644 vpdPath = virPCIFile(dev->name, "vpd");
2645 if (!virFileExists(vpdPath)) {
2646 VIR_INFO("Device VPD file does not exist %s", vpdPath);
2647 return false;
2648 } else if (!virFileIsRegular(vpdPath)) {
2649 VIR_WARN("VPD path does not point to a regular file %s", vpdPath);
2650 return false;
2651 }
2652 return true;
2653 }
2654
2655 /**
2656 * virPCIDeviceGetVPD:
2657 * @dev: a PCI device to get a PCI VPD for.
2658 *
2659 * Obtain a PCI device's Vital Product Data (VPD). VPD is optional in
2660 * both PCI Local Bus and PCIe specifications so there is no guarantee it
2661 * will be there for a particular device.
2662 *
2663 * Returns: a pointer to virPCIVPDResource which needs to be freed by the caller
2664 * or NULL if getting it failed for some reason (e.g. invalid format, I/O error).
2665 */
2666 virPCIVPDResource *
virPCIDeviceGetVPD(virPCIDevice * dev)2667 virPCIDeviceGetVPD(virPCIDevice *dev)
2668 {
2669 g_autofree char *vpdPath = NULL;
2670 int fd;
2671 g_autoptr(virPCIVPDResource) res = NULL;
2672
2673 vpdPath = virPCIFile(dev->name, "vpd");
2674 if (!virPCIDeviceHasVPD(dev)) {
2675 virReportError(VIR_ERR_INTERNAL_ERROR, _("Device %s does not have a VPD"),
2676 virPCIDeviceGetName(dev));
2677 return NULL;
2678 }
2679 if ((fd = open(vpdPath, O_RDONLY)) < 0) {
2680 virReportSystemError(-fd, _("Failed to open a VPD file '%s'"), vpdPath);
2681 return NULL;
2682 }
2683 res = virPCIVPDParse(fd);
2684
2685 if (VIR_CLOSE(fd) < 0) {
2686 virReportSystemError(errno, _("Unable to close the VPD file, fd: %d"), fd);
2687 return NULL;
2688 }
2689
2690 return g_steal_pointer(&res);
2691 }
2692
2693 #else
2694 static const char *unsupported = N_("not supported on non-linux platforms");
2695
2696 virPCIDeviceAddress *
virPCIGetDeviceAddressFromSysfsLink(const char * device_link G_GNUC_UNUSED)2697 virPCIGetDeviceAddressFromSysfsLink(const char *device_link G_GNUC_UNUSED)
2698 {
2699 virReportError(VIR_ERR_INTERNAL_ERROR, "%s", _(unsupported));
2700 return NULL;
2701 }
2702
2703
2704 int
virPCIGetPhysicalFunction(const char * vf_sysfs_path G_GNUC_UNUSED,virPCIDeviceAddress ** pf G_GNUC_UNUSED)2705 virPCIGetPhysicalFunction(const char *vf_sysfs_path G_GNUC_UNUSED,
2706 virPCIDeviceAddress **pf G_GNUC_UNUSED)
2707 {
2708 virReportError(VIR_ERR_INTERNAL_ERROR, "%s", _(unsupported));
2709 return -1;
2710 }
2711
2712 int
virPCIGetVirtualFunctionsFull(const char * sysfs_path G_GNUC_UNUSED,virPCIVirtualFunctionList ** vfs G_GNUC_UNUSED,const char * pfPhysPortID G_GNUC_UNUSED)2713 virPCIGetVirtualFunctionsFull(const char *sysfs_path G_GNUC_UNUSED,
2714 virPCIVirtualFunctionList **vfs G_GNUC_UNUSED,
2715 const char *pfPhysPortID G_GNUC_UNUSED)
2716 {
2717 virReportError(VIR_ERR_INTERNAL_ERROR, "%s", _(unsupported));
2718 return -1;
2719 }
2720
2721 int
virPCIIsVirtualFunction(const char * vf_sysfs_device_link G_GNUC_UNUSED)2722 virPCIIsVirtualFunction(const char *vf_sysfs_device_link G_GNUC_UNUSED)
2723 {
2724 virReportError(VIR_ERR_INTERNAL_ERROR, "%s", _(unsupported));
2725 return -1;
2726 }
2727
2728 int
virPCIGetVirtualFunctionIndex(const char * pf_sysfs_device_link G_GNUC_UNUSED,const char * vf_sysfs_device_link G_GNUC_UNUSED,int * vf_index G_GNUC_UNUSED)2729 virPCIGetVirtualFunctionIndex(const char *pf_sysfs_device_link G_GNUC_UNUSED,
2730 const char *vf_sysfs_device_link G_GNUC_UNUSED,
2731 int *vf_index G_GNUC_UNUSED)
2732 {
2733 virReportError(VIR_ERR_INTERNAL_ERROR, "%s", _(unsupported));
2734 return -1;
2735
2736 }
2737
2738
2739 int
virPCIDeviceAddressGetSysfsFile(virPCIDeviceAddress * dev G_GNUC_UNUSED,char ** pci_sysfs_device_link G_GNUC_UNUSED)2740 virPCIDeviceAddressGetSysfsFile(virPCIDeviceAddress *dev G_GNUC_UNUSED,
2741 char **pci_sysfs_device_link G_GNUC_UNUSED)
2742 {
2743 virReportError(VIR_ERR_INTERNAL_ERROR, "%s", _(unsupported));
2744 return -1;
2745 }
2746
2747 int
virPCIGetNetName(const char * device_link_sysfs_path G_GNUC_UNUSED,size_t idx G_GNUC_UNUSED,const char * physPortID G_GNUC_UNUSED,char ** netname G_GNUC_UNUSED)2748 virPCIGetNetName(const char *device_link_sysfs_path G_GNUC_UNUSED,
2749 size_t idx G_GNUC_UNUSED,
2750 const char *physPortID G_GNUC_UNUSED,
2751 char **netname G_GNUC_UNUSED)
2752 {
2753 virReportError(VIR_ERR_INTERNAL_ERROR, "%s", _(unsupported));
2754 return -1;
2755 }
2756
2757 int
virPCIGetVirtualFunctionInfo(const char * vf_sysfs_device_path G_GNUC_UNUSED,int pfNetDevIdx G_GNUC_UNUSED,char ** pfname G_GNUC_UNUSED,int * vf_index G_GNUC_UNUSED)2758 virPCIGetVirtualFunctionInfo(const char *vf_sysfs_device_path G_GNUC_UNUSED,
2759 int pfNetDevIdx G_GNUC_UNUSED,
2760 char **pfname G_GNUC_UNUSED,
2761 int *vf_index G_GNUC_UNUSED)
2762 {
2763 virReportError(VIR_ERR_INTERNAL_ERROR, "%s", _(unsupported));
2764 return -1;
2765 }
2766
2767 bool
virPCIDeviceHasVPD(virPCIDevice * dev G_GNUC_UNUSED)2768 virPCIDeviceHasVPD(virPCIDevice *dev G_GNUC_UNUSED)
2769 {
2770 virReportError(VIR_ERR_INTERNAL_ERROR, "%s", _(unsupported));
2771 return NULL;
2772 }
2773
2774 virPCIVPDResource *
virPCIDeviceGetVPD(virPCIDevice * dev G_GNUC_UNUSED)2775 virPCIDeviceGetVPD(virPCIDevice *dev G_GNUC_UNUSED)
2776 {
2777 virReportError(VIR_ERR_INTERNAL_ERROR, "%s", _(unsupported));
2778 return NULL;
2779 }
2780 #endif /* __linux__ */
2781
2782 int
virPCIDeviceIsPCIExpress(virPCIDevice * dev)2783 virPCIDeviceIsPCIExpress(virPCIDevice *dev)
2784 {
2785 int fd;
2786 int ret = -1;
2787
2788 if ((fd = virPCIDeviceConfigOpen(dev)) < 0)
2789 return ret;
2790
2791 if (virPCIDeviceInit(dev, fd) < 0)
2792 goto cleanup;
2793
2794 ret = dev->is_pcie;
2795
2796 cleanup:
2797 virPCIDeviceConfigClose(dev, fd);
2798 return ret;
2799 }
2800
2801 int
virPCIDeviceHasPCIExpressLink(virPCIDevice * dev)2802 virPCIDeviceHasPCIExpressLink(virPCIDevice *dev)
2803 {
2804 int fd;
2805 int ret = -1;
2806 uint16_t cap, type;
2807
2808 if ((fd = virPCIDeviceConfigOpen(dev)) < 0)
2809 return ret;
2810
2811 if (virPCIDeviceInit(dev, fd) < 0)
2812 goto cleanup;
2813
2814 if (dev->pcie_cap_pos == 0) {
2815 ret = 0;
2816 goto cleanup;
2817 }
2818
2819 cap = virPCIDeviceRead16(dev, fd, dev->pcie_cap_pos + PCI_CAP_FLAGS);
2820 type = (cap & PCI_EXP_FLAGS_TYPE) >> 4;
2821
2822 ret = type != PCI_EXP_TYPE_ROOT_INT_EP && type != PCI_EXP_TYPE_ROOT_EC;
2823
2824 cleanup:
2825 virPCIDeviceConfigClose(dev, fd);
2826 return ret;
2827 }
2828
2829 int
virPCIDeviceGetLinkCapSta(virPCIDevice * dev,int * cap_port,unsigned int * cap_speed,unsigned int * cap_width,unsigned int * sta_speed,unsigned int * sta_width)2830 virPCIDeviceGetLinkCapSta(virPCIDevice *dev,
2831 int *cap_port,
2832 unsigned int *cap_speed,
2833 unsigned int *cap_width,
2834 unsigned int *sta_speed,
2835 unsigned int *sta_width)
2836 {
2837 uint32_t t;
2838 int fd;
2839 int ret = -1;
2840
2841 if ((fd = virPCIDeviceConfigOpen(dev)) < 0)
2842 return ret;
2843
2844 if (virPCIDeviceInit(dev, fd) < 0)
2845 goto cleanup;
2846
2847 if (!dev->pcie_cap_pos) {
2848 virReportError(VIR_ERR_INTERNAL_ERROR,
2849 _("pci device %s is not a PCI-Express device"),
2850 dev->name);
2851 goto cleanup;
2852 }
2853
2854 t = virPCIDeviceRead32(dev, fd, dev->pcie_cap_pos + PCI_EXP_LNKCAP);
2855
2856 *cap_port = t >> 24;
2857 *cap_speed = t & PCI_EXP_LNKCAP_SPEED;
2858 *cap_width = (t & PCI_EXP_LNKCAP_WIDTH) >> 4;
2859
2860 t = virPCIDeviceRead16(dev, fd, dev->pcie_cap_pos + PCI_EXP_LNKSTA);
2861
2862 *sta_speed = t & PCI_EXP_LNKSTA_SPEED;
2863 *sta_width = (t & PCI_EXP_LNKSTA_WIDTH) >> 4;
2864 ret = 0;
2865
2866 cleanup:
2867 virPCIDeviceConfigClose(dev, fd);
2868 return ret;
2869 }
2870
2871
virPCIGetHeaderType(virPCIDevice * dev,int * hdrType)2872 int virPCIGetHeaderType(virPCIDevice *dev, int *hdrType)
2873 {
2874 int fd;
2875 uint8_t type;
2876
2877 *hdrType = -1;
2878
2879 if ((fd = virPCIDeviceConfigOpen(dev)) < 0)
2880 return -1;
2881
2882 type = virPCIDeviceRead8(dev, fd, PCI_HEADER_TYPE);
2883
2884 virPCIDeviceConfigClose(dev, fd);
2885
2886 type &= PCI_HEADER_TYPE_MASK;
2887 if (type >= VIR_PCI_HEADER_LAST) {
2888 virReportError(VIR_ERR_INTERNAL_ERROR,
2889 _("Unknown PCI header type '%d' for device '%s'"),
2890 type, dev->name);
2891 return -1;
2892 }
2893
2894 *hdrType = type;
2895
2896 return 0;
2897 }
2898
2899
2900 void
virPCIEDeviceInfoFree(virPCIEDeviceInfo * dev)2901 virPCIEDeviceInfoFree(virPCIEDeviceInfo *dev)
2902 {
2903 if (!dev)
2904 return;
2905
2906 g_free(dev->link_cap);
2907 g_free(dev->link_sta);
2908 g_free(dev);
2909 }
2910
2911 void
virPCIDeviceAddressFree(virPCIDeviceAddress * address)2912 virPCIDeviceAddressFree(virPCIDeviceAddress *address)
2913 {
2914 g_free(address);
2915 }
2916