1 /**
2 * Copyright (C) NVIDIA Corporation. 2019. ALL RIGHTS RESERVED.
3 *
4 * See file LICENSE for terms.
5 */
6
7 #ifdef HAVE_CONFIG_H
8 # include "config.h"
9 #endif
10
11 #include <ucs/sys/topo.h>
12 #include <ucs/sys/string.h>
13 #include <ucs/type/status.h>
14 #include <stdio.h>
15 #include <ucs/datastruct/khash.h>
16 #include <ucs/type/spinlock.h>
17 #include <ucs/debug/log.h>
18 #include <ucs/debug/assert.h>
19 #include <limits.h>
20
21 #define UCS_TOPO_MAX_SYS_DEVICES 1024
22 #define UCS_TOPO_HOP_OVERHEAD 1E-7
23
24 typedef int64_t ucs_bus_id_bit_rep_t;
25
26 typedef struct ucs_topo_sys_dev_to_bus_arr {
27 ucs_sys_bus_id_t bus_arr[UCS_TOPO_MAX_SYS_DEVICES];
28 unsigned count;
29 } ucs_topo_sys_dev_to_bus_arr_t;
30
31 KHASH_MAP_INIT_INT64(bus_to_sys_dev, ucs_sys_device_t);
32
33 typedef struct ucs_topo_global_ctx {
34 khash_t(bus_to_sys_dev) bus_to_sys_dev_hash;
35 ucs_spinlock_t lock;
36 ucs_topo_sys_dev_to_bus_arr_t sys_dev_to_bus_lookup;
37 } ucs_topo_global_ctx_t;
38
39 static ucs_topo_global_ctx_t ucs_topo_ctx;
40
ucs_topo_get_bus_id_bit_repr(const ucs_sys_bus_id_t * bus_id)41 static ucs_bus_id_bit_rep_t ucs_topo_get_bus_id_bit_repr(const ucs_sys_bus_id_t *bus_id)
42 {
43 return (((uint64_t)bus_id->domain << 24) |
44 ((uint64_t)bus_id->bus << 16) |
45 ((uint64_t)bus_id->slot << 8) |
46 (bus_id->function));
47 }
48
ucs_topo_init()49 void ucs_topo_init()
50 {
51 ucs_spinlock_init(&ucs_topo_ctx.lock, 0);
52 kh_init_inplace(bus_to_sys_dev, &ucs_topo_ctx.bus_to_sys_dev_hash);
53 ucs_topo_ctx.sys_dev_to_bus_lookup.count = 0;
54 }
55
ucs_topo_cleanup()56 void ucs_topo_cleanup()
57 {
58 ucs_status_t status;
59
60 kh_destroy_inplace(bus_to_sys_dev, &ucs_topo_ctx.bus_to_sys_dev_hash);
61
62 status = ucs_spinlock_destroy(&ucs_topo_ctx.lock);
63 if (status != UCS_OK) {
64 ucs_warn("ucs_recursive_spinlock_destroy() failed: %s",
65 ucs_status_string(status));
66 }
67 }
68
ucs_topo_find_device_by_bus_id(const ucs_sys_bus_id_t * bus_id,ucs_sys_device_t * sys_dev)69 ucs_status_t ucs_topo_find_device_by_bus_id(const ucs_sys_bus_id_t *bus_id,
70 ucs_sys_device_t *sys_dev)
71 {
72 khiter_t hash_it;
73 ucs_kh_put_t kh_put_status;
74 ucs_bus_id_bit_rep_t bus_id_bit_rep;
75
76 bus_id_bit_rep = ucs_topo_get_bus_id_bit_repr(bus_id);
77
78 ucs_spin_lock(&ucs_topo_ctx.lock);
79 hash_it = kh_put(bus_to_sys_dev /*name*/,
80 &ucs_topo_ctx.bus_to_sys_dev_hash /*pointer to hashmap*/,
81 bus_id_bit_rep /*key*/,
82 &kh_put_status);
83
84 if (kh_put_status == UCS_KH_PUT_KEY_PRESENT) {
85 *sys_dev = kh_value(&ucs_topo_ctx.bus_to_sys_dev_hash, hash_it);
86 ucs_debug("bus id %ld exists. sys_dev = %u", bus_id_bit_rep, *sys_dev);
87 } else if ((kh_put_status == UCS_KH_PUT_BUCKET_EMPTY) ||
88 (kh_put_status == UCS_KH_PUT_BUCKET_CLEAR)) {
89 *sys_dev = ucs_topo_ctx.sys_dev_to_bus_lookup.count;
90 ucs_assert(*sys_dev < UCS_TOPO_MAX_SYS_DEVICES);
91 kh_value(&ucs_topo_ctx.bus_to_sys_dev_hash, hash_it) = *sys_dev;
92 ucs_debug("bus id %ld doesn't exist. sys_dev = %u", bus_id_bit_rep,
93 *sys_dev);
94
95 ucs_topo_ctx.sys_dev_to_bus_lookup.bus_arr[*sys_dev] = *bus_id;
96 ucs_topo_ctx.sys_dev_to_bus_lookup.count++;
97 }
98
99 ucs_spin_unlock(&ucs_topo_ctx.lock);
100 return UCS_OK;
101 }
102
ucs_topo_get_path_with_bus(unsigned bus,char * path)103 static void ucs_topo_get_path_with_bus(unsigned bus, char *path)
104 {
105 static const char sysfs_pci_prefix[] = "/sys/class/pci_bus";
106
107 sprintf(path, "%s/0000:%02x", sysfs_pci_prefix, bus);
108 }
109
ucs_topo_get_distance(ucs_sys_device_t device1,ucs_sys_device_t device2,ucs_sys_dev_distance_t * distance)110 ucs_status_t ucs_topo_get_distance(ucs_sys_device_t device1,
111 ucs_sys_device_t device2,
112 ucs_sys_dev_distance_t *distance)
113 {
114 char path1[PATH_MAX], path2[PATH_MAX];
115 unsigned bus1, bus2;
116 ssize_t path_distance;
117
118 if ((device1 == UCS_SYS_DEVICE_ID_UNKNOWN) ||
119 (device2 == UCS_SYS_DEVICE_ID_UNKNOWN) ||
120 (ucs_topo_ctx.sys_dev_to_bus_lookup.count < 2) ) {
121 return UCS_ERR_IO_ERROR;
122 }
123
124 if (device1 == device2) {
125 distance->latency = 0;
126 return UCS_OK;
127 }
128
129 ucs_assert(device1 < UCS_TOPO_MAX_SYS_DEVICES);
130 ucs_assert(device2 < UCS_TOPO_MAX_SYS_DEVICES);
131
132 bus1 = ucs_topo_ctx.sys_dev_to_bus_lookup.bus_arr[device1].bus;
133 bus2 = ucs_topo_ctx.sys_dev_to_bus_lookup.bus_arr[device2].bus;
134
135 ucs_topo_get_path_with_bus(bus1, path1);
136 ucs_topo_get_path_with_bus(bus2, path2);
137
138 path_distance = ucs_path_calc_distance(path1, path2);
139 if (path_distance < 0) {
140 return (ucs_status_t)path_distance;
141 }
142
143 distance->latency = UCS_TOPO_HOP_OVERHEAD * path_distance;
144
145 return UCS_OK;
146 }
147
148
ucs_topo_print_info(FILE * stream)149 void ucs_topo_print_info(FILE *stream)
150 {
151 }
152