1 /**
2 * collectd - src/infiniband.c
3 * Copyright 2020 NVIDIA Corporation
4 *
5 * Permission is hereby granted, free of charge, to any person obtaining a
6 * copy of this software and associated documentation files (the "Software"),
7 * to deal in the Software without restriction, including without limitation
8 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9 * and/or sell copies of the Software, and to permit persons to whom the
10 * Software is furnished to do so, subject to the following conditions:
11 *
12 * The above copyright notice and this permission notice shall be included in
13 * all copies or substantial portions of the Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21 * DEALINGS IN THE SOFTWARE.
22 *
23 * Authors:
24 * Luke Yeager <lyeager at nvidia.com>
25 **/
26
27 #include "collectd.h"
28
29 #include "plugin.h"
30 #include "utils/common/common.h"
31 #include "utils/ignorelist/ignorelist.h"
32
33 #if !KERNEL_LINUX
34 #error "No applicable input method."
35 #endif
36
37 #include <ctype.h>
38 #include <glob.h>
39 #include <stdio.h>
40 #include <stdlib.h>
41 #include <string.h>
42
43 /* Configuration settings ****************************************************/
44
45 static const char *config_keys[] = {
46 "Port",
47 "IgnoreSelected",
48 };
49 static int config_keys_num = STATIC_ARRAY_SIZE(config_keys);
50 static ignorelist_t *ignorelist;
51
52 /* Listing ports *************************************************************/
53
ib_glob_ports(glob_t * g)54 static int ib_glob_ports(glob_t *g) {
55 return glob("/sys/class/infiniband/*/ports/*/state", GLOB_NOSORT, NULL, g);
56 }
57
58 static const int device_tok_idx = 3, port_tok_idx = 5;
59
ib_parse_glob_port(char * path,char ** device,char ** port)60 static int ib_parse_glob_port(char *path, char **device, char **port) {
61 char *tok, *saveptr = NULL;
62 int j = 0;
63 *device = NULL;
64 *port = NULL;
65 tok = strtok_r(path, "/", &saveptr);
66 while (tok != NULL) {
67 if (j == device_tok_idx)
68 *device = tok;
69 else if (j == port_tok_idx) {
70 *port = tok;
71 break;
72 }
73 j++;
74 tok = strtok_r(NULL, "/", &saveptr);
75 }
76 return (*device != NULL && *port != NULL) ? 0 : 1;
77 }
78
79 /* Core functions ************************************************************/
80
ib_read_value_file(const char * device,const char * port,const char * filename,int ds_type,value_t * dst)81 static int ib_read_value_file(const char *device, const char *port,
82 const char *filename, int ds_type, value_t *dst) {
83 char path[PATH_MAX];
84 if (snprintf(path, PATH_MAX, "/sys/class/infiniband/%s/ports/%s/%s", device,
85 port, filename) < 0)
86 return 1;
87 if (parse_value_file(path, dst, ds_type) != 0)
88 return 1;
89 return 0;
90 }
91
92 /*
93 * Used to parse files like this:
94 * rate: "100 Gb/sec"
95 * state: "4: ACTIVE"
96 * phys_state: "5: LinkUp"
97 */
ib_read_value_file_num_only(const char * device,const char * port,const char * filename,int ds_type,value_t * dst)98 static int ib_read_value_file_num_only(const char *device, const char *port,
99 const char *filename, int ds_type,
100 value_t *dst) {
101 char path[PATH_MAX];
102 FILE *fh;
103 char buffer[256];
104
105 if (snprintf(path, PATH_MAX, "/sys/class/infiniband/%s/ports/%s/%s", device,
106 port, filename) < 0)
107 return 1;
108
109 // copied from parse_value_file()
110 fh = fopen(path, "r");
111 if (fh == NULL)
112 return 1;
113 if (fgets(buffer, sizeof(buffer), fh) == NULL) {
114 fclose(fh);
115 return 1;
116 }
117 fclose(fh);
118 strstripnewline(buffer);
119
120 // zero-out the first non-digit character
121 for (int i = 0; i < sizeof(buffer); i++) {
122 if (!isdigit(buffer[i])) {
123 buffer[i] = '\0';
124 break;
125 }
126 }
127
128 return parse_value(buffer, dst, ds_type);
129 }
130
ib_submit(const char * device,const char * port,value_t * vs,int vc,const char * type,const char * type_instance)131 static void ib_submit(const char *device, const char *port, value_t *vs, int vc,
132 const char *type, const char *type_instance) {
133 value_list_t vl = VALUE_LIST_INIT;
134 vl.values = vs;
135 vl.values_len = vc;
136 sstrncpy(vl.plugin, "infiniband", sizeof(vl.plugin));
137 snprintf(vl.plugin_instance, sizeof(vl.plugin_instance), "%s:%s", device,
138 port);
139 sstrncpy(vl.type, type, sizeof(vl.type));
140 sstrncpy(vl.type_instance, type_instance, sizeof(vl.type_instance));
141 plugin_dispatch_values(&vl);
142 }
143
144 /**
145 * For further reading on the available sysfs files, see:
146 * - Linux: ./Documentation/infiniband/sysfs.txt
147 *
148 * For further reading on the meaning of each counter, see the InfiniBand
149 * Architecture Specification, sections 14.2.5.6 and 16.1.3.5.
150 **/
ib_read_port(const char * device,const char * port)151 static int ib_read_port(const char *device, const char *port) {
152 value_t value, values[2];
153
154 /* PortInfo attributes */
155
156 // Port state (4 is "Active")
157 if (ib_read_value_file_num_only(device, port, "state", DS_TYPE_GAUGE,
158 &value) == 0)
159 ib_submit(device, port, &value, 1, "ib_state", "");
160 // Port physical state (5 is "LinkUp")
161 if (ib_read_value_file_num_only(device, port, "phys_state", DS_TYPE_GAUGE,
162 &value) == 0)
163 ib_submit(device, port, &value, 1, "ib_phys_state", "");
164 // Currently active extended link speed, in Gb/s
165 if (ib_read_value_file_num_only(device, port, "rate", DS_TYPE_GAUGE,
166 &value) == 0)
167 ib_submit(device, port, &value, 1, "ib_rate", "");
168 // Supported capabilities of this port
169 if (ib_read_value_file(device, port, "cap_mask", DS_TYPE_GAUGE, &value) == 0)
170 ib_submit(device, port, &value, 1, "ib_cap_mask", "");
171 // The base LID (local identifier) of this port
172 if (ib_read_value_file(device, port, "lid", DS_TYPE_GAUGE, &value) == 0)
173 ib_submit(device, port, &value, 1, "ib_lid", "");
174 // The number of low order bits of the LID to mask (for multipath)
175 if (ib_read_value_file(device, port, "lid_mask_count", DS_TYPE_GAUGE,
176 &value) == 0)
177 ib_submit(device, port, &value, 1, "ib_lid_mask_count", "");
178 // The LID of the master SM (subnet manager) that is managing this port
179 if (ib_read_value_file(device, port, "sm_lid", DS_TYPE_GAUGE, &value) == 0)
180 ib_submit(device, port, &value, 1, "ib_sm_lid", "");
181 // The administrative SL (service level) of the master SM that is managing
182 // this port
183 if (ib_read_value_file(device, port, "sm_sl", DS_TYPE_GAUGE, &value) == 0)
184 ib_submit(device, port, &value, 1, "ib_sm_sl", "");
185
186 /* PortCounters */
187
188 // Total number of data octets, divided by 4, received on all VLs at the port
189 if ((ib_read_value_file(device, port, "counters/port_rcv_data",
190 DS_TYPE_DERIVE, &values[0]) == 0) &&
191 (ib_read_value_file(device, port, "counters/port_xmit_data",
192 DS_TYPE_DERIVE, &values[1]) == 0)) {
193 values[0].derive *= 4;
194 values[1].derive *= 4;
195 ib_submit(device, port, values, 2, "ib_octets", "");
196 }
197 // Total number of packets, including packets containing errors, and excluding
198 // link packets, received from all VLs on the port
199 if ((ib_read_value_file(device, port, "counters/port_rcv_packets",
200 DS_TYPE_DERIVE, &values[0]) == 0) &&
201 (ib_read_value_file(device, port, "counters/port_xmit_packets",
202 DS_TYPE_DERIVE, &values[1]) == 0))
203 ib_submit(device, port, values, 2, "ib_packets", "total");
204 // Total number of packets containing an error that were received on the port
205 if (ib_read_value_file(device, port, "counters/port_rcv_errors",
206 DS_TYPE_DERIVE, &values[0]) == 0) {
207 values[1].derive = 0;
208 ib_submit(device, port, values, 2, "ib_packets", "errors");
209 }
210 // Total number of packets marked with the EBP delimiter received on the port.
211 if (ib_read_value_file(device, port,
212 "counters/port_rcv_remote_physical_errors",
213 DS_TYPE_DERIVE, &values[0]) == 0) {
214 values[1].derive = 0;
215 ib_submit(device, port, values, 2, "ib_packets", "remote_physical_errors");
216 }
217 // Total number of packets received on the port that were discarded because
218 // they could not be forwarded by the switch relay
219 if (ib_read_value_file(device, port, "counters/port_rcv_switch_relay_errors",
220 DS_TYPE_DERIVE, &values[0]) == 0) {
221 values[1].derive = 0;
222 ib_submit(device, port, values, 2, "ib_packets", "switch_relay_errors");
223 }
224 // Total number of outbound packets discarded by the port because the port is
225 // down or congested.
226 if (ib_read_value_file(device, port, "counters/port_xmit_discards",
227 DS_TYPE_DERIVE, &values[1]) == 0) {
228 values[0].derive = 0;
229 ib_submit(device, port, values, 2, "ib_packets", "discards");
230 }
231 // Total number of packets not transmitted from the switch physical port
232 // Total number of packets received on the switch physical port that are
233 // discarded
234 if ((ib_read_value_file(device, port, "counters/port_rcv_constraint_errors",
235 DS_TYPE_DERIVE, &values[0]) == 0) &&
236 (ib_read_value_file(device, port, "counters/port_xmit_constraint_errors",
237 DS_TYPE_DERIVE, &values[1]) == 0))
238 ib_submit(device, port, values, 2, "ib_packets", "constraint_errors");
239 // Number of incoming VL15 packets dropped due to resource limitations (e.g.,
240 // lack of buffers) in the port
241 if (ib_read_value_file(device, port, "counters/VL15_dropped", DS_TYPE_DERIVE,
242 &values[0]) == 0) {
243 values[1].derive = 0;
244 ib_submit(device, port, values, 2, "ib_packets", "vl15_dropped");
245 }
246 // Total number of times the Port Training state machine has successfully
247 // completed the link error recovery process.
248 if (ib_read_value_file(device, port, "counters/link_error_recovery",
249 DS_TYPE_DERIVE, &value) == 0)
250 ib_submit(device, port, &value, 1, "ib_link_error_recovery", "recovered");
251 // Total number of times the Port Training state machine has failed the link
252 // error recovery process and downed the link.
253 if (ib_read_value_file(device, port, "counters/link_downed", DS_TYPE_DERIVE,
254 &value) == 0)
255 ib_submit(device, port, &value, 1, "ib_link_error_recovery", "downed");
256 // Total number of minor link errors detected on one or more physical lanes.
257 if (ib_read_value_file(device, port, "counters/symbol_error", DS_TYPE_DERIVE,
258 &value) == 0)
259 ib_submit(device, port, &value, 1, "ib_errors", "symbol_errors");
260 // The number of times that the count of local physical errors exceeded the
261 // threshold specified by LocalPhyErrors
262 if (ib_read_value_file(device, port, "counters/local_link_integrity_errors",
263 DS_TYPE_DERIVE, &value) == 0)
264 ib_submit(device, port, &value, 1, "ib_errors",
265 "local_link_integrity_errors");
266 // The number of times that OverrunErrors consecutive flow control update
267 // periods occurred, each having at least one overrun error
268 if (ib_read_value_file(device, port,
269 "counters/excessive_buffer_overrun_errors",
270 DS_TYPE_DERIVE, &value) == 0)
271 ib_submit(device, port, &value, 1, "ib_errors",
272 "excessive_buffer_overrun_errors");
273 // The number of ticks during which the port selected by PortSelect had data
274 // to transmit but no data was sent during the entire tick
275 if (ib_read_value_file(device, port, "counters/port_xmit_wait",
276 DS_TYPE_DERIVE, &value) == 0)
277 ib_submit(device, port, &value, 1, "ib_xmit_wait", "");
278
279 /* PortCountersExtended */
280
281 if ((ib_read_value_file(device, port, "counters/unicast_rcv_packets",
282 DS_TYPE_DERIVE, &values[0]) == 0) &&
283 (ib_read_value_file(device, port, "counters/unicast_xmit_packets",
284 DS_TYPE_DERIVE, &values[1]) == 0))
285 ib_submit(device, port, values, 2, "ib_packets", "unicast");
286 if ((ib_read_value_file(device, port, "counters/multicast_rcv_packets",
287 DS_TYPE_DERIVE, &values[0]) == 0) &&
288 (ib_read_value_file(device, port, "counters/multicast_xmit_packets",
289 DS_TYPE_DERIVE, &values[1]) == 0))
290 ib_submit(device, port, values, 2, "ib_packets", "multicast");
291
292 return 0;
293 }
294
295 /* Plugin entrypoints ********************************************************/
296
infiniband_config(const char * key,const char * value)297 static int infiniband_config(const char *key, const char *value) {
298 if (ignorelist == NULL)
299 ignorelist = ignorelist_create(1);
300
301 if (strcasecmp(key, "Port") == 0) {
302 ignorelist_add(ignorelist, value);
303 } else if (strcasecmp(key, "IgnoreSelected") == 0) {
304 int invert = 1;
305 if (IS_TRUE(value))
306 invert = 0;
307 ignorelist_set_invert(ignorelist, invert);
308 } else {
309 return -1;
310 }
311 return 0;
312 }
313
infiniband_init(void)314 static int infiniband_init(void) {
315 glob_t g;
316
317 if (ib_glob_ports(&g) != 0)
318 plugin_unregister_read("infiniband"); // no ports found
319
320 globfree(&g);
321 return 0;
322 }
323
infiniband_read(void)324 static int infiniband_read(void) {
325 int rc = 0;
326 glob_t g;
327 char port_name[255];
328
329 if (ib_glob_ports(&g) == 0) {
330 for (int i = 0; i < g.gl_pathc; ++i) {
331 char *device = NULL, *port = NULL;
332 if (ib_parse_glob_port(g.gl_pathv[i], &device, &port) == 0) {
333 snprintf(port_name, sizeof(port_name), "%s:%s", device, port);
334 if (ignorelist_match(ignorelist, port_name) == 0)
335 rc &= ib_read_port(device, port);
336 }
337 }
338 }
339
340 globfree(&g);
341 return rc;
342 }
343
module_register(void)344 void module_register(void) {
345 plugin_register_config("infiniband", infiniband_config, config_keys,
346 config_keys_num);
347 plugin_register_init("infiniband", infiniband_init);
348 plugin_register_read("infiniband", infiniband_read);
349 }
350