1 /**
2  * collectd - src/infiniband.c
3  * Copyright 2020 NVIDIA Corporation
4  *
5  * Permission is hereby granted, free of charge, to any person obtaining a
6  * copy of this software and associated documentation files (the "Software"),
7  * to deal in the Software without restriction, including without limitation
8  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9  * and/or sell copies of the Software, and to permit persons to whom the
10  * Software is furnished to do so, subject to the following conditions:
11  *
12  * The above copyright notice and this permission notice shall be included in
13  * all copies or substantial portions of the Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21  * DEALINGS IN THE SOFTWARE.
22  *
23  * Authors:
24  *   Luke Yeager <lyeager at nvidia.com>
25  **/
26 
27 #include "collectd.h"
28 
29 #include "plugin.h"
30 #include "utils/common/common.h"
31 #include "utils/ignorelist/ignorelist.h"
32 
33 #if !KERNEL_LINUX
34 #error "No applicable input method."
35 #endif
36 
37 #include <ctype.h>
38 #include <glob.h>
39 #include <stdio.h>
40 #include <stdlib.h>
41 #include <string.h>
42 
43 /* Configuration settings ****************************************************/
44 
45 static const char *config_keys[] = {
46     "Port",
47     "IgnoreSelected",
48 };
49 static int config_keys_num = STATIC_ARRAY_SIZE(config_keys);
50 static ignorelist_t *ignorelist;
51 
52 /* Listing ports *************************************************************/
53 
ib_glob_ports(glob_t * g)54 static int ib_glob_ports(glob_t *g) {
55   return glob("/sys/class/infiniband/*/ports/*/state", GLOB_NOSORT, NULL, g);
56 }
57 
58 static const int device_tok_idx = 3, port_tok_idx = 5;
59 
ib_parse_glob_port(char * path,char ** device,char ** port)60 static int ib_parse_glob_port(char *path, char **device, char **port) {
61   char *tok, *saveptr = NULL;
62   int j = 0;
63   *device = NULL;
64   *port = NULL;
65   tok = strtok_r(path, "/", &saveptr);
66   while (tok != NULL) {
67     if (j == device_tok_idx)
68       *device = tok;
69     else if (j == port_tok_idx) {
70       *port = tok;
71       break;
72     }
73     j++;
74     tok = strtok_r(NULL, "/", &saveptr);
75   }
76   return (*device != NULL && *port != NULL) ? 0 : 1;
77 }
78 
79 /* Core functions ************************************************************/
80 
ib_read_value_file(const char * device,const char * port,const char * filename,int ds_type,value_t * dst)81 static int ib_read_value_file(const char *device, const char *port,
82                               const char *filename, int ds_type, value_t *dst) {
83   char path[PATH_MAX];
84   if (snprintf(path, PATH_MAX, "/sys/class/infiniband/%s/ports/%s/%s", device,
85                port, filename) < 0)
86     return 1;
87   if (parse_value_file(path, dst, ds_type) != 0)
88     return 1;
89   return 0;
90 }
91 
92 /*
93  * Used to parse files like this:
94  * rate:       "100 Gb/sec"
95  * state:      "4: ACTIVE"
96  * phys_state: "5: LinkUp"
97  */
ib_read_value_file_num_only(const char * device,const char * port,const char * filename,int ds_type,value_t * dst)98 static int ib_read_value_file_num_only(const char *device, const char *port,
99                                        const char *filename, int ds_type,
100                                        value_t *dst) {
101   char path[PATH_MAX];
102   FILE *fh;
103   char buffer[256];
104 
105   if (snprintf(path, PATH_MAX, "/sys/class/infiniband/%s/ports/%s/%s", device,
106                port, filename) < 0)
107     return 1;
108 
109   // copied from parse_value_file()
110   fh = fopen(path, "r");
111   if (fh == NULL)
112     return 1;
113   if (fgets(buffer, sizeof(buffer), fh) == NULL) {
114     fclose(fh);
115     return 1;
116   }
117   fclose(fh);
118   strstripnewline(buffer);
119 
120   // zero-out the first non-digit character
121   for (int i = 0; i < sizeof(buffer); i++) {
122     if (!isdigit(buffer[i])) {
123       buffer[i] = '\0';
124       break;
125     }
126   }
127 
128   return parse_value(buffer, dst, ds_type);
129 }
130 
ib_submit(const char * device,const char * port,value_t * vs,int vc,const char * type,const char * type_instance)131 static void ib_submit(const char *device, const char *port, value_t *vs, int vc,
132                       const char *type, const char *type_instance) {
133   value_list_t vl = VALUE_LIST_INIT;
134   vl.values = vs;
135   vl.values_len = vc;
136   sstrncpy(vl.plugin, "infiniband", sizeof(vl.plugin));
137   snprintf(vl.plugin_instance, sizeof(vl.plugin_instance), "%s:%s", device,
138            port);
139   sstrncpy(vl.type, type, sizeof(vl.type));
140   sstrncpy(vl.type_instance, type_instance, sizeof(vl.type_instance));
141   plugin_dispatch_values(&vl);
142 }
143 
144 /**
145  * For further reading on the available sysfs files, see:
146  * - Linux: ./Documentation/infiniband/sysfs.txt
147  *
148  * For further reading on the meaning of each counter, see the InfiniBand
149  *   Architecture Specification, sections 14.2.5.6 and 16.1.3.5.
150  **/
ib_read_port(const char * device,const char * port)151 static int ib_read_port(const char *device, const char *port) {
152   value_t value, values[2];
153 
154   /* PortInfo attributes */
155 
156   // Port state (4 is "Active")
157   if (ib_read_value_file_num_only(device, port, "state", DS_TYPE_GAUGE,
158                                   &value) == 0)
159     ib_submit(device, port, &value, 1, "ib_state", "");
160   // Port physical state (5 is "LinkUp")
161   if (ib_read_value_file_num_only(device, port, "phys_state", DS_TYPE_GAUGE,
162                                   &value) == 0)
163     ib_submit(device, port, &value, 1, "ib_phys_state", "");
164   // Currently active extended link speed, in Gb/s
165   if (ib_read_value_file_num_only(device, port, "rate", DS_TYPE_GAUGE,
166                                   &value) == 0)
167     ib_submit(device, port, &value, 1, "ib_rate", "");
168   // Supported capabilities of this port
169   if (ib_read_value_file(device, port, "cap_mask", DS_TYPE_GAUGE, &value) == 0)
170     ib_submit(device, port, &value, 1, "ib_cap_mask", "");
171   // The base LID (local identifier) of this port
172   if (ib_read_value_file(device, port, "lid", DS_TYPE_GAUGE, &value) == 0)
173     ib_submit(device, port, &value, 1, "ib_lid", "");
174   // The number of low order bits of the LID to mask (for multipath)
175   if (ib_read_value_file(device, port, "lid_mask_count", DS_TYPE_GAUGE,
176                          &value) == 0)
177     ib_submit(device, port, &value, 1, "ib_lid_mask_count", "");
178   // The LID of the master SM (subnet manager) that is managing this port
179   if (ib_read_value_file(device, port, "sm_lid", DS_TYPE_GAUGE, &value) == 0)
180     ib_submit(device, port, &value, 1, "ib_sm_lid", "");
181   // The administrative SL (service level) of the master SM that is managing
182   //    this port
183   if (ib_read_value_file(device, port, "sm_sl", DS_TYPE_GAUGE, &value) == 0)
184     ib_submit(device, port, &value, 1, "ib_sm_sl", "");
185 
186   /* PortCounters */
187 
188   // Total number of data octets, divided by 4, received on all VLs at the port
189   if ((ib_read_value_file(device, port, "counters/port_rcv_data",
190                           DS_TYPE_DERIVE, &values[0]) == 0) &&
191       (ib_read_value_file(device, port, "counters/port_xmit_data",
192                           DS_TYPE_DERIVE, &values[1]) == 0)) {
193     values[0].derive *= 4;
194     values[1].derive *= 4;
195     ib_submit(device, port, values, 2, "ib_octets", "");
196   }
197   // Total number of packets, including packets containing errors, and excluding
198   //    link packets, received from all VLs on the port
199   if ((ib_read_value_file(device, port, "counters/port_rcv_packets",
200                           DS_TYPE_DERIVE, &values[0]) == 0) &&
201       (ib_read_value_file(device, port, "counters/port_xmit_packets",
202                           DS_TYPE_DERIVE, &values[1]) == 0))
203     ib_submit(device, port, values, 2, "ib_packets", "total");
204   // Total number of packets containing an error that were received on the port
205   if (ib_read_value_file(device, port, "counters/port_rcv_errors",
206                          DS_TYPE_DERIVE, &values[0]) == 0) {
207     values[1].derive = 0;
208     ib_submit(device, port, values, 2, "ib_packets", "errors");
209   }
210   // Total number of packets marked with the EBP delimiter received on the port.
211   if (ib_read_value_file(device, port,
212                          "counters/port_rcv_remote_physical_errors",
213                          DS_TYPE_DERIVE, &values[0]) == 0) {
214     values[1].derive = 0;
215     ib_submit(device, port, values, 2, "ib_packets", "remote_physical_errors");
216   }
217   // Total number of packets received on the port that were discarded because
218   //    they could not be forwarded by the switch relay
219   if (ib_read_value_file(device, port, "counters/port_rcv_switch_relay_errors",
220                          DS_TYPE_DERIVE, &values[0]) == 0) {
221     values[1].derive = 0;
222     ib_submit(device, port, values, 2, "ib_packets", "switch_relay_errors");
223   }
224   // Total number of outbound packets discarded by the port because the port is
225   //    down or congested.
226   if (ib_read_value_file(device, port, "counters/port_xmit_discards",
227                          DS_TYPE_DERIVE, &values[1]) == 0) {
228     values[0].derive = 0;
229     ib_submit(device, port, values, 2, "ib_packets", "discards");
230   }
231   // Total number of packets not transmitted from the switch physical port
232   // Total number of packets received on the switch physical port that are
233   //    discarded
234   if ((ib_read_value_file(device, port, "counters/port_rcv_constraint_errors",
235                           DS_TYPE_DERIVE, &values[0]) == 0) &&
236       (ib_read_value_file(device, port, "counters/port_xmit_constraint_errors",
237                           DS_TYPE_DERIVE, &values[1]) == 0))
238     ib_submit(device, port, values, 2, "ib_packets", "constraint_errors");
239   // Number of incoming VL15 packets dropped due to resource limitations (e.g.,
240   //    lack of buffers) in the port
241   if (ib_read_value_file(device, port, "counters/VL15_dropped", DS_TYPE_DERIVE,
242                          &values[0]) == 0) {
243     values[1].derive = 0;
244     ib_submit(device, port, values, 2, "ib_packets", "vl15_dropped");
245   }
246   // Total number of times the Port Training state machine has successfully
247   //    completed the link error recovery process.
248   if (ib_read_value_file(device, port, "counters/link_error_recovery",
249                          DS_TYPE_DERIVE, &value) == 0)
250     ib_submit(device, port, &value, 1, "ib_link_error_recovery", "recovered");
251   // Total number of times the Port Training state machine has failed the link
252   //    error recovery process and downed the link.
253   if (ib_read_value_file(device, port, "counters/link_downed", DS_TYPE_DERIVE,
254                          &value) == 0)
255     ib_submit(device, port, &value, 1, "ib_link_error_recovery", "downed");
256   // Total number of minor link errors detected on one or more physical lanes.
257   if (ib_read_value_file(device, port, "counters/symbol_error", DS_TYPE_DERIVE,
258                          &value) == 0)
259     ib_submit(device, port, &value, 1, "ib_errors", "symbol_errors");
260   // The number of times that the count of local physical errors exceeded the
261   //    threshold specified by LocalPhyErrors
262   if (ib_read_value_file(device, port, "counters/local_link_integrity_errors",
263                          DS_TYPE_DERIVE, &value) == 0)
264     ib_submit(device, port, &value, 1, "ib_errors",
265               "local_link_integrity_errors");
266   // The number of times that OverrunErrors consecutive flow control update
267   //    periods occurred, each having at least one overrun error
268   if (ib_read_value_file(device, port,
269                          "counters/excessive_buffer_overrun_errors",
270                          DS_TYPE_DERIVE, &value) == 0)
271     ib_submit(device, port, &value, 1, "ib_errors",
272               "excessive_buffer_overrun_errors");
273   // The number of ticks during which the port selected by PortSelect had data
274   //    to transmit but no data was sent during the entire tick
275   if (ib_read_value_file(device, port, "counters/port_xmit_wait",
276                          DS_TYPE_DERIVE, &value) == 0)
277     ib_submit(device, port, &value, 1, "ib_xmit_wait", "");
278 
279   /* PortCountersExtended */
280 
281   if ((ib_read_value_file(device, port, "counters/unicast_rcv_packets",
282                           DS_TYPE_DERIVE, &values[0]) == 0) &&
283       (ib_read_value_file(device, port, "counters/unicast_xmit_packets",
284                           DS_TYPE_DERIVE, &values[1]) == 0))
285     ib_submit(device, port, values, 2, "ib_packets", "unicast");
286   if ((ib_read_value_file(device, port, "counters/multicast_rcv_packets",
287                           DS_TYPE_DERIVE, &values[0]) == 0) &&
288       (ib_read_value_file(device, port, "counters/multicast_xmit_packets",
289                           DS_TYPE_DERIVE, &values[1]) == 0))
290     ib_submit(device, port, values, 2, "ib_packets", "multicast");
291 
292   return 0;
293 }
294 
295 /* Plugin entrypoints ********************************************************/
296 
infiniband_config(const char * key,const char * value)297 static int infiniband_config(const char *key, const char *value) {
298   if (ignorelist == NULL)
299     ignorelist = ignorelist_create(1);
300 
301   if (strcasecmp(key, "Port") == 0) {
302     ignorelist_add(ignorelist, value);
303   } else if (strcasecmp(key, "IgnoreSelected") == 0) {
304     int invert = 1;
305     if (IS_TRUE(value))
306       invert = 0;
307     ignorelist_set_invert(ignorelist, invert);
308   } else {
309     return -1;
310   }
311   return 0;
312 }
313 
infiniband_init(void)314 static int infiniband_init(void) {
315   glob_t g;
316 
317   if (ib_glob_ports(&g) != 0)
318     plugin_unregister_read("infiniband"); // no ports found
319 
320   globfree(&g);
321   return 0;
322 }
323 
infiniband_read(void)324 static int infiniband_read(void) {
325   int rc = 0;
326   glob_t g;
327   char port_name[255];
328 
329   if (ib_glob_ports(&g) == 0) {
330     for (int i = 0; i < g.gl_pathc; ++i) {
331       char *device = NULL, *port = NULL;
332       if (ib_parse_glob_port(g.gl_pathv[i], &device, &port) == 0) {
333         snprintf(port_name, sizeof(port_name), "%s:%s", device, port);
334         if (ignorelist_match(ignorelist, port_name) == 0)
335           rc &= ib_read_port(device, port);
336       }
337     }
338   }
339 
340   globfree(&g);
341   return rc;
342 }
343 
module_register(void)344 void module_register(void) {
345   plugin_register_config("infiniband", infiniband_config, config_keys,
346                          config_keys_num);
347   plugin_register_init("infiniband", infiniband_init);
348   plugin_register_read("infiniband", infiniband_read);
349 }
350