1 /* Copyright (c) 2021, The Tor Project, Inc. */
2 /* See LICENSE for licensing information */
3 
4 /**
5  * @file relay_metrics.c
6  * @brief Relay metrics exposed through the MetricsPort
7  **/
8 
9 #define RELAY_METRICS_ENTRY_PRIVATE
10 
11 #include "orconfig.h"
12 
13 #include "core/or/or.h"
14 #include "core/or/relay.h"
15 
16 #include "lib/malloc/malloc.h"
17 #include "lib/container/smartlist.h"
18 #include "lib/metrics/metrics_store.h"
19 #include "lib/log/util_bug.h"
20 
21 #include "feature/relay/relay_metrics.h"
22 #include "feature/stats/rephist.h"
23 
24 #include <event2/dns.h>
25 
26 /** Declarations of each fill function for metrics defined in base_metrics. */
27 static void fill_dns_error_values(void);
28 static void fill_dns_query_values(void);
29 static void fill_global_bw_limit_values(void);
30 static void fill_socket_values(void);
31 static void fill_onionskins_values(void);
32 static void fill_oom_values(void);
33 static void fill_tcp_exhaustion_values(void);
34 
35 /** The base metrics that is a static array of metrics added to the metrics
36  * store.
37  *
38  * The key member MUST be also the index of the entry in the array. */
39 static const relay_metrics_entry_t base_metrics[] =
40 {
41   {
42     .key = RELAY_METRICS_NUM_OOM_BYTES,
43     .type = METRICS_TYPE_COUNTER,
44     .name = METRICS_NAME(relay_load_oom_bytes_total),
45     .help = "Total number of bytes the OOM has freed by subsystem",
46     .fill_fn = fill_oom_values,
47   },
48   {
49     .key = RELAY_METRICS_NUM_ONIONSKINS,
50     .type = METRICS_TYPE_COUNTER,
51     .name = METRICS_NAME(relay_load_onionskins_total),
52     .help = "Total number of onionskins handled",
53     .fill_fn = fill_onionskins_values,
54   },
55   {
56     .key = RELAY_METRICS_NUM_SOCKETS,
57     .type = METRICS_TYPE_GAUGE,
58     .name = METRICS_NAME(relay_load_socket_total),
59     .help = "Total number of sockets",
60     .fill_fn = fill_socket_values,
61   },
62   {
63     .key = RELAY_METRICS_NUM_GLOBAL_RW_LIMIT,
64     .type = METRICS_TYPE_COUNTER,
65     .name = METRICS_NAME(relay_load_global_rate_limit_reached_total),
66     .help = "Total number of global connection bucket limit reached",
67     .fill_fn = fill_global_bw_limit_values,
68   },
69   {
70     .key = RELAY_METRICS_NUM_DNS,
71     .type = METRICS_TYPE_COUNTER,
72     .name = METRICS_NAME(relay_exit_dns_query_total),
73     .help = "Total number of DNS queries done by this relay",
74     .fill_fn = fill_dns_query_values,
75   },
76   {
77     .key = RELAY_METRICS_NUM_DNS_ERRORS,
78     .type = METRICS_TYPE_COUNTER,
79     .name = METRICS_NAME(relay_exit_dns_error_total),
80     .help = "Total number of DNS errors encountered by this relay",
81     .fill_fn = fill_dns_error_values,
82   },
83   {
84     .key = RELAY_METRICS_NUM_TCP_EXHAUSTION,
85     .type = METRICS_TYPE_COUNTER,
86     .name = METRICS_NAME(relay_load_tcp_exhaustion_total),
87     .help = "Total number of times we ran out of TCP ports",
88     .fill_fn = fill_tcp_exhaustion_values,
89   },
90 };
91 static const size_t num_base_metrics = ARRAY_LENGTH(base_metrics);
92 
93 /** The only and single store of all the relay metrics. */
94 static metrics_store_t *the_store;
95 
96 /** Helper function to convert an handshake type into a string. */
97 static inline const char *
handshake_type_to_str(const uint16_t type)98 handshake_type_to_str(const uint16_t type)
99 {
100   switch (type) {
101     case ONION_HANDSHAKE_TYPE_TAP:
102       return "tap";
103     case ONION_HANDSHAKE_TYPE_FAST:
104       return "fast";
105     case ONION_HANDSHAKE_TYPE_NTOR:
106       return "ntor";
107     default:
108       // LCOV_EXCL_START
109       tor_assert_unreached();
110       // LCOV_EXCL_STOP
111   }
112 }
113 
114 /** Fill function for the RELAY_METRICS_NUM_DNS metrics. */
115 static void
fill_tcp_exhaustion_values(void)116 fill_tcp_exhaustion_values(void)
117 {
118   metrics_store_entry_t *sentry;
119   const relay_metrics_entry_t *rentry =
120     &base_metrics[RELAY_METRICS_NUM_TCP_EXHAUSTION];
121 
122   sentry = metrics_store_add(the_store, rentry->type, rentry->name,
123                              rentry->help);
124   metrics_store_entry_update(sentry, rep_hist_get_n_tcp_exhaustion());
125 }
126 
127 /* NOTE: Disable the record type label until libevent is fixed. */
128 #if 0
129 /** Helper array containing mapping for the name of the different DNS records
130  * and their corresponding libevent values. */
131 static struct dns_type {
132   const char *name;
133   uint8_t type;
134 } dns_types[] = {
135   { .name = "A",    .type = DNS_IPv4_A     },
136   { .name = "PTR",  .type = DNS_PTR        },
137   { .name = "AAAA", .type = DNS_IPv6_AAAA  },
138 };
139 static const size_t num_dns_types = ARRAY_LENGTH(dns_types);
140 #endif
141 
142 /** Fill function for the RELAY_METRICS_NUM_DNS_ERRORS metrics. */
143 static void
fill_dns_error_values(void)144 fill_dns_error_values(void)
145 {
146   metrics_store_entry_t *sentry;
147   const relay_metrics_entry_t *rentry =
148     &base_metrics[RELAY_METRICS_NUM_DNS_ERRORS];
149 
150   /* Helper array to map libeven DNS errors to their names and so we can
151    * iterate over this array to add all metrics. */
152   static struct dns_error {
153     const char *name;
154     uint8_t key;
155   } errors[] = {
156     { .name = "success",      .key = DNS_ERR_NONE         },
157     { .name = "format",       .key = DNS_ERR_FORMAT       },
158     { .name = "serverfailed", .key = DNS_ERR_SERVERFAILED },
159     { .name = "notexist",     .key = DNS_ERR_NOTEXIST     },
160     { .name = "notimpl",      .key = DNS_ERR_NOTIMPL      },
161     { .name = "refused",      .key = DNS_ERR_REFUSED      },
162     { .name = "truncated",    .key = DNS_ERR_TRUNCATED    },
163     { .name = "unknown",      .key = DNS_ERR_UNKNOWN      },
164     { .name = "timeout",      .key = DNS_ERR_TIMEOUT      },
165     { .name = "shutdown",     .key = DNS_ERR_SHUTDOWN     },
166     { .name = "cancel",       .key = DNS_ERR_CANCEL       },
167     { .name = "nodata",       .key = DNS_ERR_NODATA       },
168   };
169   static const size_t num_errors = ARRAY_LENGTH(errors);
170 
171   /* NOTE: Disable the record type label until libevent is fixed. */
172 #if 0
173   for (size_t i = 0; i < num_dns_types; i++) {
174     /* Dup the label because metrics_format_label() returns a pointer to a
175      * string on the stack and we need that label for all metrics. */
176     char *record_label =
177       tor_strdup(metrics_format_label("record", dns_types[i].name));
178 
179     for (size_t j = 0; j < num_errors; j++) {
180       sentry = metrics_store_add(the_store, rentry->type, rentry->name,
181                                  rentry->help);
182       metrics_store_entry_add_label(sentry, record_label);
183       metrics_store_entry_add_label(sentry,
184               metrics_format_label("reason", errors[j].name));
185       metrics_store_entry_update(sentry,
186               rep_hist_get_n_dns_error(dns_types[i].type, errors[j].key));
187     }
188     tor_free(record_label);
189   }
190 #endif
191 
192   /* Put in the DNS errors, unfortunately not per-type for now. */
193   for (size_t j = 0; j < num_errors; j++) {
194     sentry = metrics_store_add(the_store, rentry->type, rentry->name,
195                                rentry->help);
196     metrics_store_entry_add_label(sentry,
197             metrics_format_label("reason", errors[j].name));
198     metrics_store_entry_update(sentry,
199             rep_hist_get_n_dns_error(0, errors[j].key));
200   }
201 }
202 
203 /** Fill function for the RELAY_METRICS_NUM_DNS metrics. */
204 static void
fill_dns_query_values(void)205 fill_dns_query_values(void)
206 {
207   metrics_store_entry_t *sentry;
208   const relay_metrics_entry_t *rentry =
209     &base_metrics[RELAY_METRICS_NUM_DNS];
210 
211     /* NOTE: Disable the record type label until libevent is fixed (#40490). */
212 #if 0
213   for (size_t i = 0; i < num_dns_types; i++) {
214     /* Dup the label because metrics_format_label() returns a pointer to a
215      * string on the stack and we need that label for all metrics. */
216     char *record_label =
217       tor_strdup(metrics_format_label("record", dns_types[i].name));
218     sentry = metrics_store_add(the_store, rentry->type, rentry->name,
219                                rentry->help);
220     metrics_store_entry_add_label(sentry, record_label);
221     metrics_store_entry_update(sentry,
222                                rep_hist_get_n_dns_request(dns_types[i].type));
223     tor_free(record_label);
224   }
225 #endif
226 
227   sentry = metrics_store_add(the_store, rentry->type, rentry->name,
228                              rentry->help);
229   metrics_store_entry_update(sentry, rep_hist_get_n_dns_request(0));
230 }
231 
232 /** Fill function for the RELAY_METRICS_NUM_GLOBAL_RW_LIMIT metrics. */
233 static void
fill_global_bw_limit_values(void)234 fill_global_bw_limit_values(void)
235 {
236   metrics_store_entry_t *sentry;
237   const relay_metrics_entry_t *rentry =
238     &base_metrics[RELAY_METRICS_NUM_GLOBAL_RW_LIMIT];
239 
240   sentry = metrics_store_add(the_store, rentry->type, rentry->name,
241                              rentry->help);
242   metrics_store_entry_add_label(sentry,
243                                 metrics_format_label("side", "read"));
244   metrics_store_entry_update(sentry, rep_hist_get_n_read_limit_reached());
245 
246   sentry = metrics_store_add(the_store, rentry->type, rentry->name,
247                              rentry->help);
248   metrics_store_entry_add_label(sentry,
249                                 metrics_format_label("side", "write"));
250   metrics_store_entry_update(sentry, rep_hist_get_n_write_limit_reached());
251 }
252 
253 /** Fill function for the RELAY_METRICS_NUM_SOCKETS metrics. */
254 static void
fill_socket_values(void)255 fill_socket_values(void)
256 {
257   metrics_store_entry_t *sentry;
258   const relay_metrics_entry_t *rentry =
259     &base_metrics[RELAY_METRICS_NUM_SOCKETS];
260 
261   sentry = metrics_store_add(the_store, rentry->type, rentry->name,
262                              rentry->help);
263   metrics_store_entry_add_label(sentry,
264                                 metrics_format_label("state", "opened"));
265   metrics_store_entry_update(sentry, get_n_open_sockets());
266 
267   sentry = metrics_store_add(the_store, rentry->type, rentry->name,
268                              rentry->help);
269   metrics_store_entry_update(sentry, get_max_sockets());
270 }
271 
272 /** Fill function for the RELAY_METRICS_NUM_ONIONSKINS metrics. */
273 static void
fill_onionskins_values(void)274 fill_onionskins_values(void)
275 {
276   metrics_store_entry_t *sentry;
277   const relay_metrics_entry_t *rentry =
278     &base_metrics[RELAY_METRICS_NUM_ONIONSKINS];
279 
280   for (uint16_t t = 0; t <= MAX_ONION_HANDSHAKE_TYPE; t++) {
281     /* Dup the label because metrics_format_label() returns a pointer to a
282      * string on the stack and we need that label for all metrics. */
283     char *type_label =
284       tor_strdup(metrics_format_label("type", handshake_type_to_str(t)));
285     sentry = metrics_store_add(the_store, rentry->type, rentry->name,
286                                rentry->help);
287     metrics_store_entry_add_label(sentry, type_label);
288     metrics_store_entry_add_label(sentry,
289                         metrics_format_label("action", "processed"));
290     metrics_store_entry_update(sentry,
291                                rep_hist_get_circuit_n_handshake_assigned(t));
292 
293     sentry = metrics_store_add(the_store, rentry->type, rentry->name,
294                                rentry->help);
295     metrics_store_entry_add_label(sentry, type_label);
296     metrics_store_entry_add_label(sentry,
297                         metrics_format_label("action", "dropped"));
298     metrics_store_entry_update(sentry,
299                                rep_hist_get_circuit_n_handshake_dropped(t));
300     tor_free(type_label);
301   }
302 }
303 
304 /** Fill function for the RELAY_METRICS_NUM_OOM_BYTES metrics. */
305 static void
fill_oom_values(void)306 fill_oom_values(void)
307 {
308   metrics_store_entry_t *sentry;
309   const relay_metrics_entry_t *rentry =
310     &base_metrics[RELAY_METRICS_NUM_OOM_BYTES];
311 
312   sentry = metrics_store_add(the_store, rentry->type, rentry->name,
313                              rentry->help);
314   metrics_store_entry_add_label(sentry,
315                                 metrics_format_label("subsys", "cell"));
316   metrics_store_entry_update(sentry, oom_stats_n_bytes_removed_cell);
317 
318   sentry = metrics_store_add(the_store, rentry->type, rentry->name,
319                              rentry->help);
320   metrics_store_entry_add_label(sentry,
321                                 metrics_format_label("subsys", "dns"));
322   metrics_store_entry_update(sentry, oom_stats_n_bytes_removed_dns);
323 
324   sentry = metrics_store_add(the_store, rentry->type, rentry->name,
325                              rentry->help);
326   metrics_store_entry_add_label(sentry,
327                                 metrics_format_label("subsys", "geoip"));
328   metrics_store_entry_update(sentry, oom_stats_n_bytes_removed_geoip);
329 
330   sentry = metrics_store_add(the_store, rentry->type, rentry->name,
331                              rentry->help);
332   metrics_store_entry_add_label(sentry,
333                                 metrics_format_label("subsys", "hsdir"));
334   metrics_store_entry_update(sentry, oom_stats_n_bytes_removed_hsdir);
335 }
336 
337 /** Reset the global store and fill it with all the metrics from base_metrics
338  * and their associated values.
339  *
340  * To pull this off, every metrics has a "fill" function that is called and in
341  * charge of adding the metrics to the store, appropriate labels and finally
342  * updating the value to report. */
343 static void
fill_store(void)344 fill_store(void)
345 {
346   /* Reset the current store, we are about to fill it with all the things. */
347   metrics_store_reset(the_store);
348 
349   /* Call the fill function for each metrics. */
350   for (size_t i = 0; i < num_base_metrics; i++) {
351     if (BUG(!base_metrics[i].fill_fn)) {
352       continue;
353     }
354     base_metrics[i].fill_fn();
355   }
356 }
357 
358 /** Return a list of all the relay metrics stores. This is the
359  * function attached to the .get_metrics() member of the subsys_t. */
360 const smartlist_t *
relay_metrics_get_stores(void)361 relay_metrics_get_stores(void)
362 {
363   /* We can't have the caller to free the returned list so keep it static,
364    * simply update it. */
365   static smartlist_t *stores_list = NULL;
366 
367   /* We dynamically fill the store with all the metrics upon a request. The
368    * reason for this is because the exposed metrics of a relay are often
369    * internal counters in the fast path and thus we fetch the value when a
370    * metrics port request arrives instead of keeping a local metrics store of
371    * those values. */
372   fill_store();
373 
374   if (!stores_list) {
375     stores_list = smartlist_new();
376     smartlist_add(stores_list, the_store);
377   }
378 
379   return stores_list;
380 }
381 
382 /** Initialize the relay metrics. */
383 void
relay_metrics_init(void)384 relay_metrics_init(void)
385 {
386   if (BUG(the_store)) {
387     return;
388   }
389   the_store = metrics_store_new();
390 }
391 
392 /** Free the relay metrics. */
393 void
relay_metrics_free(void)394 relay_metrics_free(void)
395 {
396   if (!the_store) {
397     return;
398   }
399   /* NULL is set with this call. */
400   metrics_store_free(the_store);
401 }
402