1 /* Copyright (c) 2021, The Tor Project, Inc. */
2 /* See LICENSE for licensing information */
3
4 /**
5 * @file relay_metrics.c
6 * @brief Relay metrics exposed through the MetricsPort
7 **/
8
9 #define RELAY_METRICS_ENTRY_PRIVATE
10
11 #include "orconfig.h"
12
13 #include "core/or/or.h"
14 #include "core/or/relay.h"
15
16 #include "lib/malloc/malloc.h"
17 #include "lib/container/smartlist.h"
18 #include "lib/metrics/metrics_store.h"
19 #include "lib/log/util_bug.h"
20
21 #include "feature/relay/relay_metrics.h"
22 #include "feature/stats/rephist.h"
23
24 #include <event2/dns.h>
25
26 /** Declarations of each fill function for metrics defined in base_metrics. */
27 static void fill_dns_error_values(void);
28 static void fill_dns_query_values(void);
29 static void fill_global_bw_limit_values(void);
30 static void fill_socket_values(void);
31 static void fill_onionskins_values(void);
32 static void fill_oom_values(void);
33 static void fill_tcp_exhaustion_values(void);
34
35 /** The base metrics that is a static array of metrics added to the metrics
36 * store.
37 *
38 * The key member MUST be also the index of the entry in the array. */
39 static const relay_metrics_entry_t base_metrics[] =
40 {
41 {
42 .key = RELAY_METRICS_NUM_OOM_BYTES,
43 .type = METRICS_TYPE_COUNTER,
44 .name = METRICS_NAME(relay_load_oom_bytes_total),
45 .help = "Total number of bytes the OOM has freed by subsystem",
46 .fill_fn = fill_oom_values,
47 },
48 {
49 .key = RELAY_METRICS_NUM_ONIONSKINS,
50 .type = METRICS_TYPE_COUNTER,
51 .name = METRICS_NAME(relay_load_onionskins_total),
52 .help = "Total number of onionskins handled",
53 .fill_fn = fill_onionskins_values,
54 },
55 {
56 .key = RELAY_METRICS_NUM_SOCKETS,
57 .type = METRICS_TYPE_GAUGE,
58 .name = METRICS_NAME(relay_load_socket_total),
59 .help = "Total number of sockets",
60 .fill_fn = fill_socket_values,
61 },
62 {
63 .key = RELAY_METRICS_NUM_GLOBAL_RW_LIMIT,
64 .type = METRICS_TYPE_COUNTER,
65 .name = METRICS_NAME(relay_load_global_rate_limit_reached_total),
66 .help = "Total number of global connection bucket limit reached",
67 .fill_fn = fill_global_bw_limit_values,
68 },
69 {
70 .key = RELAY_METRICS_NUM_DNS,
71 .type = METRICS_TYPE_COUNTER,
72 .name = METRICS_NAME(relay_exit_dns_query_total),
73 .help = "Total number of DNS queries done by this relay",
74 .fill_fn = fill_dns_query_values,
75 },
76 {
77 .key = RELAY_METRICS_NUM_DNS_ERRORS,
78 .type = METRICS_TYPE_COUNTER,
79 .name = METRICS_NAME(relay_exit_dns_error_total),
80 .help = "Total number of DNS errors encountered by this relay",
81 .fill_fn = fill_dns_error_values,
82 },
83 {
84 .key = RELAY_METRICS_NUM_TCP_EXHAUSTION,
85 .type = METRICS_TYPE_COUNTER,
86 .name = METRICS_NAME(relay_load_tcp_exhaustion_total),
87 .help = "Total number of times we ran out of TCP ports",
88 .fill_fn = fill_tcp_exhaustion_values,
89 },
90 };
91 static const size_t num_base_metrics = ARRAY_LENGTH(base_metrics);
92
93 /** The only and single store of all the relay metrics. */
94 static metrics_store_t *the_store;
95
96 /** Helper function to convert an handshake type into a string. */
97 static inline const char *
handshake_type_to_str(const uint16_t type)98 handshake_type_to_str(const uint16_t type)
99 {
100 switch (type) {
101 case ONION_HANDSHAKE_TYPE_TAP:
102 return "tap";
103 case ONION_HANDSHAKE_TYPE_FAST:
104 return "fast";
105 case ONION_HANDSHAKE_TYPE_NTOR:
106 return "ntor";
107 default:
108 // LCOV_EXCL_START
109 tor_assert_unreached();
110 // LCOV_EXCL_STOP
111 }
112 }
113
114 /** Fill function for the RELAY_METRICS_NUM_DNS metrics. */
115 static void
fill_tcp_exhaustion_values(void)116 fill_tcp_exhaustion_values(void)
117 {
118 metrics_store_entry_t *sentry;
119 const relay_metrics_entry_t *rentry =
120 &base_metrics[RELAY_METRICS_NUM_TCP_EXHAUSTION];
121
122 sentry = metrics_store_add(the_store, rentry->type, rentry->name,
123 rentry->help);
124 metrics_store_entry_update(sentry, rep_hist_get_n_tcp_exhaustion());
125 }
126
127 /* NOTE: Disable the record type label until libevent is fixed. */
128 #if 0
129 /** Helper array containing mapping for the name of the different DNS records
130 * and their corresponding libevent values. */
131 static struct dns_type {
132 const char *name;
133 uint8_t type;
134 } dns_types[] = {
135 { .name = "A", .type = DNS_IPv4_A },
136 { .name = "PTR", .type = DNS_PTR },
137 { .name = "AAAA", .type = DNS_IPv6_AAAA },
138 };
139 static const size_t num_dns_types = ARRAY_LENGTH(dns_types);
140 #endif
141
142 /** Fill function for the RELAY_METRICS_NUM_DNS_ERRORS metrics. */
143 static void
fill_dns_error_values(void)144 fill_dns_error_values(void)
145 {
146 metrics_store_entry_t *sentry;
147 const relay_metrics_entry_t *rentry =
148 &base_metrics[RELAY_METRICS_NUM_DNS_ERRORS];
149
150 /* Helper array to map libeven DNS errors to their names and so we can
151 * iterate over this array to add all metrics. */
152 static struct dns_error {
153 const char *name;
154 uint8_t key;
155 } errors[] = {
156 { .name = "success", .key = DNS_ERR_NONE },
157 { .name = "format", .key = DNS_ERR_FORMAT },
158 { .name = "serverfailed", .key = DNS_ERR_SERVERFAILED },
159 { .name = "notexist", .key = DNS_ERR_NOTEXIST },
160 { .name = "notimpl", .key = DNS_ERR_NOTIMPL },
161 { .name = "refused", .key = DNS_ERR_REFUSED },
162 { .name = "truncated", .key = DNS_ERR_TRUNCATED },
163 { .name = "unknown", .key = DNS_ERR_UNKNOWN },
164 { .name = "timeout", .key = DNS_ERR_TIMEOUT },
165 { .name = "shutdown", .key = DNS_ERR_SHUTDOWN },
166 { .name = "cancel", .key = DNS_ERR_CANCEL },
167 { .name = "nodata", .key = DNS_ERR_NODATA },
168 };
169 static const size_t num_errors = ARRAY_LENGTH(errors);
170
171 /* NOTE: Disable the record type label until libevent is fixed. */
172 #if 0
173 for (size_t i = 0; i < num_dns_types; i++) {
174 /* Dup the label because metrics_format_label() returns a pointer to a
175 * string on the stack and we need that label for all metrics. */
176 char *record_label =
177 tor_strdup(metrics_format_label("record", dns_types[i].name));
178
179 for (size_t j = 0; j < num_errors; j++) {
180 sentry = metrics_store_add(the_store, rentry->type, rentry->name,
181 rentry->help);
182 metrics_store_entry_add_label(sentry, record_label);
183 metrics_store_entry_add_label(sentry,
184 metrics_format_label("reason", errors[j].name));
185 metrics_store_entry_update(sentry,
186 rep_hist_get_n_dns_error(dns_types[i].type, errors[j].key));
187 }
188 tor_free(record_label);
189 }
190 #endif
191
192 /* Put in the DNS errors, unfortunately not per-type for now. */
193 for (size_t j = 0; j < num_errors; j++) {
194 sentry = metrics_store_add(the_store, rentry->type, rentry->name,
195 rentry->help);
196 metrics_store_entry_add_label(sentry,
197 metrics_format_label("reason", errors[j].name));
198 metrics_store_entry_update(sentry,
199 rep_hist_get_n_dns_error(0, errors[j].key));
200 }
201 }
202
203 /** Fill function for the RELAY_METRICS_NUM_DNS metrics. */
204 static void
fill_dns_query_values(void)205 fill_dns_query_values(void)
206 {
207 metrics_store_entry_t *sentry;
208 const relay_metrics_entry_t *rentry =
209 &base_metrics[RELAY_METRICS_NUM_DNS];
210
211 /* NOTE: Disable the record type label until libevent is fixed (#40490). */
212 #if 0
213 for (size_t i = 0; i < num_dns_types; i++) {
214 /* Dup the label because metrics_format_label() returns a pointer to a
215 * string on the stack and we need that label for all metrics. */
216 char *record_label =
217 tor_strdup(metrics_format_label("record", dns_types[i].name));
218 sentry = metrics_store_add(the_store, rentry->type, rentry->name,
219 rentry->help);
220 metrics_store_entry_add_label(sentry, record_label);
221 metrics_store_entry_update(sentry,
222 rep_hist_get_n_dns_request(dns_types[i].type));
223 tor_free(record_label);
224 }
225 #endif
226
227 sentry = metrics_store_add(the_store, rentry->type, rentry->name,
228 rentry->help);
229 metrics_store_entry_update(sentry, rep_hist_get_n_dns_request(0));
230 }
231
232 /** Fill function for the RELAY_METRICS_NUM_GLOBAL_RW_LIMIT metrics. */
233 static void
fill_global_bw_limit_values(void)234 fill_global_bw_limit_values(void)
235 {
236 metrics_store_entry_t *sentry;
237 const relay_metrics_entry_t *rentry =
238 &base_metrics[RELAY_METRICS_NUM_GLOBAL_RW_LIMIT];
239
240 sentry = metrics_store_add(the_store, rentry->type, rentry->name,
241 rentry->help);
242 metrics_store_entry_add_label(sentry,
243 metrics_format_label("side", "read"));
244 metrics_store_entry_update(sentry, rep_hist_get_n_read_limit_reached());
245
246 sentry = metrics_store_add(the_store, rentry->type, rentry->name,
247 rentry->help);
248 metrics_store_entry_add_label(sentry,
249 metrics_format_label("side", "write"));
250 metrics_store_entry_update(sentry, rep_hist_get_n_write_limit_reached());
251 }
252
253 /** Fill function for the RELAY_METRICS_NUM_SOCKETS metrics. */
254 static void
fill_socket_values(void)255 fill_socket_values(void)
256 {
257 metrics_store_entry_t *sentry;
258 const relay_metrics_entry_t *rentry =
259 &base_metrics[RELAY_METRICS_NUM_SOCKETS];
260
261 sentry = metrics_store_add(the_store, rentry->type, rentry->name,
262 rentry->help);
263 metrics_store_entry_add_label(sentry,
264 metrics_format_label("state", "opened"));
265 metrics_store_entry_update(sentry, get_n_open_sockets());
266
267 sentry = metrics_store_add(the_store, rentry->type, rentry->name,
268 rentry->help);
269 metrics_store_entry_update(sentry, get_max_sockets());
270 }
271
272 /** Fill function for the RELAY_METRICS_NUM_ONIONSKINS metrics. */
273 static void
fill_onionskins_values(void)274 fill_onionskins_values(void)
275 {
276 metrics_store_entry_t *sentry;
277 const relay_metrics_entry_t *rentry =
278 &base_metrics[RELAY_METRICS_NUM_ONIONSKINS];
279
280 for (uint16_t t = 0; t <= MAX_ONION_HANDSHAKE_TYPE; t++) {
281 /* Dup the label because metrics_format_label() returns a pointer to a
282 * string on the stack and we need that label for all metrics. */
283 char *type_label =
284 tor_strdup(metrics_format_label("type", handshake_type_to_str(t)));
285 sentry = metrics_store_add(the_store, rentry->type, rentry->name,
286 rentry->help);
287 metrics_store_entry_add_label(sentry, type_label);
288 metrics_store_entry_add_label(sentry,
289 metrics_format_label("action", "processed"));
290 metrics_store_entry_update(sentry,
291 rep_hist_get_circuit_n_handshake_assigned(t));
292
293 sentry = metrics_store_add(the_store, rentry->type, rentry->name,
294 rentry->help);
295 metrics_store_entry_add_label(sentry, type_label);
296 metrics_store_entry_add_label(sentry,
297 metrics_format_label("action", "dropped"));
298 metrics_store_entry_update(sentry,
299 rep_hist_get_circuit_n_handshake_dropped(t));
300 tor_free(type_label);
301 }
302 }
303
304 /** Fill function for the RELAY_METRICS_NUM_OOM_BYTES metrics. */
305 static void
fill_oom_values(void)306 fill_oom_values(void)
307 {
308 metrics_store_entry_t *sentry;
309 const relay_metrics_entry_t *rentry =
310 &base_metrics[RELAY_METRICS_NUM_OOM_BYTES];
311
312 sentry = metrics_store_add(the_store, rentry->type, rentry->name,
313 rentry->help);
314 metrics_store_entry_add_label(sentry,
315 metrics_format_label("subsys", "cell"));
316 metrics_store_entry_update(sentry, oom_stats_n_bytes_removed_cell);
317
318 sentry = metrics_store_add(the_store, rentry->type, rentry->name,
319 rentry->help);
320 metrics_store_entry_add_label(sentry,
321 metrics_format_label("subsys", "dns"));
322 metrics_store_entry_update(sentry, oom_stats_n_bytes_removed_dns);
323
324 sentry = metrics_store_add(the_store, rentry->type, rentry->name,
325 rentry->help);
326 metrics_store_entry_add_label(sentry,
327 metrics_format_label("subsys", "geoip"));
328 metrics_store_entry_update(sentry, oom_stats_n_bytes_removed_geoip);
329
330 sentry = metrics_store_add(the_store, rentry->type, rentry->name,
331 rentry->help);
332 metrics_store_entry_add_label(sentry,
333 metrics_format_label("subsys", "hsdir"));
334 metrics_store_entry_update(sentry, oom_stats_n_bytes_removed_hsdir);
335 }
336
337 /** Reset the global store and fill it with all the metrics from base_metrics
338 * and their associated values.
339 *
340 * To pull this off, every metrics has a "fill" function that is called and in
341 * charge of adding the metrics to the store, appropriate labels and finally
342 * updating the value to report. */
343 static void
fill_store(void)344 fill_store(void)
345 {
346 /* Reset the current store, we are about to fill it with all the things. */
347 metrics_store_reset(the_store);
348
349 /* Call the fill function for each metrics. */
350 for (size_t i = 0; i < num_base_metrics; i++) {
351 if (BUG(!base_metrics[i].fill_fn)) {
352 continue;
353 }
354 base_metrics[i].fill_fn();
355 }
356 }
357
358 /** Return a list of all the relay metrics stores. This is the
359 * function attached to the .get_metrics() member of the subsys_t. */
360 const smartlist_t *
relay_metrics_get_stores(void)361 relay_metrics_get_stores(void)
362 {
363 /* We can't have the caller to free the returned list so keep it static,
364 * simply update it. */
365 static smartlist_t *stores_list = NULL;
366
367 /* We dynamically fill the store with all the metrics upon a request. The
368 * reason for this is because the exposed metrics of a relay are often
369 * internal counters in the fast path and thus we fetch the value when a
370 * metrics port request arrives instead of keeping a local metrics store of
371 * those values. */
372 fill_store();
373
374 if (!stores_list) {
375 stores_list = smartlist_new();
376 smartlist_add(stores_list, the_store);
377 }
378
379 return stores_list;
380 }
381
382 /** Initialize the relay metrics. */
383 void
relay_metrics_init(void)384 relay_metrics_init(void)
385 {
386 if (BUG(the_store)) {
387 return;
388 }
389 the_store = metrics_store_new();
390 }
391
392 /** Free the relay metrics. */
393 void
relay_metrics_free(void)394 relay_metrics_free(void)
395 {
396 if (!the_store) {
397 return;
398 }
399 /* NULL is set with this call. */
400 metrics_store_free(the_store);
401 }
402