1  /*
2  * libwebsockets - small server side websockets and web server implementation
3  *
4  * Copyright (C) 2010 - 2021 Andy Green <andy@warmcat.com>
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a copy
7  * of this software and associated documentation files (the "Software"), to
8  * deal in the Software without restriction, including without limitation the
9  * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10  * sell copies of the Software, and to permit persons to whom the Software is
11  * furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in
14  * all copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
22  * IN THE SOFTWARE.
23  *
24  * Public apis related to metric collection and reporting
25  */
26 
27 /* lws_metrics public part */
28 
29 typedef uint64_t u_mt_t;
30 
31 enum {
32 	LWSMTFL_REPORT_OUTLIERS				= (1 << 0),
33 	/**< track outliers and report them internally */
34 	LWSMTFL_REPORT_OOB				= (1 << 1),
35 	/**< report events as they happen */
36 	LWSMTFL_REPORT_INACTIVITY_AT_PERIODIC		= (1 << 2),
37 	/**< explicitly externally report no activity at periodic cb, by
38 	 * default no events in the period is just not reported */
39 	LWSMTFL_REPORT_MEAN				= (1 << 3),
40 	/**< average/min/max is meaningful, else only sum is meaningful */
41 	LWSMTFL_REPORT_ONLY_GO				= (1 << 4),
42 	/**< no-go pieces invalid */
43 	LWSMTFL_REPORT_DUTY_WALLCLOCK_US		= (1 << 5),
44 	/**< aggregate compares to wallclock us for duty cycle */
45 	LWSMTFL_REPORT_HIST				= (1 << 6),
46 	/**< our type is histogram (otherwise, sum / mean aggregation) */
47 };
48 
49 /*
50  * lws_metrics_tag allows your object to accumulate OpenMetrics-style
51  * descriptive tags before accounting for it with a metrics object at the end.
52  *
53  * Tags should represent low entropy information that is likely to repeat
54  * identically, so, eg, http method name, not eg, latency in us which is
55  * unlikely to be seen the same twice.
56  *
57  * Tags are just a list of name=value pairs, used for qualifying the final
58  * metrics entry with decorations in additional dimensions.  For example,
59  * rather than keep individual metrics on methods, scheme, mountpoint, result
60  * code, you can keep metrics on http transactions only, and qualify the
61  * transaction metrics entries with tags that can be queried on the metrics
62  * backend to get the finer-grained information.
63  *
64  * http_srv{code="404",mount="/",method="GET",scheme="http"} 3
65  *
66  * For OpenMetrics the tags are converted to a { list } and appended to the base
67  * metrics name before using with actual metrics objects, the same set of tags
68  * on different transactions resolve to the same qualification string.
69  */
70 
71 typedef struct lws_metrics_tag {
72 	lws_dll2_t	list;
73 
74 	const char	*name; /* tag, intended to be in .rodata, not copied */
75 	/* overallocated value */
76 } lws_metrics_tag_t;
77 
78 LWS_EXTERN LWS_VISIBLE int
79 lws_metrics_tag_add(lws_dll2_owner_t *owner, const char *name, const char *val);
80 
81 #if defined(LWS_WITH_SYS_METRICS)
82 /*
83  * wsi-specific version that also appends the tag value to the lifecycle tag
84  * used for logging the wsi identity
85  */
86 LWS_EXTERN LWS_VISIBLE int
87 lws_metrics_tag_wsi_add(struct lws *wsi, const char *name, const char *val);
88 #else
89 #define lws_metrics_tag_wsi_add(_a, _b, _c)
90 #endif
91 
92 #if defined(LWS_WITH_SECURE_STREAMS)
93 /*
94  * ss-specific version that also appends the tag value to the lifecycle tag
95  * used for logging the ss identity
96  */
97 #if defined(LWS_WITH_SYS_METRICS)
98 LWS_EXTERN LWS_VISIBLE int
99 lws_metrics_tag_ss_add(struct lws_ss_handle *ss, const char *name, const char *val);
100 #else
101 #define lws_metrics_tag_ss_add(_a, _b, _c)
102 #endif
103 #endif
104 
105 LWS_EXTERN LWS_VISIBLE void
106 lws_metrics_tags_destroy(lws_dll2_owner_t *owner);
107 
108 LWS_EXTERN LWS_VISIBLE size_t
109 lws_metrics_tags_serialize(lws_dll2_owner_t *owner, char *buf, size_t len);
110 
111 LWS_EXTERN LWS_VISIBLE const char *
112 lws_metrics_tag_get(lws_dll2_owner_t *owner, const char *name);
113 
114 /* histogram bucket */
115 
116 typedef struct lws_metric_bucket {
117 	struct lws_metric_bucket	*next;
118 	uint64_t			count;
119 
120 	/* name + NUL is overallocated */
121 } lws_metric_bucket_t;
122 
123 /* get overallocated name of bucket from bucket pointer */
124 #define lws_metric_bucket_name_len(_b) (*((uint8_t *)&(_b)[1]))
125 #define lws_metric_bucket_name(_b) (((const char *)&(_b)[1]) + 1)
126 
127 /*
128  * These represent persistent local event measurements.  They may aggregate
129  * a large number of events inbetween external dumping of summaries of the
130  * period covered, in two different ways
131  *
132  * 1) aggregation by sum or mean, to absorb multiple scalar readings
133  *
134  *  - go / no-go ratio counting
135  *  - mean averaging for, eg, latencies
136  *  - min / max for averaged values
137  *  - period the stats covers
138  *
139  * 2) aggregation by histogram, to absorb a range of outcomes that may occur
140  *    multiple times
141  *
142  *  - add named buckets to histogram
143  *  - bucket has a 64-bit count
144  *  - bumping a bucket just increments the count if already exists, else adds
145  *    a new one with count set to 1
146  *
147  * The same type with a union covers both cases.
148  *
149  * The lws_system ops api that hooks lws_metrics up to a metrics backend is
150  * given a pointer to these according to the related policy, eg, hourly, or
151  * every event passed straight through.
152  */
153 
154 typedef struct lws_metric_pub {
155 	const char		*name;
156 	/**< eg, "n.cn.dns", "vh.myendpoint" */
157 	void			*backend_opaque;
158 	/**< ignored by lws, backend handler completely owns it */
159 
160 	lws_usec_t		us_first;
161 	/**< us time metric started collecting, reset to us_dumped at dump */
162 	lws_usec_t		us_last;
163 	/**< 0, or us time last event, reset to 0 at last dump */
164 	lws_usec_t		us_dumped;
165 	/**< 0 if never, else us time of last dump to external api */
166 
167 	/* scope of data in .u is "since last dump" --> */
168 
169 	union {
170 		/* aggregation, by sum or mean */
171 
172 		struct {
173 			u_mt_t			sum[2];
174 			/**< go, no-go summed for mean or plan sum */
175 			u_mt_t			min;
176 			/**< smallest individual measurement */
177 			u_mt_t			max;
178 			/**< largest individual measurement */
179 
180 			uint32_t		count[2];
181 			/**< go, no-go count of measurements in sum */
182 		} agg;
183 
184 		/* histogram with dynamic named buckets */
185 
186 		struct {
187 			lws_metric_bucket_t	*head;
188 			/**< first bucket in our bucket list */
189 
190 			uint64_t		total_count;
191 			/**< total count in all of our buckets */
192 			uint32_t		list_size;
193 			/**< number of buckets in our bucket list */
194 		} hist;
195 	} u;
196 
197 	uint8_t			flags;
198 
199 } lws_metric_pub_t;
200 
201 LWS_EXTERN LWS_VISIBLE void
202 lws_metrics_hist_bump_priv_tagged(lws_metric_pub_t *mt, lws_dll2_owner_t *tow,
203 				  lws_dll2_owner_t *tow2);
204 
205 
206 /*
207  * Calipers are a helper struct for implementing "hanging latency" detection,
208  * where setting the start time and finding the end time may happen in more than
209  * one place.
210  *
211  * There are convenience wrappers to eliminate caliper definitions and code
212  * cleanly if WITH_SYS_METRICS is disabled for the build.
213  */
214 
215 struct lws_metric;
216 
217 typedef struct lws_metric_caliper {
218 	struct lws_dll2_owner	mtags_owner; /**< collect tags here during
219 					      * caliper lifetime */
220 	struct lws_metric	*mt; /**< NULL == inactive */
221 	lws_usec_t		us_start;
222 } lws_metric_caliper_t;
223 
224 #if defined(LWS_WITH_SYS_METRICS)
225 #define lws_metrics_caliper_compose(_name) \
226 		lws_metric_caliper_t _name;
227 #define lws_metrics_caliper_bind(_name, _mt) \
228 	{ if (_name.mt) { \
229 		lwsl_err("caliper: overwrite %s\n", \
230 				lws_metrics_priv_to_pub(_name.mt)->name); \
231 		assert(0); } \
232 	  _name.mt = _mt; _name.us_start = lws_now_usecs(); }
233 #define lws_metrics_caliper_declare(_name, _mt) \
234 	lws_metric_caliper_t _name = { .mt = _mt, .us_start = lws_now_usecs() }
235 #define lws_metrics_caliper_report(_name, _go_nogo) \
236 	{ if (_name.us_start) { lws_metric_event(_name.mt, _go_nogo, \
237 			   (u_mt_t)(lws_now_usecs() - \
238 					   _name.us_start)); \
239 					  }  lws_metrics_caliper_done(_name);  }
240 #define lws_metrics_caliper_report_hist(_name, pwsi) if (_name.mt) { \
241 		lws_metrics_hist_bump_priv_tagged(lws_metrics_priv_to_pub(_name.mt), \
242 						  &_name.mtags_owner, \
243 						  pwsi ? &((pwsi)->cal_conn.mtags_owner) : NULL); \
244 		lws_metrics_caliper_done(_name);  }
245 
246 #define lws_metrics_caliper_cancel(_name) { lws_metrics_caliper_done(_name); }
247 #define lws_metrics_hist_bump(_mt, _name) \
248 		lws_metrics_hist_bump_(_mt, _name)
249 #define lws_metrics_hist_bump_priv(_mt, _name) \
250 		lws_metrics_hist_bump_(lws_metrics_priv_to_pub(_mt), _name)
251 #define lws_metrics_caliper_done(_name) { \
252 		_name.us_start = 0; _name.mt = NULL; \
253 		lws_metrics_tags_destroy(&_name.mtags_owner); }
254 #else
255 #define lws_metrics_caliper_compose(_name)
256 #define lws_metrics_caliper_bind(_name, _mt)
257 #define lws_metrics_caliper_declare(_name, _mp)
258 #define lws_metrics_caliper_report(_name, _go_nogo)
259 #define lws_metrics_caliper_report_hist(_name, pwsiconn)
260 #define lws_metrics_caliper_cancel(_name)
261 #define lws_metrics_hist_bump(_mt, _name)
262 #define lws_metrics_hist_bump_priv(_mt, _name)
263 #define lws_metrics_caliper_done(_name)
264 #endif
265 
266 /**
267  * lws_metrics_format() - helper to format a metrics object for logging
268  *
269  * \param pub: public part of metrics object
270  * \param buf: output buffer to place string in
271  * \param len: available length of \p buf
272  *
273  * Helper for describing the state of a metrics object as a human-readable
274  * string, accounting for how its flags indicate what it contains.  This is not
275  * how you would report metrics, but during development it can be useful to
276  * log them inbetween possibily long report intervals.
277  *
278  * It uses the metric's flags to adapt the format shown appropriately, eg,
279  * as a histogram if LWSMTFL_REPORT_HIST etc
280  */
281 LWS_EXTERN LWS_VISIBLE int
282 lws_metrics_format(lws_metric_pub_t *pub, lws_metric_bucket_t **sub,
283 		   char *buf, size_t len);
284 
285 /**
286  * lws_metrics_hist_bump() - add or increment histogram bucket
287  *
288  * \param pub: public part of metrics object
289  * \param name: bucket name to increment
290  *
291  * Either increment the count of an existing bucket of the right name in the
292  * metrics object, or add a new bucket of the given name and set its count to 1.
293  *
294  * The metrics object must have been created with flag LWSMTFL_REPORT_HIST
295  *
296  * Normally, you will actually use the preprocessor wrapper
297  * lws_metrics_hist_bump() defined above, since this automatically takes care of
298  * removing itself from the build if WITH_SYS_METRICS is not defined, without
299  * needing any preprocessor conditionals.
300  */
301 LWS_EXTERN LWS_VISIBLE int
302 lws_metrics_hist_bump_(lws_metric_pub_t *pub, const char *name);
303 
304 LWS_VISIBLE LWS_EXTERN int
305 lws_metrics_foreach(struct lws_context *ctx, void *user,
306 		    int (*cb)(lws_metric_pub_t *pub, void *user));
307 
308 LWS_VISIBLE LWS_EXTERN int
309 lws_metrics_hist_bump_describe_wsi(struct lws *wsi, lws_metric_pub_t *pub,
310 				   const char *name);
311 
312 enum {
313 	LMT_NORMAL = 0,	/* related to successful events */
314 	LMT_OUTLIER,	/* related to successful events outside of bounds */
315 
316 	LMT_FAIL,	/* related to failed events */
317 
318 	LMT_COUNT,
319 };
320 
321 typedef enum lws_metric_rpt {
322 	LMR_PERIODIC = 0,	/* we are reporting on a schedule */
323 	LMR_OUTLIER,		/* we are reporting the last outlier */
324 } lws_metric_rpt_kind_t;
325 
326 #define METRES_GO	0
327 #define METRES_NOGO	1
328 
329 
330