xref: /linux/kernel/irq/timings.c (revision 99bfce5d)
1 /*
2  * Copyright (C) 2016, Linaro Ltd - Daniel Lezcano <daniel.lezcano@linaro.org>
3  *
4  * This program is free software; you can redistribute it and/or modify
5  * it under the terms of the GNU General Public License version 2 as
6  * published by the Free Software Foundation.
7  *
8  */
9 #include <linux/kernel.h>
10 #include <linux/percpu.h>
11 #include <linux/slab.h>
12 #include <linux/static_key.h>
13 #include <linux/interrupt.h>
14 #include <linux/idr.h>
15 #include <linux/irq.h>
16 #include <linux/math64.h>
17 
18 #include <trace/events/irq.h>
19 
20 #include "internals.h"
21 
22 DEFINE_STATIC_KEY_FALSE(irq_timing_enabled);
23 
24 DEFINE_PER_CPU(struct irq_timings, irq_timings);
25 
26 struct irqt_stat {
27 	u64	next_evt;
28 	u64	last_ts;
29 	u64	variance;
30 	u32	avg;
31 	u32	nr_samples;
32 	int	anomalies;
33 	int	valid;
34 };
35 
36 static DEFINE_IDR(irqt_stats);
37 
38 void irq_timings_enable(void)
39 {
40 	static_branch_enable(&irq_timing_enabled);
41 }
42 
43 void irq_timings_disable(void)
44 {
45 	static_branch_disable(&irq_timing_enabled);
46 }
47 
48 /**
49  * irqs_update - update the irq timing statistics with a new timestamp
50  *
51  * @irqs: an irqt_stat struct pointer
52  * @ts: the new timestamp
53  *
54  * The statistics are computed online, in other words, the code is
55  * designed to compute the statistics on a stream of values rather
56  * than doing multiple passes on the values to compute the average,
57  * then the variance. The integer division introduces a loss of
58  * precision but with an acceptable error margin regarding the results
59  * we would have with the double floating precision: we are dealing
60  * with nanosec, so big numbers, consequently the mantisse is
61  * negligeable, especially when converting the time in usec
62  * afterwards.
63  *
64  * The computation happens at idle time. When the CPU is not idle, the
65  * interrupts' timestamps are stored in the circular buffer, when the
66  * CPU goes idle and this routine is called, all the buffer's values
67  * are injected in the statistical model continuying to extend the
68  * statistics from the previous busy-idle cycle.
69  *
70  * The observations showed a device will trigger a burst of periodic
71  * interrupts followed by one or two peaks of longer time, for
72  * instance when a SD card device flushes its cache, then the periodic
73  * intervals occur again. A one second inactivity period resets the
74  * stats, that gives us the certitude the statistical values won't
75  * exceed 1x10^9, thus the computation won't overflow.
76  *
77  * Basically, the purpose of the algorithm is to watch the periodic
78  * interrupts and eliminate the peaks.
79  *
80  * An interrupt is considered periodically stable if the interval of
81  * its occurences follow the normal distribution, thus the values
82  * comply with:
83  *
84  *      avg - 3 x stddev < value < avg + 3 x stddev
85  *
86  * Which can be simplified to:
87  *
88  *      -3 x stddev < value - avg < 3 x stddev
89  *
90  *      abs(value - avg) < 3 x stddev
91  *
92  * In order to save a costly square root computation, we use the
93  * variance. For the record, stddev = sqrt(variance). The equation
94  * above becomes:
95  *
96  *      abs(value - avg) < 3 x sqrt(variance)
97  *
98  * And finally we square it:
99  *
100  *      (value - avg) ^ 2 < (3 x sqrt(variance)) ^ 2
101  *
102  *      (value - avg) x (value - avg) < 9 x variance
103  *
104  * Statistically speaking, any values out of this interval is
105  * considered as an anomaly and is discarded. However, a normal
106  * distribution appears when the number of samples is 30 (it is the
107  * rule of thumb in statistics, cf. "30 samples" on Internet). When
108  * there are three consecutive anomalies, the statistics are resetted.
109  *
110  */
111 static void irqs_update(struct irqt_stat *irqs, u64 ts)
112 {
113 	u64 old_ts = irqs->last_ts;
114 	u64 variance = 0;
115 	u64 interval;
116 	s64 diff;
117 
118 	/*
119 	 * The timestamps are absolute time values, we need to compute
120 	 * the timing interval between two interrupts.
121 	 */
122 	irqs->last_ts = ts;
123 
124 	/*
125 	 * The interval type is u64 in order to deal with the same
126 	 * type in our computation, that prevent mindfuck issues with
127 	 * overflow, sign and division.
128 	 */
129 	interval = ts - old_ts;
130 
131 	/*
132 	 * The interrupt triggered more than one second apart, that
133 	 * ends the sequence as predictible for our purpose. In this
134 	 * case, assume we have the beginning of a sequence and the
135 	 * timestamp is the first value. As it is impossible to
136 	 * predict anything at this point, return.
137 	 *
138 	 * Note the first timestamp of the sequence will always fall
139 	 * in this test because the old_ts is zero. That is what we
140 	 * want as we need another timestamp to compute an interval.
141 	 */
142 	if (interval >= NSEC_PER_SEC) {
143 		memset(irqs, 0, sizeof(*irqs));
144 		irqs->last_ts = ts;
145 		return;
146 	}
147 
148 	/*
149 	 * Pre-compute the delta with the average as the result is
150 	 * used several times in this function.
151 	 */
152 	diff = interval - irqs->avg;
153 
154 	/*
155 	 * Increment the number of samples.
156 	 */
157 	irqs->nr_samples++;
158 
159 	/*
160 	 * Online variance divided by the number of elements if there
161 	 * is more than one sample.  Normally the formula is division
162 	 * by nr_samples - 1 but we assume the number of element will be
163 	 * more than 32 and dividing by 32 instead of 31 is enough
164 	 * precise.
165 	 */
166 	if (likely(irqs->nr_samples > 1))
167 		variance = irqs->variance >> IRQ_TIMINGS_SHIFT;
168 
169 	/*
170 	 * The rule of thumb in statistics for the normal distribution
171 	 * is having at least 30 samples in order to have the model to
172 	 * apply. Values outside the interval are considered as an
173 	 * anomaly.
174 	 */
175 	if ((irqs->nr_samples >= 30) && ((diff * diff) > (9 * variance))) {
176 		/*
177 		 * After three consecutive anomalies, we reset the
178 		 * stats as it is no longer stable enough.
179 		 */
180 		if (irqs->anomalies++ >= 3) {
181 			memset(irqs, 0, sizeof(*irqs));
182 			irqs->last_ts = ts;
183 			return;
184 		}
185 	} else {
186 		/*
187 		 * The anomalies must be consecutives, so at this
188 		 * point, we reset the anomalies counter.
189 		 */
190 		irqs->anomalies = 0;
191 	}
192 
193 	/*
194 	 * The interrupt is considered stable enough to try to predict
195 	 * the next event on it.
196 	 */
197 	irqs->valid = 1;
198 
199 	/*
200 	 * Online average algorithm:
201 	 *
202 	 *  new_average = average + ((value - average) / count)
203 	 *
204 	 * The variance computation depends on the new average
205 	 * to be computed here first.
206 	 *
207 	 */
208 	irqs->avg = irqs->avg + (diff >> IRQ_TIMINGS_SHIFT);
209 
210 	/*
211 	 * Online variance algorithm:
212 	 *
213 	 *  new_variance = variance + (value - average) x (value - new_average)
214 	 *
215 	 * Warning: irqs->avg is updated with the line above, hence
216 	 * 'interval - irqs->avg' is no longer equal to 'diff'
217 	 */
218 	irqs->variance = irqs->variance + (diff * (interval - irqs->avg));
219 
220 	/*
221 	 * Update the next event
222 	 */
223 	irqs->next_evt = ts + irqs->avg;
224 }
225 
226 /**
227  * irq_timings_next_event - Return when the next event is supposed to arrive
228  *
229  * During the last busy cycle, the number of interrupts is incremented
230  * and stored in the irq_timings structure. This information is
231  * necessary to:
232  *
233  * - know if the index in the table wrapped up:
234  *
235  *      If more than the array size interrupts happened during the
236  *      last busy/idle cycle, the index wrapped up and we have to
237  *      begin with the next element in the array which is the last one
238  *      in the sequence, otherwise it is a the index 0.
239  *
240  * - have an indication of the interrupts activity on this CPU
241  *   (eg. irq/sec)
242  *
243  * The values are 'consumed' after inserting in the statistical model,
244  * thus the count is reinitialized.
245  *
246  * The array of values **must** be browsed in the time direction, the
247  * timestamp must increase between an element and the next one.
248  *
249  * Returns a nanosec time based estimation of the earliest interrupt,
250  * U64_MAX otherwise.
251  */
252 u64 irq_timings_next_event(u64 now)
253 {
254 	struct irq_timings *irqts = this_cpu_ptr(&irq_timings);
255 	struct irqt_stat *irqs;
256 	struct irqt_stat __percpu *s;
257 	u64 ts, next_evt = U64_MAX;
258 	int i, irq = 0;
259 
260 	/*
261 	 * This function must be called with the local irq disabled in
262 	 * order to prevent the timings circular buffer to be updated
263 	 * while we are reading it.
264 	 */
265 	lockdep_assert_irqs_disabled();
266 
267 	/*
268 	 * Number of elements in the circular buffer: If it happens it
269 	 * was flushed before, then the number of elements could be
270 	 * smaller than IRQ_TIMINGS_SIZE, so the count is used,
271 	 * otherwise the array size is used as we wrapped. The index
272 	 * begins from zero when we did not wrap. That could be done
273 	 * in a nicer way with the proper circular array structure
274 	 * type but with the cost of extra computation in the
275 	 * interrupt handler hot path. We choose efficiency.
276 	 *
277 	 * Inject measured irq/timestamp to the statistical model
278 	 * while decrementing the counter because we consume the data
279 	 * from our circular buffer.
280 	 */
281 	for (i = irqts->count & IRQ_TIMINGS_MASK,
282 		     irqts->count = min(IRQ_TIMINGS_SIZE, irqts->count);
283 	     irqts->count > 0; irqts->count--, i = (i + 1) & IRQ_TIMINGS_MASK) {
284 
285 		irq = irq_timing_decode(irqts->values[i], &ts);
286 
287 		s = idr_find(&irqt_stats, irq);
288 		if (s) {
289 			irqs = this_cpu_ptr(s);
290 			irqs_update(irqs, ts);
291 		}
292 	}
293 
294 	/*
295 	 * Look in the list of interrupts' statistics, the earliest
296 	 * next event.
297 	 */
298 	idr_for_each_entry(&irqt_stats, s, i) {
299 
300 		irqs = this_cpu_ptr(s);
301 
302 		if (!irqs->valid)
303 			continue;
304 
305 		if (irqs->next_evt <= now) {
306 			irq = i;
307 			next_evt = now;
308 
309 			/*
310 			 * This interrupt mustn't use in the future
311 			 * until new events occur and update the
312 			 * statistics.
313 			 */
314 			irqs->valid = 0;
315 			break;
316 		}
317 
318 		if (irqs->next_evt < next_evt) {
319 			irq = i;
320 			next_evt = irqs->next_evt;
321 		}
322 	}
323 
324 	return next_evt;
325 }
326 
327 void irq_timings_free(int irq)
328 {
329 	struct irqt_stat __percpu *s;
330 
331 	s = idr_find(&irqt_stats, irq);
332 	if (s) {
333 		free_percpu(s);
334 		idr_remove(&irqt_stats, irq);
335 	}
336 }
337 
338 int irq_timings_alloc(int irq)
339 {
340 	struct irqt_stat __percpu *s;
341 	int id;
342 
343 	/*
344 	 * Some platforms can have the same private interrupt per cpu,
345 	 * so this function may be be called several times with the
346 	 * same interrupt number. Just bail out in case the per cpu
347 	 * stat structure is already allocated.
348 	 */
349 	s = idr_find(&irqt_stats, irq);
350 	if (s)
351 		return 0;
352 
353 	s = alloc_percpu(*s);
354 	if (!s)
355 		return -ENOMEM;
356 
357 	idr_preload(GFP_KERNEL);
358 	id = idr_alloc(&irqt_stats, s, irq, irq + 1, GFP_NOWAIT);
359 	idr_preload_end();
360 
361 	if (id < 0) {
362 		free_percpu(s);
363 		return id;
364 	}
365 
366 	return 0;
367 }
368