1 /* 2 * Copyright (C) 2016, Linaro Ltd - Daniel Lezcano <daniel.lezcano@linaro.org> 3 * 4 * This program is free software; you can redistribute it and/or modify 5 * it under the terms of the GNU General Public License version 2 as 6 * published by the Free Software Foundation. 7 * 8 */ 9 #include <linux/kernel.h> 10 #include <linux/percpu.h> 11 #include <linux/slab.h> 12 #include <linux/static_key.h> 13 #include <linux/interrupt.h> 14 #include <linux/idr.h> 15 #include <linux/irq.h> 16 #include <linux/math64.h> 17 18 #include <trace/events/irq.h> 19 20 #include "internals.h" 21 22 DEFINE_STATIC_KEY_FALSE(irq_timing_enabled); 23 24 DEFINE_PER_CPU(struct irq_timings, irq_timings); 25 26 struct irqt_stat { 27 u64 next_evt; 28 u64 last_ts; 29 u64 variance; 30 u32 avg; 31 u32 nr_samples; 32 int anomalies; 33 int valid; 34 }; 35 36 static DEFINE_IDR(irqt_stats); 37 38 void irq_timings_enable(void) 39 { 40 static_branch_enable(&irq_timing_enabled); 41 } 42 43 void irq_timings_disable(void) 44 { 45 static_branch_disable(&irq_timing_enabled); 46 } 47 48 /** 49 * irqs_update - update the irq timing statistics with a new timestamp 50 * 51 * @irqs: an irqt_stat struct pointer 52 * @ts: the new timestamp 53 * 54 * The statistics are computed online, in other words, the code is 55 * designed to compute the statistics on a stream of values rather 56 * than doing multiple passes on the values to compute the average, 57 * then the variance. The integer division introduces a loss of 58 * precision but with an acceptable error margin regarding the results 59 * we would have with the double floating precision: we are dealing 60 * with nanosec, so big numbers, consequently the mantisse is 61 * negligeable, especially when converting the time in usec 62 * afterwards. 63 * 64 * The computation happens at idle time. When the CPU is not idle, the 65 * interrupts' timestamps are stored in the circular buffer, when the 66 * CPU goes idle and this routine is called, all the buffer's values 67 * are injected in the statistical model continuying to extend the 68 * statistics from the previous busy-idle cycle. 69 * 70 * The observations showed a device will trigger a burst of periodic 71 * interrupts followed by one or two peaks of longer time, for 72 * instance when a SD card device flushes its cache, then the periodic 73 * intervals occur again. A one second inactivity period resets the 74 * stats, that gives us the certitude the statistical values won't 75 * exceed 1x10^9, thus the computation won't overflow. 76 * 77 * Basically, the purpose of the algorithm is to watch the periodic 78 * interrupts and eliminate the peaks. 79 * 80 * An interrupt is considered periodically stable if the interval of 81 * its occurences follow the normal distribution, thus the values 82 * comply with: 83 * 84 * avg - 3 x stddev < value < avg + 3 x stddev 85 * 86 * Which can be simplified to: 87 * 88 * -3 x stddev < value - avg < 3 x stddev 89 * 90 * abs(value - avg) < 3 x stddev 91 * 92 * In order to save a costly square root computation, we use the 93 * variance. For the record, stddev = sqrt(variance). The equation 94 * above becomes: 95 * 96 * abs(value - avg) < 3 x sqrt(variance) 97 * 98 * And finally we square it: 99 * 100 * (value - avg) ^ 2 < (3 x sqrt(variance)) ^ 2 101 * 102 * (value - avg) x (value - avg) < 9 x variance 103 * 104 * Statistically speaking, any values out of this interval is 105 * considered as an anomaly and is discarded. However, a normal 106 * distribution appears when the number of samples is 30 (it is the 107 * rule of thumb in statistics, cf. "30 samples" on Internet). When 108 * there are three consecutive anomalies, the statistics are resetted. 109 * 110 */ 111 static void irqs_update(struct irqt_stat *irqs, u64 ts) 112 { 113 u64 old_ts = irqs->last_ts; 114 u64 variance = 0; 115 u64 interval; 116 s64 diff; 117 118 /* 119 * The timestamps are absolute time values, we need to compute 120 * the timing interval between two interrupts. 121 */ 122 irqs->last_ts = ts; 123 124 /* 125 * The interval type is u64 in order to deal with the same 126 * type in our computation, that prevent mindfuck issues with 127 * overflow, sign and division. 128 */ 129 interval = ts - old_ts; 130 131 /* 132 * The interrupt triggered more than one second apart, that 133 * ends the sequence as predictible for our purpose. In this 134 * case, assume we have the beginning of a sequence and the 135 * timestamp is the first value. As it is impossible to 136 * predict anything at this point, return. 137 * 138 * Note the first timestamp of the sequence will always fall 139 * in this test because the old_ts is zero. That is what we 140 * want as we need another timestamp to compute an interval. 141 */ 142 if (interval >= NSEC_PER_SEC) { 143 memset(irqs, 0, sizeof(*irqs)); 144 irqs->last_ts = ts; 145 return; 146 } 147 148 /* 149 * Pre-compute the delta with the average as the result is 150 * used several times in this function. 151 */ 152 diff = interval - irqs->avg; 153 154 /* 155 * Increment the number of samples. 156 */ 157 irqs->nr_samples++; 158 159 /* 160 * Online variance divided by the number of elements if there 161 * is more than one sample. Normally the formula is division 162 * by nr_samples - 1 but we assume the number of element will be 163 * more than 32 and dividing by 32 instead of 31 is enough 164 * precise. 165 */ 166 if (likely(irqs->nr_samples > 1)) 167 variance = irqs->variance >> IRQ_TIMINGS_SHIFT; 168 169 /* 170 * The rule of thumb in statistics for the normal distribution 171 * is having at least 30 samples in order to have the model to 172 * apply. Values outside the interval are considered as an 173 * anomaly. 174 */ 175 if ((irqs->nr_samples >= 30) && ((diff * diff) > (9 * variance))) { 176 /* 177 * After three consecutive anomalies, we reset the 178 * stats as it is no longer stable enough. 179 */ 180 if (irqs->anomalies++ >= 3) { 181 memset(irqs, 0, sizeof(*irqs)); 182 irqs->last_ts = ts; 183 return; 184 } 185 } else { 186 /* 187 * The anomalies must be consecutives, so at this 188 * point, we reset the anomalies counter. 189 */ 190 irqs->anomalies = 0; 191 } 192 193 /* 194 * The interrupt is considered stable enough to try to predict 195 * the next event on it. 196 */ 197 irqs->valid = 1; 198 199 /* 200 * Online average algorithm: 201 * 202 * new_average = average + ((value - average) / count) 203 * 204 * The variance computation depends on the new average 205 * to be computed here first. 206 * 207 */ 208 irqs->avg = irqs->avg + (diff >> IRQ_TIMINGS_SHIFT); 209 210 /* 211 * Online variance algorithm: 212 * 213 * new_variance = variance + (value - average) x (value - new_average) 214 * 215 * Warning: irqs->avg is updated with the line above, hence 216 * 'interval - irqs->avg' is no longer equal to 'diff' 217 */ 218 irqs->variance = irqs->variance + (diff * (interval - irqs->avg)); 219 220 /* 221 * Update the next event 222 */ 223 irqs->next_evt = ts + irqs->avg; 224 } 225 226 /** 227 * irq_timings_next_event - Return when the next event is supposed to arrive 228 * 229 * During the last busy cycle, the number of interrupts is incremented 230 * and stored in the irq_timings structure. This information is 231 * necessary to: 232 * 233 * - know if the index in the table wrapped up: 234 * 235 * If more than the array size interrupts happened during the 236 * last busy/idle cycle, the index wrapped up and we have to 237 * begin with the next element in the array which is the last one 238 * in the sequence, otherwise it is a the index 0. 239 * 240 * - have an indication of the interrupts activity on this CPU 241 * (eg. irq/sec) 242 * 243 * The values are 'consumed' after inserting in the statistical model, 244 * thus the count is reinitialized. 245 * 246 * The array of values **must** be browsed in the time direction, the 247 * timestamp must increase between an element and the next one. 248 * 249 * Returns a nanosec time based estimation of the earliest interrupt, 250 * U64_MAX otherwise. 251 */ 252 u64 irq_timings_next_event(u64 now) 253 { 254 struct irq_timings *irqts = this_cpu_ptr(&irq_timings); 255 struct irqt_stat *irqs; 256 struct irqt_stat __percpu *s; 257 u64 ts, next_evt = U64_MAX; 258 int i, irq = 0; 259 260 /* 261 * This function must be called with the local irq disabled in 262 * order to prevent the timings circular buffer to be updated 263 * while we are reading it. 264 */ 265 lockdep_assert_irqs_disabled(); 266 267 /* 268 * Number of elements in the circular buffer: If it happens it 269 * was flushed before, then the number of elements could be 270 * smaller than IRQ_TIMINGS_SIZE, so the count is used, 271 * otherwise the array size is used as we wrapped. The index 272 * begins from zero when we did not wrap. That could be done 273 * in a nicer way with the proper circular array structure 274 * type but with the cost of extra computation in the 275 * interrupt handler hot path. We choose efficiency. 276 * 277 * Inject measured irq/timestamp to the statistical model 278 * while decrementing the counter because we consume the data 279 * from our circular buffer. 280 */ 281 for (i = irqts->count & IRQ_TIMINGS_MASK, 282 irqts->count = min(IRQ_TIMINGS_SIZE, irqts->count); 283 irqts->count > 0; irqts->count--, i = (i + 1) & IRQ_TIMINGS_MASK) { 284 285 irq = irq_timing_decode(irqts->values[i], &ts); 286 287 s = idr_find(&irqt_stats, irq); 288 if (s) { 289 irqs = this_cpu_ptr(s); 290 irqs_update(irqs, ts); 291 } 292 } 293 294 /* 295 * Look in the list of interrupts' statistics, the earliest 296 * next event. 297 */ 298 idr_for_each_entry(&irqt_stats, s, i) { 299 300 irqs = this_cpu_ptr(s); 301 302 if (!irqs->valid) 303 continue; 304 305 if (irqs->next_evt <= now) { 306 irq = i; 307 next_evt = now; 308 309 /* 310 * This interrupt mustn't use in the future 311 * until new events occur and update the 312 * statistics. 313 */ 314 irqs->valid = 0; 315 break; 316 } 317 318 if (irqs->next_evt < next_evt) { 319 irq = i; 320 next_evt = irqs->next_evt; 321 } 322 } 323 324 return next_evt; 325 } 326 327 void irq_timings_free(int irq) 328 { 329 struct irqt_stat __percpu *s; 330 331 s = idr_find(&irqt_stats, irq); 332 if (s) { 333 free_percpu(s); 334 idr_remove(&irqt_stats, irq); 335 } 336 } 337 338 int irq_timings_alloc(int irq) 339 { 340 struct irqt_stat __percpu *s; 341 int id; 342 343 /* 344 * Some platforms can have the same private interrupt per cpu, 345 * so this function may be be called several times with the 346 * same interrupt number. Just bail out in case the per cpu 347 * stat structure is already allocated. 348 */ 349 s = idr_find(&irqt_stats, irq); 350 if (s) 351 return 0; 352 353 s = alloc_percpu(*s); 354 if (!s) 355 return -ENOMEM; 356 357 idr_preload(GFP_KERNEL); 358 id = idr_alloc(&irqt_stats, s, irq, irq + 1, GFP_NOWAIT); 359 idr_preload_end(); 360 361 if (id < 0) { 362 free_percpu(s); 363 return id; 364 } 365 366 return 0; 367 } 368