xref: /dragonfly/usr.sbin/dntpd/client.c (revision 0ac6bf9d)
1 /*
2  * Copyright (c) 2005 The DragonFly Project.  All rights reserved.
3  *
4  * This code is derived from software contributed to The DragonFly Project
5  * by Matthew Dillon <dillon@backplane.com>
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  *
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in
15  *    the documentation and/or other materials provided with the
16  *    distribution.
17  * 3. Neither the name of The DragonFly Project nor the names of its
18  *    contributors may be used to endorse or promote products derived
19  *    from this software without specific, prior written permission.
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
24  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE
25  * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
26  * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
27  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
28  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
29  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
30  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
31  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32  * SUCH DAMAGE.
33  *
34  * $DragonFly: src/usr.sbin/dntpd/client.c,v 1.9 2005/04/26 23:50:23 dillon Exp $
35  */
36 
37 #include "defs.h"
38 
39 void
40 client_init(void)
41 {
42 }
43 
44 int
45 client_main(struct server_info **info_ary, int count)
46 {
47     struct server_info *best_off;
48     struct server_info *best_freq;
49     double last_freq;
50     double freq;
51     double offset;
52     int i;
53     int calc_offset_correction;
54 
55     last_freq = 0.0;
56 
57     for (;;) {
58 	/*
59 	 * Subtract the interval from poll_sleep and poll the client
60 	 * if it reaches 0.
61 	 *
62 	 * Because we do not compensate for offset corrections which are
63 	 * in progress, we cannot accumulate data for an offset correction
64 	 * while a prior correction is still being worked through by the
65 	 * system.
66 	 */
67 	calc_offset_correction = !sysntp_offset_correction_is_running();
68 	for (i = 0; i < count; ++i)
69 	    client_poll(info_ary[i], min_sleep_opt, calc_offset_correction);
70 
71 	/*
72 	 * Find the best client (or synthesize one).  A different client
73 	 * can be chosen for frequency and offset.  Note in particular
74 	 * that offset counters and averaging code gets reset when an
75 	 * offset correction is made (otherwise the averaging history will
76 	 * cause later corrections to overshoot).
77 	 *
78 	 * The regression used to calculate the frequency is a much
79 	 * longer-term entity and is NOT reset, so it is still possible
80 	 * for the offset correction code to make minor adjustments to
81 	 * the frequency if it so desires.
82 	 *
83 	 * client_check may replace the server_info pointer with a new
84 	 * one.
85 	 */
86 	best_off = NULL;
87 	best_freq = NULL;
88 	for (i = 0; i < count; ++i)
89 	    client_check(&info_ary[i], &best_off, &best_freq);
90 
91 	/*
92 	 * Offset correction.
93 	 */
94 	if (best_off) {
95 	    offset = best_off->lin_sumoffset / best_off->lin_countoffset;
96 	    lin_resetalloffsets(info_ary, count);
97 	    if (offset < -COURSE_OFFSET_CORRECTION_LIMIT ||
98 		offset > COURSE_OFFSET_CORRECTION_LIMIT ||
99 		quickset_opt
100 	    ) {
101 		freq = sysntp_correct_course_offset(offset);
102 		quickset_opt = 0;
103 	    } else {
104 		freq = sysntp_correct_offset(offset);
105 	    }
106 	} else {
107 	    freq = 0.0;
108 	}
109 
110 	/*
111 	 * Frequency correction (throw away minor freq adjusts from the
112 	 * offset code if we can't do a frequency correction here).  Do
113 	 * not reissue if it hasn't changed from the last issued correction.
114 	 */
115 	if (best_freq) {
116 	    freq += best_freq->lin_cache_freq;
117 	    if (last_freq != freq) {
118 		sysntp_correct_freq(freq);
119 		last_freq = freq;
120 	    }
121 	}
122 
123 	/*
124 	 * This function is responsible for managing the polling mode and
125 	 * figures out how long we should sleep.
126 	 */
127 	for (i = 0; i < count; ++i)
128 	    client_manage_polling_mode(info_ary[i]);
129 
130 	/*
131 	 * Polling loop sleep.
132 	 */
133 	usleep(min_sleep_opt * 1000000 + random() % 500000);
134     }
135 }
136 
137 void
138 client_poll(server_info_t info, int poll_interval, int calc_offset_correction)
139 {
140     struct timeval rtv;
141     struct timeval ltv;
142     struct timeval lbtv;
143     double offset;
144 
145     /*
146      * By default we always poll.  If the polling interval comes under
147      * active management the poll_sleep will be non-zero.
148      */
149     if (info->poll_sleep > poll_interval) {
150 	info->poll_sleep -= poll_interval;
151 	return;
152     }
153     info->poll_sleep = 0;
154 
155     logdebug(4, "%s: poll, ", info->target);
156     if (udp_ntptimereq(info->fd, &rtv, &ltv, &lbtv) < 0) {
157 	++info->poll_failed;
158 	logdebug(4, "no response (%d failures in a row)\n",
159 		info->poll_failed);
160 	if (info->poll_failed == POLL_FAIL_RESET) {
161 	    if (info->lin_count != 0) {
162 		logdebug(4, "%s: resetting regression due to failures\n",
163 			info->target);
164 	    }
165 	    lin_reset(info);
166 	}
167 	return;
168     }
169 
170     /*
171      * Successful query.  Update polling info for the polling mode manager.
172      */
173     ++info->poll_count;
174     info->poll_failed = 0;
175 
176     /*
177      * Figure out the offset (the difference between the reported
178      * time and our current time) for linear regression purposes.
179      */
180     offset = tv_delta_double(&rtv, &ltv);
181 
182     while (info) {
183 	/*
184 	 * Linear regression
185 	 */
186 	if (debug_level >= 4) {
187 	    struct tm *tp;
188 	    char buf[64];
189 	    time_t t;
190 
191 	    t = rtv.tv_sec;
192 	    tp = localtime(&t);
193 	    strftime(buf, sizeof(buf), "%d-%b-%Y %H:%M:%S", tp);
194 	    logdebug(4, "%s.%03ld ", buf, rtv.tv_usec / 1000);
195 	}
196 	lin_regress(info, &ltv, &lbtv, offset, calc_offset_correction);
197 	info = info->altinfo;
198 	if (info && debug_level >= 4) {
199 	    logdebug(4, "%*.*s: poll, ",
200 		(int)strlen(info->target),
201 		(int)strlen(info->target), "(alt)");
202 	}
203     }
204 }
205 
206 /*
207  * Find the best client (or synthesize a fake info structure to return).
208  * We can find separate best clients for offset and frequency.
209  */
210 void
211 client_check(struct server_info **checkp,
212 	     struct server_info **best_off,
213 	     struct server_info **best_freq)
214 {
215     struct server_info *check = *checkp;
216     struct server_info *info;
217 
218     /*
219      * Start an alternate linear regression once our current one
220      * has passed a certain point.
221      */
222     if (check->lin_count >= LIN_RESTART / 2 && check->altinfo == NULL) {
223 	info = malloc(sizeof(*info));
224 	assert(info != NULL);
225 	/* note: check->altinfo is NULL as of the bcopy */
226 	bcopy(check, info, sizeof(*info));
227 	check->altinfo = info;
228 	lin_reset(info);
229     }
230 
231     /*
232      * Replace our current linear regression with the alternate once
233      * the current one has hit its limit (beyond a certain point the
234      * linear regression starts to work against us, preventing us from
235      * reacting to changing conditions).
236      *
237      * Report any significant change in the offset or ppm.
238      */
239     if (check->lin_count >= LIN_RESTART) {
240 	if ((info = check->altinfo) && info->lin_count >= LIN_RESTART / 2) {
241 	    double freq_diff;
242 
243 	    freq_diff = info->lin_cache_freq - check->lin_cache_freq;
244 	    logdebug(4, "%s: Switching to alternate, Frequence "
245 		    "difference is %6.3f ppm\n",
246 		    info->target, freq_diff * 1.0E+6);
247 	    *checkp = info;
248 	    free(check);
249 	    check = info;
250 	}
251     }
252 
253     /*
254      * BEST CLIENT FOR FREQUENCY CORRECTION:
255      *
256      *	8 samples and a correllation > 0.99, or
257      * 16 samples and a correllation > 0.96
258      */
259     info = *best_freq;
260     if ((check->lin_count >= 8 && fabs(check->lin_cache_corr) >= 0.99) ||
261 	(check->lin_count >= 16 && fabs(check->lin_cache_corr) >= 0.96)
262     ) {
263 	if (info == NULL ||
264 	    fabs(check->lin_cache_corr) > fabs(info->lin_cache_corr)
265 	) {
266 	    info = check;
267 	    *best_freq = info;
268 	}
269 
270     }
271 
272     /*
273      * BEST CLIENT FOR OFFSET CORRECTION:
274      *
275      * Use the standard-deviation and require at least 4 samples.  An
276      * offset correction is valid if the standard deviation is less then
277      * the average offset divided by 4.
278      */
279     info = *best_off;
280     if (check->lin_countoffset >= 4 &&
281 	check->lin_cache_stddev < fabs(check->lin_sumoffset / check->lin_countoffset / 4)) {
282 	if (info == NULL ||
283 	    fabs(check->lin_cache_stddev) < fabs(info->lin_cache_stddev)
284 	) {
285 	    info = check;
286 	    *best_off = info;
287 	}
288     }
289 }
290 
291 /*
292  * Actively manage the polling interval.  Note that the poll_* fields are
293  * always transfered to the alternate regression when the check code replaces
294  * the current regression with a new one.
295  *
296  * This routine is called from the main loop for each base info structure.
297  * The polling mode applies to all alternates so we do not have to iterate
298  * through the alt's.
299  */
300 void
301 client_manage_polling_mode(struct server_info *info)
302 {
303     /*
304      * If too many query failures occured go into a failure-recovery state.
305      * If we were in startup when we failed, go into the second failure
306      * state so a recovery returns us back to startup mode.
307      */
308     if (info->poll_failed >= POLL_FAIL_RESET &&
309 	info->poll_mode != POLL_FAILED_1 &&
310 	info->poll_mode != POLL_FAILED_2
311     ) {
312 	logdebug(2, "%s: polling mode moving to a FAILED state.\n",
313 		info->target);
314 	if (info->poll_mode != POLL_STARTUP)
315 	    info->poll_mode = POLL_FAILED_1;
316 	else
317 	    info->poll_mode = POLL_FAILED_2;
318 	info->poll_count = 0;
319     }
320 
321     /*
322      * Standard polling mode progression
323      */
324     switch(info->poll_mode) {
325     case POLL_FIXED:
326 	info->poll_mode = POLL_STARTUP;
327 	info->poll_count = 0;
328 	logdebug(2, "%s: polling mode INIT->STARTUP.\n", info->target);
329 	/* fall through */
330     case POLL_STARTUP:
331 	if (info->poll_count < POLL_STARTUP_MAX) {
332 	    if (info->poll_sleep == 0)
333 		info->poll_sleep = min_sleep_opt;
334 	    break;
335 	}
336 	info->poll_mode = POLL_ACQUIRE;
337 	info->poll_count = 0;
338 	logdebug(2, "%s: polling mode STARTUP->ACQUIRE.\n", info->target);
339 	/* fall through */
340     case POLL_ACQUIRE:
341 	/*
342 	 * Acquisition mode using the nominal timeout.  We do not shift
343 	 * to maintainance mode unless the correllation is at least 0.90
344 	 */
345 	if (info->poll_count < POLL_ACQUIRE_MAX ||
346 	    info->lin_count < 8 ||
347 	    fabs(info->lin_cache_corr) < 0.85
348 	) {
349 	    if (info->poll_count >= POLL_ACQUIRE_MAX &&
350 		info->lin_count == LIN_RESTART - 2
351 	    ) {
352 		logdebug(2,
353 		    "%s: WARNING: Unable to shift this source to "
354 		    "maintainance mode.  Target correllation is aweful.\n",
355 		    info->target);
356 	    }
357 	    if (info->poll_sleep == 0)
358 		info->poll_sleep = nom_sleep_opt;
359 	    break;
360 	}
361 	info->poll_mode = POLL_MAINTAIN;
362 	info->poll_count = 0;
363 	logdebug(2, "%s: polling mode ACQUIRE->MAINTAIN.\n", info->target);
364 	/* fall through */
365     case POLL_MAINTAIN:
366 	if (info->lin_count >= LIN_RESTART / 2 &&
367 	    fabs(info->lin_cache_corr) < 0.70
368 	) {
369 	    logdebug(2,
370 		"%s: polling mode MAINTAIN->ACQUIRE.  Unable to maintain\n"
371 		"the maintainance mode because the correllation went"
372 		" bad!\n", info->target);
373 	    info->poll_mode = POLL_ACQUIRE;
374 	    info->poll_count = 0;
375 	    break;
376 	}
377 	if (info->poll_sleep == 0)
378 	    info->poll_sleep = max_sleep_opt;
379 	/* do nothing */
380 	break;
381     case POLL_FAILED_1:
382 	/*
383 	 * We have failed recently. If we recover return to the acquisition
384 	 * state.
385 	 *
386 	 * poll_count does not increment while we are failed.  poll_failed
387 	 * does increment (but gets zero'd once we recover).
388 	 */
389 	if (info->poll_count != 0) {
390 	    logdebug(2, "%s: polling mode FAILED1->ACQUIRE.\n", info->target);
391 	    info->poll_mode = POLL_ACQUIRE;
392 	    /* do not reset poll_count */
393 	    break;
394 	}
395 	if (info->poll_failed >= POLL_RECOVERY_RESTART)
396 	    info->poll_mode = POLL_FAILED_2;
397 	if (info->poll_sleep == 0)
398 	    info->poll_sleep = nom_sleep_opt;
399 	break;
400     case POLL_FAILED_2:
401 	/*
402 	 * We have been failed for a very long time, or we failed while
403 	 * in startup.  If we recover we have to go back into startup.
404 	 */
405 	if (info->poll_count != 0) {
406 	    logdebug(2, "%s: polling mode FAILED2->STARTUP.\n", info->target);
407 	    info->poll_mode = POLL_STARTUP;
408 	    break;
409 	}
410 	if (info->poll_sleep == 0)
411 	    info->poll_sleep = nom_sleep_opt;
412 	break;
413     }
414 }
415 
416 /*
417  * Linear regression.
418  *
419  *	ltv	local time as of when the offset error was calculated between
420  *		local time and remote time.
421  *
422  *	lbtv	base time as of when local time was obtained.  Used to
423  *		calculate the cumulative corrections made to the system's
424  *		real time clock so we can de-correct the offset for the
425  *		linear regression.
426  *
427  * X is the time axis, in seconds.
428  * Y is the uncorrected offset, in seconds.
429  */
430 void
431 lin_regress(server_info_t info, struct timeval *ltv, struct timeval *lbtv,
432 	    double offset, int calc_offset_correction)
433 {
434     double time_axis;
435     double uncorrected_offset;
436 
437     /*
438      * De-correcting the offset:
439      *
440      *	The passed offset is (our_real_time - remote_real_time).  To remove
441      *  corrections from our_real_time we take the difference in the basetime
442      *  (new_base_time - old_base_time) and subtract that from the offset.
443      *  That is, if the basetime goesup, the uncorrected offset goes down.
444      */
445     if (info->lin_count == 0) {
446 	info->lin_tv = *ltv;
447 	info->lin_btv = *lbtv;
448 	time_axis = 0;
449 	uncorrected_offset = offset;
450     } else {
451 	time_axis = tv_delta_double(&info->lin_tv, ltv);
452 	uncorrected_offset = offset - tv_delta_double(&info->lin_btv, lbtv);
453     }
454 
455     /*
456      * We have to use the uncorrected offset for frequency calculations.
457      */
458     ++info->lin_count;
459     info->lin_sumx += time_axis;
460     info->lin_sumx2 += time_axis * time_axis;
461     info->lin_sumy += uncorrected_offset;
462     info->lin_sumy2 += uncorrected_offset * uncorrected_offset;
463     info->lin_sumxy += time_axis * uncorrected_offset;
464 
465     /*
466      * We have to use the corrected offset for offset calculations.
467      */
468     if (calc_offset_correction) {
469 	++info->lin_countoffset;
470 	info->lin_sumoffset += offset;
471 	info->lin_sumoffset2 += offset * offset;
472     }
473 
474     /*
475      * Calculate various derived values.   This gets us slope, y-intercept,
476      * and correllation from the linear regression.
477      */
478     if (info->lin_count > 1) {
479 	info->lin_cache_slope =
480 	 (info->lin_count * info->lin_sumxy - info->lin_sumx * info->lin_sumy) /
481 	 (info->lin_count * info->lin_sumx2 - info->lin_sumx * info->lin_sumx);
482 
483 	info->lin_cache_yint =
484 	 (info->lin_sumy - info->lin_cache_slope * info->lin_sumx) /
485 	 (info->lin_count);
486 
487 	info->lin_cache_corr =
488 	 (info->lin_count * info->lin_sumxy - info->lin_sumx * info->lin_sumy) /
489 	 sqrt((info->lin_count * info->lin_sumx2 -
490 		      info->lin_sumx * info->lin_sumx) *
491 	     (info->lin_count * info->lin_sumy2 -
492 		      info->lin_sumy * info->lin_sumy)
493 	 );
494     }
495 
496     /*
497      * Calculate more derived values.  This gets us the standard-deviation
498      * of offsets.  The standard deviation approximately means that 68%
499      * of the samples fall within the calculated stddev of the mean.
500      */
501     if (info->lin_countoffset > 1) {
502 	 info->lin_cache_stddev =
503 	     sqrt((info->lin_sumoffset2 -
504 		 ((info->lin_sumoffset * info->lin_sumoffset /
505 		   info->lin_countoffset))) /
506 	         (info->lin_countoffset - 1.0));
507     }
508 
509     /*
510      * Save the most recent offset, we might use it in the future.
511      * Save the frequency correction (we might scale the slope later so
512      * we have a separate field for the actual frequency correction in
513      * seconds per second).
514      */
515     info->lin_cache_offset = offset;
516     info->lin_cache_freq = info->lin_cache_slope;
517 
518     if (debug_level >= 4) {
519 	logdebug(4, "iter=%2d time=%7.3f off=%.6f uoff=%.6f",
520 	    (int)info->lin_count,
521 	    time_axis, offset, uncorrected_offset);
522 	if (info->lin_count > 1) {
523 	    logdebug(4, " slope %7.6f"
524 			    " yint %3.2f corr %7.6f freq_ppm %4.2f",
525 		info->lin_cache_slope,
526 		info->lin_cache_yint,
527 		info->lin_cache_corr,
528 		info->lin_cache_freq * 1000000.0);
529 	}
530 	if (info->lin_countoffset > 1) {
531 	    logdebug(4, " stddev %7.6f", info->lin_cache_stddev);
532 	} else if (calc_offset_correction == 0) {
533 	    /* cannot calculate offset correction due to prior correction */
534 	    logdebug(4, " offset_ignored");
535 	}
536 	logdebug(4, "\n");
537     }
538 }
539 
540 /*
541  * Reset the linear regression data.  The info structure will not again be
542  * a candidate for frequency or offset correction until sufficient data
543  * has been accumulated to make a decision.
544  */
545 void
546 lin_reset(server_info_t info)
547 {
548     server_info_t scan;
549 
550     info->lin_count = 0;
551     info->lin_sumx = 0;
552     info->lin_sumy = 0;
553     info->lin_sumxy = 0;
554     info->lin_sumx2 = 0;
555     info->lin_sumy2 = 0;
556 
557     info->lin_countoffset = 0;
558     info->lin_sumoffset = 0;
559     info->lin_sumoffset2 = 0;
560 
561     info->lin_cache_slope = 0;
562     info->lin_cache_yint = 0;
563     info->lin_cache_corr = 0;
564     info->lin_cache_offset = 0;
565     info->lin_cache_freq = 0;
566 
567     /*
568      * Destroy any additional alternative regressions.
569      */
570     while ((scan = info->altinfo) != NULL) {
571 	info->altinfo = scan->altinfo;
572 	free(scan);
573     }
574 }
575 
576 /*
577  * Sometimes we want to clean out the offset calculations without
578  * destroying the linear regression used to figure out the frequency
579  * correction.  This usually occurs whenever we issue an offset
580  * adjustment to the system, which invalidates any offset data accumulated
581  * up to that point.
582  */
583 void
584 lin_resetalloffsets(struct server_info **info_ary, int count)
585 {
586     server_info_t info;
587     int i;
588 
589     for (i = 0; i < count; ++i) {
590 	for (info = info_ary[i]; info; info = info->altinfo)
591 	    lin_resetoffsets(info);
592     }
593 }
594 
595 void
596 lin_resetoffsets(server_info_t info)
597 {
598     info->lin_countoffset = 0;
599     info->lin_sumoffset = 0;
600     info->lin_sumoffset2 = 0;
601 }
602 
603