xref: /dragonfly/usr.sbin/dntpd/client.c (revision 1d9b37b0)
1 /*
2  * Copyright (c) 2005 The DragonFly Project.  All rights reserved.
3  *
4  * This code is derived from software contributed to The DragonFly Project
5  * by Matthew Dillon <dillon@backplane.com>
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  *
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in
15  *    the documentation and/or other materials provided with the
16  *    distribution.
17  * 3. Neither the name of The DragonFly Project nor the names of its
18  *    contributors may be used to endorse or promote products derived
19  *    from this software without specific, prior written permission.
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
24  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE
25  * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
26  * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
27  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
28  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
29  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
30  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
31  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32  * SUCH DAMAGE.
33  */
34 
35 #include "defs.h"
36 
37 static int client_insane(struct server_info **, int, server_info_t);
38 
39 void
client_init(void)40 client_init(void)
41 {
42 }
43 
44 void
client_main(struct server_info ** info_ary,int count)45 client_main(struct server_info **info_ary, int count)
46 {
47     struct server_info *best_off;
48     struct server_info *best_freq;
49     double last_freq;
50     double freq;
51     double offset;
52     int calc_offset_correction;
53     int didreconnect;
54     int i;
55     int insane;
56 
57     last_freq = 0.0;
58 
59     for (;;) {
60 	/*
61 	 * Subtract the interval from poll_sleep and poll the client
62 	 * if it reaches 0.
63 	 *
64 	 * Because we do not compensate for offset corrections which are
65 	 * in progress, we cannot accumulate data for an offset correction
66 	 * while a prior correction is still being worked through by the
67 	 * system.
68 	 */
69 	calc_offset_correction = !sysntp_offset_correction_is_running();
70 	for (i = 0; i < count; ++i)
71 	    client_poll(info_ary[i], min_sleep_opt, calc_offset_correction);
72 
73 	/*
74 	 * Find the best client (or synthesize one).  A different client
75 	 * can be chosen for frequency and offset.  Note in particular
76 	 * that offset counters and averaging code gets reset when an
77 	 * offset correction is made (otherwise the averaging history will
78 	 * cause later corrections to overshoot).
79 	 *
80 	 * The regression used to calculate the frequency is a much
81 	 * longer-term entity and is NOT reset, so it is still possible
82 	 * for the offset correction code to make minor adjustments to
83 	 * the frequency if it so desires.
84 	 *
85 	 * client_check may replace the server_info pointer with a new
86 	 * one.
87 	 */
88 	best_off = NULL;
89 	best_freq = NULL;
90 	for (i = 0; i < count; ++i)
91 	    client_check(&info_ary[i], &best_off, &best_freq);
92 
93 	/*
94 	 * Check for server insanity.  In large NNTP pools some servers
95 	 * may just be dead wrong, but report that they are right.
96 	 */
97 	if (best_off) {
98 	    insane = client_insane(info_ary, count, best_off);
99 	    if (insane > 0) {
100 		/*
101 		 * best_off meets the quorum requirements and is good
102 		 * (keep best_off)
103 		 */
104 		best_off->server_insane = 0;
105 	    } else if (insane == 0) {
106 		/*
107 		 * best_off is probably good, but we do not have enough
108 		 * servers reporting yet to meet the quorum requirements.
109 		 */
110 		best_off = NULL;
111 	    } else {
112 		/*
113 		 * best_off is ugly, mark the server as being insane for
114 		 * 60 minutes.
115 		 */
116 		best_off->server_insane = 60 * 60;
117 		logdebuginfo(best_off, 1,
118 			     "excessive offset deviation, mapping out\n");
119 		best_off = NULL;
120 	    }
121 	}
122 
123 	/*
124 	 * Offset correction.
125 	 */
126 	if (best_off) {
127 	    offset = best_off->lin_sumoffset / best_off->lin_countoffset;
128 	    lin_resetalloffsets(info_ary, count);
129 	    if (offset < -COURSE_OFFSET_CORRECTION_LIMIT ||
130 		offset > COURSE_OFFSET_CORRECTION_LIMIT ||
131 		quickset_opt
132 	    ) {
133 		freq = sysntp_correct_course_offset(offset);
134 		quickset_opt = 0;
135 	    } else {
136 		freq = sysntp_correct_offset(offset);
137 	    }
138 	} else {
139 	    freq = 0.0;
140 	}
141 
142 	/*
143 	 * Frequency correction (throw away minor freq adjusts from the
144 	 * offset code if we can't do a frequency correction here).  Do
145 	 * not reissue if it hasn't changed from the last issued correction.
146 	 */
147 	if (best_freq) {
148 	    freq += best_freq->lin_cache_freq;
149 	    if (last_freq != freq) {
150 		sysntp_correct_freq(freq);
151 		last_freq = freq;
152 	    }
153 	}
154 
155 	/*
156 	 * This function is responsible for managing the polling mode and
157 	 * figures out how long we should sleep.
158 	 */
159 	didreconnect = 0;
160 	for (i = 0; i < count; ++i)
161 	    client_manage_polling_mode(info_ary[i], &didreconnect);
162 	if (didreconnect)
163 	    client_check_duplicate_ips(info_ary, count);
164 
165 	/*
166 	 * Polling loop sleep.
167 	 */
168 	usleep(min_sleep_opt * 1000000 + random() % 500000);
169     }
170 }
171 
172 void
client_poll(server_info_t info,int poll_interval,int calc_offset_correction)173 client_poll(server_info_t info, int poll_interval, int calc_offset_correction)
174 {
175     struct timeval rtv;
176     struct timeval ltv;
177     struct timeval lbtv;
178     double offset;
179 
180     /*
181      * Adjust the insane-server countdown
182      */
183     if (info->server_insane > poll_interval)
184 	info->server_insane -= poll_interval;
185     else
186 	info->server_insane = 0;
187 
188     /*
189      * By default we always poll.  If the polling interval comes under
190      * active management the poll_sleep will be non-zero.
191      */
192     if (info->poll_sleep > poll_interval) {
193 	info->poll_sleep -= poll_interval;
194 	return;
195     }
196     info->poll_sleep = 0;
197 
198     /*
199      * If the client isn't open don't mess with the poll_failed count
200      * or anything else.  We are left in the init or startup phase.
201      */
202     if (info->fd < 0) {
203 	if (info->poll_failed < 0x7FFFFFFF)
204 	    ++info->poll_failed;
205 	return;
206     }
207 
208     logdebuginfo(info, 4, "poll, ");
209     if (udp_ntptimereq(info->fd, &rtv, &ltv, &lbtv) < 0) {
210 	++info->poll_failed;
211 	logdebug(4, "no response (%d failures in a row)\n", info->poll_failed);
212 	if (info->poll_failed == POLL_FAIL_RESET) {
213 	    if (info->lin_count != 0) {
214 		logdebuginfo(info, 4, "resetting regression due to failures\n");
215 	    }
216 	    lin_reset(info);
217 	}
218 	return;
219     }
220 
221     /*
222      * Successful query.  Update polling info for the polling mode manager.
223      */
224     ++info->poll_count;
225     info->poll_failed = 0;
226 
227     /*
228      * Figure out the offset (the difference between the reported
229      * time and our current time) for linear regression purposes.
230      */
231     offset = tv_delta_double(&rtv, &ltv);
232 
233     while (info) {
234 	/*
235 	 * Linear regression
236 	 */
237 	if (debug_level >= 4) {
238 	    struct tm *tp;
239 	    char buf[64];
240 	    time_t t;
241 
242 	    t = rtv.tv_sec;
243 	    tp = localtime(&t);
244 	    strftime(buf, sizeof(buf), "%d-%b-%Y %H:%M:%S", tp);
245 	    logdebug(4, "%s.%03ld ", buf, rtv.tv_usec / 1000);
246 	}
247 	lin_regress(info, &ltv, &lbtv, offset, calc_offset_correction);
248 	info = info->altinfo;
249 	if (info && debug_level >= 4) {
250 	    logdebug(4, "%*.*s: poll, ",
251 		(int)strlen(info->target),
252 		(int)strlen(info->target), "(alt)");
253 	}
254     }
255 }
256 
257 /*
258  * Find the best client (or synthesize a fake info structure to return).
259  * We can find separate best clients for offset and frequency.
260  */
261 void
client_check(struct server_info ** checkp,struct server_info ** best_off,struct server_info ** best_freq)262 client_check(struct server_info **checkp,
263 	     struct server_info **best_off,
264 	     struct server_info **best_freq)
265 {
266     struct server_info *check = *checkp;
267     struct server_info *info;
268     int min_samples;
269 
270     /*
271      * Start an alternate linear regression once our current one
272      * has passed a certain point.
273      */
274     if (check->lin_count >= LIN_RESTART / 2 && check->altinfo == NULL) {
275 	info = malloc(sizeof(*info));
276 	assert(info != NULL);
277 	/* note: check->altinfo is NULL as of the bcopy */
278 	bcopy(check, info, sizeof(*info));
279 	check->altinfo = info;
280 	lin_reset(info);
281     }
282 
283     /*
284      * Replace our current linear regression with the alternate once
285      * the current one has hit its limit (beyond a certain point the
286      * linear regression starts to work against us, preventing us from
287      * reacting to changing conditions).
288      *
289      * Report any significant change in the offset or ppm.
290      */
291     if (check->lin_count >= LIN_RESTART) {
292 	if ((info = check->altinfo) && info->lin_count >= LIN_RESTART / 2) {
293 	    double freq_diff;
294 
295 	    freq_diff = info->lin_cache_freq - check->lin_cache_freq;
296 	    logdebuginfo(info, 4, "Switching to alternate, Frequency "
297 			 "difference is %6.3f ppm\n",
298 			 freq_diff * 1.0E+6);
299 	    *checkp = info;
300 	    free(check);
301 	    check = info;
302 	}
303     }
304 
305     /*
306      * BEST CLIENT FOR FREQUENCY CORRECTION:
307      *
308      * Frequency corrections get better the longer the time separation
309      * between samples.
310      *
311      *	8 samples and a correlation > 0.99, or
312      * 16 samples and a correlation > 0.96
313      */
314     info = *best_freq;
315     if ((check->lin_count >= 8 && fabs(check->lin_cache_corr) >= 0.99) ||
316 	(check->lin_count >= 16 && fabs(check->lin_cache_corr) >= 0.96)
317     ) {
318 	if (info == NULL ||
319 	    fabs(check->lin_cache_corr) > fabs(info->lin_cache_corr)
320 	) {
321 	    info = check;
322 	    *best_freq = info;
323 	}
324 
325     }
326 
327     /*
328      * BEST CLIENT FOR OFFSET CORRECTION:
329      *
330      * Use the standard-deviation and require at least 4 samples.  An
331      * offset correction is valid if the standard deviation is less then
332      * the average offset divided by 4.
333      *
334      * If we are in maintainance mode, require 8 samples instead of 4.
335      * Offset corrections get better with more samples.  This reduces
336      * ping-pong effects that can occur with a small number of samples.
337      *
338      * Servers marked as being insane are not allowed
339      */
340     info = *best_off;
341     if (info && info->poll_mode == POLL_MAINTAIN)
342 	min_samples = 8;
343     else
344 	min_samples = 4;
345     if (check->lin_countoffset >= min_samples &&
346 	(check->lin_cache_stddev <
347 	 fabs(check->lin_sumoffset / check->lin_countoffset / 4)) &&
348 	check->server_insane == 0
349      ) {
350 	if (info == NULL ||
351 	    fabs(check->lin_cache_stddev) < fabs(info->lin_cache_stddev)
352 	) {
353 	    info = check;
354 	    *best_off = info;
355 	}
356     }
357 }
358 
359 /*
360  * Actively manage the polling interval.  Note that the poll_* fields are
361  * always transfered to the alternate regression when the check code replaces
362  * the current regression with a new one.
363  *
364  * This routine is called from the main loop for each base info structure.
365  * The polling mode applies to all alternates so we do not have to iterate
366  * through the alt's.
367  */
368 void
client_manage_polling_mode(struct server_info * info,int * didreconnect)369 client_manage_polling_mode(struct server_info *info, int *didreconnect)
370 {
371     /*
372      * Permanently failed servers are ignored.
373      */
374     if (info->server_state == -2)
375 	return;
376 
377     /*
378      * Our polling interval has not yet passed.
379      */
380     if (info->poll_sleep)
381 	return;
382 
383     /*
384      * Standard polling mode progression
385      */
386     switch(info->poll_mode) {
387     case POLL_FIXED:
388 	/*
389 	 * Initial state after connect or when a reconnect is required.
390 	 */
391 	if (info->fd < 0) {
392 	    logdebuginfo(info, 2, "polling mode INIT, relookup & reconnect\n");
393 	    reconnect_server(info);
394 	    *didreconnect = 1;
395 	    if (info->fd < 0) {
396 		if (info->poll_failed >= POLL_RECOVERY_RESTART * 5)
397 		    info->poll_sleep = max_sleep_opt;
398 		else if (info->poll_failed >= POLL_RECOVERY_RESTART)
399 		    info->poll_sleep = nom_sleep_opt;
400 		else
401 		    info->poll_sleep = min_sleep_opt;
402 		break;
403 	    }
404 
405 	    /*
406 	     * Transition the server to the DNS lookup successful state.
407 	     * Note that the server state does not transition out of
408 	     * lookup successful if we relookup after a packet failure
409 	     * so the message is printed only once, usually.
410 	     */
411 	    client_setserverstate(info, 0, "DNS lookup success");
412 
413 	    /*
414 	     * If we've failed many times switch to the startup state but
415 	     * do not fall through into it.  break the switch and a single
416 	     * poll will be made after the nominal polling interval.
417 	     */
418 	    if (info->poll_failed >= POLL_RECOVERY_RESTART * 5) {
419 		logdebuginfo(info, 2, "polling mode INIT->STARTUP (very slow)\n");
420 		info->poll_mode = POLL_STARTUP;
421 		info->poll_sleep = max_sleep_opt;
422 		info->poll_count = 0;
423 		break;
424 	    } else if (info->poll_failed >= POLL_RECOVERY_RESTART) {
425 		logdebuginfo(info, 2, "polling mode INIT->STARTUP (slow)\n");
426 		info->poll_mode = POLL_STARTUP;
427 		info->poll_count = 0;
428 		break;
429 	    }
430 	}
431 
432 	/*
433 	 * Fall through to the startup state.
434 	 */
435 	info->poll_mode = POLL_STARTUP;
436 	logdebuginfo(info, 2, "polling mode INIT->STARTUP (normal)\n");
437 	/* fall through */
438     case POLL_STARTUP:
439 	/*
440 	 * Transition to a FAILED state if too many poll failures occured.
441 	 */
442 	if (info->poll_failed >= POLL_FAIL_RESET) {
443 	    logdebuginfo(info, 2, "polling mode STARTUP->FAILED\n");
444 	    info->poll_mode = POLL_FAILED;
445 	    info->poll_count = 0;
446 	    break;
447 	}
448 
449 	/*
450 	 * Transition the server to operational.  Do a number of minimum
451 	 * interval polls to try to get a good offset calculation quickly.
452 	 */
453 	if (info->poll_count)
454 	    client_setserverstate(info, 1, "connected ok");
455 	if (info->poll_count < POLL_STARTUP_MAX) {
456 	    info->poll_sleep = min_sleep_opt;
457 	    break;
458 	}
459 
460 	/*
461 	 * Once we've got our polls fall through to aquisition mode to
462 	 * do aquisition processing.
463 	 */
464 	info->poll_mode = POLL_ACQUIRE;
465 	info->poll_count = 0;
466 	logdebuginfo(info, 2, "polling mode STARTUP->ACQUIRE\n");
467 	/* fall through */
468     case POLL_ACQUIRE:
469 	/*
470 	 * Transition to a FAILED state if too many poll failures occured.
471 	 */
472 	if (info->poll_failed >= POLL_FAIL_RESET) {
473 	    logdebuginfo(info, 2, "polling mode STARTUP->FAILED\n");
474 	    info->poll_mode = POLL_FAILED;
475 	    info->poll_count = 0;
476 	    break;
477 	}
478 
479 	/*
480 	 * Acquisition mode using the nominal timeout.  We do not shift
481 	 * to maintainance mode unless the correlation is at least 0.90
482 	 */
483 	if (info->poll_count < POLL_ACQUIRE_MAX ||
484 	    info->lin_count < 8 ||
485 	    fabs(info->lin_cache_corr) < 0.85
486 	) {
487 	    if (info->poll_count >= POLL_ACQUIRE_MAX &&
488 		info->lin_count == LIN_RESTART - 2
489 	    ) {
490 		logdebuginfo(info, 2,
491 		    "WARNING: Unable to shift this source to "
492 		    "maintenance mode.  Target correlation is awful\n");
493 	    }
494 	    break;
495 	}
496 	info->poll_mode = POLL_MAINTAIN;
497 	info->poll_count = 0;
498 	logdebuginfo(info, 2, "polling mode ACQUIRE->MAINTAIN\n");
499 	/* fall through */
500     case POLL_MAINTAIN:
501 	/*
502 	 * Transition to a FAILED state if too many poll failures occured.
503 	 */
504 	if (info->poll_failed >= POLL_FAIL_RESET) {
505 	    logdebuginfo(info, 2, "polling mode STARTUP->FAILED\n");
506 	    info->poll_mode = POLL_FAILED;
507 	    info->poll_count = 0;
508 	    break;
509 	}
510 
511 	/*
512 	 * Maintaince mode, max polling interval.
513 	 *
514 	 * Transition back to acquisition mode if we are unable to maintain
515 	 * this mode due to the correlation going bad.
516 	 */
517 	if (info->lin_count >= LIN_RESTART / 2 &&
518 	    fabs(info->lin_cache_corr) < 0.70
519 	) {
520 	    logdebuginfo(info, 2,
521 		"polling mode MAINTAIN->ACQUIRE.  Unable to maintain\n"
522 		"the maintenance mode because the correlation went"
523 		" bad!\n");
524 	    info->poll_mode = POLL_ACQUIRE;
525 	    info->poll_count = 0;
526 	    break;
527 	}
528 	info->poll_sleep = max_sleep_opt;
529 	break;
530     case POLL_FAILED:
531 	/*
532 	 * We have a communications failure.  A late recovery is possible
533 	 * if we enter this state with a good poll.
534 	 */
535 	if (info->poll_count != 0) {
536 	    logdebuginfo(info, 2, "polling mode FAILED->ACQUIRE\n");
537 	    if (info->poll_failed >= POLL_FAIL_RESET)
538 		info->poll_mode = POLL_STARTUP;
539 	    else
540 		info->poll_mode = POLL_ACQUIRE;
541 	    /* do not reset poll_count */
542 	    break;
543 	}
544 
545 	/*
546 	 * If we have been failed too long, disconnect from the server
547 	 * and start us all over again.  Note that the failed count is not
548 	 * reset to 0.
549 	 */
550 	if (info->poll_failed >= POLL_RECOVERY_RESTART) {
551 	    logdebuginfo(info, 2, "polling mode FAILED->INIT\n");
552 	    client_setserverstate(info, 0, "FAILED");
553 	    disconnect_server(info);
554 	    info->poll_mode = POLL_FIXED;
555 	    break;
556 	}
557 	break;
558     }
559 
560     /*
561      * If the above state machine has not set a polling interval, set a
562      * nominal polling interval.
563      */
564     if (info->poll_sleep == 0)
565 	info->poll_sleep = nom_sleep_opt;
566 }
567 
568 /*
569  * Look for duplicate IP addresses.  This is done very inoften, so we do
570  * not use a particularly efficient algorithm.
571  *
572  * Only reconnect a client which has not done its initial poll.
573  */
574 void
client_check_duplicate_ips(struct server_info ** info_ary,int count)575 client_check_duplicate_ips(struct server_info **info_ary, int count)
576 {
577     server_info_t info1;
578     server_info_t info2;
579     int tries;
580     int i;
581     int j;
582 
583     for (i = 0; i < count; ++i) {
584 	info1 = info_ary[i];
585 	if (info1->fd < 0 || info1->server_state != 0)
586 	    continue;
587 	for (tries = 0; tries < 10; ++tries) {
588 	    for (j = 0; j < count; ++j) {
589 		info2 = info_ary[j];
590 		if (i == j || info2->fd < 0)
591 		    continue;
592 		if (info1->fd < 0 || /* info1 was lost in previous reconnect */
593 		    strcmp(info1->ipstr, info2->ipstr) == 0) {
594 		    reconnect_server(info1);
595 		    break;
596 		}
597 	    }
598 	    if (j == count)
599 		break;
600 	}
601 	if (tries == 10) {
602 	    disconnect_server(info1);
603 	    client_setserverstate(info1, -2,
604 				  "permanently disabling duplicate server");
605 	}
606     }
607 }
608 
609 /*
610  * Calculate whether the server pointed to by *bestp is insane or not.
611  * For some reason some servers in e.g. the ntp pool are sometimes an hour
612  * off.  If we have at least three servers in the pool require that a
613  * quorum agree that the current best server's offset is reasonable.
614  *
615  * Allow +/- 0.5 seconds of error for now (settable with option).
616  *
617  * Returns -1 if insane, 0 if not enough samples, and 1 if ok
618  */
619 static
620 int
client_insane(struct server_info ** info_ary,int count,server_info_t best)621 client_insane(struct server_info **info_ary, int count, server_info_t best)
622 {
623     server_info_t info;
624     double best_offset;
625     double info_offset;
626     int good;
627     int bad;
628     int skip;
629     int quorum;
630     int i;
631 
632     /*
633      * If only one ntp server we cannot check to see if it is insane
634      */
635     if (count < 2)
636 	    return(1);
637     best_offset = best->lin_sumoffset / best->lin_countoffset;
638 
639     /*
640      * Calculated the quorum.  Do not count permanently failed servers
641      * in the calculation.
642      *
643      * adjusted count	quorum
644      *   2		  2
645      *   3		  2
646      *   4		  3
647      *   5		  3
648      */
649     quorum = count;
650     for (i = 0; i < count; ++i) {
651 	info = info_ary[i];
652 	if (info->server_state == -2)
653 	    --quorum;
654     }
655 
656     quorum = quorum / 2 + 1;
657     good = 0;
658     bad = 0;
659     skip = 0;
660 
661     /*
662      * Find the good, the bad, and the ugly.  We need at least four samples
663      * and a stddev within the deviation being checked to count a server
664      * in the calculation.
665      */
666     for (i = 0; i < count; ++i) {
667 	info = info_ary[i];
668 	if (info->lin_countoffset < 4 ||
669 	    info->lin_cache_stddev > insane_deviation
670 	) {
671 	    ++skip;
672 	    continue;
673 	}
674 
675 	info_offset = info->lin_sumoffset / info->lin_countoffset;
676 	info_offset -= best_offset;
677 	if (info_offset < -insane_deviation || info_offset > insane_deviation)
678 		++bad;
679 	else
680 		++good;
681     }
682 
683     /*
684      * Did we meet our quorum?
685      */
686     logdebuginfo(best, 5, "insanecheck good=%d bad=%d skip=%d "
687 			  "quorum=%d (allowed=%-+8.6f)\n",
688 		 good, bad, skip, quorum, insane_deviation);
689     if (good >= quorum)
690 	return(1);
691     if (good + skip >= quorum)
692 	return(0);
693     return(-1);
694 }
695 
696 /*
697  * Linear regression.
698  *
699  *	ltv	local time as of when the offset error was calculated between
700  *		local time and remote time.
701  *
702  *	lbtv	base time as of when local time was obtained.  Used to
703  *		calculate the cumulative corrections made to the system's
704  *		real time clock so we can de-correct the offset for the
705  *		linear regression.
706  *
707  * X is the time axis, in seconds.
708  * Y is the uncorrected offset, in seconds.
709  */
710 void
lin_regress(server_info_t info,struct timeval * ltv,struct timeval * lbtv,double offset,int calc_offset_correction)711 lin_regress(server_info_t info, struct timeval *ltv, struct timeval *lbtv,
712 	    double offset, int calc_offset_correction)
713 {
714     double time_axis;
715     double uncorrected_offset;
716 
717     /*
718      * De-correcting the offset:
719      *
720      *	The passed offset is (our_real_time - remote_real_time).  To remove
721      *  corrections from our_real_time we take the difference in the basetime
722      *  (new_base_time - old_base_time) and subtract that from the offset.
723      *  That is, if the basetime goesup, the uncorrected offset goes down.
724      */
725     if (info->lin_count == 0) {
726 	info->lin_tv = *ltv;
727 	info->lin_btv = *lbtv;
728 	time_axis = 0;
729 	uncorrected_offset = offset;
730     } else {
731 	time_axis = tv_delta_double(&info->lin_tv, ltv);
732 	uncorrected_offset = offset - tv_delta_double(&info->lin_btv, lbtv);
733     }
734 
735     /*
736      * We have to use the uncorrected offset for frequency calculations.
737      */
738     ++info->lin_count;
739     info->lin_sumx += time_axis;
740     info->lin_sumx2 += time_axis * time_axis;
741     info->lin_sumy += uncorrected_offset;
742     info->lin_sumy2 += uncorrected_offset * uncorrected_offset;
743     info->lin_sumxy += time_axis * uncorrected_offset;
744 
745     /*
746      * We have to use the corrected offset for offset calculations.
747      */
748     if (calc_offset_correction) {
749 	++info->lin_countoffset;
750 	info->lin_sumoffset += offset;
751 	info->lin_sumoffset2 += offset * offset;
752     }
753 
754     /*
755      * Calculate various derived values.   This gets us slope, y-intercept,
756      * and correlation from the linear regression.
757      */
758     if (info->lin_count > 1) {
759 	info->lin_cache_slope =
760 	 (info->lin_count * info->lin_sumxy - info->lin_sumx * info->lin_sumy) /
761 	 (info->lin_count * info->lin_sumx2 - info->lin_sumx * info->lin_sumx);
762 
763 	info->lin_cache_yint =
764 	 (info->lin_sumy - info->lin_cache_slope * info->lin_sumx) /
765 	 (info->lin_count);
766 
767 	info->lin_cache_corr =
768 	 (info->lin_count * info->lin_sumxy - info->lin_sumx * info->lin_sumy) /
769 	 sqrt((info->lin_count * info->lin_sumx2 -
770 		      info->lin_sumx * info->lin_sumx) *
771 	     (info->lin_count * info->lin_sumy2 -
772 		      info->lin_sumy * info->lin_sumy)
773 	 );
774     }
775 
776     /*
777      * Calculate more derived values.  This gets us the standard-deviation
778      * of offsets.  The standard deviation approximately means that 68%
779      * of the samples fall within the calculated stddev of the mean.
780      */
781     if (info->lin_countoffset > 1) {
782 	 info->lin_cache_stddev =
783 	     sqrt((info->lin_sumoffset2 -
784 		 ((info->lin_sumoffset * info->lin_sumoffset /
785 		   info->lin_countoffset))) /
786 	         (info->lin_countoffset - 1.0));
787     }
788 
789     /*
790      * Save the most recent offset, we might use it in the future.
791      * Save the frequency correction (we might scale the slope later so
792      * we have a separate field for the actual frequency correction in
793      * seconds per second).
794      */
795     info->lin_cache_offset = offset;
796     info->lin_cache_freq = info->lin_cache_slope;
797 
798     if (debug_level >= 4) {
799 	logdebuginfo(info, 4, "iter=%2d time=%7.3f off=%+.6f uoff=%+.6f",
800 	    (int)info->lin_count,
801 	    time_axis, offset, uncorrected_offset);
802 	if (info->lin_count > 1) {
803 	    logdebug(4, " slope %+7.6f"
804 			    " yint %+3.2f corr %+7.6f freq_ppm %+4.2f",
805 		info->lin_cache_slope,
806 		info->lin_cache_yint,
807 		info->lin_cache_corr,
808 		info->lin_cache_freq * 1000000.0);
809 	}
810 	if (info->lin_countoffset > 1) {
811 	    logdebug(4, " stddev %7.6f", info->lin_cache_stddev);
812 	} else if (calc_offset_correction == 0) {
813 	    /* cannot calculate offset correction due to prior correction */
814 	    logdebug(4, " offset_ignored");
815 	}
816 	logdebug(4, "\n");
817     }
818 }
819 
820 /*
821  * Reset the linear regression data.  The info structure will not again be
822  * a candidate for frequency or offset correction until sufficient data
823  * has been accumulated to make a decision.
824  */
825 void
lin_reset(server_info_t info)826 lin_reset(server_info_t info)
827 {
828     server_info_t scan;
829 
830     info->lin_count = 0;
831     info->lin_sumx = 0;
832     info->lin_sumy = 0;
833     info->lin_sumxy = 0;
834     info->lin_sumx2 = 0;
835     info->lin_sumy2 = 0;
836 
837     info->lin_countoffset = 0;
838     info->lin_sumoffset = 0;
839     info->lin_sumoffset2 = 0;
840 
841     info->lin_cache_slope = 0;
842     info->lin_cache_yint = 0;
843     info->lin_cache_corr = 0;
844     info->lin_cache_offset = 0;
845     info->lin_cache_freq = 0;
846 
847     /*
848      * Destroy any additional alternative regressions.
849      */
850     while ((scan = info->altinfo) != NULL) {
851 	info->altinfo = scan->altinfo;
852 	free(scan);
853     }
854 }
855 
856 /*
857  * Sometimes we want to clean out the offset calculations without
858  * destroying the linear regression used to figure out the frequency
859  * correction.  This usually occurs whenever we issue an offset
860  * adjustment to the system, which invalidates any offset data accumulated
861  * up to that point.
862  */
863 void
lin_resetalloffsets(struct server_info ** info_ary,int count)864 lin_resetalloffsets(struct server_info **info_ary, int count)
865 {
866     server_info_t info;
867     int i;
868 
869     for (i = 0; i < count; ++i) {
870 	for (info = info_ary[i]; info; info = info->altinfo)
871 	    lin_resetoffsets(info);
872     }
873 }
874 
875 void
lin_resetoffsets(server_info_t info)876 lin_resetoffsets(server_info_t info)
877 {
878     info->lin_countoffset = 0;
879     info->lin_sumoffset = 0;
880     info->lin_sumoffset2 = 0;
881 }
882 
883 void
client_setserverstate(server_info_t info,int state,const char * str)884 client_setserverstate(server_info_t info, int state, const char *str)
885 {
886     if (info->server_state != state) {
887         info->server_state = state;
888 	logdebuginfo(info, 1, "%s\n", str);
889     }
890 }
891 
892