1 /* 2 * Copyright (c) 2005 The DragonFly Project. All rights reserved. 3 * 4 * This code is derived from software contributed to The DragonFly Project 5 * by Matthew Dillon <dillon@backplane.com> 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in 15 * the documentation and/or other materials provided with the 16 * distribution. 17 * 3. Neither the name of The DragonFly Project nor the names of its 18 * contributors may be used to endorse or promote products derived 19 * from this software without specific, prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 22 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 24 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 25 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 26 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING, 27 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 28 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 29 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 30 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT 31 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 32 * SUCH DAMAGE. 33 * 34 * $DragonFly: src/usr.sbin/dntpd/client.c,v 1.9 2005/04/26 23:50:23 dillon Exp $ 35 */ 36 37 #include "defs.h" 38 39 void 40 client_init(void) 41 { 42 } 43 44 int 45 client_main(struct server_info **info_ary, int count) 46 { 47 struct server_info *best_off; 48 struct server_info *best_freq; 49 double last_freq; 50 double freq; 51 double offset; 52 int i; 53 int calc_offset_correction; 54 55 last_freq = 0.0; 56 57 for (;;) { 58 /* 59 * Subtract the interval from poll_sleep and poll the client 60 * if it reaches 0. 61 * 62 * Because we do not compensate for offset corrections which are 63 * in progress, we cannot accumulate data for an offset correction 64 * while a prior correction is still being worked through by the 65 * system. 66 */ 67 calc_offset_correction = !sysntp_offset_correction_is_running(); 68 for (i = 0; i < count; ++i) 69 client_poll(info_ary[i], min_sleep_opt, calc_offset_correction); 70 71 /* 72 * Find the best client (or synthesize one). A different client 73 * can be chosen for frequency and offset. Note in particular 74 * that offset counters and averaging code gets reset when an 75 * offset correction is made (otherwise the averaging history will 76 * cause later corrections to overshoot). 77 * 78 * The regression used to calculate the frequency is a much 79 * longer-term entity and is NOT reset, so it is still possible 80 * for the offset correction code to make minor adjustments to 81 * the frequency if it so desires. 82 * 83 * client_check may replace the server_info pointer with a new 84 * one. 85 */ 86 best_off = NULL; 87 best_freq = NULL; 88 for (i = 0; i < count; ++i) 89 client_check(&info_ary[i], &best_off, &best_freq); 90 91 /* 92 * Offset correction. 93 */ 94 if (best_off) { 95 offset = best_off->lin_sumoffset / best_off->lin_countoffset; 96 lin_resetalloffsets(info_ary, count); 97 if (offset < -COURSE_OFFSET_CORRECTION_LIMIT || 98 offset > COURSE_OFFSET_CORRECTION_LIMIT || 99 quickset_opt 100 ) { 101 freq = sysntp_correct_course_offset(offset); 102 quickset_opt = 0; 103 } else { 104 freq = sysntp_correct_offset(offset); 105 } 106 } else { 107 freq = 0.0; 108 } 109 110 /* 111 * Frequency correction (throw away minor freq adjusts from the 112 * offset code if we can't do a frequency correction here). Do 113 * not reissue if it hasn't changed from the last issued correction. 114 */ 115 if (best_freq) { 116 freq += best_freq->lin_cache_freq; 117 if (last_freq != freq) { 118 sysntp_correct_freq(freq); 119 last_freq = freq; 120 } 121 } 122 123 /* 124 * This function is responsible for managing the polling mode and 125 * figures out how long we should sleep. 126 */ 127 for (i = 0; i < count; ++i) 128 client_manage_polling_mode(info_ary[i]); 129 130 /* 131 * Polling loop sleep. 132 */ 133 usleep(min_sleep_opt * 1000000 + random() % 500000); 134 } 135 } 136 137 void 138 client_poll(server_info_t info, int poll_interval, int calc_offset_correction) 139 { 140 struct timeval rtv; 141 struct timeval ltv; 142 struct timeval lbtv; 143 double offset; 144 145 /* 146 * By default we always poll. If the polling interval comes under 147 * active management the poll_sleep will be non-zero. 148 */ 149 if (info->poll_sleep > poll_interval) { 150 info->poll_sleep -= poll_interval; 151 return; 152 } 153 info->poll_sleep = 0; 154 155 logdebug(4, "%s: poll, ", info->target); 156 if (udp_ntptimereq(info->fd, &rtv, <v, &lbtv) < 0) { 157 ++info->poll_failed; 158 logdebug(4, "no response (%d failures in a row)\n", 159 info->poll_failed); 160 if (info->poll_failed == POLL_FAIL_RESET) { 161 if (info->lin_count != 0) { 162 logdebug(4, "%s: resetting regression due to failures\n", 163 info->target); 164 } 165 lin_reset(info); 166 } 167 return; 168 } 169 170 /* 171 * Successful query. Update polling info for the polling mode manager. 172 */ 173 ++info->poll_count; 174 info->poll_failed = 0; 175 176 /* 177 * Figure out the offset (the difference between the reported 178 * time and our current time) for linear regression purposes. 179 */ 180 offset = tv_delta_double(&rtv, <v); 181 182 while (info) { 183 /* 184 * Linear regression 185 */ 186 if (debug_level >= 4) { 187 struct tm *tp; 188 char buf[64]; 189 time_t t; 190 191 t = rtv.tv_sec; 192 tp = localtime(&t); 193 strftime(buf, sizeof(buf), "%d-%b-%Y %H:%M:%S", tp); 194 logdebug(4, "%s.%03ld ", buf, rtv.tv_usec / 1000); 195 } 196 lin_regress(info, <v, &lbtv, offset, calc_offset_correction); 197 info = info->altinfo; 198 if (info && debug_level >= 4) { 199 logdebug(4, "%*.*s: poll, ", 200 (int)strlen(info->target), 201 (int)strlen(info->target), "(alt)"); 202 } 203 } 204 } 205 206 /* 207 * Find the best client (or synthesize a fake info structure to return). 208 * We can find separate best clients for offset and frequency. 209 */ 210 void 211 client_check(struct server_info **checkp, 212 struct server_info **best_off, 213 struct server_info **best_freq) 214 { 215 struct server_info *check = *checkp; 216 struct server_info *info; 217 218 /* 219 * Start an alternate linear regression once our current one 220 * has passed a certain point. 221 */ 222 if (check->lin_count >= LIN_RESTART / 2 && check->altinfo == NULL) { 223 info = malloc(sizeof(*info)); 224 assert(info != NULL); 225 /* note: check->altinfo is NULL as of the bcopy */ 226 bcopy(check, info, sizeof(*info)); 227 check->altinfo = info; 228 lin_reset(info); 229 } 230 231 /* 232 * Replace our current linear regression with the alternate once 233 * the current one has hit its limit (beyond a certain point the 234 * linear regression starts to work against us, preventing us from 235 * reacting to changing conditions). 236 * 237 * Report any significant change in the offset or ppm. 238 */ 239 if (check->lin_count >= LIN_RESTART) { 240 if ((info = check->altinfo) && info->lin_count >= LIN_RESTART / 2) { 241 double freq_diff; 242 243 freq_diff = info->lin_cache_freq - check->lin_cache_freq; 244 logdebug(4, "%s: Switching to alternate, Frequence " 245 "difference is %6.3f ppm\n", 246 info->target, freq_diff * 1.0E+6); 247 *checkp = info; 248 free(check); 249 check = info; 250 } 251 } 252 253 /* 254 * BEST CLIENT FOR FREQUENCY CORRECTION: 255 * 256 * 8 samples and a correllation > 0.99, or 257 * 16 samples and a correllation > 0.96 258 */ 259 info = *best_freq; 260 if ((check->lin_count >= 8 && fabs(check->lin_cache_corr) >= 0.99) || 261 (check->lin_count >= 16 && fabs(check->lin_cache_corr) >= 0.96) 262 ) { 263 if (info == NULL || 264 fabs(check->lin_cache_corr) > fabs(info->lin_cache_corr) 265 ) { 266 info = check; 267 *best_freq = info; 268 } 269 270 } 271 272 /* 273 * BEST CLIENT FOR OFFSET CORRECTION: 274 * 275 * Use the standard-deviation and require at least 4 samples. An 276 * offset correction is valid if the standard deviation is less then 277 * the average offset divided by 4. 278 */ 279 info = *best_off; 280 if (check->lin_countoffset >= 4 && 281 check->lin_cache_stddev < fabs(check->lin_sumoffset / check->lin_countoffset / 4)) { 282 if (info == NULL || 283 fabs(check->lin_cache_stddev) < fabs(info->lin_cache_stddev) 284 ) { 285 info = check; 286 *best_off = info; 287 } 288 } 289 } 290 291 /* 292 * Actively manage the polling interval. Note that the poll_* fields are 293 * always transfered to the alternate regression when the check code replaces 294 * the current regression with a new one. 295 * 296 * This routine is called from the main loop for each base info structure. 297 * The polling mode applies to all alternates so we do not have to iterate 298 * through the alt's. 299 */ 300 void 301 client_manage_polling_mode(struct server_info *info) 302 { 303 /* 304 * If too many query failures occured go into a failure-recovery state. 305 * If we were in startup when we failed, go into the second failure 306 * state so a recovery returns us back to startup mode. 307 */ 308 if (info->poll_failed >= POLL_FAIL_RESET && 309 info->poll_mode != POLL_FAILED_1 && 310 info->poll_mode != POLL_FAILED_2 311 ) { 312 logdebug(2, "%s: polling mode moving to a FAILED state.\n", 313 info->target); 314 if (info->poll_mode != POLL_STARTUP) 315 info->poll_mode = POLL_FAILED_1; 316 else 317 info->poll_mode = POLL_FAILED_2; 318 info->poll_count = 0; 319 } 320 321 /* 322 * Standard polling mode progression 323 */ 324 switch(info->poll_mode) { 325 case POLL_FIXED: 326 info->poll_mode = POLL_STARTUP; 327 info->poll_count = 0; 328 logdebug(2, "%s: polling mode INIT->STARTUP.\n", info->target); 329 /* fall through */ 330 case POLL_STARTUP: 331 if (info->poll_count < POLL_STARTUP_MAX) { 332 if (info->poll_sleep == 0) 333 info->poll_sleep = min_sleep_opt; 334 break; 335 } 336 info->poll_mode = POLL_ACQUIRE; 337 info->poll_count = 0; 338 logdebug(2, "%s: polling mode STARTUP->ACQUIRE.\n", info->target); 339 /* fall through */ 340 case POLL_ACQUIRE: 341 /* 342 * Acquisition mode using the nominal timeout. We do not shift 343 * to maintainance mode unless the correllation is at least 0.90 344 */ 345 if (info->poll_count < POLL_ACQUIRE_MAX || 346 info->lin_count < 8 || 347 fabs(info->lin_cache_corr) < 0.85 348 ) { 349 if (info->poll_count >= POLL_ACQUIRE_MAX && 350 info->lin_count == LIN_RESTART - 2 351 ) { 352 logdebug(2, 353 "%s: WARNING: Unable to shift this source to " 354 "maintainance mode. Target correllation is aweful.\n", 355 info->target); 356 } 357 if (info->poll_sleep == 0) 358 info->poll_sleep = nom_sleep_opt; 359 break; 360 } 361 info->poll_mode = POLL_MAINTAIN; 362 info->poll_count = 0; 363 logdebug(2, "%s: polling mode ACQUIRE->MAINTAIN.\n", info->target); 364 /* fall through */ 365 case POLL_MAINTAIN: 366 if (info->lin_count >= LIN_RESTART / 2 && 367 fabs(info->lin_cache_corr) < 0.70 368 ) { 369 logdebug(2, 370 "%s: polling mode MAINTAIN->ACQUIRE. Unable to maintain\n" 371 "the maintainance mode because the correllation went" 372 " bad!\n", info->target); 373 info->poll_mode = POLL_ACQUIRE; 374 info->poll_count = 0; 375 break; 376 } 377 if (info->poll_sleep == 0) 378 info->poll_sleep = max_sleep_opt; 379 /* do nothing */ 380 break; 381 case POLL_FAILED_1: 382 /* 383 * We have failed recently. If we recover return to the acquisition 384 * state. 385 * 386 * poll_count does not increment while we are failed. poll_failed 387 * does increment (but gets zero'd once we recover). 388 */ 389 if (info->poll_count != 0) { 390 logdebug(2, "%s: polling mode FAILED1->ACQUIRE.\n", info->target); 391 info->poll_mode = POLL_ACQUIRE; 392 /* do not reset poll_count */ 393 break; 394 } 395 if (info->poll_failed >= POLL_RECOVERY_RESTART) 396 info->poll_mode = POLL_FAILED_2; 397 if (info->poll_sleep == 0) 398 info->poll_sleep = nom_sleep_opt; 399 break; 400 case POLL_FAILED_2: 401 /* 402 * We have been failed for a very long time, or we failed while 403 * in startup. If we recover we have to go back into startup. 404 */ 405 if (info->poll_count != 0) { 406 logdebug(2, "%s: polling mode FAILED2->STARTUP.\n", info->target); 407 info->poll_mode = POLL_STARTUP; 408 break; 409 } 410 if (info->poll_sleep == 0) 411 info->poll_sleep = nom_sleep_opt; 412 break; 413 } 414 } 415 416 /* 417 * Linear regression. 418 * 419 * ltv local time as of when the offset error was calculated between 420 * local time and remote time. 421 * 422 * lbtv base time as of when local time was obtained. Used to 423 * calculate the cumulative corrections made to the system's 424 * real time clock so we can de-correct the offset for the 425 * linear regression. 426 * 427 * X is the time axis, in seconds. 428 * Y is the uncorrected offset, in seconds. 429 */ 430 void 431 lin_regress(server_info_t info, struct timeval *ltv, struct timeval *lbtv, 432 double offset, int calc_offset_correction) 433 { 434 double time_axis; 435 double uncorrected_offset; 436 437 /* 438 * De-correcting the offset: 439 * 440 * The passed offset is (our_real_time - remote_real_time). To remove 441 * corrections from our_real_time we take the difference in the basetime 442 * (new_base_time - old_base_time) and subtract that from the offset. 443 * That is, if the basetime goesup, the uncorrected offset goes down. 444 */ 445 if (info->lin_count == 0) { 446 info->lin_tv = *ltv; 447 info->lin_btv = *lbtv; 448 time_axis = 0; 449 uncorrected_offset = offset; 450 } else { 451 time_axis = tv_delta_double(&info->lin_tv, ltv); 452 uncorrected_offset = offset - tv_delta_double(&info->lin_btv, lbtv); 453 } 454 455 /* 456 * We have to use the uncorrected offset for frequency calculations. 457 */ 458 ++info->lin_count; 459 info->lin_sumx += time_axis; 460 info->lin_sumx2 += time_axis * time_axis; 461 info->lin_sumy += uncorrected_offset; 462 info->lin_sumy2 += uncorrected_offset * uncorrected_offset; 463 info->lin_sumxy += time_axis * uncorrected_offset; 464 465 /* 466 * We have to use the corrected offset for offset calculations. 467 */ 468 if (calc_offset_correction) { 469 ++info->lin_countoffset; 470 info->lin_sumoffset += offset; 471 info->lin_sumoffset2 += offset * offset; 472 } 473 474 /* 475 * Calculate various derived values. This gets us slope, y-intercept, 476 * and correllation from the linear regression. 477 */ 478 if (info->lin_count > 1) { 479 info->lin_cache_slope = 480 (info->lin_count * info->lin_sumxy - info->lin_sumx * info->lin_sumy) / 481 (info->lin_count * info->lin_sumx2 - info->lin_sumx * info->lin_sumx); 482 483 info->lin_cache_yint = 484 (info->lin_sumy - info->lin_cache_slope * info->lin_sumx) / 485 (info->lin_count); 486 487 info->lin_cache_corr = 488 (info->lin_count * info->lin_sumxy - info->lin_sumx * info->lin_sumy) / 489 sqrt((info->lin_count * info->lin_sumx2 - 490 info->lin_sumx * info->lin_sumx) * 491 (info->lin_count * info->lin_sumy2 - 492 info->lin_sumy * info->lin_sumy) 493 ); 494 } 495 496 /* 497 * Calculate more derived values. This gets us the standard-deviation 498 * of offsets. The standard deviation approximately means that 68% 499 * of the samples fall within the calculated stddev of the mean. 500 */ 501 if (info->lin_countoffset > 1) { 502 info->lin_cache_stddev = 503 sqrt((info->lin_sumoffset2 - 504 ((info->lin_sumoffset * info->lin_sumoffset / 505 info->lin_countoffset))) / 506 (info->lin_countoffset - 1.0)); 507 } 508 509 /* 510 * Save the most recent offset, we might use it in the future. 511 * Save the frequency correction (we might scale the slope later so 512 * we have a separate field for the actual frequency correction in 513 * seconds per second). 514 */ 515 info->lin_cache_offset = offset; 516 info->lin_cache_freq = info->lin_cache_slope; 517 518 if (debug_level >= 4) { 519 logdebug(4, "iter=%2d time=%7.3f off=%.6f uoff=%.6f", 520 (int)info->lin_count, 521 time_axis, offset, uncorrected_offset); 522 if (info->lin_count > 1) { 523 logdebug(4, " slope %7.6f" 524 " yint %3.2f corr %7.6f freq_ppm %4.2f", 525 info->lin_cache_slope, 526 info->lin_cache_yint, 527 info->lin_cache_corr, 528 info->lin_cache_freq * 1000000.0); 529 } 530 if (info->lin_countoffset > 1) { 531 logdebug(4, " stddev %7.6f", info->lin_cache_stddev); 532 } else if (calc_offset_correction == 0) { 533 /* cannot calculate offset correction due to prior correction */ 534 logdebug(4, " offset_ignored"); 535 } 536 logdebug(4, "\n"); 537 } 538 } 539 540 /* 541 * Reset the linear regression data. The info structure will not again be 542 * a candidate for frequency or offset correction until sufficient data 543 * has been accumulated to make a decision. 544 */ 545 void 546 lin_reset(server_info_t info) 547 { 548 server_info_t scan; 549 550 info->lin_count = 0; 551 info->lin_sumx = 0; 552 info->lin_sumy = 0; 553 info->lin_sumxy = 0; 554 info->lin_sumx2 = 0; 555 info->lin_sumy2 = 0; 556 557 info->lin_countoffset = 0; 558 info->lin_sumoffset = 0; 559 info->lin_sumoffset2 = 0; 560 561 info->lin_cache_slope = 0; 562 info->lin_cache_yint = 0; 563 info->lin_cache_corr = 0; 564 info->lin_cache_offset = 0; 565 info->lin_cache_freq = 0; 566 567 /* 568 * Destroy any additional alternative regressions. 569 */ 570 while ((scan = info->altinfo) != NULL) { 571 info->altinfo = scan->altinfo; 572 free(scan); 573 } 574 } 575 576 /* 577 * Sometimes we want to clean out the offset calculations without 578 * destroying the linear regression used to figure out the frequency 579 * correction. This usually occurs whenever we issue an offset 580 * adjustment to the system, which invalidates any offset data accumulated 581 * up to that point. 582 */ 583 void 584 lin_resetalloffsets(struct server_info **info_ary, int count) 585 { 586 server_info_t info; 587 int i; 588 589 for (i = 0; i < count; ++i) { 590 for (info = info_ary[i]; info; info = info->altinfo) 591 lin_resetoffsets(info); 592 } 593 } 594 595 void 596 lin_resetoffsets(server_info_t info) 597 { 598 info->lin_countoffset = 0; 599 info->lin_sumoffset = 0; 600 info->lin_sumoffset2 = 0; 601 } 602 603