1/* 2 * alg_config - help determine optimal values for algorithm levels 3 * 4 * Copyright (C) 2006,2014,2021 Landon Curt Noll 5 * 6 * Calc is open software; you can redistribute it and/or modify it under 7 * the terms of the version 2.1 of the GNU Lesser General Public License 8 * as published by the Free Software Foundation. 9 * 10 * Calc is distributed in the hope that it will be useful, but WITHOUT 11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 12 * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General 13 * Public License for more details. 14 * 15 * A copy of version 2.1 of the GNU Lesser General Public License is 16 * distributed with calc under the filename COPYING-LGPL. You should have 17 * received a copy with calc; if not, write to Free Software Foundation, Inc. 18 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. 19 * 20 * Under source code control: 2006/06/07 14:10:11 21 * File existed as early as: 2006 22 * 23 * chongo <was here> /\oo/\ http://www.isthe.com/chongo/ 24 * Share and enjoy! :-) http://www.isthe.com/chongo/tech/comp/calc/ 25 */ 26 27static test_time; /* try for this many seconds in loop test */ 28 29 30/* 31 * close_to_one - set to 1 if the ratio is close enough to 1 32 * 33 * given: 34 * ratio the ratio of time between two algorithms 35 * 36 * returns: 37 * 1 When ratio is near 1.0 38 * 0 otherwise 39 * 40 * We consider the range [0.995, 1.005] to be close enough to 1 to be call unity 41 * because of the precision of the CPU timing. 42 */ 43define close_to_one(ratio) 44{ 45 /* firewall */ 46 if (!isreal(ratio)) { 47 quit "close: 1st arg: must be a real number"; 48 } 49 50 /* check if the ratio is far from unity */ 51 if ((ratio < 0.995) || (ratio > 1.005)) { 52 return 0; 53 } 54 55 /* we are close to unity */ 56 return 1; 57} 58 59 60/* 61 * mul_loop - measure the CPU time to perform a set of multiply loops 62 * 63 * given: 64 * repeat number of multiply loops to perform 65 * x array of 5 values, each the same length in BASEB-bit words 66 * 67 * NOTE: When their lengths are 1 BASEB-bit word, then a 68 * dummy loop of simple constants are used. Thus the 69 * length == 1 is an approximation of loop overhead. 70 * 71 * returns: 72 * approximate runtime to perform repeat the multiply loops 73 * 74 * NOTE: This is an internal support function that is normally 75 * not called directly from the command line. Call the 76 * function best_mul2() instead. 77 */ 78define mul_loop(repeat, x) 79{ 80 local start; /* start of execution */ 81 local end; /* end of execution */ 82 local answer; /* multiplicand */ 83 local len; /* length of each element */ 84 local baseb_bytes; /* bytes in a BASEB-bit word */ 85 local i; 86 87 /* firewall */ 88 if (!isint(repeat) || repeat < 0) { 89 quit "mul_loop: 1st arg: repeat must be an integer > 0"; 90 } 91 if (size(*x) != 5) { 92 quit "mul_loop: 2nd arg matrix does not have 5 elements"; 93 } 94 if (matdim(*x) != 1) { 95 quit "mul_loop: 2nd arg matrix is not 1 dimensional"; 96 } 97 if (matmin(*x, 1) != 0) { 98 quit "mul_loop: 2nd arg matrix index range does not start with 0"; 99 } 100 if (matmax(*x, 1) != 4) { 101 quit "mul_loop: 2nd arg matrix index range does not end with 4"; 102 } 103 104 baseb_bytes = config("baseb") / 8; 105 len = sizeof((*x)[0]) / baseb_bytes; 106 for (i=1; i < 4; ++i) { 107 if ((sizeof((*x)[i]) / baseb_bytes) != len) { 108 quit "mul_loop: 2nd arg matrix elements are not of " 109 "equal BASEB-bit word length"; 110 } 111 } 112 113 /* multiply pairwise, all sets of a given length */ 114 start = usertime(); 115 for (i=0; i < repeat; ++i) { 116 117 if (len == 1) { 118 /* we use len == 1 to test this tester loop overhead */ 119 answer = 0 * 0; answer = 0 * 0; answer = 0 * 0; answer = 0 * 0; 120 /**/ 121 answer = 0 * 0; answer = 0 * 0; answer = 0 * 0; answer = 0 * 0; 122 /**/ 123 answer = 0 * 0; answer = 0 * 0; answer = 0 * 0; answer = 0 * 0; 124 /**/ 125 answer = 0 * 0; answer = 0 * 0; answer = 0 * 0; answer = 0 * 0; 126 /**/ 127 answer = 0 * 0; answer = 0 * 0; answer = 0 * 0; answer = 0 * 0; 128 } else { 129 answer = (*x)[0] * (*x)[1]; 130 answer = (*x)[0] * (*x)[2]; 131 answer = (*x)[0] * (*x)[3]; 132 answer = (*x)[0] * (*x)[4]; 133 /**/ 134 answer = (*x)[1] * (*x)[0]; 135 answer = (*x)[1] * (*x)[2]; 136 answer = (*x)[1] * (*x)[3]; 137 answer = (*x)[1] * (*x)[4]; 138 /**/ 139 answer = (*x)[2] * (*x)[0]; 140 answer = (*x)[2] * (*x)[1]; 141 answer = (*x)[2] * (*x)[3]; 142 answer = (*x)[2] * (*x)[4]; 143 /**/ 144 answer = (*x)[3] * (*x)[0]; 145 answer = (*x)[3] * (*x)[1]; 146 answer = (*x)[3] * (*x)[2]; 147 answer = (*x)[3] * (*x)[4]; 148 /**/ 149 answer = (*x)[4] * (*x)[0]; 150 answer = (*x)[4] * (*x)[1]; 151 answer = (*x)[4] * (*x)[2]; 152 answer = (*x)[4] * (*x)[3]; 153 } 154 } 155 156 /* 157 * return duration 158 */ 159 end = usertime(); 160 return end-start; 161} 162 163 164/* 165 * mul_ratio - ratio of rates of 1st and 2nd multiply algorithms 166 * 167 * given: 168 * len length in BASEB-bit words to multiply 169 * 170 * return: 171 * ratio of (1st / 2nd) algorithm rate. 172 * 173 * When want to determine a rate to a precision of 1 part in 1000. 174 * Most systems today return CPU time to at least 10 msec precision. 175 * So to get rates to that precision, we need to time loops to at 176 * least 1000 times as long as the precision (10 msec * 1000) 177 * which usually requires timing of loops that last 10 seconds or more. 178 * 179 * NOTE: This is an internal support function that is normally 180 * not called directly from the command line. Call the 181 * function best_mul2() instead. 182 */ 183define mul_ratio(len) 184{ 185 local mat x[5]; /* array of values for mul_loop to multiply */ 186 local mat one[5]; /* array if single BASEB-bit values */ 187 local baseb; /* calc word size in bits */ 188 local orig_cfg; /* caller configuration */ 189 local loops; /* number of multiply loops to time */ 190 local tlen; /* time to perform some number of loops */ 191 local tover; /* est of time for loop overhead */ 192 local alg1_rate; /* loop rate of 1st algorithm */ 193 local alg2_rate; /* loop rate of 2nd algorithm */ 194 local ret; /* return ratio, or 1.0 */ 195 local i; 196 197 /* 198 * firewall 199 */ 200 if (!isint(len) || len < 2) { 201 quit "mul_ratio: 1st arg: len is not an integer > 1"; 202 } 203 204 /* 205 * remember the caller's config state 206 */ 207 orig_cfg = config("all"); 208 config("mul2", 0),; 209 config("sq2", 0),; 210 config("pow2", 0),; 211 config("redc2", 0),; 212 config("tilde", 0),; 213 214 /* 215 * initialize x, the values we will multiply 216 * 217 * We want these tests to be repeatable as possible, so we will seed 218 * the PRNG in a deterministic way. 219 */ 220 baseb = config("baseb"); 221 srand(sha1(sha1(baseb, config("version")))); 222 for (i=0; i < 5; ++i) { 223 /* force the values to be a full len words long */ 224 x[i] = ((1<<(((len-1) * baseb) + baseb-1)) | 225 randbit(((len-1) * baseb) + baseb-2)); 226 /* single BASEB-bit values */ 227 one[i] = 1; 228 } 229 230 /* 231 * determine the number of loops needed to test 1st alg 232 */ 233 config("mul2", 2^31-1),; 234 loops = 1/2; 235 do { 236 loops *= 2; 237 tlen = mul_loop(loops, &x); 238 if (config("user_debug") > 3) { 239 printf("\t alg1 loops %d took %.3f sec\n", loops, tlen); 240 } 241 } while (tlen < 1.0); 242 243 /* 244 * determine the 1st algorithm rate 245 */ 246 loops = max(1, ceil(loops * test_time / tlen)); 247 if (loops < 16) { 248 if (config("user_debug") > 1) { 249 printf(" we must expand alg1 loop test time to about %d secs\n", 250 ceil(test_time * (16 / loops))); 251 } 252 loops = 16; 253 } 254 if (config("user_debug") > 3) { 255 printf("\t will try alg1 %d loops\n", loops); 256 } 257 tlen = mul_loop(loops, &x); 258 if (config("user_debug") > 3) { 259 printf("\t alg1 time = %.3f secs\n", tlen); 260 } 261 tover = mul_loop(loops, &one); 262 if (config("user_debug") > 3) { 263 printf("\t alg1 overhead look %.3f secs\n", tover); 264 } 265 if (tlen <= tover) { 266 quit "mul_ratio: overhead >= loop time"; 267 } 268 alg1_rate = loops / (tlen - tover); 269 if (config("user_debug") > 2) { 270 printf("\tmultiply alg1 rate = %.3f loopsets/sec\n", alg1_rate); 271 } 272 if (alg1_rate <= 0.0) { 273 quit "mul_ratio: alg1 rate was <= 0.0"; 274 } 275 276 /* 277 * determine the number of loops needed to test 1st alg 278 */ 279 config("mul2", 2),; 280 loops = 1/2; 281 do { 282 loops *= 2; 283 tlen = mul_loop(loops, &x); 284 if (config("user_debug") > 3) { 285 printf("\t alg2 loops %d took %.3f sec\n", loops, tlen); 286 } 287 } while (tlen < 1.0); 288 289 /* 290 * determine the 2nd algorithm rate 291 */ 292 loops = max(1, ceil(loops * test_time / tlen)); 293 if (loops < 16) { 294 if (config("user_debug") > 1) { 295 printf(" we must expand alg2 loop test time to about %d secs\n", 296 ceil(test_time * (16 / loops))); 297 } 298 loops = 16; 299 } 300 tlen = mul_loop(loops, &x); 301 if (config("user_debug") > 3) { 302 printf("\t alg2 time = %.3f secs\n", tlen); 303 } 304 tover = mul_loop(loops, &one); 305 if (config("user_debug") > 3) { 306 printf("\t alg2 overhead look %.3f secs\n", tover); 307 } 308 if (tlen <= tover) { 309 quit "mul_ratio: overhead >= loop time"; 310 } 311 alg2_rate = loops / (tlen - tover); 312 if (config("user_debug") > 2) { 313 printf("\tmultiply alg2 rate = %.3f loopsets/sec\n", alg2_rate); 314 } 315 if (alg2_rate <= 0.0) { 316 quit "mul_ratio: alg2 rate was <= 0.0"; 317 } 318 319 /* 320 * restore old config 321 */ 322 config("all", orig_cfg),; 323 324 /* 325 * return alg1 / alg2 rate ratio 326 */ 327 ret = alg1_rate / alg2_rate; 328 if (config("user_debug") > 2) { 329 printf("\tprecise ratio is: %.f mul_ratio will return: %.3f\n", 330 alg1_rate / alg2_rate, ret); 331 } 332 return ret; 333} 334 335 336/* 337 * best_mul2 - determine the best config("mul2") parameter 338 * 339 * NOTE: Due to precision problems with CPU measurements, it is not 340 * unusual for the output of this function to vary slightly 341 * from run to run. 342 * 343 * NOTE: This function is designed to take a long time to run. 344 * We recommend setting: 345 * 346 * config("user_debug", 2) 347 * 348 * so that yon can watch the progress of this function. 349 */ 350define best_mul2() 351{ 352 local ratio; /* previously calculated alg1/alg2 ratio */ 353 local low; /* low loop value tested */ 354 local high; /* high loop value tested */ 355 local mid; /* between low and high */ 356 local best_val; /* value found with ratio closest to unity */ 357 local best_ratio; /* closest ratio found to unity */ 358 local expand; /* how fast to expand the length */ 359 360 /* 361 * setup 362 */ 363 printf("WARNING: This tool may not be computing the correct best value\n"); 364 test_time = 5.0; 365 printf("The best_mul2() function will take a LONG time to run!\n"); 366 printf("It is important that best_mul2() run on an otherwise idle host!\n"); 367 if (config("user_debug") <= 0) { 368 printf("To monitor progress, set user_debug to 2: " 369 "config(\"user_debug\", 2)\n"); 370 } 371 printf("Starting with loop test time of %d secs\n", test_time); 372 373 /* 374 * firewall - must have a >1 ratio for the initial length 375 */ 376 high = 8; 377 best_val = high; 378 if (config("user_debug") > 0) { 379 printf("testing multiply alg1/alg2 ratio for len = %d\n", high); 380 } 381 ratio = mul_ratio(high); 382 best_ratio = ratio; 383 if (config("user_debug") > 1) { 384 printf(" multiply alg1/alg2 ratio = %.6f\n", ratio); 385 } 386 if (ratio < 1.0) { 387 quit "best_mul2: tests imply mul2 should be < 16, which seems bogus"; 388 } 389 390 /* 391 * expand lengths until the ratio flips 392 */ 393 do { 394 /* 395 * determine the parameters of the next ratio test 396 * 397 * We will multiplicatively expand our test level until 398 * the ratio drops below 1.0. 399 */ 400 expand = 2; 401 low = high; 402 high *= expand; 403 if (config("user_debug") > 1) { 404 printf(" expand the next test range by a factor of %d\n", 405 expand); 406 } 407 408 /* 409 * determine the alg1/alg2 test ratio for this new length 410 */ 411 if (high >= 2^31) { 412 quit "best_mul2: test implies mul2 >= 2^31, which seems bogus"; 413 } 414 if (config("user_debug") > 0) { 415 printf("testing multiply alg1/alg2 ratio for len = %d\n", high); 416 } 417 ratio = mul_ratio(high); 418 if (abs(ratio - 1.0) < abs(best_ratio - 1.0)) { 419 best_val = high; 420 best_ratio = ratio; 421 if (config("user_debug") > 1) { 422 printf(" len %d has a new closest ratio to unity: %.6f\n", 423 best_val, best_ratio); 424 } 425 } 426 if (config("user_debug") > 1) { 427 printf(" multiply alg1/alg2 ratio = %.6f\n", ratio); 428 } 429 } while (ratio > 1.0); 430 431 /* 432 * If we previously expanded more than by a factor of 2, then 433 * we may have jumped over the crossover point. So now 434 * drop down powers of two until the ratio is again >= 1.0 435 */ 436 if (expand > 2) { 437 do { 438 439 /* 440 * contract by 2 441 */ 442 high /= 2; 443 low = high / 2; 444 if (config("user_debug") > 0) { 445 printf("re-testing multiply alg1/alg2 ratio for len = %d\n", 446 high); 447 } 448 ratio = mul_ratio(high); 449 if (abs(ratio - 1.0) < abs(best_ratio - 1.0)) { 450 best_val = high; 451 best_ratio = ratio; 452 if (config("user_debug") > 1) { 453 printf(" len %d has a new closest ratio " 454 "to unity: %.6f\n", 455 best_val, best_ratio); 456 } 457 } 458 if (config("user_debug") > 1) { 459 printf(" multiply alg1/alg2 ratio = %.6f\n", ratio); 460 } 461 462 } while (ratio <= 1.0); 463 464 /* now that the ratio flipped again, use the previous range */ 465 low = high; 466 high = high * 2; 467 } 468 if (config("user_debug") > 0) { 469 printf("Starting binary search between %d and %d\n", low, high); 470 } 471 472 /* 473 * binary search between low and high, for where ratio is just under 1.0 474 */ 475 while (low+1 < high) { 476 477 /* try the mid-point */ 478 mid = int((low+high)/2); 479 if (config("user_debug") > 0) { 480 printf("testing multiply alg1/alg2 ratio for len = %d\n", mid); 481 } 482 ratio = mul_ratio(mid); 483 if (abs(ratio - 1.0) < abs(best_ratio - 1.0)) { 484 best_val = mid; 485 best_ratio = ratio; 486 if (config("user_debug") > 1) { 487 printf(" len %d has a new closest ratio to unity: %.6f\n", 488 best_val, best_ratio); 489 } 490 } 491 if (config("user_debug") > 1) { 492 printf(" len %d multiply alg1/alg2 ratio = %.6f\n", mid, ratio); 493 } 494 495 /* stop search if near unity */ 496 if (close_to_one(ratio)) { 497 low = mid; 498 high = mid; 499 if (config("user_debug") > 0) { 500 printf("\twe are close enough to unity ratio at: %d\n", mid); 501 } 502 break; 503 } 504 505 /* bump lower range up if we went over */ 506 if (ratio > 1.0) { 507 if (config("user_debug") > 2) { 508 printf("\tmove low from %d up to %d\n", 509 low, mid); 510 } 511 low = mid; 512 513 /* drop higher range down if we went under */ 514 } else { 515 if (config("user_debug") > 2) { 516 printf("\tmove high from %d down to %d\n", 517 high, mid); 518 } 519 high = mid; 520 } 521 522 /* report on test loop progress */ 523 if (config("user_debug") > 1) { 524 printf("\tsetting low: %d high: %d diff: %d\n", 525 low, high, high-low); 526 } 527 } 528 529 /* 530 * return on the suggested config("mul2") value 531 */ 532 if (config("user_debug") > 0) { 533 printf("Best value for multiply is near %d\n", best_val); 534 printf("Best multiply alg1/alg2 ratio is: %.6f\n", best_ratio); 535 printf("We suggest placing this line in your .calcrc:\n"); 536 printf("config(\"mul2\", %d),;\n", best_val); 537 printf("WARNING: It is believed that the output " 538 "of this resource file is bogus!\n"); 539 printf("WARNING: You may NOT wish to follow the above suggestion.\n"); 540 } 541 return mid; 542} 543 544 545/* 546 * sq_loop - measure the CPU time to perform a set of square loops 547 * 548 * given: 549 * repeat number of square loops to perform 550 * x array of 5 values, each the same length in BASEB-bit words 551 * 552 * NOTE: When their lengths are 1 BASEB-bit word, then a 553 * dummy loop of simple constants are used. Thus the 554 * length == 1 is an approximation of loop overhead. 555 * returns: 556 * approximate runtime to perform a square loop 557 * 558 * NOTE: This is an internal support function that is normally 559 * not called directly from the command line. Call the 560 * function best_sq2() instead. 561 */ 562define sq_loop(repeat, x) 563{ 564 local start; /* start of execution */ 565 local end; /* end of execution */ 566 local answer; /* squared value */ 567 local len; /* length of each element */ 568 local baseb_bytes; /* bytes in a BASEB-bit word */ 569 local i; 570 571 /* firewall */ 572 if (!isint(repeat) || repeat < 0) { 573 quit "sq_loop: 1st arg: repeat must be an integer > 0"; 574 } 575 if (size(*x) != 5) { 576 quit "sq_loop: 2nd arg matrix does not have 5 elements"; 577 } 578 if (matdim(*x) != 1) { 579 quit "sq_loop: 2nd arg matrix is not 1 dimensional"; 580 } 581 if (matmin(*x, 1) != 0) { 582 quit "sq_loop: 2nd arg matrix index range does not start with 0"; 583 } 584 if (matmax(*x, 1) != 4) { 585 quit "sq_loop: 2nd arg matrix index range does not end with 4"; 586 } 587 baseb_bytes = config("baseb") / 8; 588 len = sizeof((*x)[0]) / baseb_bytes; 589 for (i=1; i < 4; ++i) { 590 if ((sizeof((*x)[i]) / baseb_bytes) != len) { 591 quit "sq_loop: 2nd arg matrix elements are not of equal " 592 "BASEB-bit word length"; 593 } 594 } 595 596 /* square pairwise, all sets of a given length */ 597 start = usertime(); 598 for (i=0; i < repeat; ++i) { 599 600 if (len == 1) { 601 /* we use len == 1 to test this tester loop overhead */ 602 answer = 0^2; answer = 0^2; answer = 0^2; answer = 0^2; 603 answer = 0^2; 604 /**/ 605 answer = 0^2; answer = 0^2; answer = 0^2; answer = 0^2; 606 answer = 0^2; 607 /**/ 608 answer = 0^2; answer = 0^2; answer = 0^2; answer = 0^2; 609 answer = 0^2; 610 /**/ 611 answer = 0^2; answer = 0^2; answer = 0^2; answer = 0^2; 612 answer = 0^2; 613 } else { 614 /* one square loop */ 615 answer = (*x)[0]^2; 616 answer = (*x)[1]^2; 617 answer = (*x)[2]^2; 618 answer = (*x)[3]^2; 619 answer = (*x)[4]^2; 620 /**/ 621 answer = (*x)[0]^2; 622 answer = (*x)[1]^2; 623 answer = (*x)[2]^2; 624 answer = (*x)[3]^2; 625 answer = (*x)[4]^2; 626 /**/ 627 answer = (*x)[0]^2; 628 answer = (*x)[1]^2; 629 answer = (*x)[2]^2; 630 answer = (*x)[3]^2; 631 answer = (*x)[4]^2; 632 /**/ 633 answer = (*x)[0]^2; 634 answer = (*x)[1]^2; 635 answer = (*x)[2]^2; 636 answer = (*x)[3]^2; 637 answer = (*x)[4]^2; 638 } 639 } 640 641 /* 642 * return duration 643 */ 644 end = usertime(); 645 return end-start; 646} 647 648 649/* 650 * sq_ratio - ratio of rates of 1st and 2nd square algorithms 651 * 652 * given: 653 * len length in BASEB-bit words to square 654 * 655 * return: 656 * ratio of (1st / 2nd) algorithm rates 657 * 658 * When want to determine a rate to a precision of 1 part in 1000. 659 * Most systems today return CPU time to at least 10 msec precision. 660 * So to get rates to that precision, we need to time loops to at 661 * least 1000 times as long as the precision (10 msec * 1000) 662 * which usually requires timing of loops that last 10 seconds or more. 663 * 664 * NOTE: This is an internal support function that is normally 665 * not called directly from the command line. Call the 666 * function best_sq2() instead. 667 */ 668define sq_ratio(len) 669{ 670 local mat x[5]; /* array of values for sq_loop to square */ 671 local mat one[5]; /* array if single BASEB-bit values */ 672 local baseb; /* calc word size in bits */ 673 local orig_cfg; /* caller configuration */ 674 local loops; /* number of square loops to time */ 675 local tlen; /* time to perform some number of loops */ 676 local tover; /* est of time for loop overhead */ 677 local alg1_rate; /* loop rate of 1st algorithm */ 678 local alg2_rate; /* loop rate of 2nd algorithm */ 679 local ret; /* return ratio, or 1.0 */ 680 local i; 681 682 /* 683 * firewall 684 */ 685 if (!isint(len) || len < 2) { 686 quit "sq_ratio: 1st arg: len is not an integer > 1"; 687 } 688 689 /* 690 * remember the caller's config state 691 */ 692 orig_cfg = config("all"); 693 config("mul2", 0),; 694 config("sq2", 0),; 695 config("pow2", 0),; 696 config("redc2", 0),; 697 config("tilde", 0),; 698 699 /* 700 * initialize x, the values we will square 701 * 702 * We want these tests to be repeatable as possible, so we will seed 703 * the PRNG in a deterministic way. 704 */ 705 baseb = config("baseb"); 706 srand(sha1(sha1(baseb, config("version")))); 707 for (i=0; i < 5; ++i) { 708 /* force the values to be a full len words long */ 709 x[i] = ((1<<(((len-1) * baseb) + baseb-1)) | 710 randbit(((len-1) * baseb) + baseb-2)); 711 /* single BASEB-bit values */ 712 one[i] = 1; 713 } 714 715 /* 716 * determine the number of loops needed to test 1st alg 717 */ 718 config("sq2", 2^31-1),; 719 loops = 1/2; 720 do { 721 loops *= 2; 722 tlen = sq_loop(loops, &x); 723 if (config("user_debug") > 3) { 724 printf("\t alg1 loops %d took %.3f sec\n", loops, tlen); 725 } 726 } while (tlen < 1.0); 727 728 /* 729 * determine the 1st algorithm rate 730 */ 731 loops = max(1, ceil(loops * test_time / tlen)); 732 if (loops < 16) { 733 if (config("user_debug") > 1) { 734 printf(" we must expand alg1 loop test time to about %d secs\n", 735 ceil(test_time * (16 / loops))); 736 } 737 loops = 16; 738 } 739 tlen = sq_loop(loops, &x); 740 if (config("user_debug") > 3) { 741 printf("\t alg1 time = %.3f secs\n", tlen); 742 } 743 tover = sq_loop(loops, &one); 744 if (config("user_debug") > 3) { 745 printf("\t alg1 overhead look %.3f secs\n", tover); 746 } 747 if (tlen <= tover) { 748 quit "sq_ratio: overhead >= loop time"; 749 } 750 alg1_rate = loops / (tlen - tover); 751 if (config("user_debug") > 2) { 752 printf("\tsquare alg1 rate = %.3f loopsets/sec\n", alg1_rate); 753 } 754 if (alg1_rate <= 0.0) { 755 quit "sq_ratio: alg1 rate was <= 0.0"; 756 } 757 758 /* 759 * determine the number of loops needed to test 1st alg 760 */ 761 config("sq2", 2),; 762 loops = 1/2; 763 do { 764 loops *= 2; 765 tlen = sq_loop(loops, &x); 766 if (config("user_debug") > 3) { 767 printf("\t alg2 loops %d took %.3f sec\n", loops, tlen); 768 } 769 } while (tlen < 1.0); 770 771 /* 772 * determine the 2nd algorithm rate 773 */ 774 loops = max(1, ceil(loops * test_time / tlen)); 775 if (loops < 16) { 776 if (config("user_debug") > 1) { 777 printf(" we must expand alg2 loop test time to about %d secs\n", 778 ceil(test_time * (16 / loops))); 779 } 780 loops = 16; 781 } 782 tlen = sq_loop(loops, &x); 783 if (config("user_debug") > 3) { 784 printf("\t alg2 time = %.3f secs\n", tlen); 785 } 786 tover = sq_loop(loops, &one); 787 if (config("user_debug") > 3) { 788 printf("\t alg2 overhead look %.3f secs\n", tover); 789 } 790 if (tlen <= tover) { 791 quit "sq_ratio: overhead >= loop time"; 792 } 793 alg2_rate = loops / (tlen - tover); 794 if (config("user_debug") > 2) { 795 printf("\tsquare alg2 rate = %.3f loopsets/sec\n", alg2_rate); 796 } 797 if (alg2_rate <= 0.0) { 798 quit "sq_ratio: alg2 rate was <= 0.0"; 799 } 800 801 /* 802 * restore old config 803 */ 804 config("all", orig_cfg),; 805 806 /* 807 * return alg1 / alg2 rate ratio 808 */ 809 ret = alg1_rate / alg2_rate; 810 if (config("user_debug") > 2) { 811 printf("\tprecise ratio is: %.f sq_ratio will return: %.3f\n", 812 alg1_rate / alg2_rate, ret); 813 } 814 return ret; 815} 816 817 818/* 819 * best_sq2 - determine the best config("sq2") parameter 820 * 821 * NOTE: Due to precision problems with CPU measurements, it is not 822 * unusual for the output of this function to vary slightly 823 * from run to run. 824 * 825 * NOTE: This function is designed to take a long time to run. 826 * We recommend setting: 827 * 828 * config("user_debug", 2) 829 * 830 * so that yon can watch the progress of this function. 831 */ 832define best_sq2() 833{ 834 local ratio; /* previously calculated alg1/alg2 ratio */ 835 local low; /* low loop value tested */ 836 local high; /* high loop value tested */ 837 local mid; /* between low and high */ 838 local best_val; /* value found with ratio closest to unity */ 839 local best_ratio; /* closest ratio found to unity */ 840 local expand; /* how fast to expand the length */ 841 842 /* 843 * setup 844 */ 845 printf("WARNING: This tool may not be computing the correct best value\n"); 846 test_time = 5.0; 847 printf("The best_sq2() function will take a LONG time to run!\n"); 848 printf("It is important that best_sq2() run on an otherwise idle host!\n"); 849 if (config("user_debug") <= 0) { 850 printf("To monitor progress, set user_debug to 2: " 851 "config(\"user_debug\", 2)\n"); 852 } 853 printf("Starting with loop test time of %d secs\n", test_time); 854 855 /* 856 * firewall - must have a >1 ratio for the initial length 857 */ 858 high = 8; 859 best_val = high; 860 if (config("user_debug") > 0) { 861 printf("testing square alg1/alg2 ratio for len = %d\n", high); 862 } 863 ratio = sq_ratio(high); 864 best_ratio = ratio; 865 if (config("user_debug") > 1) { 866 printf(" square alg1/alg2 ratio = %.3f\n", ratio); 867 } 868 if (ratio < 1.0) { 869 quit "best_sq2: test implies sq2 < 16, which seems bogus"; 870 } 871 872 /* 873 * expand lengths until the ratio flips 874 */ 875 do { 876 /* 877 * determine the parameters of the next ratio test 878 * 879 * We will multiplicatively expand our test level until 880 * the ratio drops below 1.0. 881 */ 882 expand = 2; 883 low = high; 884 high *= expand; 885 if (config("user_debug") > 1) { 886 printf(" expand the next test range by a factor of %d\n", 887 expand); 888 } 889 890 /* 891 * determine the alg1/alg2 test ratio for this new length 892 */ 893 if (high >= 2^31) { 894 quit "best_sq2: tests imply sq2 >= 2^31, which seems bogus"; 895 } 896 if (config("user_debug") > 0) { 897 printf("testing square alg1/alg2 ratio for len = %d\n", high); 898 } 899 ratio = sq_ratio(high); 900 if (abs(ratio - 1.0) < abs(best_ratio - 1.0)) { 901 best_val = high; 902 best_ratio = ratio; 903 if (config("user_debug") > 1) { 904 printf(" len %d has a new closest ratio to unity: %.6f\n", 905 best_val, best_ratio); 906 } 907 } 908 if (config("user_debug") > 1) { 909 printf(" square alg1/alg2 ratio = %.3f\n", ratio); 910 } 911 } while (ratio > 1.0); 912 913 /* 914 * If we previously expanded more than by a factor of 2, then 915 * we may have jumped over the crossover point. So now 916 * drop down powers of two until the ratio is again >= 1.0 917 */ 918 if (expand > 2) { 919 do { 920 921 /* 922 * contract by 2 923 */ 924 high /= 2; 925 low = high / 2; 926 if (config("user_debug") > 0) { 927 printf("re-testing multiply alg1/alg2 ratio for len = %d\n", 928 high); 929 } 930 ratio = mul_ratio(high); 931 if (abs(ratio - 1.0) < abs(best_ratio - 1.0)) { 932 best_val = high; 933 best_ratio = ratio; 934 if (config("user_debug") > 1) { 935 printf(" len %d has a new closest ratio " 936 "to unity: %.6f\n", 937 best_val, best_ratio); 938 } 939 } 940 if (config("user_debug") > 1) { 941 printf(" multiply alg1/alg2 ratio = %.6f\n", ratio); 942 } 943 944 } while (ratio <= 1.0); 945 946 /* now that the ratio flipped again, use the previous range */ 947 low = high; 948 high = high * 2; 949 } 950 if (config("user_debug") > 0) { 951 printf("Starting binary search between %d and %d\n", low, high); 952 } 953 954 /* 955 * binary search between low and high, for where ratio is just under 1.0 956 */ 957 while (low+1 < high) { 958 959 /* try the mid-point */ 960 mid = int((low+high)/2); 961 if (config("user_debug") > 0) { 962 printf("testing square alg1/alg2 ratio for len = %d\n", mid); 963 } 964 ratio = sq_ratio(mid); 965 if (abs(ratio - 1.0) < abs(best_ratio - 1.0)) { 966 best_val = mid; 967 best_ratio = ratio; 968 if (config("user_debug") > 1) { 969 printf(" len %d has a new closest ratio to unity: %.6f\n", 970 best_val, best_ratio); 971 } 972 } 973 if (config("user_debug") > 1) { 974 printf(" len %d square alg1/alg2 ratio = %.6f\n", mid, ratio); 975 } 976 977 /* stop search if near unity */ 978 if (close_to_one(ratio)) { 979 low = mid; 980 high = mid; 981 if (config("user_debug") > 0) { 982 printf("\twe are close enough to unity ratio at: %d\n", mid); 983 } 984 break; 985 } 986 987 /* bump lower range up if we went over */ 988 if (ratio > 1.0) { 989 if (config("user_debug") > 2) { 990 printf("\tmove low from %d up to %d\n", 991 low, mid); 992 } 993 low = mid; 994 995 /* drop higher range down if we went under */ 996 } else { 997 if (config("user_debug") > 2) { 998 printf("\tmove high from %d down to %d\n", 999 high, mid); 1000 } 1001 high = mid; 1002 } 1003 1004 /* report on test loop progress */ 1005 if (config("user_debug") > 1) { 1006 printf("\tsetting low: %d high: %d diff: %d\n", 1007 low, high, high-low); 1008 } 1009 } 1010 1011 /* 1012 * return on the suggested config("sq2") value 1013 */ 1014 mid = int((low+high)/2); 1015 if (config("user_debug") > 0) { 1016 printf("Best value for square is near %d\n", best_val); 1017 printf("Best square alg1/alg2 ratio is: %.6f\n", best_ratio); 1018 printf("We suggest placing this line in your .calcrc:\n"); 1019 printf("config(\"sq2\", %d),;\n", best_val); 1020 printf("WARNING: It is believed that the output " 1021 "of this resource file is bogus!\n"); 1022 printf("WARNING: You may NOT wish to follow the above suggestion.\n"); 1023 } 1024 return mid; 1025} 1026 1027 1028/* 1029 * pow_loop - measure the CPU time to perform a set of pmod loops 1030 * 1031 * given: 1032 * repeat number of pmod loops to perform 1033 * x array of 5 values, each the same length in BASEB-bit words 1034 * 1035 * NOTE: When their lengths are 1 BASEB-bit word, then a 1036 * dummy loop of simple constants are used. Thus the 1037 * length == 1 is an approximation of loop overhead. 1038 * 1039 * ex exponent for pmod value 1040 * 1041 * returns: 1042 * approximate runtime to perform a pmod loop 1043 * 1044 * NOTE: This is an internal support function that is normally 1045 * not called directly from the command line. Call the 1046 * function best_pow2() instead. 1047 */ 1048define pow_loop(repeat, x, ex) 1049{ 1050 local start; /* start of execution */ 1051 local end; /* end of execution */ 1052 local answer; /* pmod value */ 1053 local len; /* length of each element */ 1054 local baseb_bytes; /* bytes in a BASEB-bit word */ 1055 local i; 1056 1057 /* firewall */ 1058 if (!isint(repeat) || repeat < 0) { 1059 quit "pow_loop: 1st arg: repeat must be an integer > 0"; 1060 } 1061 if (size(*x) != 5) { 1062 quit "pow_loop: 2nd arg matrix does not have 5 elements"; 1063 } 1064 if (matdim(*x) != 1) { 1065 quit "pow_loop: 2nd arg matrix is not 1 dimensional"; 1066 } 1067 if (matmin(*x, 1) != 0) { 1068 quit "pow_loop: 2nd arg matrix index range does not start with 0"; 1069 } 1070 if (matmax(*x, 1) != 4) { 1071 quit "pow_loop: 2nd arg matrix index range does not end with 4"; 1072 } 1073 baseb_bytes = config("baseb") / 8; 1074 len = sizeof((*x)[0]) / baseb_bytes; 1075 for (i=1; i < 4; ++i) { 1076 if ((sizeof((*x)[i]) / baseb_bytes) != len) { 1077 quit "pow_loop: 2nd arg matrix elements are not of " 1078 "equal BASEB-bit word length"; 1079 } 1080 } 1081 if (!isint(ex) || ex < 3) { 1082 quit" pow_loop: 3rd arg ex is not an integer > 2"; 1083 } 1084 1085 /* pmod pairwise, all sets of a given length */ 1086 start = usertime(); 1087 for (i=0; i < repeat; ++i) { 1088 1089 if (len == 1) { 1090 /* we use len == 1 to test this tester loop overhead */ 1091 answer = pmod(0,0,0); answer = pmod(0,0,0); 1092 answer = pmod(0,0,0); answer = pmod(0,0,0); 1093 /**/ 1094 answer = pmod(0,0,0); answer = pmod(0,0,0); 1095 answer = pmod(0,0,0); answer = pmod(0,0,0); 1096 /**/ 1097 answer = pmod(0,0,0); answer = pmod(0,0,0); 1098 answer = pmod(0,0,0); answer = pmod(0,0,0); 1099 /**/ 1100 answer = pmod(0,0,0); answer = pmod(0,0,0); 1101 answer = pmod(0,0,0); answer = pmod(0,0,0); 1102 /**/ 1103 answer = pmod(0,0,0); answer = pmod(0,0,0); 1104 answer = pmod(0,0,0); answer = pmod(0,0,0); 1105 /**/ 1106 answer = pmod(0,0,0); answer = pmod(0,0,0); 1107 answer = pmod(0,0,0); answer = pmod(0,0,0); 1108 } else { 1109 answer = pmod((*x)[0], ex, (*x)[1]); 1110 answer = pmod((*x)[0], ex, (*x)[2]); 1111 answer = pmod((*x)[0], ex, (*x)[3]); 1112 answer = pmod((*x)[0], ex, (*x)[4]); 1113 /**/ 1114 answer = pmod((*x)[1], ex, (*x)[0]); 1115 answer = pmod((*x)[1], ex, (*x)[2]); 1116 answer = pmod((*x)[1], ex, (*x)[3]); 1117 answer = pmod((*x)[1], ex, (*x)[4]); 1118 /**/ 1119 answer = pmod((*x)[2], ex, (*x)[0]); 1120 answer = pmod((*x)[2], ex, (*x)[1]); 1121 answer = pmod((*x)[2], ex, (*x)[3]); 1122 answer = pmod((*x)[2], ex, (*x)[4]); 1123 /**/ 1124 answer = pmod((*x)[3], ex, (*x)[0]); 1125 answer = pmod((*x)[3], ex, (*x)[1]); 1126 answer = pmod((*x)[3], ex, (*x)[2]); 1127 answer = pmod((*x)[3], ex, (*x)[4]); 1128 /**/ 1129 answer = pmod((*x)[4], ex, (*x)[0]); 1130 answer = pmod((*x)[4], ex, (*x)[1]); 1131 answer = pmod((*x)[4], ex, (*x)[2]); 1132 answer = pmod((*x)[4], ex, (*x)[3]); 1133 } 1134 } 1135 1136 /* 1137 * return duration 1138 */ 1139 end = usertime(); 1140 return end-start; 1141} 1142 1143 1144/* 1145 * pow_ratio - ratio of rates of 1st and 2nd pmod algorithms 1146 * 1147 * given: 1148 * len length in BASEB-bit words to pmod 1149 * 1150 * return: 1151 * ratio of (1st / 2nd) algorithm rates 1152 * 1153 * When want to determine a rate to a precision of 1 part in 1000. 1154 * Most systems today return CPU time to at least 10 msec precision. 1155 * So to get rates to that precision, we need to time loops to at 1156 * least 1000 times as long as the precision (10 msec * 1000) 1157 * which usually requires timing of loops that last 10 seconds or more. 1158 * 1159 * NOTE: This is an internal support function that is normally 1160 * not called directly from the command line. Call the 1161 * function best_pow2() instead. 1162 */ 1163define pow_ratio(len) 1164{ 1165 local mat x[5]; /* array of values for pow_loop to pmod */ 1166 local mat one[5]; /* array if single BASEB-bit values */ 1167 local baseb; /* calc word size in bits */ 1168 local orig_cfg; /* caller configuration */ 1169 local loops; /* number of pmod loops to time */ 1170 local tlen; /* time to perform some number of loops */ 1171 local tover; /* est of time for loop overhead */ 1172 local alg1_rate; /* loop rate of 1st algorithm */ 1173 local alg2_rate; /* loop rate of 2nd algorithm */ 1174 local ex; /* exponent to use in pow_loop() */ 1175 local ret; /* return ratio, or 1.0 */ 1176 local i; 1177 1178 /* 1179 * firewall 1180 */ 1181 if (!isint(len) || len < 2) { 1182 quit "pow_ratio: 1st arg: len is not an integer > 1"; 1183 } 1184 1185 /* 1186 * remember the caller's config state 1187 */ 1188 orig_cfg = config("all"); 1189 config("mul2", 0),; 1190 config("sq2", 0),; 1191 config("pow2", 0),; 1192 config("redc2", 0),; 1193 config("tilde", 0),; 1194 1195 /* 1196 * setup 1197 */ 1198 ex = 7; 1199 1200 /* 1201 * initialize x, the values we will pmod 1202 * 1203 * We want these tests to be repeatable as possible, so we will seed 1204 * the PRNG in a deterministic way. 1205 */ 1206 baseb = config("baseb"); 1207 srand(sha1(sha1(ex, baseb, config("version")))); 1208 for (i=0; i < 5; ++i) { 1209 /* force the values to be a full len words long */ 1210 x[i] = ((1<<(((len-1) * baseb) + baseb-1)) | 1211 randbit(((len-1) * baseb) + baseb-2)); 1212 /* single BASEB-bit values */ 1213 one[i] = 1; 1214 } 1215 1216 /* 1217 * determine the number of loops needed to test 1st alg 1218 */ 1219 config("pow2", 2^31-1),; 1220 config("redc2", 2^31-1),; 1221 loops = 1/2; 1222 do { 1223 loops *= 2; 1224 tlen = pow_loop(loops, &x, ex); 1225 if (config("user_debug") > 3) { 1226 printf("\t alg1 loops %d took %.3f sec\n", loops, tlen); 1227 } 1228 } while (tlen < 1.0); 1229 1230 /* 1231 * determine the 1st algorithm rate 1232 */ 1233 loops = max(1, ceil(loops * test_time / tlen)); 1234 if (loops < 16) { 1235 if (config("user_debug") > 1) { 1236 printf(" we must expand alg1 loop test time to about %d secs\n", 1237 ceil(test_time * (16 / loops))); 1238 } 1239 loops = 16; 1240 } 1241 tlen = pow_loop(loops, &x, ex); 1242 if (config("user_debug") > 3) { 1243 printf("\t alg1 time = %.3f secs\n", tlen); 1244 } 1245 tover = pow_loop(loops, &one, ex); 1246 if (config("user_debug") > 3) { 1247 printf("\t alg1 overhead look %.3f secs\n", tover); 1248 } 1249 if (tlen <= tover) { 1250 quit "pow_ratio: overhead >= loop time"; 1251 } 1252 alg1_rate = loops / (tlen - tover); 1253 if (config("user_debug") > 2) { 1254 printf("\tpmod alg1 rate = %.3f loopsets/sec\n", alg1_rate); 1255 } 1256 if (alg1_rate <= 0.0) { 1257 quit "pow_ratio: alg1 rate was <= 0.0"; 1258 } 1259 1260 /* 1261 * determine the number of loops needed to test 1st alg 1262 */ 1263 config("pow2", 2),; 1264 config("redc2", 2^31-1),; 1265 loops = 1/2; 1266 do { 1267 loops *= 2; 1268 tlen = pow_loop(loops, &x, ex); 1269 if (config("user_debug") > 3) { 1270 printf("\t alg2 loops %d took %.3f sec\n", loops, tlen); 1271 } 1272 } while (tlen < 1.0); 1273 1274 /* 1275 * determine the 2nd algorithm rate 1276 */ 1277 loops = max(1, ceil(loops * test_time / tlen)); 1278 if (loops < 16) { 1279 if (config("user_debug") > 1) { 1280 printf(" we must expand alg2 loop test time to about %d secs\n", 1281 ceil(test_time * (16 / loops))); 1282 } 1283 loops = 16; 1284 } 1285 tlen = pow_loop(loops, &x, ex); 1286 if (config("user_debug") > 3) { 1287 printf("\t alg2 time = %.3f secs\n", tlen); 1288 } 1289 tover = pow_loop(loops, &one, ex); 1290 if (config("user_debug") > 3) { 1291 printf("\t alg2 overhead look %.3f secs\n", tover); 1292 } 1293 if (tlen <= tover) { 1294 quit "pow_ratio: overhead >= loop time"; 1295 } 1296 alg2_rate = loops / (tlen - tover); 1297 if (config("user_debug") > 2) { 1298 printf("\tpmod alg2 rate = %.3f loopsets/sec\n", alg2_rate); 1299 } 1300 if (alg2_rate <= 0.0) { 1301 quit "pow_ratio: alg2 rate was <= 0.0"; 1302 } 1303 1304 /* 1305 * restore old config 1306 */ 1307 config("all", orig_cfg),; 1308 1309 /* 1310 * return alg1 / alg2 rate ratio 1311 */ 1312 ret = alg1_rate / alg2_rate; 1313 if (config("user_debug") > 2) { 1314 printf("\tprecise ratio is: %.f pow_ratio will return: %.3f\n", 1315 alg1_rate / alg2_rate, ret); 1316 } 1317 return ret; 1318} 1319 1320 1321/* 1322 * best_pow2 - determine the best config("pow2") parameter w/o REDC2 1323 * 1324 * NOTE: Due to precision problems with CPU measurements, it is not 1325 * unusual for the output of this function to vary slightly 1326 * from run to run. 1327 * 1328 * NOTE: This function is designed to take a long time to run. 1329 * We recommend setting: 1330 * 1331 * config("user_debug", 2) 1332 * 1333 * so that yon can watch the progress of this function. 1334 */ 1335define best_pow2() 1336{ 1337 local ratio; /* previously calculated alg1/alg2 ratio */ 1338 local low; /* low loop value tested */ 1339 local high; /* high loop value tested */ 1340 local mid; /* between low and high */ 1341 local best_val; /* value found with ratio closest to unity */ 1342 local best_ratio; /* closest ratio found to unity */ 1343 local expand; /* how fast to expand the length */ 1344 local looped; /* 1 ==> we have expanded lengths before */ 1345 1346 /* 1347 * setup 1348 */ 1349 printf("WARNING: This tool may not be computing the correct best value\n"); 1350 test_time = 60.0; 1351 printf("The best_pow2() function will take a LONG time to run!\n"); 1352 printf("It is important that best_pow2() run on an otherwise idle host!\n"); 1353 if (config("user_debug") <= 0) { 1354 printf("To monitor progress, set user_debug to 2: " 1355 "config(\"user_debug\", 2)\n"); 1356 } 1357 printf("Starting with loop test time of %d secs\n", test_time); 1358 1359 /* 1360 * firewall - must have a >1.02 ratio for the initial length 1361 * 1362 * We select 1.02 because of the precision of the CPU timing. We 1363 * want to first move into an area where the 1st algorithm clearly 1364 * dominates. 1365 */ 1366 low = 4; 1367 high = 4; 1368 best_val = high; 1369 best_ratio = 1e10; /* not a real value */ 1370 do { 1371 high *= 4; 1372 if (config("user_debug") > 0) { 1373 printf("testing pmod alg1/alg2 ratio for len = %d\n", high); 1374 } 1375 ratio = pow_ratio(high); 1376 if (abs(ratio - 1.0) < abs(best_ratio - 1.0)) { 1377 best_val = high; 1378 best_ratio = ratio; 1379 if (config("user_debug") > 1) { 1380 printf(" len %d has a new closest ratio to unity: %.6f\n", 1381 best_val, best_ratio); 1382 } 1383 } 1384 if (config("user_debug") > 1) { 1385 printf(" pmod alg1/alg2 ratio = %.3f\n", ratio); 1386 if (ratio > 1.0 && ratio <= 1.02) { 1387 printf(" while alg1 is slightly better than alg2, " 1388 "it is not clearly better\n"); 1389 } 1390 } 1391 } while (ratio <= 1.02); 1392 if (config("user_debug") > 0) { 1393 printf("starting the pow2 search above %d\n", high); 1394 } 1395 1396 /* 1397 * expand lengths until the ratio flips 1398 */ 1399 looped = 0; 1400 do { 1401 /* 1402 * determine the parameters of the next ratio test 1403 * 1404 * We will multiplicatively expand our test level until 1405 * the ratio drops below 1.0. 1406 * 1407 * NOTE: At low lengths, the ratios seen to be very small 1408 * so we force an expansion of 4 to speed us on our 1409 * way to larger lengths. At these somewhat larger 1410 * lengths, the ratios usually don't get faster than 1411 * 1.25, so we need to expand force a more rapid 1412 * expansion than normal. At lengths longer than 1413 * 2k, the time to test becomes very long, so we 1414 * want to slow down at these higher lengths. 1415 */ 1416 expand = 2; 1417 if (looped) { 1418 low = high; 1419 } 1420 high *= expand; 1421 if (config("user_debug") > 1) { 1422 printf(" expand the next test range by a factor of %d\n", 1423 expand); 1424 } 1425 1426 /* 1427 * determine the alg1/alg2 test ratio for this new length 1428 */ 1429 if (high >= 2^31) { 1430 quit "best_pow2: test implies pow2 >= 2^31, which seems bogus"; 1431 } 1432 if (config("user_debug") > 0) { 1433 printf("testing pmod alg1/alg2 ratio for len = %d\n", high); 1434 } 1435 ratio = pow_ratio(high); 1436 if (abs(ratio - 1.0) < abs(best_ratio - 1.0)) { 1437 best_val = high; 1438 best_ratio = ratio; 1439 if (config("user_debug") > 1) { 1440 printf(" len %d has a new closest ratio to unity: %.6f\n", 1441 best_val, best_ratio); 1442 } 1443 } 1444 if (config("user_debug") > 1) { 1445 printf(" pmod alg1/alg2 ratio = %.6f\n", ratio); 1446 } 1447 looped = 1; 1448 } while (ratio > 1.0); 1449 if (config("user_debug") > 0) { 1450 printf("Starting binary search between %d and %d\n", low, high); 1451 } 1452 1453 /* 1454 * binary search between low and high, for where ratio is just under 1.0 1455 */ 1456 while (low+1 < high) { 1457 1458 /* try the mid-point */ 1459 mid = int((low+high)/2); 1460 if (config("user_debug") > 0) { 1461 printf("testing pow2 alg1/alg2 ratio for len = %d\n", mid); 1462 } 1463 ratio = pow_ratio(mid); 1464 if (abs(ratio - 1.0) < abs(best_ratio - 1.0)) { 1465 best_val = mid; 1466 best_ratio = ratio; 1467 if (config("user_debug") > 1) { 1468 printf(" len %d has a new closest ratio to unity: %.6f\n", 1469 best_val, best_ratio); 1470 } 1471 } 1472 if (config("user_debug") > 1) { 1473 printf(" len %d pmod alg1/alg2 ratio = %.6f\n", mid, ratio); 1474 } 1475 1476 /* stop search if near unity */ 1477 if (close_to_one(ratio)) { 1478 low = mid; 1479 high = mid; 1480 if (config("user_debug") > 0) { 1481 printf("\twe are close enough to unity ratio at: %d\n", mid); 1482 } 1483 break; 1484 } 1485 1486 /* bump lower range up if we went over */ 1487 if (ratio > 1.0) { 1488 if (config("user_debug") > 2) { 1489 printf("\tmove low from %d up to %d\n", 1490 low, mid); 1491 } 1492 low = mid; 1493 1494 /* drop higher range down if we went under */ 1495 } else { 1496 if (config("user_debug") > 2) { 1497 printf("\tmove high from %d down to %d\n", 1498 high, mid); 1499 } 1500 high = mid; 1501 } 1502 1503 /* report on test loop progress */ 1504 if (config("user_debug") > 1) { 1505 printf("\tsetting low: %d high: %d diff: %d\n", 1506 low, high, high-low); 1507 } 1508 } 1509 1510 /* 1511 * return on the suggested config("pow2") value 1512 */ 1513 mid = int((low+high)/2); 1514 if (config("user_debug") > 0) { 1515 printf("Best value for pmod is near %d\n", best_val); 1516 printf("Best pmod alg1/alg2 ratio is: %.6f\n", best_ratio); 1517 printf("We suggest placing this line in your .calcrc:\n"); 1518 printf("config(\"pow2\", %d),;\n", best_val); 1519 printf("WARNING: It is believed that the output " 1520 "of this resource file is bogus!\n"); 1521 printf("WARNING: You may NOT wish to follow the above suggestion.\n"); 1522 } 1523 return mid; 1524} 1525