1#!/sw/bin/gawk -f 2 3# Usage: distribute_eprover.awk <spec_file> [<lower> [<upper>]] 4# 5# Copyright 2001 Stephan Schulz, schulz@informatik.tu-muenchen.de 6# 7# Read a specification file describing a set of parameters and a list 8# of problems, and run a distributed test of the prover over the 9# problems. This is fairly specific for our site (with systems named 10# systematically), but should be rewritable to a more general case. 11 12 13# ----------% Here starteth the generic stuff % ------------- # 14 15# Similar to C assert() 16 17function assert(val, comment) 18{ 19 if(!val) 20 { 21 print "Assertion fail: " comment > "/dev/stderr"; 22 exit 1; 23 } 24} 25 26 27 28# Return the result of a single, simple shell command yieding exactly 29# one line 30 31function get_shell_res(cmd, tmp) 32{ 33 cmd | getline tmp; 34 close(cmd); 35 36 assert(tmp, "No result found (get_shell_res(" cmd ")"); 37 return tmp; 38} 39 40 41# Same thing, but no response is not a bug automatically 42 43function get_shell_res_nocheck(cmd, tmp) 44{ 45 tmp = ""; 46 cmd | getline tmp; 47 close(cmd); 48 49 return tmp; 50} 51 52# Return the result of a single, simple shell command yieding exactly 53# one line, and executed on a remote host. The command should not 54# contain single quotes! 55 56function get_remote_shell_res(host, cmd, tmp) 57{ 58 tmp = get_shell_res("ssh -x " host " '" cmd "'"); 59 60 return tmp; 61} 62 63# Same thing, but empty response is ok. 64 65function get_remote_shell_res_nocheck(host, cmd, tmp) 66{ 67 tmp = get_shell_res_nocheck("ssh -x " host " '" cmd "'"); 68 69 return tmp; 70} 71 72 73# Minimum... 74 75function min(a,b) 76{ 77 if(a>b) 78 { 79 return b; 80 } 81 return a; 82} 83 84 85# Get the host name 86 87function get_hostname( tmp) 88{ 89 tmp = get_shell_res("hostname"); 90 91 return tmp; 92} 93 94# Get the Pid 95 96function get_pid( tmp) 97{ 98 getline tmp < "/dev/pid"; 99 close("/dev/pid"); 100 if(!tmp) 101 { 102 print ARGV[0] ": Cannot get PID ?!?" > "/dev/stderr"; 103 exit 1; 104 } 105 return tmp; 106} 107 108 109# Perform globbing 110 111function expand_file(name, tmp) 112{ 113 tmp = get_shell_res("csh -c \"echo " name "\""); 114 if(!tmp) 115 { 116 print ARGV[0] ": Cannot expand filename" > "/dev/stderr"; 117 exit 1; 118 } 119 print name " expands into " tmp; 120 return tmp; 121} 122 123 124function file_exists(file, test, tmp) 125{ 126 test = getline tmp < file; 127 close(file); 128 if(test == -1) 129 { 130 return 0; 131 } 132 return 1; 133} 134 135function exp_file_exists(file, test, tmp) 136{ 137 return file_exists(expand_file(file)); 138} 139 140 141 142function has_interactive_user(host, user) 143{ 144 user = get_remote_shell_res_nocheck(host, "who|grep console"); 145 return user; 146} 147 148 149# Compute memory in Megabytes from a string containing Bytes (no 150# suffixe), Kilobytes (Suffix K), Megabytes (Suffix M) or Gigabytes 151# (Suffix G) 152 153function normalize_mem(valstring) 154{ 155 if(valstring ~ /[0-9]+G/) 156 { 157 return int(valstring*1024); 158 } 159 if(valstring ~ /[0-9]+M/) 160 { 161 return int(valstring); 162 } 163 if(valstring ~ /[0-9]+K/) 164 { 165 return int(valstring/1024); 166 } 167 return int(valstring/(1024*1024)) 168} 169 170 171# Get the physical memory and free memory of a machine 172 173function get_mem_info(host, result_array, tmp, tmparr) 174{ 175 tmp = get_remote_shell_res_nocheck(host, "top -d1 | grep ^Memory"); 176 if(!tmp) 177 { 178 return 0; 179 } 180 split(tmp, tmparr); 181 result_array[0] = normalize_mem(tmparr[2]); 182 result_array[1] = normalize_mem(tmparr[4]); 183 return 1; 184} 185 186 187 188 189# ----------% Here starteth the local stuff % ------------- # 190 191 192# If signal_file exists, sleep in 10 minute intervals until current 193# hour is not within [start_block, end_block] 194 195function delay_processing(start_block, end_block, signal_file, tmp, day) 196{ 197 delay = exp_file_exists(signal_file); 198 tmp = strftime("%H", systime())+0; 199 day = strftime("%w", systime())+0; 200 while(delay && (day!=0) && (day!=6) && (tmp >= start_block) && (tmp < end_block)) 201 { 202 print "Blockfile exists, sleeping" 203 system("sleep 600"); 204 delay = exp_file_exists(signal_file); 205 tmp = strftime("%H", systime())+0; 206 day = strftime("%w", systime())+0; 207 } 208} 209 210function init_machine_ratings() 211{ 212 e_mark["Pentium-III-Coppermine-696.422"] = 225.642; 213 e_mark["Pentium-III-Coppermine-728.454"] = 196.681; 214 e_mark["Pentium-III-Coppermine-860.904"] = 260.519; 215 e_mark["Pentium-III-Coppermine-864.484"] = 222.518; 216 e_mark["Pentium-III-Katmai-498.762"] = 143.921; 217 e_mark["REFERENCE"] = 100; 218 e_mark["SUNW-Sun-Blade-100-sparcv9-502"] = 131.934; 219 e_mark["SUNW-Sun-Blade-1000-sparcv9-750"] = 250.049; 220 e_mark["SUNW-Ultra-2-sparc-200"] = 69.2536; 221 e_mark["SUNW-Ultra-5_10-sparcv9-300"] = 87.2878; 222 e_mark["SUNW-Ultra-5_10-sparcv9-440"] = 140.591; 223 e_mark["SUNW-Sun-Fire-880-sparcv9-900"] = 285.646; 224 e_mark["SUNW-Sun-Fire-880-sparcv9-1200"] = 380.861; 225 e_mark["ppc7450-1000"] = 267.552; 226 e_mark["IntelR-PentiumR-4-CPU-1400MHz-1395.787"] = 300.922; 227 e_mark["SUNW-Ultra-60-sparcv9-296"] = 100; 228 e_mark["NC-AMD-Athlontm-MP-Processor-1600+-1400.058"] = 366.393; 229 e_mark["Pentium-II-Deschutes-448.882"] = 157.919; 230 e_mark["IntelR-PentiumR-4-CPU-2.40GHz-2405.486"] = 496.488; 231 e_mark["IntelR-PentiumR-4-CPU-2.40GHz-2405.487"] = 496.488; 232 e_mark["IntelR-PentiumR-4-CPU-2.40GHz-2405.526"] = 518.546; 233 e_mark["IntelR-PentiumR-4-CPU-2.40GHz-2405.502"] = 521.253; 234} 235 236 237function get_host_type(host, res) 238{ 239 res = get_shell_res("ssh -x " host " ~/bin/get_system.awk"); 240 if(!e_mark[res]) 241 { 242 return ""; 243 } 244 print "Host " host " is type " res " with " e_mark[res] " EMark"; 245 return res; 246} 247 248function create_host_list( i) 249{ 250 for(i=lower_host; i<=upper_host; i++) 251 { 252 all_hosts[i] = "sunhalle" i; 253 host_is_available["sunhalle" i] = 0; 254 host_in_use["sunhalle" i] = 0; 255 } 256 exclude_hosts["sunhalle83"] = 1; # ssh trouble 257} 258 259 260function get_cluster_name( pipe, tmp) 261{ 262 tmp = get_shell_res("hostname"); 263 264 if(match(tmp, /sunjessen.*/)) 265 { 266 return "sunjessen"; 267 } 268 else if(match(tmp, /sunhalle.*/)) 269 { 270 return "sunhalle"; 271 } 272 else if(match(tmp, /rayhalle.*/)) 273 { 274 return "sunhalle"; 275 } 276 else if(match(tmp, /lxjessen.*/)) 277 { 278 return "lxjessen"; 279 } 280 return 0; 281} 282 283 284function print_used_hosts(print_diffs ,i, count_files, 285 count_array, files, array) 286{ 287 count_files = 0; 288 count_array = 0; 289 290 for(i=lower_host; i<= upper_host; i++) 291 { 292 array = host_in_use[all_hosts[i]]; 293 files = file_exists(all_hosts[i] "_lock"); 294 295 if(array) 296 { 297 /* print "In use: " all_hosts[i]; */ 298 count_array++; 299 } 300 if(files) 301 { 302 count_files++; 303 } 304 if(print_diffs) 305 { 306 if((array && (!files))) 307 { 308 print all_hosts[i] " in host_in_use[], but no lock file"; 309 } 310 if(((!array) && files)) 311 { 312 print all_hosts[i] " has lock file, but no host_in_use[] entry"; 313 } 314 } 315 } 316 print "Found " count_array " array entries"; 317 print "Found " count_files " lock files"; 318} 319 320 321function extract_load(string, which, tmp_array, number) 322{ 323 number = split(string, tmp_array, ","); 324 if(number == 0) 325 { 326 return -1; 327 } 328 return tmp_array[number-3+which]; 329} 330 331 332function extract_host(string) 333{ 334 match(string, /sunhalle[0-9]+/); 335 return substr(string, RSTART, RLENGTH); 336} 337 338 339function collect_host_info( res, count, i, type) 340{ 341 count = 0; 342 sum = 0; 343 344 global_check_blockfile_age = 0; 345 346 for(i in all_hosts) 347 { 348 if(exclude_hosts[all_hosts[i]]!=1) 349 { 350 print "Checking host " all_hosts[i] "."; 351 res = check_availablity(all_hosts[i]); 352 if(res != 1000000) 353 { 354 type = get_host_type(all_hosts[i]); 355 if(type) 356 { 357 count++; 358 host_proc_power[all_hosts[i]] = e_mark[type]; 359 print "..." all_hosts[i] " is ready at " host_proc_power[all_hosts[i]] " EMark"; 360 } 361 } 362 } 363 } 364 print "Found " count " hosts."; 365 return count; 366} 367 368 369function update_global_host_count() 370{ 371 global_host_count++; 372 if(global_host_count > upper_host) 373 { 374 global_host_count = lower_host; 375 } 376} 377 378 379function ping_host(host, pipe, tmp) 380{ 381 tmp = get_shell_res_nocheck("ping " host); 382 383 if(!index(tmp, "alive")) 384 { 385 return 0; 386 } 387 return 1; 388} 389 390 391function check_availablity(host, pipe, tmp, cmd, memdata) 392{ 393 if(exclude_hosts[host]==1) 394 { 395 /* Machine unsuitable for reasons known to the user */ 396 return 1000000; 397 } 398 if(!ping_host(host)) 399 { 400 # No good response -> dont use machine 401 return 1000000; 402 } 403 404 mem_data[0] = 0; 405 mem_data[1] = 0; 406 if(!get_mem_info(host, mem_data)) 407 { 408 # Something is broken 409 return 1000000; 410 } 411 if(mem_data[0] < 256) 412 { 413 print "# Checking " host ": Not enough memory " mem_data[0]; 414 exclude_hosts[host] = 1; 415 return 1000000; # Not enough memory 416 } 417 if(has_interactive_user(host)) 418 { 419 if(mem_data[1] < 160) 420 { 421 print "# Checking " host ": Greedy interactive user, only " mem_data[1] " free"; 422 return 1000000; 423 } 424 } 425 #{ 426 # /* No good response -> dont use machine */ 427 # return 1000000; 428 #} 429 #return extract_load(tmp, 2); 430 return 0; 431} 432 433 434function check_and_initialize( tmp, job) 435{ 436 if(first_job) 437 { 438 if(!executable) 439 { 440 print "distribute_eprover.awk: No executable found" > "/dev/stderr"; 441 exit 1; 442 } 443 if(!logfile) 444 { 445 print "distribute_eprover.awk: No logfile found" > "/dev/stderr"; 446 exit 1; 447 } 448 if(!problemdir) 449 { 450 print "distribute_eprover.awk: No problemdir found" > "/dev/stderr"; 451 exit 1; 452 } 453 if(!file_exists(logfile)) 454 { 455 print "Creating log file " logfile; 456 print "# " args " " auto_args > logfile; 457 execpath = get_shell_res("which " executable); 458 execdate = get_shell_res("ls -l " execpath); 459 exechost = get_hostname(); 460 execversion = get_shell_res(executable " -V"); 461 print "# Started with " substr(execdate, 42) " (" execversion ") on " exechost >> logfile; 462 } 463 else 464 { 465 print "Logfile " logfile " exists, using old results"; 466 execpath = get_shell_res("which " executable); 467 execdate = get_shell_res("ls -l " execpath); 468 exechost = get_hostname(); 469 execversion = get_shell_res(executable " -V"); 470 print "# Started with " substr(execdate, 42) " (" execversion ") on " exechost >> logfile; 471 processed_count = 0; 472 while ((getline tmp < logfile) > 0) 473 { 474 if(match(job, /^[0-9A-Za-z].*\.lop/) || 475 match(job, /^[0-9A-Za-z].*\.p/)|| 476 match(job, /^[0-9A-Za-z].*\.e/) || 477 match(job, /^[0-9A-Za-z].*\.tpt/) || 478 match(job, /^[0-9A-Za-z].*\.tptp/)) 479 { 480 job = substr(tmp, RSTART, RLENGTH); 481 processed_jobs[job] = 1; 482 processed_count++; 483 } 484 } 485 close(logfile); 486 print "Found " processed_count " old results"; 487 } 488 first_job = 0; 489 } 490} 491 492 493 494function process_result(host , file, tmp, name, time, org_time, status,\ 495 reason, generated, processed,shared_terms,raw_terms,\ 496 rewrite_steps, r_matches, e_matches, literals,nu_subs,\ 497 nurec_sub, u_subs) 498{ 499 file = host "_complete"; 500 getline name < file; 501 close(file); 502 503 time = time_limit; 504 status = "F"; 505 reason = "unknown"; 506 generated = 0; 507 processed = 0; 508 shared_terms = 0; 509 raw_terms = 0; 510 rewrite_steps = 0; 511 r_matches = 0; 512 e_matches = 0; 513 literals = 0; 514 515 if(index(name, ".lop") || index(name, ".p") || index(name, ".tptp")) 516 { 517 file = host "_lock"; 518 while((getline tmp < file)>0) 519 { 520 if(index(tmp, "# No proof found!")) 521 { 522 status = "N"; 523 reason = "success"; 524 } 525 else if(index(tmp, "# Proof found!")) 526 { 527 status = "T"; 528 reason = "success"; 529 } 530 else if(index(tmp, "# Failure: Out of unprocessed clauses!")) 531 { 532 status = "F"; 533 reason = "incomplete"; 534 } 535 else if(index(tmp, "# Failure: Resource limit exceeded (memory)")) 536 { 537 reason = "maxmem "; 538 } 539 else if(index(tmp, "# Failure: Resource limit exceeded (time)")) 540 { 541 reason = "maxtime "; 542 } 543 else if(index(tmp, "# Failure: User resource limit exceeded")) 544 { 545 reason = "maxres"; 546 } 547 else if(index(tmp, "# Processed clauses :")) 548 { 549 processed = substr(tmp, 42); 550 } 551 else if(index(tmp, "# Generated clauses :")) 552 { 553 generated = substr(tmp, 42); 554 } 555 else if(index(tmp, "# Shared term nodes :")) 556 { 557 shared_terms = substr(tmp, 42); 558 } 559 else if(index(tmp, "# ...corresponding unshared nodes :")) 560 { 561 raw_terms = substr(tmp, 42); 562 } 563 else if(index(tmp, "# Shared rewrite steps :")) 564 { 565 rewrite_steps = substr(tmp, 42); 566 } 567 else if(index(tmp, "# Match attempts with oriented units :")) 568 { 569 r_matches = substr(tmp, 42); 570 } 571 else if(index(tmp, "# Match attempts with unoriented units :")) 572 { 573 e_matches = substr(tmp, 42); 574 } 575 else if(index(tmp, "# Total literals in generated clauses :")) 576 { 577 literals = substr(tmp, 42); 578 } 579 else if(index(tmp, "# Clause-clause subsumption calls (NU) :")) 580 { 581 nu_subs = substr(tmp, 42); 582 } 583 else if(index(tmp, "# Rec. Clause-clause subsumption calls :")) 584 { 585 nurec_subs = substr(tmp, 42); 586 } 587 else if(index(tmp, "# Unit Clause-clause subsumption calls :")) 588 { 589 u_subs = substr(tmp, 42); 590 } 591 else if(index(tmp, "# Total time")) 592 { 593 org_time = substr(tmp, 30); 594 time = org_time*host_proc_power[host]/100; 595 # printf("Time %8.3f on host %s corrected to %8.3f\n", org_time, host, time); 596 } 597 } 598 close(file); 599 printf("%-29s " status " %8.3f %-10s %10d %10d %10d %10d %10d\n", \ 600 name, 0+time, reason, generated, processed, \ 601 nu_subs, nurec_subs,u_subs) >> logfile; 602 open_jobs--; 603 printf("Open: " open_jobs " %-29s " status " %8.3f " reason "\n", name, 0+time); 604 } 605 else 606 { 607 print "No valid job in " host"_complete: Something strange"; 608 if(!file_exists(file)) 609 { 610 print host"_complete does not seem to exists?!?!"; 611 } 612 else 613 { 614 system("echo " file " >> buggy_complete"); 615 system("cat " file " >> buggy_complete"); 616 } 617 } 618 619 system("rm " cwd "/" host "_lock"); 620 system("rm " cwd "/" host "_complete"); 621 622 host_in_use[host] = 0; 623} 624 625 626 627 628 629function get_host( i, host, load, tmp_count) 630{ 631 host = ""; 632 633 global_check_blockfile_age++; 634 if(global_check_blockfile_age >= 400) 635 { 636 delay_processing(8, 18, "~/block_eprover"); 637 global_check_blockfile_age = 0; 638 } 639 640 while(!host) 641 { 642 for(i = lower_host; i<= upper_host; i++) 643 { 644 update_global_host_count(); 645 if(host_proc_power[all_hosts[global_host_count]]) 646 { 647 if((host_in_use[all_hosts[global_host_count]]==0) && 648 (!file_exists(all_hosts[global_host_count] "_lock"))) 649 { 650 load = check_availablity(all_hosts[global_host_count]); 651 652 if(load != 1000000) 653 { 654 host = all_hosts[global_host_count]; 655 break; 656 } 657 } 658 } 659 } 660 if(!host) 661 { 662 print "No host, processing results"; 663 if(!process_pending_results()) 664 { 665 print "No host available, sleeping"; 666 system("sleep 30"); 667 } 668 } 669 } 670 return host; 671} 672 673 674function find_max_index(array, i) 675{ 676 for(i=1; array[i]; i++) 677 { 678 } 679 return i-1; 680} 681 682 683function shift_num_array(array, position, limit,i) 684{ 685 limit = find_max_index(array); 686 687 for(i = limit; i>=position; i--) 688 { 689 array[i+1] = array[i]; 690 } 691 array[position] = ""; 692 693 return limit+1; 694} 695 696 697function job_is_not_processed(job ,tmp) 698{ 699 match(job, /^[0-9A-Za-z].*\.lop/) || \ 700 match(job, /^[0-9A-Za-z].*\.p/)|| \ 701 match(job, /^[0-9A-Za-z].*\.e/) || \ 702 match(job, /^[0-9A-Za-z].*\.tpt/) || \ 703 match(job, /^[0-9A-Za-z].*\.tptp/); 704 tmp = substr(job, RSTART, RLENGTH); 705 if(tmp in processed_jobs) 706 { 707 return 0; 708 } 709 return 1; 710} 711 712 713function find_pid_in_protocoll(file, pid, tmp) 714{ 715 while ((getline tmp < file) > 0) 716 { 717 if(match(tmp, /# Pid: [0-9]+/)) 718 { 719 pid = substr(tmp, 8); 720 break; 721 } 722 } 723 close(file); 724 return pid; 725} 726 727 728function kill_job(lockfile, host, pid, res, shell) 729{ 730 if(match(lockfile, /sunhalle[0-9]+/)) 731 { 732 host = substr(lockfile, RSTART, RLENGTH); 733 if(ping_host(host)) 734 { 735 pid = find_pid_in_protocoll(lockfile); 736 system("ssh -x " host " kill " pid "\n"); 737 res = 1; 738 } 739 else 740 { 741 print "Host " host " not reachable"; 742 res = 0; 743 } 744 system("rm " cwd "/" host "_lock"); 745 system("rm " cwd "/" host "_complete"); 746 return res; 747 } 748 return 0; 749} 750 751 752function kill_old_jobs( pipe, count, tmp) 753{ 754 pipe = "ls " cwd "/sunhalle*_lock"; 755 count = 0; 756 757 while ((pipe | getline tmp) > 0) 758 { 759 count+=kill_job(tmp); 760 host_in_use[tmp] = 0; 761 } 762 close(pipe); 763 764 system("rm " cwd "/sunhalle*_complete"); 765 system("rm " cwd "/sunhalle*_lock"); 766 767 open_jobs = 0; 768 print "Killed " count " old job(s)"; 769} 770 771 772function check_alive( current_time, i) 773{ 774 current_time = systime(); 775 776 for(i in host_in_use) 777 { 778 if(host_in_use[i] && ((current_time - host_in_use[i]) > time_limit*10)) 779 { 780 print "Trying to kill dormant job on " i "."; 781 kill_job(i "_lock"); 782 open_jobs--; 783 host_in_use[i] = 0; 784 } 785 } 786} 787 788 789function number_of_lock_files( pipe, count) 790{ 791 count = 0; 792 793 pipe = "ls " cwd "/sunhalle*_lock"; 794 795 while ((pipe | getline tmp) > 0) 796 { 797 if(match(tmp, /sunhalle[0-9]+_lock/)) 798 { 799 count++; 800 } 801 } 802 close(pipe); 803 804 return count; 805} 806 807 808function process_pending_results( pipe, count, tmp) 809{ 810 print_used_hosts(1); 811 count = 0; 812 813 pipe = "ls " cwd "/sunhalle*_complete"; 814 815 while ((pipe | getline tmp) > 0) 816 { 817 if(match(tmp, /sunhalle[0-9]+/)) 818 { 819 host = substr(tmp, RSTART, RLENGTH); 820 process_result(host); 821 count++; 822 } 823 } 824 close(pipe); 825 print_used_hosts(1); 826 return count; 827} 828 829function host_cpu_limit_opt(limit, host_rating, res, host_limit) 830{ 831 host_limit = ((limit*100)/host_rating)+0.5; 832 833 if(soft_cpu_limit) 834 { 835 res = sprintf("--soft-cpu-limit=%d --cpu-limit=%d", 836 host_limit,3*host_limit); 837 } 838 else 839 { 840 res = sprintf("--cpu-limit=%d",host_limit); 841 } 842 return res; 843} 844 845 846BEGIN{ 847 print "Initializing..."; 848 init_machine_ratings(); 849 soft_cpu_limit = 0; 850 time_limit = 10; /* Default, may be overridden */ 851 auto_args = "-s --print-pid --resources-info --print-statistics --memory-limit=192"; 852 first_job = 1; 853 cwd = ENVIRON["PWD"]; 854 print "Working directory is " cwd 855 print "Killing old jobs"; 856 kill_old_jobs(); 857 858 lower_host = 1; 859 upper_host = 107; 860 if(ARGV[2]) 861 { 862 lower_host = ARGV[2]; 863 ARGV[2] = ""; 864 } 865 if(ARGV[3]) 866 { 867 upper_host = ARGV[3]; 868 ARGV[3] = ""; 869 } 870 print "Creating host list"; 871 create_host_list(); 872 print "Getting host information"; 873 collect_host_info(); 874 global_host_count = lower_host; 875 print "...complete"; 876} 877 878 879 880/^Executable:/{ 881 executable = expand_file($2); 882 next; 883} 884 885/^Logfile:/{ 886 logfile = expand_file($2); 887 next; 888} 889 890/^Problemdir:/{ 891 problemdir = expand_file($2); 892 next; 893} 894 895/^Arguments:/{ 896 args = ""; 897 for(i=2; i<=NF; i++) 898 { 899 args = args " " $(i); 900 } 901 if(index(args,"--print-detailed-statistics")) 902 { 903 print "Switching to soft cpu limit for detailed statistics"; 904 soft_cpu_limit = 1; 905 }; 906 next; 907} 908 909/^Time limit:/{ 910 time_limit = $3; 911 next; 912} 913 914 915/^Include:/{ 916 split(expand_file($2), local_files); 917 local_count = ARGIND+1; 918 for(i=1; local_files[i]; i++) 919 { 920 print "Adding file " local_files[i]; 921 shift_num_array(ARGV, local_count); 922 ARGV[local_count] = local_files[i]; 923 local_count++; 924 ARGC++; 925 } 926 next; 927} 928 929 930# Everything else not starting with # and not empty is a job! 931/^#/{ 932 print "Skipping comment"; 933 next; 934} 935 936 937 938/[A-Za-z0-9]+/{ 939 job = $0; 940 check_and_initialize(); 941 if(job_is_not_processed(job)) 942 { 943 host_local = get_host(); 944 outfile1 = cwd "/" host_local "_lock"; 945 outfile2 = cwd "/" host_local "_complete"; 946 947 cpu_opt = host_cpu_limit_opt(time_limit, host_proc_power[host_local]); 948 949 command = executable " " auto_args " " cpu_opt " " args " " problemdir "/" job " > " outfile1; 950 951 printf "Distributing " job " onto " host_local ". "; 952 remote_shell = "ssh -x -T " host_local " 2>&1 1> /dev/null"; 953 print "touch " outfile1 |remote_shell; 954 print "(/bin/nice -15 " command "; sync; sleep 3; echo " job ">" \ 955 outfile2 ")< /dev/null > & /dev/null &\n" | remote_shell; 956 close(remote_shell); 957 open_jobs++; 958 print "Open jobs: " open_jobs; 959 host_in_use[host_local] = systime(); 960 } 961 else 962 { 963 print "Job " job " already processed"; 964 } 965} 966 967 968END{ 969 print "Distribution of jobs complete"; 970 971 count = 0; 972 while((lock_files_pending = number_of_lock_files()) > 0) 973 { 974 print "Waiting for " lock_files_pending " results... (Open jobs:) " open_jobs; 975 system("sleep 30"); 976 count++; 977 if(count%10 == 0) 978 { 979 check_alive(); 980 } 981 else 982 { 983 process_pending_results(); 984 } 985 } 986 print "Sorting Result file"; 987 srtfile = logfile ".srt"; 988 system("sort " logfile " | myuniq.awk > " srtfile "; mv " srtfile " " logfile); 989 print "Test run complete"; 990} 991 992