1#!/sw/bin/gawk -f
2
3# Usage: distribute_eprover.awk <spec_file> [<lower> [<upper>]]
4#
5# Copyright 2001 Stephan Schulz, schulz@informatik.tu-muenchen.de
6#
7# Read a specification file describing a set of parameters and a list
8# of problems, and run a distributed test of the prover over the
9# problems. This is fairly specific for our site (with systems named
10# systematically), but should be rewritable to a more general case.
11
12
13# ----------% Here starteth the generic stuff % ------------- #
14
15# Similar to C assert()
16
17function assert(val, comment)
18{
19   if(!val)
20   {
21      print "Assertion fail: " comment > "/dev/stderr";
22      exit 1;
23   }
24}
25
26
27
28# Return the result of a single, simple shell command yieding exactly
29# one line
30
31function get_shell_res(cmd,   tmp)
32{
33   cmd | getline tmp;
34   close(cmd);
35
36   assert(tmp, "No result found (get_shell_res(" cmd ")");
37   return tmp;
38}
39
40
41# Same thing, but no response is not a bug automatically
42
43function get_shell_res_nocheck(cmd,   tmp)
44{
45   tmp = "";
46   cmd | getline tmp;
47   close(cmd);
48
49   return tmp;
50}
51
52# Return the result of a single, simple shell command yieding exactly
53# one line, and executed on a remote host. The command should not
54# contain single quotes!
55
56function get_remote_shell_res(host, cmd,    tmp)
57{
58   tmp = get_shell_res("ssh -x " host " '" cmd "'");
59
60   return tmp;
61}
62
63# Same thing, but empty response is ok.
64
65function get_remote_shell_res_nocheck(host, cmd,   tmp)
66{
67   tmp = get_shell_res_nocheck("ssh -x " host " '" cmd "'");
68
69   return tmp;
70}
71
72
73# Minimum...
74
75function min(a,b)
76{
77  if(a>b)
78    {
79      return b;
80    }
81  return a;
82}
83
84
85# Get the host name
86
87function get_hostname(     tmp)
88{
89   tmp = get_shell_res("hostname");
90
91   return tmp;
92}
93
94# Get the Pid
95
96function get_pid(     tmp)
97{
98   getline tmp < "/dev/pid";
99   close("/dev/pid");
100   if(!tmp)
101   {
102      print ARGV[0] ": Cannot get PID ?!?" > "/dev/stderr";
103      exit 1;
104   }
105   return tmp;
106}
107
108
109# Perform globbing
110
111function expand_file(name,            tmp)
112{
113   tmp = get_shell_res("csh -c \"echo " name "\"");
114   if(!tmp)
115   {
116      print ARGV[0] ": Cannot expand filename" > "/dev/stderr";
117      exit 1;
118   }
119   print name " expands into " tmp;
120   return tmp;
121}
122
123
124function file_exists(file,    test, tmp)
125{
126  test = getline tmp < file;
127  close(file);
128  if(test == -1)
129    {
130      return 0;
131    }
132  return 1;
133}
134
135function exp_file_exists(file,    test, tmp)
136{
137   return file_exists(expand_file(file));
138}
139
140
141
142function has_interactive_user(host,    user)
143{
144   user = get_remote_shell_res_nocheck(host, "who|grep console");
145   return user;
146}
147
148
149# Compute memory in Megabytes from a string containing Bytes (no
150# suffixe), Kilobytes (Suffix K), Megabytes (Suffix M) or Gigabytes
151# (Suffix G)
152
153function normalize_mem(valstring)
154{
155   if(valstring ~ /[0-9]+G/)
156   {
157      return int(valstring*1024);
158   }
159   if(valstring ~ /[0-9]+M/)
160   {
161      return int(valstring);
162   }
163   if(valstring ~ /[0-9]+K/)
164   {
165      return int(valstring/1024);
166   }
167   return int(valstring/(1024*1024))
168}
169
170
171# Get the physical memory and free memory of a machine
172
173function get_mem_info(host, result_array,                tmp, tmparr)
174{
175   tmp = get_remote_shell_res_nocheck(host, "top -d1 | grep ^Memory");
176   if(!tmp)
177   {
178      return 0;
179   }
180   split(tmp, tmparr);
181   result_array[0] = normalize_mem(tmparr[2]);
182   result_array[1] = normalize_mem(tmparr[4]);
183   return 1;
184}
185
186
187
188
189# ----------% Here starteth the local stuff % ------------- #
190
191
192# If signal_file exists, sleep in 10 minute intervals until current
193# hour is not within [start_block, end_block]
194
195function delay_processing(start_block, end_block, signal_file,    tmp, day)
196{
197   delay = exp_file_exists(signal_file);
198   tmp = strftime("%H", systime())+0;
199   day = strftime("%w", systime())+0;
200   while(delay && (day!=0) && (day!=6) && (tmp >= start_block) && (tmp < end_block))
201   {
202      print "Blockfile exists, sleeping"
203      system("sleep 600");
204      delay = exp_file_exists(signal_file);
205      tmp = strftime("%H", systime())+0;
206      day = strftime("%w", systime())+0;
207   }
208}
209
210function init_machine_ratings()
211{
212   e_mark["Pentium-III-Coppermine-696.422"]              = 225.642;
213   e_mark["Pentium-III-Coppermine-728.454"]              = 196.681;
214   e_mark["Pentium-III-Coppermine-860.904"]              = 260.519;
215   e_mark["Pentium-III-Coppermine-864.484"]              = 222.518;
216   e_mark["Pentium-III-Katmai-498.762"]                  = 143.921;
217   e_mark["REFERENCE"]                                   = 100;
218   e_mark["SUNW-Sun-Blade-100-sparcv9-502"]              = 131.934;
219   e_mark["SUNW-Sun-Blade-1000-sparcv9-750"]             = 250.049;
220   e_mark["SUNW-Ultra-2-sparc-200"]                      = 69.2536;
221   e_mark["SUNW-Ultra-5_10-sparcv9-300"]                 = 87.2878;
222   e_mark["SUNW-Ultra-5_10-sparcv9-440"]                 = 140.591;
223   e_mark["SUNW-Sun-Fire-880-sparcv9-900"]               = 285.646;
224   e_mark["SUNW-Sun-Fire-880-sparcv9-1200"]              = 380.861;
225   e_mark["ppc7450-1000"]                                = 267.552;
226   e_mark["IntelR-PentiumR-4-CPU-1400MHz-1395.787"]      = 300.922;
227   e_mark["SUNW-Ultra-60-sparcv9-296"]                   = 100;
228   e_mark["NC-AMD-Athlontm-MP-Processor-1600+-1400.058"] = 366.393;
229   e_mark["Pentium-II-Deschutes-448.882"]                = 157.919;
230   e_mark["IntelR-PentiumR-4-CPU-2.40GHz-2405.486"]      = 496.488;
231   e_mark["IntelR-PentiumR-4-CPU-2.40GHz-2405.487"]      = 496.488;
232   e_mark["IntelR-PentiumR-4-CPU-2.40GHz-2405.526"]      = 518.546;
233   e_mark["IntelR-PentiumR-4-CPU-2.40GHz-2405.502"]      = 521.253;
234}
235
236
237function get_host_type(host,          res)
238{
239   res = get_shell_res("ssh -x " host " ~/bin/get_system.awk");
240   if(!e_mark[res])
241   {
242      return "";
243   }
244   print "Host " host " is type " res " with " e_mark[res] " EMark";
245   return res;
246}
247
248function create_host_list(   i)
249{
250  for(i=lower_host; i<=upper_host; i++)
251  {
252     all_hosts[i] = "sunhalle" i;
253     host_is_available["sunhalle" i] = 0;
254     host_in_use["sunhalle" i] = 0;
255  }
256  exclude_hosts["sunhalle83"] = 1; # ssh trouble
257}
258
259
260function get_cluster_name(   pipe, tmp)
261{
262   tmp = get_shell_res("hostname");
263
264   if(match(tmp, /sunjessen.*/))
265   {
266      return "sunjessen";
267   }
268   else if(match(tmp, /sunhalle.*/))
269   {
270      return "sunhalle";
271   }
272   else if(match(tmp, /rayhalle.*/))
273   {
274      return "sunhalle";
275   }
276   else if(match(tmp, /lxjessen.*/))
277   {
278      return "lxjessen";
279   }
280   return 0;
281}
282
283
284function print_used_hosts(print_diffs     ,i, count_files,
285			  count_array, files, array)
286{
287   count_files = 0;
288   count_array = 0;
289
290   for(i=lower_host; i<= upper_host; i++)
291   {
292      array = host_in_use[all_hosts[i]];
293      files = file_exists(all_hosts[i] "_lock");
294
295      if(array)
296      {
297	 /* print "In use: " all_hosts[i]; */
298	 count_array++;
299      }
300      if(files)
301      {
302	 count_files++;
303      }
304      if(print_diffs)
305      {
306	 if((array && (!files)))
307	 {
308	    print all_hosts[i] " in host_in_use[], but no lock file";
309	 }
310	 if(((!array) && files))
311	 {
312	    print all_hosts[i] " has lock file, but no host_in_use[] entry";
313	 }
314      }
315   }
316   print "Found " count_array " array entries";
317   print "Found " count_files " lock files";
318}
319
320
321function extract_load(string, which,     tmp_array, number)
322{
323  number = split(string, tmp_array, ",");
324  if(number == 0)
325    {
326      return -1;
327    }
328  return tmp_array[number-3+which];
329}
330
331
332function extract_host(string)
333{
334  match(string, /sunhalle[0-9]+/);
335  return substr(string, RSTART, RLENGTH);
336}
337
338
339function collect_host_info(    res, count, i, type)
340{
341  count = 0;
342  sum = 0;
343
344  global_check_blockfile_age = 0;
345
346  for(i in all_hosts)
347  {
348     if(exclude_hosts[all_hosts[i]]!=1)
349     {
350	print "Checking host " all_hosts[i] ".";
351	res = check_availablity(all_hosts[i]);
352	if(res != 1000000)
353	{
354           type = get_host_type(all_hosts[i]);
355           if(type)
356           {
357              count++;
358              host_proc_power[all_hosts[i]] = e_mark[type];
359              print "..." all_hosts[i] " is ready at " host_proc_power[all_hosts[i]] " EMark";
360           }
361	}
362     }
363  }
364  print "Found " count " hosts.";
365  return count;
366}
367
368
369function update_global_host_count()
370{
371   global_host_count++;
372   if(global_host_count > upper_host)
373   {
374      global_host_count = lower_host;
375   }
376}
377
378
379function ping_host(host,   pipe, tmp)
380{
381  tmp = get_shell_res_nocheck("ping " host);
382
383  if(!index(tmp, "alive"))
384  {
385     return 0;
386  }
387  return 1;
388}
389
390
391function check_availablity(host,       pipe, tmp, cmd, memdata)
392{
393  if(exclude_hosts[host]==1)
394  {
395     /* Machine unsuitable for reasons known to the user */
396      return 1000000;
397  }
398  if(!ping_host(host))
399  {
400     # No good response -> dont use machine
401	 return 1000000;
402  }
403
404  mem_data[0] = 0;
405  mem_data[1] = 0;
406  if(!get_mem_info(host, mem_data))
407  {
408     # Something is broken
409     return 1000000;
410  }
411  if(mem_data[0] < 256)
412  {
413     print "# Checking " host ": Not enough memory " mem_data[0];
414     exclude_hosts[host] = 1;
415     return 1000000; # Not enough memory
416  }
417  if(has_interactive_user(host))
418  {
419     if(mem_data[1] < 160)
420     {
421	print "# Checking " host ": Greedy interactive user, only " mem_data[1] " free";
422	return 1000000;
423     }
424  }
425  #{
426  #   /* No good response -> dont use machine */
427  #  return 1000000;
428  #}
429  #return extract_load(tmp, 2);
430  return 0;
431}
432
433
434function check_and_initialize(    tmp, job)
435{
436   if(first_job)
437   {
438      if(!executable)
439      {
440	 print "distribute_eprover.awk: No executable found" > "/dev/stderr";
441	 exit 1;
442      }
443      if(!logfile)
444      {
445	 print "distribute_eprover.awk: No logfile found" > "/dev/stderr";
446	 exit 1;
447      }
448      if(!problemdir)
449      {
450	 print "distribute_eprover.awk: No problemdir found" > "/dev/stderr";
451	 exit 1;
452      }
453      if(!file_exists(logfile))
454      {
455	 print "Creating log file " logfile;
456	 print "# " args " " auto_args > logfile;
457	 execpath = get_shell_res("which " executable);
458	 execdate = get_shell_res("ls -l " execpath);
459	 exechost = get_hostname();
460	 execversion = get_shell_res(executable " -V");
461	 print "# Started with " substr(execdate, 42) " (" execversion ") on " exechost >> logfile;
462      }
463      else
464      {
465	 print "Logfile " logfile " exists, using old results";
466	 execpath = get_shell_res("which " executable);
467	 execdate = get_shell_res("ls -l " execpath);
468	 exechost = get_hostname();
469	 execversion = get_shell_res(executable " -V");
470	 print "# Started with " substr(execdate, 42) " (" execversion ") on " exechost >> logfile;
471	 processed_count = 0;
472	 while ((getline tmp < logfile) > 0)
473	 {
474	    if(match(job, /^[0-9A-Za-z].*\.lop/) ||
475               match(job, /^[0-9A-Za-z].*\.p/)||
476               match(job, /^[0-9A-Za-z].*\.e/)  ||
477               match(job, /^[0-9A-Za-z].*\.tpt/)  ||
478               match(job, /^[0-9A-Za-z].*\.tptp/))
479	    {
480	       job = substr(tmp, RSTART, RLENGTH);
481	       processed_jobs[job] = 1;
482	       processed_count++;
483	    }
484	 }
485	 close(logfile);
486	 print "Found " processed_count " old results";
487      }
488      first_job = 0;
489   }
490}
491
492
493
494function process_result(host     , file, tmp, name, time, org_time, status,\
495	     reason, generated, processed,shared_terms,raw_terms,\
496	     rewrite_steps, r_matches, e_matches, literals,nu_subs,\
497	     nurec_sub, u_subs)
498{
499   file = host "_complete";
500   getline name < file;
501   close(file);
502
503   time = time_limit;
504   status = "F";
505   reason = "unknown";
506   generated = 0;
507   processed = 0;
508   shared_terms = 0;
509   raw_terms = 0;
510   rewrite_steps = 0;
511   r_matches = 0;
512   e_matches = 0;
513   literals = 0;
514
515   if(index(name, ".lop") || index(name, ".p") || index(name, ".tptp"))
516   {
517      file = host "_lock";
518      while((getline tmp < file)>0)
519      {
520	 if(index(tmp, "# No proof found!"))
521	 {
522	    status = "N";
523	    reason = "success";
524	 }
525	 else if(index(tmp, "# Proof found!"))
526	 {
527	    status = "T";
528	    reason = "success";
529	 }
530	 else if(index(tmp, "# Failure: Out of unprocessed clauses!"))
531	 {
532	    status = "F";
533	    reason = "incomplete";
534	 }
535	 else if(index(tmp, "# Failure: Resource limit exceeded (memory)"))
536	 {
537	    reason = "maxmem ";
538	 }
539	 else if(index(tmp, "# Failure: Resource limit exceeded (time)"))
540	 {
541	    reason = "maxtime ";
542	 }
543	 else if(index(tmp, "# Failure: User resource limit exceeded"))
544	 {
545	    reason = "maxres";
546	 }
547	 else if(index(tmp, "# Processed clauses                    :"))
548	 {
549	    processed = substr(tmp, 42);
550	 }
551	 else if(index(tmp, "# Generated clauses                    :"))
552	 {
553	    generated = substr(tmp, 42);
554	 }
555	 else if(index(tmp, "# Shared term nodes                    :"))
556	 {
557	    shared_terms = substr(tmp, 42);
558	 }
559	 else if(index(tmp, "# ...corresponding unshared nodes      :"))
560	 {
561	    raw_terms = substr(tmp, 42);
562	 }
563	 else if(index(tmp, "# Shared rewrite steps                 :"))
564	 {
565	    rewrite_steps = substr(tmp, 42);
566	 }
567	 else if(index(tmp, "# Match attempts with oriented units   :"))
568	 {
569	    r_matches = substr(tmp, 42);
570	 }
571	 else if(index(tmp, "# Match attempts with unoriented units :"))
572	 {
573	    e_matches = substr(tmp, 42);
574	 }
575	 else if(index(tmp, "# Total literals in generated clauses  :"))
576	 {
577	    literals = substr(tmp, 42);
578	 }
579	 else if(index(tmp, "# Clause-clause subsumption calls (NU) :"))
580	 {
581	    nu_subs = substr(tmp, 42);
582	 }
583	 else if(index(tmp, "# Rec. Clause-clause subsumption calls :"))
584	 {
585	    nurec_subs = substr(tmp, 42);
586	 }
587	 else if(index(tmp, "# Unit Clause-clause subsumption calls :"))
588	 {
589	    u_subs = substr(tmp, 42);
590	 }
591	 else if(index(tmp, "# Total time"))
592	 {
593	    org_time = substr(tmp, 30);
594	    time = org_time*host_proc_power[host]/100;
595	    # printf("Time %8.3f on host %s corrected to %8.3f\n", org_time, host, time);
596	 }
597      }
598      close(file);
599      printf("%-29s " status " %8.3f  %-10s %10d %10d %10d %10d %10d\n", \
600	     name, 0+time, reason, generated, processed, \
601	     nu_subs, nurec_subs,u_subs) >> logfile;
602      open_jobs--;
603      printf("Open: " open_jobs " %-29s " status " %8.3f  " reason "\n", name, 0+time);
604   }
605   else
606   {
607      print "No valid job in " host"_complete: Something strange";
608      if(!file_exists(file))
609      {
610	 print host"_complete does not seem to exists?!?!";
611      }
612      else
613      {
614	 system("echo " file " >> buggy_complete");
615	 system("cat "  file " >> buggy_complete");
616      }
617   }
618
619   system("rm " cwd "/" host "_lock");
620   system("rm " cwd "/" host "_complete");
621
622   host_in_use[host] = 0;
623}
624
625
626
627
628
629function get_host(   i, host, load, tmp_count)
630{
631   host = "";
632
633   global_check_blockfile_age++;
634   if(global_check_blockfile_age >= 400)
635   {
636      delay_processing(8, 18, "~/block_eprover");
637      global_check_blockfile_age = 0;
638   }
639
640   while(!host)
641   {
642      for(i = lower_host; i<= upper_host; i++)
643      {
644	 update_global_host_count();
645	 if(host_proc_power[all_hosts[global_host_count]])
646	 {
647	    if((host_in_use[all_hosts[global_host_count]]==0) &&
648	       (!file_exists(all_hosts[global_host_count] "_lock")))
649	    {
650	       load = check_availablity(all_hosts[global_host_count]);
651
652	       if(load != 1000000)
653	       {
654		  host = all_hosts[global_host_count];
655		  break;
656	       }
657	    }
658	 }
659      }
660      if(!host)
661      {
662	 print "No host, processing results";
663	 if(!process_pending_results())
664	 {
665	    print "No host available, sleeping";
666	    system("sleep 30");
667	 }
668      }
669   }
670   return host;
671}
672
673
674function find_max_index(array,     i)
675{
676   for(i=1; array[i]; i++)
677   {
678   }
679   return i-1;
680}
681
682
683function shift_num_array(array, position,              limit,i)
684{
685   limit = find_max_index(array);
686
687   for(i = limit; i>=position; i--)
688   {
689      array[i+1] = array[i];
690   }
691   array[position] = "";
692
693   return limit+1;
694}
695
696
697function job_is_not_processed(job    ,tmp)
698{
699   match(job, /^[0-9A-Za-z].*\.lop/) || \
700   match(job, /^[0-9A-Za-z].*\.p/)||    \
701   match(job, /^[0-9A-Za-z].*\.e/)  ||  \
702   match(job, /^[0-9A-Za-z].*\.tpt/)  ||  \
703   match(job, /^[0-9A-Za-z].*\.tptp/);
704   tmp = substr(job, RSTART, RLENGTH);
705   if(tmp in processed_jobs)
706   {
707      return 0;
708   }
709   return 1;
710}
711
712
713function find_pid_in_protocoll(file,      pid, tmp)
714{
715  while ((getline tmp < file) > 0)
716    {
717      if(match(tmp, /# Pid: [0-9]+/))
718	{
719	  pid  = substr(tmp, 8);
720	  break;
721	}
722    }
723  close(file);
724  return pid;
725}
726
727
728function kill_job(lockfile,     host, pid, res, shell)
729{
730   if(match(lockfile, /sunhalle[0-9]+/))
731   {
732      host = substr(lockfile, RSTART, RLENGTH);
733      if(ping_host(host))
734      {
735	 pid  = find_pid_in_protocoll(lockfile);
736	 system("ssh -x " host " kill " pid "\n");
737	 res = 1;
738      }
739      else
740      {
741	 print "Host " host " not reachable";
742	 res = 0;
743      }
744      system("rm " cwd "/" host "_lock");
745      system("rm " cwd "/" host "_complete");
746      return res;
747   }
748   return 0;
749}
750
751
752function kill_old_jobs(         pipe, count, tmp)
753{
754  pipe = "ls " cwd "/sunhalle*_lock";
755  count = 0;
756
757  while ((pipe | getline tmp) > 0)
758  {
759     count+=kill_job(tmp);
760     host_in_use[tmp] = 0;
761  }
762  close(pipe);
763
764  system("rm " cwd "/sunhalle*_complete");
765  system("rm " cwd "/sunhalle*_lock");
766
767  open_jobs = 0;
768  print "Killed " count " old job(s)";
769}
770
771
772function check_alive(     current_time, i)
773{
774   current_time = systime();
775
776   for(i in host_in_use)
777   {
778      if(host_in_use[i] && ((current_time - host_in_use[i]) > time_limit*10))
779      {
780	 print "Trying to kill dormant job on " i ".";
781	 kill_job(i "_lock");
782	 open_jobs--;
783	 host_in_use[i] = 0;
784      }
785   }
786}
787
788
789function number_of_lock_files(        pipe, count)
790{
791  count = 0;
792
793  pipe = "ls " cwd "/sunhalle*_lock";
794
795  while ((pipe | getline tmp) > 0)
796    {
797      if(match(tmp, /sunhalle[0-9]+_lock/))
798	{
799	  count++;
800	}
801    }
802  close(pipe);
803
804  return count;
805}
806
807
808function process_pending_results(     pipe, count, tmp)
809{
810   print_used_hosts(1);
811   count = 0;
812
813   pipe = "ls " cwd "/sunhalle*_complete";
814
815   while ((pipe | getline tmp) > 0)
816   {
817      if(match(tmp, /sunhalle[0-9]+/))
818      {
819	 host = substr(tmp, RSTART, RLENGTH);
820	 process_result(host);
821	 count++;
822      }
823   }
824   close(pipe);
825   print_used_hosts(1);
826   return count;
827}
828
829function host_cpu_limit_opt(limit, host_rating,    res, host_limit)
830{
831   host_limit = ((limit*100)/host_rating)+0.5;
832
833   if(soft_cpu_limit)
834   {
835      res = sprintf("--soft-cpu-limit=%d --cpu-limit=%d",
836		    host_limit,3*host_limit);
837   }
838   else
839   {
840      res = sprintf("--cpu-limit=%d",host_limit);
841   }
842   return res;
843}
844
845
846BEGIN{
847  print "Initializing...";
848  init_machine_ratings();
849  soft_cpu_limit = 0;
850  time_limit = 10; /* Default, may be overridden */
851  auto_args = "-s --print-pid --resources-info --print-statistics --memory-limit=192";
852  first_job = 1;
853  cwd = ENVIRON["PWD"];
854  print "Working directory is " cwd
855  print "Killing old jobs";
856  kill_old_jobs();
857
858  lower_host = 1;
859  upper_host = 107;
860  if(ARGV[2])
861  {
862     lower_host = ARGV[2];
863     ARGV[2] = "";
864  }
865  if(ARGV[3])
866  {
867     upper_host = ARGV[3];
868     ARGV[3] = "";
869  }
870  print "Creating host list";
871  create_host_list();
872  print "Getting host information";
873  collect_host_info();
874  global_host_count = lower_host;
875  print "...complete";
876}
877
878
879
880/^Executable:/{
881   executable = expand_file($2);
882   next;
883}
884
885/^Logfile:/{
886   logfile = expand_file($2);
887   next;
888}
889
890/^Problemdir:/{
891   problemdir = expand_file($2);
892   next;
893}
894
895/^Arguments:/{
896   args = "";
897   for(i=2; i<=NF; i++)
898   {
899      args = args " " $(i);
900   }
901   if(index(args,"--print-detailed-statistics"))
902   {
903      print "Switching to soft cpu limit for detailed statistics";
904      soft_cpu_limit = 1;
905   };
906   next;
907}
908
909/^Time limit:/{
910   time_limit = $3;
911   next;
912}
913
914
915/^Include:/{
916   split(expand_file($2), local_files);
917   local_count = ARGIND+1;
918   for(i=1; local_files[i]; i++)
919   {
920      print "Adding file " local_files[i];
921      shift_num_array(ARGV, local_count);
922      ARGV[local_count] = local_files[i];
923      local_count++;
924      ARGC++;
925   }
926   next;
927}
928
929
930# Everything else not starting with # and not empty is a job!
931/^#/{
932  print "Skipping comment";
933  next;
934}
935
936
937
938/[A-Za-z0-9]+/{
939   job = $0;
940   check_and_initialize();
941   if(job_is_not_processed(job))
942   {
943      host_local = get_host();
944      outfile1 = cwd "/" host_local "_lock";
945      outfile2 = cwd "/" host_local "_complete";
946
947      cpu_opt = host_cpu_limit_opt(time_limit, host_proc_power[host_local]);
948
949      command = executable " " auto_args " " cpu_opt " " args " " problemdir "/" job " > " outfile1;
950
951      printf "Distributing " job " onto " host_local ". ";
952      remote_shell =  "ssh -x -T " host_local " 2>&1 1> /dev/null";
953      print  "touch " outfile1 |remote_shell;
954      print "(/bin/nice -15 " command "; sync; sleep 3; echo " job ">" \
955	 outfile2 ")< /dev/null > & /dev/null &\n" | remote_shell;
956      close(remote_shell);
957      open_jobs++;
958      print "Open jobs: " open_jobs;
959      host_in_use[host_local] = systime();
960   }
961   else
962   {
963      print "Job " job " already processed";
964   }
965}
966
967
968END{
969  print "Distribution of jobs complete";
970
971  count = 0;
972  while((lock_files_pending = number_of_lock_files()) > 0)
973    {
974      print "Waiting for " lock_files_pending " results... (Open jobs:) " open_jobs;
975      system("sleep 30");
976      count++;
977      if(count%10 == 0)
978      {
979	 check_alive();
980      }
981      else
982      {
983	 process_pending_results();
984      }
985    }
986  print "Sorting Result file";
987  srtfile = logfile ".srt";
988  system("sort " logfile " | myuniq.awk > " srtfile "; mv " srtfile " " logfile);
989  print "Test run complete";
990}
991
992