1#!/usr/bin/awk -f
2#* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
3#*                                                                           *
4#*                  This file is part of the program and library             *
5#*         SCIP --- Solving Constraint Integer Programs                      *
6#*                                                                           *
7#*    Copyright (C) 2002-2021 Konrad-Zuse-Zentrum                            *
8#*                            fuer Informationstechnik Berlin                *
9#*                                                                           *
10#*  SCIP is distributed under the terms of the ZIB Academic License.         *
11#*                                                                           *
12#*  You should have received a copy of the ZIB Academic License              *
13#*  along with SCIP; see the file COPYING. If not email to scip@zib.de.      *
14#*                                                                           *
15#* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
16#
17#@file    permaverage.awk
18#@brief   compute averages over instances for different permuations
19#@author  Jan Kuske
20#@author  Marc Pfetsch
21#
22# This script creates the average values between different
23# permutations over the same setting, solver, and instances. There are
24# some switches to give additional information. They are listed and
25# documented in the begin-block. This script will not interrupt if the
26# type of a problem changes between the permutations. Instead it will
27# print "**" in the column "Type". The status is shown as "status (n)",
28# the number n tells how many times the status appeared. Only
29# the Status with the highest priority will be counted.
30#
31# The priorities are:
32#  1. fail / abort /readerror
33#  2. some limit is reached except timeout
34#  3. timeout
35#  4. ok
36#
37# If some permutations are missing, the script will proceed and print
38# a list with all instances with missing permutations. The same
39# happens with failed/abort/readerror status permutations
40
41function min(x,y)
42{
43   return (x) < (y) ? (x) : (y);
44}
45
46function max(x,y)
47{
48   return (x) > (y) ? (x) : (y);
49}
50
51function ceil(x)
52{
53   return (x == int(x) ? x : (x < 0 ? int(x) : int(x+1)));
54}
55
56function fracceil(x,f)
57{
58   return ceil(x/f)*f;
59}
60
61function median(c,v,  j)
62{
63   asort(v,j);
64   if (c % 2)
65      return j[(c+1)/2];
66   else
67      return (j[c/2+1]+j[c/2])/2.0;
68}
69
70BEGIN {
71   printrounded = 0;             # output rounded average values for integral values
72   printmediantime = 0;          # output median time
73   printstdtime = 0;             # output standard devation time
74   namelength = 35;              # maximal length of instance names (can be increased)
75
76   skipfails = 0;                # if true: the script will proceed, even if some permutations failed
77				 # if false: if one permutation failed, the whole instance will be ignored
78   missingpem = 0;               # to verify if there is at least one permutation missing
79   failedpem = 0;                # to verify if there is at least one permutation failed
80
81   infinity = 1e+20;
82   mintime = 0.5;
83   timegeomshift = 1.0;
84   nodegeomshift = 100.0;
85
86   nruns = 0;
87   nprobs[nruns] = 0;
88   problistlen = 0;
89}
90/^@02 timelimit: / {
91   timelimit[nruns] = $3;
92}
93/^@01/ {
94   # determine solver and settings
95   githash[nruns] = $4;
96
97   solver[nruns] = $2;
98   sub(/:.*/, "", solver[nruns]);
99
100   settings[nruns] = $2;
101   sub(/.*:/, "", settings[nruns]);
102
103   nruns++;
104   nprobs[nruns] = 0;
105}
106// {
107   if ( NF >= 13 )
108   {
109      statuses["ok"] = 1;
110      statuses["timeout"] = 1;
111      statuses["unknown"] = 1;
112      statuses["abort"] = 1;
113      statuses["fail"] = 1;
114      statuses["readerror"] = 1;
115      statuses["better"] = 1;
116      statuses["solved"] = 1;
117      statuses["sollimit"] = 1;
118      statuses["gaplimit"] = 1;
119      statuses["memlimit"] = 1;
120      statuses["nodelimit"] = 1;
121
122      validline = 0;
123
124      if ( NF >= 13 && $13 in statuses ) # GLPK, CPLEX, SCIP without columns displaying times to first and best solution
125      {
126	 # collect data (line with problem type, original and presolved problem size and simplex iterations)
127	 name[nruns,nprobs[nruns]] = $1;
128	 type[nruns,nprobs[nruns]] = $2;
129	 origconss[nruns,nprobs[nruns]] = $3;
130	 origvars[nruns,nprobs[nruns]] = $4;
131	 conss[nruns,nprobs[nruns]] = $5;
132	 vars[nruns,nprobs[nruns]] = $6;
133	 dualbound[nruns,nprobs[nruns]] = max(min($7, +infinity), -infinity);
134	 primalbound[nruns,nprobs[nruns]] = max(min($8, +infinity), -infinity);
135	 gap[nruns,nprobs[nruns]] = $9;
136	 iters[nruns,nprobs[nruns]] = $10;
137	 nodes[nruns,nprobs[nruns]] = max($11,1);
138	 time[nruns,nprobs[nruns]] = fracceil(max($12,mintime),0.1);
139	 status[nruns,nprobs[nruns]] = $13;
140	 validline = 1;
141      }
142
143      if ( validline )
144      {
145	 # postprocessing of information
146	 if ( status[nruns,nprobs[nruns]] == "better" )
147	    status[nruns,nprobs[nruns]] = "timeout";
148	 if ( status[nruns,nprobs[nruns]] == "sollimit" || status[nruns,nprobs[nruns]] == "gaplimit" || status[nruns,nprobs[nruns]] == "solved" )
149	    status[nruns,nprobs[nruns]] = "ok";
150
151	 probidx[$1,nruns] = nprobs[nruns];
152	 if ( $1 in probcnt )
153	   probcnt[$1]++;
154	 else
155	    probcnt[$1] = 1;
156	 nprobs[nruns]++;
157	 if ( probcnt[$1] == 1 )
158	 {
159	    problist[problistlen] = $1;
160	    problistlen++;
161	    probfirstrun[$1] = nruns;
162	 }
163      }
164   }
165}
166END {
167   if ( nruns == 0 )
168   {
169      printf("No instances found in log files.\n");
170      exit 1;
171   }
172
173   # check whether time limits, solvers, and git hashes are the same
174   t = timelimit[0];
175   s = solver[0];
176   h = githash[0];
177   for (i = 1; i < nruns; ++i)
178   {
179      if ( timelimit[i] != t )
180      {
181	 printf("Time limits of the runs are different.\n");
182	 exit 1;
183      }
184      if ( solver[i] != s )
185      {
186	 printf("Solvers of the runs are different.\n");
187	 exit 1;
188      }
189      if ( githash[i] != h )
190      {
191	 printf("Git hashes of the runs are different.\n");
192	 exit 1;
193      }
194   }
195
196   # prepare header
197   hyphenstr = "";
198   for (i = 0; i < namelength; ++i)
199      hyphenstr = sprintf("%s-", hyphenstr);
200
201   # first part: name of given length
202   tablehead1 = hyphenstr;
203   tablehead2 = sprintf("Name%*s", namelength-4, " ");
204   tablehead3 = hyphenstr;
205
206   # append rest of header
207   if ( printrounded )
208   {
209      tablehead1 = tablehead1"+------+--- Original --+-- Presolved --+----------------+----------------+------+---------+--------+-------+";
210      tablehead2 = tablehead2"| Type | Conss |  Vars | Conss |  Vars |   Dual Bound   |  Primal Bound  | Gap%% |  Iters  |  Nodes |  Time |";
211      tablehead3 = tablehead3"+------+-------+-------+-------+-------+----------------+----------------+------+---------+--------+-------+";
212   }
213   else
214   {
215      tablehead1 = tablehead1"+------+--- Original --+---- Presolved ----+----------------+----------------+------+-----------+----------+-------+";
216      tablehead2 = tablehead2"| Type | Conss |  Vars |   Conss |    Vars |   Dual Bound   |  Primal Bound  | Gap%% |   Iters   |   Nodes  |  Time |";
217      tablehead3 = tablehead3"+------+-------+-------+---------+---------+----------------+----------------+------+-----------+----------+-------+";
218   }
219
220   tablehead1 = tablehead1"------------+";
221   tablehead2 = tablehead2" Delta Time |";
222   tablehead3 = tablehead3"------------+";
223
224   if (printstdtime)
225   {
226      tablehead1 = tablehead1"----------+";
227      tablehead2 = tablehead2" StddTime |";
228      tablehead3 = tablehead3"----------+";
229   }
230
231   if (printmediantime)
232   {
233      tablehead1 = tablehead1"----------+";
234      tablehead2 = tablehead2"  Median  |";
235      tablehead3 = tablehead3"----------+";
236   }
237
238   tablehead1 = tablehead1"--------\n";
239   tablehead2 = tablehead2"       \n";
240   tablehead3 = tablehead3"--------\n";
241
242   printf(tablehead1);
243   printf(tablehead2);
244   printf(tablehead3);
245
246   # init averages over all instances
247   nodegeom = 0.0;
248   timegeom = 0.0;
249   shiftednodegeom = nodegeomshift;
250   shiftedtimegeom = timegeomshift;
251   stotnodes = 0.0;
252   stottime = 0.0;
253   passes = 0;
254   timeouts = 0;
255   fails = 0;
256
257   # display the mean values
258   k=0; #this is a counter is used to calculate the geom. mean.
259   for (i = 0; i < problistlen; ++i)
260   {
261      prob = problist[i];
262
263      if ( length(prob) > namelength )
264	 shortprob = substr(prob, length(prob)-namelength-1, namelength);
265      else
266	 shortprob = prob;
267
268      line = sprintf("%-18s", shortprob);
269
270      firstrun = probfirstrun[prob];
271      pemmiscnt[prob]=firstrun;
272      if (firstrun != 0)
273	 missingpem = 1;
274
275      # get data for first instance to compare with
276      pidx = probidx[prob,firstrun];
277
278      # check whether the following values are the same for each run
279      typefirst = type[firstrun,pidx];
280      origconssfirst = origconss[firstrun,pidx];
281      origvarsfirst = origvars[firstrun,pidx];
282      statusfirst = status[firstrun,pidx];
283      finalstatus = statusfirst;
284      finalstatusnr = 1;
285      if ( statusfirst == "fail" || statusfirst == "abort" || statusfirst == "readererror" )
286      {
287	 failedpem = 1;
288	 finalstatus="ok";
289	 finalstatusnr = 0;
290	 pemfailedcnt[prob]=1;
291      }
292
293      # initialize average values (note that the presolve # of conss and vars might differ between runs)
294      avgconss = conss[firstrun,pidx];
295      avgvars = vars[firstrun,pidx];
296      avgdb = dualbound[firstrun,pidx];
297      avgpb = primalbound[firstrun,pidx];
298      avggap = gap[firstrun,pidx];
299      avgiters = iters[firstrun,pidx];
300      avgnodes = nodes[firstrun,pidx];
301      avgtime = time[firstrun,pidx];
302      mintime = time[firstrun,pidx];
303      maxtime = time[firstrun,pidx];
304
305      # loop through runs
306      for (s = firstrun+1; s < nruns; ++s)
307      {
308	 pidx = probidx[prob,s];
309
310	 # check whether instance has been processed
311	 if ( pidx == "" )
312	 {
313	    pemmiscnt[prob]++;
314	    missingpem = 1;
315	    continue;
316	 }
317
318	 if ( status[s,pidx] == "fail" || status[s,pidx] == "abort" || status[s,pidx] == "readererror" )
319	 {
320	    failedpem = 1;
321	    pemfailedcnt[prob]++;
322	    if (skipfails)
323	    {
324	       continue;
325	    }
326	    else
327	    {
328	       break;
329	    }
330	 }
331
332	 # compare statistics to first instance
333	 if ( type[s,pidx] != typefirst )
334	 {
335	    typerfirst = "**";   #if the type changes between runs, we write "**" into the type column.
336	 }
337
338	 if ( origconss[s,pidx] != origconssfirst )
339	 {
340	    printf("Error: Problem <%s> number of constraints not equal between runs (%d != %d (%d)).\n", prob, origconssfirst, origconss[s,pidx], s);
341	    exit 1;
342	 }
343
344	 if ( origvars[s,pidx] != origvarsfirst )
345	 {
346	    printf("Error: Problem <%s> number of variables not equal between runs.\n", prob);
347	    exit 1;
348	 }
349
350	 if ( status[s,pidx] == finalstatus )
351	    finalstatusnr += 1;
352	 else
353	 {
354	    if ( finalstatus != "sollimit" && finalstatus != "gaplimit" && finalstatus != "memlimit" && finalstatus != "nodelimit" )
355	    {
356	       if ( status[s,pidx] == "sollimit" || status[s,pidx] == "gaplimit" || status[s,pidx] == "memlimit" || status[s,pidx] == "nodelimit" )
357	       {
358		  finalstatus = status[s,pidx];
359		  finalstatusnr = 1;
360	       }
361	       else
362	       {
363		  if ( finalstatus != "timeout" )
364		  {
365		     if ( status[s,pidx] == "timeout" )
366		     {
367			finalstatus = status[s,pidx];
368			finalstatusnr = 1;
369		     }
370		  }
371	       }
372	    }
373	 }
374
375	 # ---------------------------------------------
376	 # compute average values
377
378	 # take care of infinite values
379	 if ( avgdb > -infinity && avgdb < infinity )
380	 {
381	    if ( dualbound[s,pidx] <= -infinity )
382	       avgdb = -infinity;
383	    else if ( dualbound[s,pidx] >= infinity )
384	       avgdb = -infinity;
385	    else
386	       avgdb += dualbound[s,pidx];
387	 }
388
389	 if ( avgpb > -infinity && avgpb < infinity )
390	 {
391	    if ( primalbound[s,pidx] <= -infinity )
392	       avgpb = infinity;
393	    else if ( primalbound[s,pidx] >= infinity )
394	       avgpb = infinity;
395	    else
396	       avgpb += primalbound[s,pidx];
397	 }
398
399	 # take care of gap
400	 if ( gap[s,pidx] == "" || gap[s,pidx] == "--" || gap[s,pidx] == "Large" )
401	    avggap = infinity;
402	 else if ( gap[s,pidx] < infinity )
403	    avggap += gap[s,pidx];
404	 else
405	    avggap = infinity;
406
407	 # the other values should all be finite
408	 avgconss += conss[s,pidx];
409	 avgvars += vars[s,pidx];
410	 avgiters += iters[s,pidx];
411	 avgnodes += nodes[s,pidx];
412	 avgtime += time[s,pidx];
413
414	 if (maxtime < time[s,pidx])
415	    maxtime = time[s,pidx];
416	 if (mintime > time[s,pidx])
417	    mintime = time[s,pidx];
418      }
419
420      if (pemfailedcnt[prob] != 0 && !skipfails)
421      {
422	 continue;
423      }
424
425      if (pemfailedcnt[prob] != probcnt[prob])
426      {
427	 probcnt[prob]=probcnt[prob]-pemfailedcnt[prob];
428      }
429      else
430      {
431	 continue;
432      }
433
434      # final computation of average values
435      if ( avgdb > -infinity && avgdb < infinity )
436	 avgdb /=probcnt[prob];
437
438      if ( avgpb > -infinity && avgpb < infinity )
439	 avgpb /= probcnt[prob];
440
441      if ( avggap > -infinity && avggap < infinity )
442	 avggap /= probcnt[prob];
443
444      avgconss /= probcnt[prob];
445      avgvars /= probcnt[prob];
446      avgiters /= probcnt[prob];
447      avgnodes /= probcnt[prob];
448      avgtime /= probcnt[prob];
449      difftime = maxtime - mintime;
450
451      stddtime = 0;
452      tempcnt=0;
453      for (s = 0; s < nruns; ++s)
454      {
455	 pidx = probidx[prob,s];
456	 if ( pidx != "" )
457	 {
458	    stddtime = stddtime + (avgtime - time[s,pidx])^2;
459	    medtimeList[tempcnt] = time[s,pidx];
460	    tempcnt++;
461	 }
462      }
463      if ( probcnt[prob] > 1 )
464	 stddtime = (1/(probcnt[prob]-1))*stddtime;
465
466      stddtime = sqrt(stddtime);
467      medtime =  median(tempcnt,medtimeList);
468
469      # output
470      if ( avggap < 0.0 )
471	 gapstr = "  --  ";
472      else if ( avggap < 1e+04 )
473	 gapstr = sprintf("%6.1f", avggap);
474      else
475	 gapstr = " Large";
476
477      if ( printrounded )
478      {
479	 printf("%-*s  %-5s %7d %7d %7d %7d %16.9g %16.9g %6s %9d %8d %7.1f %12.1f ",
480	 namelength, shortprob, typefirst, origconssfirst, origvarsfirst, avgconss, avgvars, avgdb, avgpb, gapstr, avgiters, avgnodes, avgtime,difftime);
481      }
482      else
483      {
484	 printf("%-*s  %-5s %7d %7d %9.1f %9.1f %16.9g %16.9g %6s %11.1f %10.1f %7.1f %12.1f ",
485	 namelength, shortprob, typefirst, origconssfirst, origvarsfirst, avgconss, avgvars, avgdb, avgpb, gapstr, avgiters, avgnodes, avgtime,difftime);
486      }
487
488      if (printstdtime)
489      {
490	 printf("%10.1f ",stddtime);
491      }
492      if (printmediantime)
493      {
494	 printf("%10.1f ",medtime);
495      }
496
497      printf("%s (%d)\n", finalstatus, finalstatusnr);
498
499      # determine count as fails/timeout/passed
500      if ( finalstatus == "timeout" || finalstatus == "sollimit" || finalstatus == "gaplimit" || finalstatus == "memlimit" || finalstatus == "nodelimit" )
501	 timeouts += 1;
502
503      if ( finalstatus == "ok" || finalstatus == "better" || finalstatus == "unknown" || finalstatus == "solved" )
504	 passes += 1;
505
506      if ( finalstatus == "fail" || finalstatus == "abort" || finalstatus == "readererror" )
507	 fails += 1;
508
509      # compute averages over all instances
510      nodegeom = nodegeom^(k/(k+1)) * max(avgnodes, 1.0)^(1.0/(k+1));
511      timegeom = timegeom^(k/(k+1)) * max(avgtime, 1.0)^(1.0/(k+1));
512      shiftednodegeom = shiftednodegeom^(k/(k+1)) * max(avgnodes + nodegeomshift, 1.0)^(1.0/(k+1));
513      shiftedtimegeom = shiftedtimegeom^(k/(k+1)) * max(avgtime + timegeomshift, 1.0)^(1.0/(k+1));
514      k++;
515      stotnodes += avgnodes;
516      stottime += avgtime;
517   }
518   printf(tablehead3)
519
520   # output averages over all instances
521   tablebottom1 = "------------------------------[Nodes]---------------[Time]------";
522   tablebottom2 = "  Cnt  Pass  Time  Fail  total(k)     geom.     total     geom.";
523   tablebottom3 = "----------------------------------------------------------------";
524
525   tablebottom1 = tablebottom1"\n";
526   tablebottom2 = tablebottom2"\n";
527   tablebottom3 = tablebottom3"\n";
528
529   printf(tablebottom1);
530   printf(tablebottom2);
531   printf(tablebottom3);
532
533   shiftednodegeom -= nodegeomshift;
534   shiftedtimegeom -= timegeomshift;
535
536   printf("%5d %5d %5d %5d %9d %9.1f %9.1f %9.1f ",passes+timeouts+fails , passes, timeouts, fails, stotnodes / 1000.0, nodegeom, stottime, timegeom);
537
538   printf("\n");
539   printf(" shifted geom. [%5d/%5.1f]      %9.1f           %9.1f ",nodegeomshift, timegeomshift, shiftednodegeom, shiftedtimegeom);
540   printf("\n");
541   printf(tablebottom3);
542
543   if (missingpem)
544   {
545      printf("\n");
546      printf(tablebottom3);
547      printf("Missing Permutations \n");
548      printf(tablebottom3);
549      printf("%-18s %s\n","Instance","Permutations");
550      printf(tablebottom3);
551
552      for (i = 0; i < problistlen; ++i)
553      {
554	 prob = problist[i];
555	 if ( length(prob) > namelength )
556	    shortprob = substr(prob, length(prob)-namelength-1, namelength);
557	 else
558	    shortprob = prob;
559	 if (pemmiscnt[prob] != 0)
560	    printf("%-18s  %2d \n",shortprob,pemmiscnt[prob]);
561      }
562   }
563   if (!skipfails && failedpem)
564   {
565      printf("\n");
566      printf(tablebottom3);
567      printf("Ignored Instances with at least one failed permutation \n");
568      printf(tablebottom3);
569      printf("%-18s \n","Instance");
570      printf(tablebottom3);
571      for (i = 0; i < problistlen; ++i)
572      {
573	 prob = problist[i];
574	 if ( length(prob) > namelength )
575	    shortprob = substr(prob, length(prob)-namelength-1, namelength);
576	 else
577	    shortprob = prob;
578	 if (pemfailedcnt[prob] != 0)
579	    printf("%-18s\n",shortprob);
580      }
581   }
582   else if (skipfails && failedpem)
583   {
584      printf("\n");
585      printf(tablebottom3);
586      printf("Skipped failed permutations \n");
587      printf(tablebottom3);
588      printf("%-18s %s\n","Instance","failed");
589      printf(tablebottom3);
590      for (i = 0; i < problistlen; ++i)
591      {
592	 prob = problist[i];
593	 if ( length(prob) > namelength )
594	    shortprob = substr(prob, length(prob)-namelength-1, namelength);
595	 else
596	    shortprob = prob;
597
598	 if (pemfailedcnt[prob] != 0)
599	 {
600	    if (pemfailedcnt[prob] != probcnt[prob])
601	       printf("%-18s %2d\n",shortprob,pemfailedcnt[prob]);
602	    else
603	       printf("%-18s %2d (all)\n",shortprob,pemfailedcnt[prob]);
604	 }
605      }
606   }
607   printf("\n");
608
609   # try to clean up settings (if used for permuted runs)
610   setting = settings[1];
611   sub(/-p([0-9])*$/, "", setting);
612
613   printf("@03 Permutations: %d\n",nruns);
614   printf("@02 timelimit: %g\n", timelimit[0]);
615   printf("@01 %s:%s [GitHash: %s\n", solver[0], setting, githash[0]);
616}
617