1 /* mplrs.c: initial release of MPI version
2 This program is free software; you can redistribute it and/or
3 modify it under the terms of the GNU General Public License
4 as published by the Free Software Foundation; either version 2
5 of the License, or (at your option) any later version.
6
7 This program is distributed in the hope that it will be useful,
8 but WITHOUT ANY WARRANTY; without even the implied warranty of
9 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10 GNU General Public License for more details.
11
12 You should have received a copy of the GNU General Public License
13 along with this program; if not, write to the Free Software
14 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
15
16 Author: Charles Jordan skip@ist.hokudai.ac.jp
17 Based on plrs.cpp by Gary Roumanis
18 Initial lrs Author: David Avis avis@cs.mcgill.ca
19 */
20
21 /* #include "lrslib.h" */ /* included in mplrs.h */
22
23 #include "mplrs.h"
24
25 #include <mpi.h>
26 #include <stdlib.h>
27 #include <stdarg.h>
28 #include <string.h>
29 #include <sys/time.h>
30 #include <time.h>
31
32 /* global variables */
33 mplrsv mplrs; /* state of this process */
34 masterv master; /* state of the master */
35 consumerv consumer; /* state of the consumer */
36
37 char argv0[] = "mplrs-internal"; /* warning removal for C++ */
38
39 /******************
40 * initialization *
41 ******************/
42
main(int argc,char ** argv)43 int main(int argc, char **argv)
44 {
45
46 mplrs_init(argc, argv);
47
48 mprintf2(("%d: initialized on %s\n",mplrs.rank,mplrs.host));
49
50 if (mplrs.size<3)
51 return mplrs_fallback();
52
53 if (mplrs.rank == MASTER)
54 return mplrs_master();
55 else if (mplrs.rank == CONSUMER)
56 return mplrs_consumer();
57 else
58 return mplrs_worker();
59 }
60
mplrs_init(int argc,char ** argv)61 void mplrs_init(int argc, char **argv)
62 {
63 int i,j,count;
64 int header[4];
65 char c;
66 time_t curt = time(NULL);
67 char *tim, *tim1;
68 char *offs, *tmp;
69 long *wred = NULL; /* redineq for redund */
70
71 /* make timestamp for filenames */
72 tim = ctime(&curt);
73 tim1 = tim+4;
74 tim1[3] = tim1[6] = '_';
75 tim1[15]='\0';
76
77 /* start MPI */
78 MPI_Init(&argc, &argv);
79 MPI_Comm_rank(MPI_COMM_WORLD, &mplrs.rank);
80 MPI_Comm_size(MPI_COMM_WORLD, &mplrs.size);
81 MPI_Get_processor_name(mplrs.host, &count);
82
83 /* allocate mp for volume calculation, from plrs */
84 lrs_alloc_mp(mplrs.tN); lrs_alloc_mp(mplrs.tD);
85 lrs_alloc_mp(mplrs.Vnum); lrs_alloc_mp(mplrs.Vden);
86 lrs_alloc_mp(mplrs.Tnum); lrs_alloc_mp(mplrs.Tden);
87 itomp(ZERO, mplrs.Vnum); itomp(ONE, mplrs.Vden);
88
89 gettimeofday(&mplrs.start, NULL);
90
91 mplrs_initstrucs(); /* initialize default values of globals */
92
93 /* process commandline arguments, set input and output files */
94 mplrs_commandline(argc, argv);
95
96 if (mplrs.rank == MASTER)
97 {
98 /* open input file for reading on master, histogram
99 * file for writing on master, if used.
100 */
101 mplrs_initfiles();
102 master_sendfile();
103 /* may have produced warnings from stage 0 lrs, flush them. */
104 if (mplrs.output_list == NULL) /* need to prod consumer */
105 post_output("warning"," "); /* see waiting_initial */
106 process_output(); /* before starting a flush */
107 clean_outgoing_buffers();
108 }
109 else
110 {
111 /* open output file for writing on consumer, open histogram file
112 * for writing on consumer, if used.
113 */
114 if (mplrs.rank == CONSUMER)
115 mplrs_initfiles();
116 /* receive input file from master */
117 MPI_Recv(header, 4, MPI_INT, 0, 20, MPI_COMM_WORLD,
118 MPI_STATUS_IGNORE);
119 count = header[0];
120 mplrs.abortinit = header[3];
121 mplrs.input = malloc(sizeof(char)*(count+1));
122 if (!mplrs.abortinit) /* don't need if aborting, may be big */
123 MPI_Recv(mplrs.input, header[0], MPI_CHAR, 0, 20,
124 MPI_COMM_WORLD, MPI_STATUS_IGNORE);
125 mplrs.input[count] = '\0';
126 if (header[1]>0 && mplrs.rank == CONSUMER) /* sigh ... */
127 {
128 consumer.redineq = calloc((header[2]+1),
129 sizeof(long));
130 mplrs.redund = 1;
131 }
132 else if (header[1]>0) /* handle redund split */
133 {
134 wred = malloc(header[1]*sizeof(long));
135 MPI_Recv(wred, header[1], MPI_LONG, 0, 20,
136 MPI_COMM_WORLD, MPI_STATUS_IGNORE);
137 count+=snprintf(NULL, 0, "\nredund_list %d", header[1]);
138 for (i=0; i<header[1]; i++)
139 count+=snprintf(NULL, 0, " %ld", wred[i]);
140 mprintf(("%d: got redund_list %d option\n",mplrs.rank,
141 header[1]));
142 mplrs.redund = 1;
143
144 tmp = realloc(mplrs.input,sizeof(char)*(count+1));
145 if (tmp == NULL)
146 {
147 fprintf(stderr, "Error: unable to allocate memory for input\n");
148 MPI_Abort(MPI_COMM_WORLD, 1);
149 }
150 mplrs.input = tmp;
151
152 offs = mplrs.input + header[0]; /* concatenate */
153 if (mplrs.rank != CONSUMER)
154 {
155 offs += sprintf(offs, "\nredund_list %d",header[1]);
156 for (i=0; i<header[1]; i++)
157 offs += sprintf(offs, " %ld", wred[i]);
158 }
159 }
160 mplrs.input[count] = '\0';
161
162 /* get number of chars needed for worker files */
163 j = mplrs.size;
164 for (i=1; j>9; i++)
165 j = j/10;
166 i += 6+strlen(tim1); /* mplrs_TIMESTAMP */
167 i += strlen(mplrs.tfn_prefix) + strlen(mplrs.input_filename) +
168 i + 6; /* _%d.ine\0 */
169 mplrs.tfn = malloc(sizeof(char) * i);
170 sprintf(mplrs.tfn, "%smplrs_%s%s_%d.ine",
171 mplrs.tfn_prefix, tim1,
172 mplrs.input_filename,
173 mplrs.rank);
174 /* flatten directory structure in mplrs.input_filename
175 * for mplrs.tfn, to prevent writing to non-existent
176 * subdirectories in e.g. /tmp
177 */
178 i = strlen(mplrs.tfn_prefix) + 6 + strlen(tim1);
179 j = strlen(mplrs.tfn);
180 for (; i<j; i++)
181 {
182 c = mplrs.tfn[i];
183 if (c == '/' || c == '\\')
184 mplrs.tfn[i] = '_';
185 }
186
187 }
188
189 /* setup signals --
190 * TERM checkpoint to checkpoint file, or output file if none & exit
191 * HUP ditto
192 */
193 signal(SIGTERM, mplrs_caughtsig);
194 signal(SIGHUP, mplrs_caughtsig);
195 free(wred);
196 }
197
198 /* if we catch a signal, set a flag to exit */
mplrs_caughtsig(int sig)199 void mplrs_caughtsig(int sig)
200 {
201 if (mplrs.caughtsig<2)
202 mplrs.caughtsig = 1;
203 signal(sig, mplrs_caughtsig);
204 return;
205 }
206
207 /* if we've caught a signal, tell the master about it */
mplrs_handlesigs(void)208 void mplrs_handlesigs(void)
209 {
210 float junk = 0; /* 0: caught a signal */
211 if (mplrs.caughtsig != 1)
212 return;
213 /* we want to stop, so no need to hurry and queue the send */
214 MPI_Send(&junk, 1, MPI_FLOAT, MASTER, 9, MPI_COMM_WORLD);
215 mplrs.caughtsig = 2; /* handled */
216 return;
217 }
218
219 /* signal the master that we want to stop,
220 * can try to checkpoint. Use mplrs to give
221 * output if desired.
222 * Does not return to caller ... ugly
223 */
mplrs_cleanstop(int checkpoint)224 void mplrs_cleanstop(int checkpoint)
225 {
226 MPI_Request req = MPI_REQUEST_NULL;
227 char *cobasis = NULL;
228 float junk;
229 int header[8]={0};
230 int flag = 0, len;
231 unsigned long ncob = 0;
232 mprintf(("%d: in cleanstop\n", mplrs.rank));
233 if (checkpoint)
234 junk = 1;
235 else
236 junk = -1;
237 /* inform master, wait for reply */
238 MPI_Send(&junk, 1, MPI_FLOAT, MASTER, 9, MPI_COMM_WORLD);
239 MPI_Recv(&junk, 1, MPI_FLOAT,MASTER,9,MPI_COMM_WORLD,MPI_STATUS_IGNORE);
240 /* needs to do usual cleanup / exit of a worker. callstack is
241 * unpredictable though, so exit() and don't return.
242 */
243 /* leaks: starting_cobasis (mplrs_worker)
244 */
245 /* do_work cleanup*/
246 remove(mplrs.tfn); /* delete temporary file */
247 /* mplrs_worker cleanup */
248 mplrs.outputblock = 0;
249 process_output();
250 process_curwarn();
251 return_unfinished_cobases();
252 clean_outgoing_buffers();
253 /* mplrs_worker start of loop ... */
254 MPI_Isend(&ncob, 1, MPI_UNSIGNED, MASTER, 6, MPI_COMM_WORLD, &req);
255 while (1)
256 {
257 MPI_Test(&req, &flag, MPI_STATUS_IGNORE);
258 if (flag)
259 {
260 mprintf3(("%d: clean_stop incoming message from 0\n",
261 mplrs.rank));
262 MPI_Recv(header, 8, MPI_INT, MASTER, MPI_ANY_TAG,
263 MPI_COMM_WORLD, MPI_STATUS_IGNORE);
264 if (header[0] == -1) /* no more work to do */
265 {
266 mplrs_worker_finished();
267 exit(0);
268 }
269 else /* forget this cobasis */
270 {
271 len = header[0];
272 cobasis = malloc(sizeof(char)*(len+1));
273 MPI_Recv(cobasis, len, MPI_CHAR, MASTER,
274 MPI_ANY_TAG, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
275 cobasis[len] = '\0';
276 mprintf3(("%d: got %s, throwing away\n",
277 mplrs.rank, cobasis));
278 free(cobasis);
279 return_unfinished_cobases();
280 clean_outgoing_buffers();
281 MPI_Isend(&ncob, 1, MPI_UNSIGNED, MASTER, 6,
282 MPI_COMM_WORLD, &req);
283 }
284 }
285 clean_outgoing_buffers();
286 }
287
288 /* mplrs_worker */
289
290 }
291
292 /* terminate immediately and ungracefully after printing the string */
mplrs_emergencystop(const char * msg)293 void mplrs_emergencystop(const char *msg)
294 {
295 int ret;
296 fprintf(stderr, "%s",msg);
297 ret = MPI_Abort(MPI_COMM_WORLD, 1);
298 exit(ret);
299 }
300
301 /* send the contents of the input file to all workers */
master_sendfile(void)302 void master_sendfile(void)
303 {
304 char *buf;
305 int count=0;
306 int extra=0;
307 int header[4]={0};
308 /* header: input file size, #redund to do this worker, m_A,
309 abortinit */
310 int c, i, j, chunksize=0, rem=0, rstart;
311 long rcount = 0, *wred = NULL;
312 long m;
313
314 /* get m from lrs */
315 mplrs.tfn = mplrs.input_filename;
316
317 mplrs_worker_init(); /* we're the master but okay */
318 header[3] = mplrs.abortinit; /* tell workers to abort if bad input */
319 if (mplrs.overflow != 3 && mplrs.abortinit == 0)
320 m = mplrs.P->m_A;
321 else /* overflow in mplrs_worker_init and non-hybrid, can't start run */
322 m = 0;
323 header[2] = m;
324 if (mplrs.overflow!=3 && mplrs.R->redund && mplrs.abortinit == 0)
325 {
326 master.redund = 1;
327 for (i=1; i<=m; i++)
328 if (mplrs.R->redineq[i] == 1)
329 rcount++;
330 }
331
332 /* check free TODO*/
333 if (mplrs.overflow != 3 && !mplrs.abortinit)
334 {
335 mplrs.lrs_main(0,NULL,&mplrs.P,&mplrs.Q,0,2,NULL,mplrs.R);
336 }
337 c = fgetc(master.input);
338
339 while (c!=EOF)
340 {
341 count++;
342 c = fgetc(master.input);
343 }
344
345 if (mplrs.countonly)
346 extra = 10; /* \ncountonly */
347
348 buf = malloc(sizeof(char)*(count+extra));
349
350 fseek(master.input, 0, SEEK_SET);
351
352 for (i=0; i<count; i++)
353 {
354 c = fgetc(master.input);
355 buf[i]=c;
356 }
357 if (mplrs.countonly)
358 sprintf(buf+i, "\ncountonly");
359 header[0] = count+extra;
360 if (master.redund)
361 {
362 /* will split into chunksize sized blocks,
363 * also add the rem-many extra ones to low-id workers
364 */
365 chunksize = rcount / (mplrs.size-2);
366 rem = rcount % (mplrs.size - 2);
367 }
368 if (chunksize == 0) /* m smaller than #workers */
369 master.max_redundworker = rem+1;
370 else
371 master.max_redundworker = mplrs.size-1;
372 mprintf(("M: making chunks of size %d rem %d, max_redundworker=%d\n",
373 chunksize,rem,master.max_redundworker));
374
375 wred = calloc(chunksize+1, sizeof(long));
376
377 rstart = 1;
378 for (i=0; i<mplrs.size; i++)
379 {
380 header[1] = 0;
381 if (i==MASTER)
382 continue;
383 else if (master.redund && i==CONSUMER)
384 {
385 header[1] = 1;
386 }
387 else if (master.redund && i<=master.max_redundworker)
388 {
389 /* set redund options */
390 header[1] = chunksize;
391 if (rem>0)
392 {
393 header[1]++;
394 rem--;
395 }
396 for (j=0; j<header[1]; j++)
397 {
398 for (; rstart<=m; rstart++)
399 if (mplrs.R->redineq[rstart] == 1 )
400 {
401 wred[j] = rstart++;
402 break;
403 }
404 }
405 }
406 else if (master.redund) /* this worker gets no jobs */
407 header[1] = header[2] = 0; /* not actually needed */
408 mprintf2(("M: Sending input file to %d\n", i));
409 MPI_Send(header, 4, MPI_INT, i, 20, MPI_COMM_WORLD);
410 if (!mplrs.abortinit) /* don't need if aborting, may be big */
411 MPI_Send(buf, count+extra, MPI_CHAR, i, 20,
412 MPI_COMM_WORLD);
413 if (header[1]>0 && i!=CONSUMER) /* send redund portion */
414 MPI_Send(wred, header[1], MPI_LONG,i,20,MPI_COMM_WORLD);
415 }
416 /* fseek(master.input, 0, SEEK_SET); */
417 fclose(master.input);
418 free(mplrs.R->redineq);
419 free(mplrs.R->facet);
420 free(mplrs.R);
421 free(wred);
422 free(buf);
423 }
424
425 /* initialize default values of global structures, before commandline */
mplrs_initstrucs(void)426 void mplrs_initstrucs(void)
427 {
428 mplrs.cobasis_list = NULL;
429 mplrs.lrs_main = mplrs_init_lrs_main;
430 mplrs.P = NULL;
431 mplrs.Q = NULL;
432 mplrs.R = NULL;
433
434 mplrs.caughtsig = 0;
435 mplrs.abortinit = 0;
436 mplrs.overflow = 0;
437 mplrs.rays = 0;
438 mplrs.vertices = 0;
439 mplrs.bases = 0;
440 mplrs.facets = 0;
441 mplrs.linearities = 0;
442 mplrs.intvertices = 0;
443 mplrs.deepest = 0;
444
445 mplrs.my_tag = 100;
446 mplrs.tfn_prefix = DEF_TEMP;
447 mplrs.tfn = NULL;
448 mplrs.tfile = NULL;
449 mplrs.input_filename = DEF_INPUT;
450 mplrs.input = NULL;
451 mplrs.output_list = NULL;
452 mplrs.ol_tail = NULL;
453 mplrs.outnum = 0;
454 mplrs.finalwarn = malloc(sizeof(char)*32);
455 mplrs.finalwarn_len = 32;
456 mplrs.curwarn = malloc(sizeof(char)*32);
457 mplrs.curwarn_len = 32;
458 mplrs.finalwarn[0] = '\0';
459 mplrs.curwarn[0] = '\0';
460 mplrs.maxbuf = DEF_MAXBUF; /* maximum # lines to buffer */
461 mplrs.outputblock = 0; /* don't block initial output */
462 mplrs.redund = 0;
463 mplrs.countonly = 0;
464
465 master.cobasis_list = NULL;
466 master.size_L = 0;
467 master.tot_L = 0;
468 master.num_empty = 0;
469 master.num_producers = 0;
470 master.checkpointing = 0;
471 master.cleanstop = 0;
472 master.messages = 1;
473 master.lmin = DEF_LMIN;
474 master.lmax = DEF_LMAX;
475 master.orig_lmax = 1;
476 master.scalec = DEF_SCALEC;
477 master.initdepth = DEF_ID;
478 master.maxdepth = DEF_MAXD;
479 master.maxcobases = DEF_MAXC;
480 master.maxncob = DEF_MAXNCOB;
481 master.lponly = 0;
482 master.redund = 0;
483 master.time_limit = 0;
484 master.hist_filename = DEF_HIST;
485 master.hist = NULL;
486 master.doing_histogram = 0;
487 master.freq_filename = DEF_FREQ;
488 master.freq = NULL;
489 master.restart_filename = DEF_RESTART;
490 master.restart = NULL;
491 master.checkp_filename = DEF_CHECKP;
492 master.checkp = NULL;
493 master.stop_filename = DEF_STOP;
494 master.stop = NULL;
495 master.input = NULL;
496
497 consumer.prodreq = NULL;
498 consumer.prodibf = NULL;
499 consumer.incoming = NULL;
500 consumer.output_filename = DEF_OUTPUT;
501 consumer.output = stdout;
502 consumer.oflow_flag = 0;
503 consumer.num_producers = 0;
504 consumer.waiting_initial = 2; /* 2: waiting initial and master warnings */
505 consumer.final_print = 1;
506 consumer.final_redundcheck = 0;
507 }
508
509 /* process commandline arguments */
mplrs_commandline(int argc,char ** argv)510 void mplrs_commandline(int argc, char **argv)
511 {
512 int i, arg, firstfile=1;
513 for (i=1; i<argc; i++)
514 {
515 if (!strcmp(argv[i], "-lmin"))
516 {
517 arg = atoi(argv[i+1]);
518 i++;
519 if (arg < 1 )
520 bad_args();
521 master.lmin = arg;
522 if (master.lmin > master.lmax)
523 master.lmax = arg;
524 continue;
525 }
526 else if (!strcmp(argv[i], "-lmax"))
527 {
528 arg = atoi(argv[i+1]);
529 i++;
530 if (arg<1 )
531 bad_args();
532 master.lmax = arg;
533 master.orig_lmax = 0;
534 if (master.lmin > master.lmax)
535 master.lmin = arg;
536 continue;
537 }
538 else if (!strcmp(argv[i], "-scale"))
539 {
540 arg = atoi(argv[i+1]);
541 i++;
542 if (arg<1)
543 bad_args();
544 master.scalec = arg;
545 continue;
546 }
547 else if (!strcmp(argv[i], "-hist"))
548 {
549 master.hist_filename = argv[i+1];
550 i++;
551 continue;
552 }
553 else if (!strcmp(argv[i], "-freq"))
554 {
555 master.freq_filename = argv[i+1];
556 i++;
557 continue;
558 }
559 else if (!strcmp(argv[i], "-stopafter"))
560 {
561 arg = atoi(argv[i+1]);
562 i++;
563 if (arg<1)
564 bad_args();
565 master.maxncob = arg;
566 continue;
567 }
568 else if (!strcmp(argv[i], "-countonly"))
569 {
570 mplrs.countonly = 1;
571 continue;
572 }
573 else if (!strcmp(argv[i], "-maxbuf"))
574 {
575 arg = atoi(argv[i+1]);
576 i++;
577 if (arg<1)
578 bad_args();
579 mplrs.maxbuf = arg;
580 continue;
581 }
582 else if (!strcmp(argv[i], "-id"))
583 {
584 arg = atoi(argv[i+1]);
585 i++;
586 if (arg<0)
587 bad_args();
588 master.initdepth = arg;
589 continue;
590 }
591 else if (!strcmp(argv[i], "-maxd"))
592 {
593 arg = atoi(argv[i+1]);
594 i++;
595 if (arg<1)
596 bad_args();
597 master.maxdepth = arg;
598 continue;
599 }
600 else if (!strcmp(argv[i], "-maxc"))
601 {
602 arg = atoi(argv[i+1]);
603 i++;
604 if (arg<0)
605 bad_args();
606 master.maxcobases = arg;
607 continue;
608 }
609 else if (!strcmp(argv[i], "-checkp"))
610 {
611 master.checkp_filename = argv[i+1];
612 i++;
613 continue;
614 }
615 else if (!strcmp(argv[i], "-lponly"))
616 {
617 master.lponly = 1;
618 continue;
619 }
620 else if (!strcmp(argv[i], "-redund"))
621 {
622 master.redund = 1;
623 continue;
624 }
625 else if (!strcmp(argv[i], "-stop"))
626 {
627 master.stop_filename = argv[i+1];
628 i++;
629 continue;
630 }
631 else if (!strcmp(argv[i], "-time"))
632 {
633 arg = atoi(argv[i+1]);
634 i++;
635 if (arg<1)
636 bad_args();
637 master.time_limit = arg;
638 continue;
639 }
640 else if (!strcmp(argv[i], "-restart"))
641 {
642 master.restart_filename = argv[i+1];
643 i++;
644 continue;
645 }
646 else if (!strcmp(argv[i], "-temp"))
647 {
648 mplrs.tfn_prefix = argv[i+1];
649 i++;
650 continue;
651 }
652 else if (firstfile == 1)
653 {
654 mplrs.input_filename = argv[i];
655 firstfile = 2;
656 }
657 else if (firstfile == 2)
658 {
659 consumer.output_filename = argv[i];
660 firstfile = 3;
661 }
662 else
663 bad_args();
664 }
665 if (master.orig_lmax) /* default lmax behavior is lmax=lmin */
666 master.lmax = (master.lmin>0? master.lmin: 0);
667 if (mplrs.input_filename==NULL) /* need an input file */
668 bad_args();
669 if ((master.stop_filename!=NULL || master.time_limit!=0) &&
670 master.checkp_filename==NULL)
671 bad_args(); /* need checkpoint file if stop condition given */
672 }
673
674 /* open input file on master, histogram (if exists) on master,
675 * output (if exists) on consumer
676 */
mplrs_initfiles(void)677 void mplrs_initfiles(void)
678 {
679 if (mplrs.rank == MASTER)
680 {
681 master.input = fopen(mplrs.input_filename, "r");
682 if (master.input == NULL)
683 {
684 printf("Unable to open %s for reading [%s].\n",
685 mplrs.input_filename, mplrs.host);
686 /* MPI_Finalize(); */
687 exit(0);
688 }
689 if (master.hist_filename != NULL)
690 {
691 master.hist = fopen(master.hist_filename, "w");
692 if (master.hist == NULL)
693 {
694 printf("Unable to open %s for writing [%s].\n",
695 master.hist_filename, mplrs.host);
696 /* MPI_Finalize(); */
697 exit(0);
698 }
699 master.doing_histogram = 1;
700 mprintf2(("M: Prepared histogram (%s)\n", master.hist_filename));
701 }
702 if (master.freq_filename != NULL)
703 {
704 master.freq = fopen(master.freq_filename, "w");
705 if (master.freq == NULL)
706 {
707 printf("Unable to open %s for writing [%s].\n",
708 master.freq_filename, mplrs.host);
709 exit(0);
710 }
711 mprintf2(("M: Prepared frequency file (%s)\n",
712 master.freq_filename));
713 }
714 if (master.checkp_filename !=NULL)
715 {
716 master.checkp = fopen(master.checkp_filename, "w");
717 if (master.checkp == NULL)
718 {
719 printf("Unable to open %s for writing [%s].\n",
720 master.checkp_filename, mplrs.host);
721 /* MPI_Finalize(); */
722 exit(0);
723 }
724 mprintf2(("M: Prepared checkpoint file (%s)\n",
725 master.checkp_filename));
726 }
727 if (master.restart_filename != NULL)
728 {
729 master.restart = fopen(master.restart_filename, "r");
730 if (master.restart == NULL)
731 {
732 printf("Unable to open %s for reading [%s].\n",
733 master.restart_filename, mplrs.host);
734 /* MPI_Finalize(); */
735 exit(0);
736 }
737 mprintf2(("M: Opened restart file (%s)\n",
738 master.restart_filename));
739 }
740 }
741 if (mplrs.rank == CONSUMER)
742 {
743 if (consumer.output_filename == NULL)
744 return;
745 consumer.output = fopen(consumer.output_filename, "w");
746 if (consumer.output == NULL)
747 {
748 printf("Unable to open %s for writing [%s].\n",
749 consumer.output_filename, mplrs.host);
750 /* MPI_Finalize(); */
751 exit(0);
752 }
753 }
754 }
755
756 /* free the lrs_mp allocated in mplrs_init */
mplrs_freemps(void)757 void mplrs_freemps(void)
758 {
759 lrs_clear_mp(mplrs.tN); lrs_clear_mp(mplrs.tD);
760 lrs_clear_mp(mplrs.Vnum); lrs_clear_mp(mplrs.Vden);
761 lrs_clear_mp(mplrs.Tnum); lrs_clear_mp(mplrs.Tden);
762 }
763
764 /* Bad commandline arguments. Complain and die. */
bad_args(void)765 void bad_args(void)
766 {
767 if (mplrs.rank == CONSUMER)
768 printf("Invalid arguments.\n%s\n", USAGE);
769 mplrs_freemps();
770 MPI_Finalize();
771 exit(0);
772 }
773
774 /* fallback -- meaningless if <3 processes
775 * better would be to fallback to normal lrs if <4 processes
776 */
mplrs_fallback(void)777 int mplrs_fallback(void)
778 {
779 if (mplrs.rank==0)
780 printf("mplrs requires at least 3 processes.\n");
781 mplrs_freemps();
782 MPI_Finalize();
783 exit(0);
784 return 0; /* unreachable */
785 }
786
787 /**********
788 * master *
789 **********/
790
mplrs_master(void)791 int mplrs_master(void)
792 {
793 int i;
794 int loopiter = 0; /* do some things only sometimes */
795 MPI_Request ign = MPI_REQUEST_NULL; /* a request we'll ignore */
796 struct timeval cur, last; /* for histograms */
797 int flag = 0;;
798 int phase = 1; /* In phase1? */
799 int done = -1; /* need a negative int to send to finished workers */
800 int ncob=0; /* for printing sizes of sub-problems and for -stopafter*/
801 unsigned long tot_ncob = 0;
802 int want_stop = 0;
803 int check = 0; /* inform consumer whether checkpointing */
804
805 gettimeofday(&last, NULL);
806
807 master.num_producers = 0; /* nobody working right now */
808 master.act_producers = malloc(sizeof(unsigned int)*mplrs.size);
809 master.live_workers = mplrs.size - 2;
810 master.workin = malloc(sizeof(int)*mplrs.size);
811 master.mworkers = malloc(sizeof(MPI_Request)*mplrs.size);
812 master.incoming = NULL;
813 master.sigbuf = malloc(sizeof(float)*mplrs.size);
814 master.sigcheck = malloc(sizeof(MPI_Request)*mplrs.size);
815
816 if (master.restart!=NULL)
817 {
818 master_restart();
819 phase = 0;
820 }
821
822 for (i=0; i<mplrs.size; i++)
823 {
824 master.act_producers[i] = 0;
825 if (i==MASTER)
826 continue;
827 MPI_Irecv(master.sigbuf+i, 1, MPI_FLOAT, i, 9, MPI_COMM_WORLD,
828 master.sigcheck+i);
829 if (i==CONSUMER)
830 continue;
831 MPI_Irecv(master.workin+i, 1, MPI_UNSIGNED, i, 6, MPI_COMM_WORLD,
832 master.mworkers+i);
833 }
834
835 if (mplrs.abortinit)
836 {
837 want_stop = 1;
838 master_stop_consumer(0);
839 }
840 while ((master.cobasis_list!=NULL && !master.checkpointing && !want_stop) || master.num_producers>0 || master.live_workers>0)
841 {
842 loopiter++;
843 /* sometimes check if we should update histogram etc */
844 if (!(loopiter&0x1ff))
845 {
846 if (master.maxncob>0 && master.maxncob<=tot_ncob)
847 want_stop = 1;
848 if (master.doing_histogram)
849 print_histogram(&cur, &last);
850 }
851 /* sometimes check if we should checkpoint */
852 /* don't check in phase 1 to ensure the consumer sees a 'begin' and exits */
853 if (!(loopiter&0x7ff) && phase!=1)
854 // && !master.checkpointing && !master.cleanstop)
855 {
856 if (master.stop_filename!=NULL || master.time_limit!=0)
857 check_stop();
858 master_checksigs();
859 if (master.cleanstop)
860 want_stop = 1;
861 }
862
863 recv_producer_lists();
864
865 if (master.redund && phase==1) /* send out redund jobs */
866 {
867 for (i=0; i<=master.max_redundworker; i++)
868 {
869 if (i==CONSUMER || i==MASTER)
870 continue;
871 MPI_Wait(master.mworkers+i, MPI_STATUS_IGNORE);
872 send_work(i,phase);
873 MPI_Irecv(master.workin+i, 1, MPI_UNSIGNED,i, 6,
874 MPI_COMM_WORLD, master.mworkers+i);
875 }
876 phase = 0;
877 master.tot_L = master.max_redundworker - 1;
878 }
879
880 /* check if anyone wants work */
881 for (i=0; i<mplrs.size; i++)
882 { /* consumer and master don't want work */
883 if (i==CONSUMER || i==MASTER)
884 continue;
885 /* workers that have exited can't work */
886 if (master.mworkers[i]==MPI_REQUEST_NULL)
887 continue;
888 if (master.num_producers>0 && master.cobasis_list==NULL)
889 {
890 break; /* no work to give now, but some may
891 * appear later
892 */
893 }
894
895 if (phase==1 && i!=INITIAL && master.cleanstop==0)
896 continue; /* INITIAL gets the first bit */
897 /* need cleanstop condition for
898 * overflows in mplrs_worker_init()
899 */
900 MPI_Test(master.mworkers+i, &flag, MPI_STATUS_IGNORE);
901 if (!flag)
902 continue; /* i is not ready for more work */
903
904 ncob = master.workin[i];
905 tot_ncob+=ncob;
906 mprintf2(("M: %d looking for work\n", i));
907 if ((master.cobasis_list!=NULL || phase==1) &&
908 !master.checkpointing && !want_stop)
909 { /* and not checkpointing! */
910 send_work(i,phase);
911 MPI_Irecv(master.workin+i, 1, MPI_UNSIGNED,i, 6,
912 MPI_COMM_WORLD, master.mworkers+i);
913 phase=0;
914 if (master.freq!=NULL && ncob>0)
915 fprintf(master.freq, "%d\n", ncob);
916 continue;
917 }
918 /* else tell worker we've finished */
919 mprintf(("M: Saying goodbye to %d, %d left\n", i,
920 master.live_workers-1));
921 MPI_Isend(&done, 1, MPI_INT, i, 8, MPI_COMM_WORLD,&ign);
922 MPI_Request_free(&ign);
923 master.live_workers--;
924 if (master.freq!=NULL && ncob>0)
925 fprintf(master.freq, "%d\n", ncob);
926 }
927
928 clean_outgoing_buffers();
929 }
930
931 /* don't checkpoint if we actually finished the run */
932 if (master.checkpointing && master.cobasis_list==NULL)
933 master.checkpointing = 0;
934
935 if (master.checkpointing) /* checkpointing */
936 check = CHECKFLAG;
937 /* inform consumer if checkpointing or not */
938 mprintf2(("M: Telling consumer whether or not checkpointing (%d)\n", check));
939 MPI_Send(&check, 1, MPI_INT, CONSUMER, 1, MPI_COMM_WORLD);
940
941 if (master.checkpointing)
942 master_checkpoint();
943
944 send_master_stats();
945 mplrs_freemps();
946 if (master.freq != NULL)
947 fclose(master.freq);
948 if (master.hist != NULL)
949 fclose(master.hist);
950 if (master.checkp != NULL)
951 fclose(master.checkp);
952 free(mplrs.finalwarn);
953 free(mplrs.curwarn);
954 MPI_Finalize();
955 free(master.workin);
956 free(master.mworkers);
957 free(master.act_producers);
958 free(master.sigbuf);
959 free(master.sigcheck);
960 return 0;
961 }
962
963 /* prepare to receive remaining cobases from target.
964 * Since we don't yet know the size of buffers needed, we
965 * only Irecv the header and will Irecv the actual cobases later
966 */
master_add_incoming(int target)967 void master_add_incoming(int target)
968 {
969 msgbuf *msg = malloc(sizeof(msgbuf));
970 msg->req = malloc(sizeof(MPI_Request)*3);
971 msg->buf = malloc(sizeof(void *)*3);
972 msg->buf[0] = malloc(sizeof(int) * 3); /* (strlen,lengths,tag) */
973 msg->buf[1] = NULL; /* sizes not known yet */
974 msg->buf[2] = NULL;
975 msg->count = 3;
976 msg->target = target;
977 msg->queue = 1;
978 msg->tags = NULL;
979 msg->sizes = NULL;
980 msg->types = NULL;
981 msg->next = master.incoming;
982 master.incoming = msg;
983 MPI_Irecv(msg->buf[0], 3, MPI_INT, target, 10, MPI_COMM_WORLD,msg->req); return;
984 }
985
986 /* check our list of incoming messages from producers about cobases
987 * to add to L. Add any from messages that have completed.
988 * If the header has completed (msg->queue==1 and header completed),
989 * add the remaining MPI_Irecv's.
990 * Update num_producers to keep track of how many messages the master
991 * is owed (workers are not allowed to exit until num_producers==0 and
992 * L is empty).
993 * Also update size_L
994 */
recv_producer_lists(void)995 void recv_producer_lists(void)
996 {
997 msgbuf *msg, *prev=NULL, *next;
998 int *header;
999 int flag;
1000
1001 for (msg = master.incoming; msg; msg=next)
1002 {
1003 next = msg->next;
1004 MPI_Test(msg->req, &flag, MPI_STATUS_IGNORE);
1005 if (!flag) /* header has not completed yet */
1006 {
1007 prev = msg;
1008 continue;
1009 }
1010 header = (int *)msg->buf[0];
1011 if (msg->queue) /* header completed, and need to Irecv now */
1012 {
1013 if (header[0]==-1) /* producer returns NOTHING */
1014 {
1015 master.num_producers--;
1016 master.act_producers[msg->target]--;
1017 free_msgbuf(msg);
1018 if (prev)
1019 prev->next = next;
1020 else
1021 master.incoming = next;
1022 continue;
1023 }
1024 msg->buf[1]= malloc(sizeof(char)*header[1]);
1025 msg->buf[2]= malloc(sizeof(int)*header[0]);
1026 MPI_Irecv(msg->buf[1], header[1], MPI_CHAR, msg->target,
1027 header[2], MPI_COMM_WORLD, msg->req+1);
1028 MPI_Irecv(msg->buf[2], header[0], MPI_INT, msg->target,
1029 header[2], MPI_COMM_WORLD, msg->req+2);
1030 msg->queue=0;
1031 prev = msg;
1032 continue;
1033 }
1034 /* header completed, did the rest? */
1035 MPI_Testall(2, msg->req+1, &flag, MPI_STATUSES_IGNORE);
1036 if (!flag) /* not yet */
1037 {
1038 prev = msg;
1039 continue;
1040 }
1041
1042 mprintf2(("M: %d returned non-empty producer list (%d, %d)\n",
1043 msg->target, header[0], header[1]));
1044 process_returned_cobases(msg);
1045
1046 mprintf2(("M: Now have size_L=%lu\n",master.size_L));
1047
1048 if (prev)
1049 prev->next = next;
1050 else
1051 master.incoming = next;
1052 master.num_producers--;
1053 master.act_producers[msg->target]--;
1054 free_msgbuf(msg);
1055 }
1056 return;
1057 }
1058
1059 /* msg is a completed, non-empty buffer containing cobases to add to
1060 * L. Process it, add them to L, and update size_L
1061 * Basically the inverse of return_unfinished_cobases()
1062 */
process_returned_cobases(msgbuf * msg)1063 void process_returned_cobases(msgbuf *msg)
1064 {
1065 int *header = (int *)msg->buf[0];
1066 char *str = (char *)msg->buf[1];
1067 int *lengths = (int *)msg->buf[2];
1068 int i;
1069 char *cob;
1070
1071 for (i=0; i<header[0]; i++)
1072 {
1073 cob = malloc(sizeof(char)*(lengths[i]+1));
1074 strncpy(cob, str, lengths[i]);
1075 cob[lengths[i]] = '\0';
1076 str+=lengths[i];
1077 mprintf2(("M: Adding to L: %s\n",cob));
1078 master.cobasis_list = addlist(master.cobasis_list, cob);
1079 }
1080 master.size_L += header[0];
1081 master.tot_L += header[0];
1082
1083 return;
1084 }
1085
1086 /* send one unit from L to target. if phase!=0, this is the first
1087 * unit we're sending (i.e. phase 1). usually, phase==0.
1088 * Parameters are scaled and sent in the header.
1089 */
send_work(int target,int phase)1090 void send_work(int target, int phase)
1091 {
1092 slist *cob;
1093 msgbuf *msg = malloc(sizeof(msgbuf));
1094 int *header;
1095 msg->req = malloc(sizeof(MPI_Request)*2);
1096 msg->buf = malloc(sizeof(void *)*2);
1097 /*{length of work string, int maxdepth, int maxcobases, bool lponly,
1098 bool messages, 3x future use} */
1099 msg->buf[0] = malloc(sizeof(int) * 8);
1100 header = (int *)msg->buf[0];
1101
1102 header[4] = master.messages;
1103 master.messages = 0;
1104
1105 if (phase==0) /* normal */
1106 {
1107 cob = master.cobasis_list;
1108 master.cobasis_list = cob->next;
1109 header[0] = strlen((char *)cob->data);
1110 msg->buf[1] = cob->data;
1111 setparams(header); /* scale if needed */
1112 master.size_L--;
1113 if (master.size_L == 0)
1114 master.num_empty++;
1115 mprintf(("M: Sending work to %d (%d,%d,%d) %s\n",
1116 target, header[0], header[1], header[2],
1117 (char*)msg->buf[1]));
1118 msg->count = 2;
1119 free(cob);
1120 }
1121 else /* phase 1 */
1122 {
1123 header[0] = 0; /* header[0]==0 means initial phase 1 */
1124 header[1] = master.initdepth;
1125 header[2] = master.maxcobases;
1126 if (master.redund) /* redund turns off budget */
1127 header[1] = header[2] = 0;
1128 mprintf(("M: Sending phase 1 to %d (%d,%d)\n", target,
1129 header[1], header[2]));
1130 msg->buf[1] = NULL;
1131 msg->count = 1;
1132 }
1133 msg->target = target;
1134 msg->queue = 0;
1135 msg->tags = NULL;
1136 msg->sizes = NULL;
1137 msg->types = NULL;
1138
1139 /* ready to send */
1140 MPI_Isend(header, 8, MPI_INT, target, 1, MPI_COMM_WORLD, msg->req);
1141 if (phase==0)
1142 MPI_Isend(msg->buf[1], header[0], MPI_CHAR, target, 1,
1143 MPI_COMM_WORLD, msg->req+1);
1144 master_add_incoming(target); /* prepare to receive remaining cobases */
1145
1146 msg->next = mplrs.outgoing;
1147 mplrs.outgoing = msg;
1148
1149 master.act_producers[target]++;
1150 master.num_producers++;
1151
1152 return;
1153 }
1154
1155 /* header is a work header (length, maxd, maxc) not yet set.
1156 * Set the parameters (maxd, maxc) as desired.
1157 */
setparams(int * header)1158 void setparams(int *header)
1159 {
1160 /* if L is too small, use maxdepth */
1161 if (master.lmin>0 && (master.size_L < mplrs.size*master.lmin))
1162 header[1] = master.maxdepth;
1163 else /* don't have too small L, so no maxdepth */
1164 header[1] = 0;
1165 header[2] = master.maxcobases;
1166 if (master.lmax>0 && (master.size_L > mplrs.size * master.lmax))
1167 header[2] = header[2] * master.scalec;
1168 header[3] = master.lponly;
1169 }
1170
1171 /* check if we want to stop now.
1172 * if master.stop_filename exists or time limit exceeded,
1173 * set master.checkpointing = 1.
1174 * this is not an immediate stop -- we wait for current workers to
1175 * complete the tasks they've been assigned, stopping after that.
1176 */
check_stop(void)1177 void check_stop(void)
1178 {
1179 struct timeval cur;
1180 int flag = 0;
1181
1182 if (master.checkpointing || master.cleanstop)
1183 return; /* already stopping */
1184 if (master.stop_filename)
1185 {
1186 mprintf2(("M: checking stop file %s\n", master.stop_filename));
1187 master.stop = fopen(master.stop_filename, "r");
1188 if (master.stop!=NULL)
1189 {
1190 flag=1;
1191 fclose(master.stop);
1192 }
1193 }
1194
1195 if (master.time_limit!=0)
1196 {
1197 mprintf2(("M: checking if time exceeded\n"));
1198 gettimeofday(&cur, NULL);
1199 if (cur.tv_sec - mplrs.start.tv_sec > master.time_limit)
1200 flag=1;
1201 }
1202 if (flag!=0)
1203 {
1204 mprintf(("M: Stop condition detected, checkpointing!\n"));
1205 master.checkpointing = 1;
1206 }
1207 }
1208
master_stop_consumer(int already_stopping)1209 void master_stop_consumer(int already_stopping)
1210 {
1211 MPI_Request ign;
1212 int check[3] = {STOPFLAG,0,0};
1213 mprintf2(("M: telling consumer to stop, already_stopping:%d checkpointing:%d\n", already_stopping, master.checkpointing));
1214 if (!already_stopping)
1215 MPI_Isend(check, 3, MPI_INT, CONSUMER, 7,
1216 MPI_COMM_WORLD, &ign);
1217 master.cleanstop = 1;
1218 }
1219
1220 /* check if we've caught a signal, or we've received a message from someone
1221 * that has. If so, we want to checkpoint like above
1222 * The similar mplrs_cleanstop also uses this...
1223 */
master_checksigs(void)1224 void master_checksigs(void)
1225 {
1226 int i, flag, size=mplrs.size;
1227 float junk=0;
1228 MPI_Request ign;
1229 int already_stopping = master.checkpointing || master.cleanstop;
1230 if (mplrs.caughtsig == 1 && !already_stopping)
1231 {
1232 mprintf(("M: I caught signal, checkpointing!\n"));
1233 /* this master.checkpointing must be inside a !already_stopping
1234 * condition, see comment below
1235 */
1236 master.checkpointing = 1;
1237 already_stopping = 1;
1238 }
1239 for (i=1; i<size; i++)
1240 {
1241 MPI_Test(master.sigcheck+i, &flag, MPI_STATUS_IGNORE);
1242 if (flag)
1243 MPI_Irecv(master.sigbuf+i, 1, MPI_FLOAT, i, 9, MPI_COMM_WORLD,
1244 master.sigcheck+i);
1245 if (flag && master.sigbuf[i]==0 && !already_stopping)
1246 {
1247 mprintf(("M: %d caught signal, checkpointing!\n",i));
1248 /* this master.checkpointing must also be inside a
1249 * !already_stopping condition, see comment below
1250 */
1251 master.checkpointing = 1;
1252 already_stopping = 1;
1253 }
1254 else if (flag && master.sigbuf[i]==-1)
1255 {
1256 mprintf(("M: %d wants cleanstop, no checkpoint\n",i));
1257 /* still needed, to tell consumer if overflow occurs
1258 * in initial job
1259 */
1260 master_stop_consumer(already_stopping);
1261 MPI_Isend(&junk, 1, MPI_FLOAT, i,9,MPI_COMM_WORLD,&ign);
1262 already_stopping = 1;
1263 /* disable a checkpoint in case we notice overflow
1264 * after deciding to checkpoint but before workers
1265 * return. in that case we can't guarantee all
1266 * output/unfinished cobases are produced so avoid
1267 * chance of making an incorrect checkpoint
1268 */
1269 /* note this is not in an !already_stopping condition*/
1270 master.checkpointing = 0;
1271 }
1272 else if (flag && master.sigbuf[i]==1)
1273 {
1274 mprintf(("M: %d wants cleanstop, checkpointing!\n",i));
1275 master.cleanstop = 1;
1276 /* don't produce a checkpoint if overflow has
1277 * already occurred, even if a checkpoint is
1278 * requested after overflow but before shutdown,
1279 * as in comment above
1280 */
1281 if (!already_stopping)
1282 master.checkpointing = 1;
1283 already_stopping = 1;
1284 }
1285 }
1286 }
1287
1288 /* we're checkpointing, receive stats from consumer and output checkpoint file
1289 */
1290 /* two cases: we have a file to checkpoint to (normal)
1291 * or, we caught a signal and want to checkpoint but have no checkpoint file
1292 * in this case we append to the output file via the consumer, with
1293 * comments on where to cut
1294 */
1295 /* mplrs1 format: counts are 'unsigned int' (%d unfortunately)
1296 * mplrs2 format: counts are 'unsigned long' (%lu)
1297 * so mplrs1 files can be read by mplrs2 readers not necc. reverse
1298 */
master_checkpoint(void)1299 void master_checkpoint(void)
1300 {
1301 int fin = -1;
1302 mprintf(("M: making checkpoint file\n"));
1303 recv_counting_stats(CONSUMER);
1304 if (master.checkp_filename != NULL)
1305 {
1306 MPI_Send(&fin, 1, MPI_INT, CONSUMER, 1, MPI_COMM_WORLD);
1307 master_checkpointfile();
1308 return;
1309 }
1310 else
1311 {
1312 fin = 1;
1313 MPI_Send(&fin, 1, MPI_INT, CONSUMER, 1, MPI_COMM_WORLD);
1314 master_checkpointconsumer();
1315 return;
1316 }
1317 }
1318
1319 /* note: master_checkpointconsumer and master_checkpointfile should
1320 * produce the *same* format. First two lines of checkpointconsumer
1321 * done by the consumer directly for now.
1322 */
master_checkpointconsumer(void)1323 void master_checkpointconsumer(void)
1324 {
1325 int len;
1326 char *str;
1327 slist *list, *next;
1328 for (list=master.cobasis_list; list; list=next)
1329 {
1330 next = list->next;
1331 str = (char*)list->data;
1332 len = strlen(str)+1; /* include \0 */
1333 MPI_Send(&len, 1, MPI_INT, CONSUMER, 1, MPI_COMM_WORLD);
1334 MPI_Send(str, len, MPI_CHAR, CONSUMER, 1, MPI_COMM_WORLD);
1335 free(str);
1336 free(list);
1337 }
1338 len = -1;
1339 MPI_Send(&len, 1, MPI_INT, CONSUMER, 1, MPI_COMM_WORLD);
1340 }
master_checkpointfile(void)1341 void master_checkpointfile(void)
1342 {
1343 slist *list, *next;
1344 char *vol = cprat("", mplrs.Vnum, mplrs.Vden);
1345 fprintf(master.checkp, "mplrs4\n%llu %llu %llu %llu %llu\n%s\n%llu\n",
1346 mplrs.rays, mplrs.vertices, mplrs.bases, mplrs.facets,
1347 mplrs.intvertices,vol,mplrs.deepest);
1348 free(vol);
1349 for (list=master.cobasis_list; list; list=next)
1350 {
1351 next = list->next;
1352 fprintf(master.checkp, "%s\n", (char *)list->data);
1353 free(list->data);
1354 free(list);
1355 master.size_L--;
1356 }
1357 /* fclose(master.checkp); */ /* not here */
1358 return;
1359 }
1360
1361 /* we want to restart. load L and counting stats from restart file,
1362 * send counting stats to consumer (after notifying consumer of restart)
1363 */
master_restart(void)1364 void master_restart(void)
1365 {
1366 char *line=NULL;
1367 char *vol=NULL;
1368 size_t size=0, vsize=0;
1369 ssize_t len=0;
1370 int restart[3] = {RESTARTFLAG,0,0};
1371 int ver;
1372
1373 /* check 'mplrs1' header */
1374 len = getline(&line, &size, master.restart);
1375 if (len!=7 || (strcmp("mplrs1\n",line) && strcmp("mplrs2\n",line) &&
1376 strcmp("mplrs3\n",line) && strcmp("mplrs4\n",line)))
1377 {
1378 printf("Unknown checkpoint format\n");
1379 /* MPI_Finalize(); */
1380 exit(0);
1381 }
1382
1383 mprintf2(("M: found checkpoint header\n"));
1384 sscanf(line,"mplrs%d\n",&ver);
1385
1386 /* get counting stats */
1387 fscanf(master.restart, "%llu %llu %llu %llu %llu\n",
1388 &mplrs.rays, &mplrs.vertices, &mplrs.bases, &mplrs.facets,
1389 &mplrs.intvertices);
1390 if (ver<3) /* volume added in mplrs3 */
1391 printf("*Old checkpoint file, volume may be incorrect\n");
1392 else /* get volume */
1393 {
1394 len = getline(&vol, &vsize, master.restart);
1395 if (len<=1)
1396 {
1397 printf("Broken checkpoint file\n");
1398 exit(0);
1399 }
1400 vol[len-1] = '\0'; /* remove '\n' */
1401 #if defined(MA) || defined(GMP) || defined(FLINT)
1402 plrs_readrat(mplrs.Tnum, mplrs.Tden, vol);
1403 copy(mplrs.tN, mplrs.Vnum); copy(mplrs.tD, mplrs.Vden);
1404 linrat(mplrs.tN, mplrs.tD, 1L, mplrs.Tnum, mplrs.Tden,
1405 1L, mplrs.Vnum, mplrs.Vden);
1406 #else
1407 if (strcmp(vol,"0"))
1408 printf("*Checkpoint volume ignored, use gmp or hybrid mplrs\n");
1409 #endif
1410 free(vol);
1411 }
1412 if (ver<4) /* tree depth added in mplrs4 */
1413 printf("*Old checkpoint file, tree depth may be incorrect\n");
1414 else
1415 fscanf(master.restart, "%llu\n", &mplrs.deepest);
1416
1417 /* get L */
1418 while((len = getline(&line, &size, master.restart))!= -1)
1419 {
1420 if (line[0]=='\n') /* ignore blank lines */
1421 {
1422 free(line);
1423 line = NULL;
1424 size=0;
1425 continue;
1426 }
1427 line[strlen(line)-1]='\0'; /* replace \n by \0 */
1428 master.cobasis_list = addlist(master.cobasis_list, line);
1429 master.size_L++;
1430 line = NULL;
1431 size = 0;
1432 }
1433 master.tot_L = master.size_L; /* maybe should save and retrieve */
1434 mprintf(("M: Restarted with |L|=%lu\n",master.size_L));
1435 fclose(master.restart);
1436
1437 MPI_Send(restart, 3, MPI_INT, CONSUMER, 7, MPI_COMM_WORLD);
1438 send_counting_stats(CONSUMER);
1439 mplrs.rays = 0;
1440 mplrs.vertices = 0;
1441 mplrs.bases = 0;
1442 mplrs.facets = 0;
1443 mplrs.intvertices = 0;
1444
1445 return;
1446 }
1447
1448 /* check if we should update the histogram and then do it */
print_histogram(struct timeval * cur,struct timeval * last)1449 void print_histogram(struct timeval *cur, struct timeval *last)
1450 {
1451 float sec;
1452 int i;
1453 int act;
1454
1455 gettimeofday(cur,NULL);
1456 if (cur->tv_sec > last->tv_sec)
1457 {
1458 sec = (float)(cur->tv_sec - mplrs.start.tv_sec) + ((float)(cur->tv_usec - mplrs.start.tv_usec))/1000000;
1459 act=0;
1460 for (i=0; i<mplrs.size; i++)
1461 if (master.act_producers[i])
1462 act++;
1463 /* time (seconds), number of active producers, number of
1464 * producers owing us a message about remaining cobases,
1465 * 0, 0 (existed in mplrs.cpp but no longer)
1466 */
1467 fprintf(master.hist, "%f %d %lu %d %d %d %lu\n",
1468 sec, act, master.size_L, master.num_producers, 0, 0, master.tot_L);
1469 fflush(master.hist);
1470 last->tv_sec = cur->tv_sec;
1471 last->tv_usec = cur->tv_usec;
1472 }
1473 }
1474
1475 /**********
1476 * worker *
1477 **********/
1478
mplrs_worker(void)1479 int mplrs_worker(void)
1480 {
1481 char *starting_cobasis;
1482 /* header for incoming work:
1483 * {length of work string, int maxdepth, int maxcobases, 5xfuture use}
1484 */
1485 int header[8]={0,0,0,0,0,0,0,0};
1486 MPI_Request req = MPI_REQUEST_NULL;
1487 unsigned int ncob=0; /* used for # cobases in prev. job */
1488 unsigned int tot_ncob=0;
1489 int len;
1490 int flag;
1491
1492 if (!mplrs.abortinit) /* didn't receive file if parsing failed */
1493 mplrs_worker_init();
1494
1495 while (1)
1496 {
1497 ncob = mplrs.bases - tot_ncob; /* #cobases in last job */
1498 tot_ncob = mplrs.bases; /* #cobases done so far */
1499 /* check signals */
1500 mplrs_handlesigs();
1501 /* ask for work */
1502 mprintf2(("%d: Asking master for work\n",mplrs.rank));
1503 MPI_Isend(&ncob, 1, MPI_UNSIGNED, MASTER, 6, MPI_COMM_WORLD,
1504 &req);
1505 flag = 0;
1506 while (1) /* was MPI_Wait(&req, MPI_STATUS_IGNORE); */
1507 {
1508 MPI_Test(&req, &flag, MPI_STATUS_IGNORE);
1509 if (flag)
1510 break;
1511 clean_outgoing_buffers();
1512 }
1513
1514 starting_cobasis = NULL;
1515 /* get response */
1516 MPI_Recv(header, 8, MPI_INT, MASTER, MPI_ANY_TAG,
1517 MPI_COMM_WORLD, MPI_STATUS_IGNORE);
1518 mprintf2(("%d: Message received from master\n",mplrs.rank));
1519
1520 len = header[0];
1521
1522 if (len==-1) /* no more work to do */
1523 return mplrs_worker_finished();
1524
1525 if (len>0)
1526 {
1527 starting_cobasis = malloc(sizeof(char)*(len+1));
1528 MPI_Recv(starting_cobasis, len, MPI_CHAR, MASTER,
1529 MPI_ANY_TAG, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
1530 starting_cobasis[len] = '\0';
1531 }
1532
1533 mplrs.outputblock = 0; /* enable maxbuf-based flushing */
1534 /* do work */
1535 do_work(header, starting_cobasis);
1536 free(starting_cobasis);
1537
1538 /* send output and unfinished cobases */
1539 mplrs.outputblock = 0; /* enable maxbuf-based flushing */
1540 process_output();
1541 process_curwarn();
1542 return_unfinished_cobases();
1543
1544 clean_outgoing_buffers(); /* check buffered sends,
1545 * free if finished
1546 */
1547 }
1548 return 0; /* unreachable */
1549 }
1550
1551 /* run lrs_main.
1552 * easy in the non-hybrid case. in the hybrid case, runs the appropriate one,
1553 * cleaning up and proceeding to the next arithmetic package as needed until
1554 * finishing the run.
1555 * for now only use with stage != 0, TODO: handle init run too
1556 * header, starting_cobasis only used if header non-NULL
1557 */
run_lrs(int argc,char ** argv,long o,long stage,const int * header,char * starting_cobasis)1558 void run_lrs(int argc, char **argv, long o, long stage,
1559 const int *header, char *starting_cobasis)
1560 {
1561
1562 long ret = 1;
1563 #ifndef MA
1564 if (mplrs.overflow != 3)
1565 ret = mplrs.lrs_main(argc, argv, &mplrs.P, &mplrs.Q, o, stage,
1566 NULL, mplrs.R);
1567 if (ret == 1 || (ret==2 && mplrs.redund))
1568 {
1569 mplrs.overflow = 3;
1570 worker_report_overflow();
1571 }
1572 return;
1573 #else /* hybrid */
1574 while ((ret == 1) || (ret==2 && mplrs.redund)) /* overflow */
1575 {
1576 ret = mplrs.lrs_main(argc, argv, &mplrs.P, &mplrs.Q, o, stage,
1577 NULL, mplrs.R);
1578 mprintf3(("%d: lrs_main returned %ld (overflow status %d)\n",
1579 mplrs.rank, ret, mplrs.overflow));
1580 if (ret == 0) /* done */
1581 break;
1582
1583 if (stage!=0)
1584 overflow_cleanup();
1585 mprintf2(("%d: overflow in run_lrs, trying next\n",
1586 mplrs.rank));
1587 #ifdef B128
1588 if (mplrs.lrs_main == lrs1_main)
1589 {
1590 mplrs.overflow = 1;
1591 mplrs.lrs_main = lrs2_main;
1592 mplrs_worker_init(); /* re-init */
1593 if (header!=NULL)
1594 set_restart(header, starting_cobasis);
1595 if (consumer.final_redundcheck)
1596 consumer_setredineq();
1597 continue;
1598 }
1599 #endif
1600 mplrs.overflow = 2;
1601 mplrs.lrs_main = lrsgmp_main;
1602 mplrs_worker_init(); /* re-init */
1603 if (header != NULL)
1604 set_restart(header, starting_cobasis);
1605 if (consumer.final_redundcheck)
1606 consumer_setredineq();
1607 continue;
1608 }
1609 mprintf2(("%d: lrs_main finished, package status %d\n",
1610 mplrs.rank, mplrs.overflow));
1611 return;
1612 #endif
1613 }
1614
1615
mplrs_worker_init(void)1616 void mplrs_worker_init(void)
1617 {
1618 char *argv[] = {argv0, mplrs.tfn};
1619 long o = 1;
1620
1621 if ((mplrs.rank == MASTER && master.redund) || (mplrs.rank == CONSUMER && mplrs.redund))
1622 argv[0] = "redund"; /* hack for -redund */
1623
1624 if (mplrs.R != NULL)
1625 { /* overflow happened, free and re-init */
1626 free(mplrs.R->redineq);
1627 free(mplrs.R->facet);
1628 free(mplrs.R);
1629 }
1630
1631 mplrs.R = lrs_alloc_restart();
1632 mprintf2(("%d: calling lrs_main to setup P & Q\n", mplrs.rank));
1633
1634 /* temp hack to test message requests */
1635 /* initialization done on the master to get the full redund line.
1636 * stage=1 done on INITIAL because the master never does stage=1
1637 */
1638 /* note we must be careful about returns below in order to fix this
1639 * up on MASTER and INITIAL
1640 */
1641 if(mplrs.rank == MASTER)
1642 mplrs.R->messages=1;
1643 else
1644 mplrs.R->messages=0;
1645
1646 if (mplrs.rank != MASTER)
1647 {
1648 mplrs.tfile = fopen(mplrs.tfn, "w");
1649 fprintf(mplrs.tfile, "%s", mplrs.input);
1650 fclose(mplrs.tfile);
1651 }
1652
1653 while (o != 0)
1654 {
1655 o = mplrs.lrs_main(2,argv,&mplrs.P,&mplrs.Q,0,0,NULL,mplrs.R);
1656 if (o == -1)
1657 {
1658 mprintf(("%d: failed lrs setup, want to exit\n",
1659 mplrs.rank));
1660 mplrs.abortinit = 1;
1661 break;
1662 }
1663 if (o == 1)
1664 {
1665 #ifdef MA
1666 mprintf2(("%d: overflow in init, trying next arithmetic\n", mplrs.rank));
1667 overflow_cleanup(); /* 2020.6.1 : avoid multiple
1668 warnings etc when master overflows in setup */
1669 #ifdef B128 /* hybrid with B128 */
1670 if (mplrs.lrs_main == lrs1_main)
1671 {
1672 mplrs.lrs_main = lrs2_main;
1673 mplrs.overflow = 1;
1674 }
1675 else
1676 {
1677 mplrs.lrs_main = lrsgmp_main;
1678 mplrs.overflow = 2;
1679 }
1680 #else /* hybrid but no B128 */
1681 mplrs.lrs_main = lrsgmp_main;
1682 mplrs.overflow = 2;
1683 #endif
1684 #else /* not hybrid, stop on overflow */
1685 mprintf(("%d: overflow in init, fatal\n", mplrs.rank));
1686 worker_report_overflow();
1687 mplrs.overflow = 3; /* fatal overflow */
1688 break;
1689 #endif
1690 }
1691 }
1692
1693 mprintf3(("%d: lrs_main setup finished (%ld)\n", mplrs.rank, o));
1694 mplrs.R->facet = calloc(mplrs.R->d+1, sizeof(long));
1695 if (mplrs.rank != MASTER)
1696 remove(mplrs.tfn);
1697 mplrs.R->overide = 1;
1698 process_curwarn();
1699 }
1700
1701 /* This worker has finished. Tell the consumer, send counting stats
1702 * and exit.
1703 */
mplrs_worker_finished(void)1704 int mplrs_worker_finished(void)
1705 {
1706 int done[3] = {-1,-1,-1};
1707
1708 mprintf((" %d: All finished! Informing consumer.\n",mplrs.rank));
1709
1710 while (mplrs.outgoing) /* needed? negligible in any case */
1711 {
1712 clean_outgoing_buffers();
1713 }
1714 MPI_Send(&done, 3, MPI_INT, CONSUMER, 7, MPI_COMM_WORLD);
1715 send_counting_stats(CONSUMER);
1716
1717 /* free P & Q */
1718 /* overflows free themselves in lrslib? abortinit didn't allocate */
1719 if (mplrs.overflow != 3 && !mplrs.abortinit)
1720 {
1721 mplrs.lrs_main(0,NULL,&mplrs.P,&mplrs.Q,0,2,NULL,mplrs.R);
1722 }
1723 if (mplrs.R != NULL)
1724 {
1725 free(mplrs.R->redineq);
1726 free(mplrs.R->facet);
1727 free(mplrs.R);
1728 }
1729 free(mplrs.tfn);
1730 free(mplrs.input);
1731 mplrs_freemps();
1732 free(mplrs.finalwarn);
1733 free(mplrs.curwarn);
1734 MPI_Finalize();
1735 return 0;
1736 }
1737
1738 /* Go through our outgoing MPI_Isends, free anything that has completed.
1739 * Also, if any of these are queued and the header has completed, then
1740 * send the remaining data.
1741 * Don't use with incoming buffers.
1742 */
clean_outgoing_buffers(void)1743 void clean_outgoing_buffers(void)
1744 {
1745 msgbuf *msg, *next, *prev=NULL;
1746
1747 for (msg = mplrs.outgoing; msg; msg=next)
1748 {
1749 next = msg->next;
1750 if (!outgoing_msgbuf_completed(msg))
1751 {
1752 prev = msg;
1753 continue;
1754 }
1755 if (prev)
1756 prev->next = next;
1757 else
1758 mplrs.outgoing = next;
1759 free_msgbuf(msg);
1760 }
1761 }
1762
1763 /* had an overflow; discard any buffered output and
1764 * unfinished cobases. some output may have already been
1765 * sent (due to -maxbuf) so duplication still possible.
1766 * also resets mplrs.outputblock = 0
1767 */
overflow_cleanup(void)1768 void overflow_cleanup(void)
1769 {
1770 outlist *out = mplrs.output_list, *next;
1771 slist *list, *lnext;
1772
1773 mplrs.outputblock = 0;
1774
1775 /* discard output */
1776 mplrs.outnum = 0; /* clearing buffer */
1777 mplrs.output_list = NULL;
1778 mplrs.ol_tail = NULL;
1779 for (; out; out=next)
1780 {
1781 next = out->next;
1782 free(out->type);
1783 free(out->data);
1784 free(out);
1785 }
1786
1787 /* discard cobases */
1788 for (list=mplrs.cobasis_list; list; list=lnext)
1789 {
1790 lnext = list->next;
1791 free(list->data);
1792 free(list);
1793 }
1794 mplrs.cobasis_list = NULL;
1795 /* discard curwarn */
1796 mplrs.curwarn[0] = '\0';
1797 }
1798
1799 /* set up restart parameters in mplrs.R */
1800 /* header[1] gives maxdepth, header[2] gives maxcobases,
1801 * if header[0] > 0,
1802 * starting_cobasis gives the starting cobasis.
1803 * if header[0] == 0, starting at initial input (phase 1)
1804 * header[4] gives desired bool for R->messages
1805 */
set_restart(const int * header,char * starting_cobasis)1806 void set_restart(const int *header, char *starting_cobasis)
1807 {
1808 lrs_restart_dat *R = mplrs.R;
1809 int i, j, len;
1810 long depth = 0; /* if restarting from earlier checkpoint,
1811 * then we want to fall back to old depth-0
1812 * behavior */
1813 /* prepare input file */
1814 mplrs.initializing = 0;
1815
1816 /* reset counts */
1817 for (i=0; i<10; i++)
1818 R->count[i] = 0;
1819 R->messages = header[4];
1820
1821 if (header[0]>0)
1822 {
1823 /* ugly: recover depth after '!' marker */
1824 len = strlen(starting_cobasis);
1825 for (i=0; i<len-1; i++)
1826 if (starting_cobasis[i]=='!')
1827 {
1828 depth = strtol(starting_cobasis+i+1, NULL, 10);
1829 starting_cobasis[i] = ' ';
1830 break;
1831 }
1832 mprintf3(("%d: depth %ld in %s\n", mplrs.rank, depth,
1833 starting_cobasis));
1834 R->depth = depth;
1835 R->mindepth = depth;
1836 R->restart = 1;
1837 if (i<len-1) /* reset in case redo because of overflow */
1838 starting_cobasis[i] = '!';
1839 /* 2018.4.28 recover restart line */
1840 /* TODO: should be sent as longs, no more need for string */
1841 mprintf3(("%d: facets \"", mplrs.rank));
1842 for (; i<len && starting_cobasis[i]!=' '; i++);
1843 for (; i<len && starting_cobasis[i]==' '; i++);
1844 for (j=0; j<R->d; j++)
1845 {
1846 if (i<len)
1847 R->facet[j] = atol(starting_cobasis+i);
1848 else
1849 R->facet[j] = 0;
1850 mprintf3(("%ld ", R->facet[j]));
1851 for (; i<len && starting_cobasis[i]!=' '; i++);
1852 i++;
1853 }
1854 mprintf3(("\" from %s\n", starting_cobasis));
1855 }
1856 else
1857 {
1858 mplrs.initializing = 1; /* phase 1 output handled different */
1859 R->count[2] = 1; /* why? to get begin line... */
1860 R->depth = 0;
1861 R->restart = 0;
1862 }
1863
1864 if (header[1]>0)
1865 R->maxdepth = header[1] + depth;
1866 if (header[2]>0)
1867 R->maxcobases = header[2];
1868 #if 0
1869 /* 2018.4.28 TODO lponly ? countonly now set on master */
1870 if (mplrs.initializing != 1 && header[3]==1) /* lponly option, only in*/
1871 fprintf(mplrs.tfile, "lponly\n"); /* non-initial jobs
1872 */
1873 #endif
1874
1875 return;
1876 }
1877
1878 /* update counts in mplrs.rays, etc via mplrs.R */
1879 /* see lrsrestart.h */
update_counts(void)1880 void update_counts(void)
1881 {
1882 lrs_restart_dat *R = mplrs.R;
1883 int hull = R->count[5];
1884 long deepest = mplrs.deepest; /* silly, avoid sign comparison warning */
1885 long linearities = mplrs.linearities; /* silly, avoid warning */
1886 if (!hull)
1887 mplrs.rays += R->count[0];
1888 else
1889 mplrs.facets += R->count[0];
1890 if (!hull)
1891 mplrs.vertices += R->count[1];
1892 mplrs.bases += R->count[2];
1893 mplrs.intvertices += R->count[4];
1894 if (linearities < R->count[6])
1895 mplrs.linearities = R->count[6];
1896 if (deepest < R->count[7])
1897 mplrs.deepest = R->count[7];
1898 }
1899
1900 /* header[1] gives maxdepth, header[2] gives maxcobases,
1901 * if header[0] > 0,
1902 * starting_cobasis gives the starting cobasis.
1903 * if header[0] == 0, starting at initial input (phase 1)
1904 * header[4] gives desired bool for R->messages
1905 */
do_work(const int * header,char * starting_cobasis)1906 void do_work(const int *header, char *starting_cobasis)
1907 {
1908 char *argv[] = {argv0, mplrs.tfn};
1909
1910 mprintf3(("%d: Received work (%d,%d,%d)\n",mplrs.rank,header[0],
1911 header[1],header[2]));
1912 set_restart(header, starting_cobasis);
1913 mprintf2(("%d: Calling run_lrs\n",mplrs.rank));
1914 run_lrs(2, argv, 0, 1, header, starting_cobasis);
1915 mprintf2(("%d: run_lrs returned, updating counts\n",mplrs.rank));
1916 update_counts();
1917 }
1918
1919 /* lrs reported overflow; tell the master and prepare to finish */
worker_report_overflow(void)1920 void worker_report_overflow(void)
1921 {
1922 float junk = -1;
1923 mprintf(("%d: reporting overflow\n", mplrs.rank));
1924 if (mplrs.rank == MASTER)
1925 { /* possible now since master does worker_init to get m and redund options */
1926 mplrs.overflow = 3;
1927 return;
1928 }
1929 send_output(2, dupstr("*possible overflow: try using gmp or hybrid mplrs\n"));
1930 /* inform master, wait for reply */
1931 MPI_Send(&junk, 1, MPI_FLOAT, MASTER, 9, MPI_COMM_WORLD);
1932 MPI_Recv(&junk, 1, MPI_FLOAT,MASTER,9,MPI_COMM_WORLD,MPI_STATUS_IGNORE);
1933 }
1934
1935 /* The worker has finished its work. Process the output, preparing
1936 * and sending the output to the consumer, and preparing the unfinished
1937 * cobases for return_unfinished_cobases().
1938 */
process_output(void)1939 void process_output(void)
1940 {
1941 outlist *out = mplrs.output_list, *next;
1942 char *out_string=NULL; /* for output file if exists */
1943 char *serr_string=NULL; /* for stdout */
1944 char *redund_string=NULL; /* for redund */
1945 const char *type;
1946 const char *data;
1947 int len = 1024;
1948 int len2 = 256;
1949 int len3 = 256;
1950
1951 mplrs.outnum = 0; /* clearing buffer */
1952 mplrs.output_list = NULL;
1953 mplrs.ol_tail = NULL;
1954
1955 out_string = malloc(sizeof(char)*len);
1956 out_string[0]='\0';
1957 serr_string = malloc(sizeof(char)*len2);
1958 serr_string[0]='\0';
1959 redund_string = malloc(sizeof(char)*len3);
1960 redund_string[0]='\0';
1961
1962 /* reverse when initializing to get correct order */
1963 #if 0 /* TODO fixme */
1964 if (mplrs.initializing)
1965 out = reverse_list(out);
1966 #endif
1967
1968 while (out)
1969 {
1970 type = out->type;
1971 data = out->data;
1972 if (!strcmp(type, "vertex"))
1973 out_string = append_out(out_string, &len, data);
1974 else if (!strcmp(type, "ray"))
1975 out_string = append_out(out_string, &len, data);
1976 else if (!strcmp(type, "unexp")) /* no longer used */
1977 process_cobasis(data);
1978 else if (!strcmp(type, "cobasis"))
1979 out_string = append_out(out_string, &len, data);
1980 else if (!strcmp(type, "V cobasis"))
1981 out_string = append_out(out_string, &len, data);
1982 else if (!strcmp(type, "facet count")) /* no longer used */
1983 mplrs.facets += atoi(data);
1984 else if (!strcmp(type, "ray count")) /* no longer used */
1985 mplrs.rays += atoi(data);
1986 else if (!strcmp(type, "basis count")) /* no longer used */
1987 mplrs.bases += atoi(data);
1988 else if (!strcmp(type, "vertex count")) /* no longer used */
1989 mplrs.vertices += atoi(data);
1990 else if (!strcmp(type, "integer vertex count")) /* no longer used */
1991 mplrs.intvertices += atoi(data);
1992 else if (!strcmp(type, "tree depth")) /* no longer used */
1993 {
1994 if (mplrs.deepest < strtoul(data,NULL,10))
1995 mplrs.deepest = strtoul(data,NULL,10);
1996 }
1997 else if (!strcmp(type, "linearities")) /* no longer used */
1998 {
1999 if (mplrs.linearities < strtoul(data,NULL,10))
2000 mplrs.linearities = strtoul(data,NULL,10);
2001 }
2002 else if (!strcmp(type, "volume")) /* still used */
2003 {
2004 #if defined(MA) || defined(GMP) || defined(FLINT)
2005 plrs_readrat(mplrs.Tnum, mplrs.Tden, data);
2006 copy(mplrs.tN, mplrs.Vnum); copy(mplrs.tD, mplrs.Vden);
2007 linrat(mplrs.tN, mplrs.tD, 1L, mplrs.Tnum, mplrs.Tden,
2008 1L, mplrs.Vnum, mplrs.Vden);
2009 #endif
2010 }
2011 else if (!strcmp(type, "options warning"))
2012 {
2013 /* only do warnings once, otherwise repeated */
2014 if (mplrs.initializing)
2015 out_string = append_out(out_string, &len, data);
2016 }
2017 else if (!strcmp(type, "header"))
2018 {
2019 /*only do header if initializing, otherwise repeated*/
2020 if (mplrs.initializing)
2021 out_string = append_out(out_string, &len, data);
2022 }
2023 else if(!strcmp(type, "redund"))
2024 {
2025 /* handle redund inequalities: TODO better */
2026 redund_string = append_out(redund_string, &len3, data);
2027 }
2028 else if (!strcmp(type, "debug"))
2029 {
2030 out_string = append_out(out_string, &len, data);
2031 }
2032 else if (!strcmp(type, "warning"))
2033 { /* warnings always go to output file and stderr if output is not stdout */
2034 out_string = append_out(out_string, &len, data);
2035 if(consumer.output != stdout)
2036 serr_string = append_out(serr_string, &len2, data);
2037 }
2038 else if (!strcmp(type, "finalwarn"))
2039 { /* these are printed at end of run */
2040 mplrs.curwarn = append_out(mplrs.curwarn,
2041 &mplrs.curwarn_len,
2042 data);
2043 /* but also to stderr now */
2044 serr_string = append_out(serr_string, &len2, data);
2045 }
2046 next = out->next;
2047 free(out->type);
2048 free(out->data);
2049 free(out);
2050 out = next;
2051 }
2052
2053 if (strlen(out_string)>0 && strcmp(out_string, "\n"))
2054 send_output(1, out_string);
2055 else
2056 free(out_string);
2057 if (strlen(serr_string)>0 && strcmp(serr_string, "\n"))
2058 send_output(0, serr_string);
2059 else
2060 free(serr_string);
2061 if (strlen(redund_string)>0 && strcmp(redund_string, "\n"))
2062 send_output(3, redund_string);
2063 else
2064 free(redund_string);
2065 }
2066
2067 /* if we've produced anything for finalwarn, add it to finalwarn now.
2068 * buffered in curwarn to clear on overflow, avoiding multiple copies
2069 */
process_curwarn(void)2070 void process_curwarn(void)
2071 {
2072 if (strlen(mplrs.curwarn)>0)
2073 {
2074 mplrs.finalwarn = append_out(mplrs.finalwarn,
2075 &mplrs.finalwarn_len,mplrs.curwarn);
2076 mplrs.curwarn[0] = '\0';
2077 }
2078 }
2079
2080
2081 /* send this string to the consumer to output.
2082 * If dest==1, then it goes to the output file (stdout if no output file).
2083 * If dest==0, then it goes to stderr.
2084 * If dest==2, it's an overflow message: only the first one printed (stderr)
2085 * If dest==3, it's a redund output: consumer handles specially
2086 *
2087 * The pointer str is surrendered to send_output and should not be changed
2088 * It will be freed once the send is complete.
2089 */
2090 /* str should not be NULL */
send_output(int dest,char * str)2091 void send_output(int dest, char *str)
2092 {
2093 msgbuf *msg = malloc(sizeof(msgbuf));
2094 int *header = malloc(sizeof(int)*3);
2095
2096 header[0] = dest;
2097 header[1] = strlen(str);
2098 header[2] = mplrs.my_tag; /* to ensure the dest/str pair
2099 * remains intact even if another
2100 * send happens in between
2101 */
2102 msg->req = malloc(sizeof(MPI_Request)*2);
2103 msg->buf = malloc(sizeof(void *)*2);
2104 msg->buf[0] = header;
2105 msg->buf[1] = str;
2106 msg->count = 2;
2107 msg->target = CONSUMER;
2108 msg->queue = 1;
2109
2110 msg->tags = malloc(sizeof(int)*2);
2111 msg->sizes = malloc(sizeof(int)*2);
2112 msg->types = malloc(sizeof(MPI_Datatype)*2);
2113
2114 msg->types[1] = MPI_CHAR;
2115 msg->sizes[1] = header[1]+1;
2116
2117 msg->tags[1] = mplrs.my_tag;
2118
2119 mplrs.my_tag++;
2120
2121 msg->next = mplrs.outgoing;
2122 mplrs.outgoing = msg;
2123 MPI_Isend(header, 3, MPI_INT, CONSUMER, 7, MPI_COMM_WORLD,
2124 msg->req);
2125 }
2126
2127 /* lrs returned this unexplored cobasis - send it along.
2128 * For now converts to a string and re-uses the old code,
2129 * but avoids horrible process_cobasis(). TODO: send along as
2130 * longs instead.
2131 */
post_R(lrs_restart_dat * cob)2132 void post_R(lrs_restart_dat *cob)
2133 {
2134 int i=0, offs=0;
2135 int arg = 0;
2136 int hull = cob->count[5];
2137 char *newcob = NULL;
2138
2139 while (arg == 0)
2140 {
2141 arg = offs;
2142 if (hull == 0)
2143 offs = snprintf(newcob,arg," %ld %ld %ld!%ld ",
2144 cob->count[1],cob->count[0],
2145 cob->count[2],cob->depth);
2146 else
2147 offs = snprintf(newcob,arg," %ld %ld!%ld ",
2148 cob->count[1],cob->count[2],
2149 cob->depth);
2150 for (i=0; i<cob->d; i++)
2151 offs += snprintf(newcob+offs,arg,"%ld ", cob->facet[i]);
2152 if (newcob == NULL)
2153 newcob = malloc(sizeof(char)*(offs+1));
2154 }
2155
2156 mplrs.cobasis_list = addlist(mplrs.cobasis_list, newcob);
2157 }
2158
2159 /* process_cobasis is no longer used */
2160 /* called from process_output to handle a 'cobasis',
2161 * add to queue to return to master
2162 */
2163 /* awful string-hacking, basically copied from plrs processCobasis() */
2164 /* First, if first characters are 'F#', it's a hull. otherwise it's not.
2165 * Then, remove ignore_chars.
2166 * Then, remove everything starting from the first 'I'
2167 * Then, copy everything verbatim, except:
2168 * if it's a hull, replace everything between second and third spaces
2169 * by '0'
2170 * if it's not a hull, replace everything between third and fourth
2171 * spaces by '0'
2172 * (this is resetting the depth to be 0 for a restart)
2173 */
process_cobasis(const char * newcob)2174 void process_cobasis(const char *newcob)
2175 {
2176 int nlen = strlen(newcob);
2177 char *buf = malloc(sizeof(char) * (nlen+1));
2178 int i,j,k;
2179 int num_spaces=0; /* we count the number of spaces */
2180 char ignore_chars[] = "#VRBh=facetsFvertices/rays";
2181 char c;
2182 int num_ignore = strlen(ignore_chars);
2183 int hull = 0;
2184 /*int replace = 0;*/ /* no longer hacking depth to 0 */
2185
2186 mprintf3(("%d: process_cobasis( %s )", mplrs.rank, newcob));
2187
2188 if (nlen>1 && newcob[0]=='F' && newcob[1]=='#')
2189 hull = 1;
2190
2191 for (i=0,j=0; i<=nlen; i++)
2192 {
2193 c = newcob[i];
2194 /* ignore ignore_chars */
2195 for (k=0; k<=num_ignore; k++)
2196 if (c == ignore_chars[k])
2197 break;
2198 if (k<=num_ignore)
2199 continue;
2200
2201 if (c=='I')
2202 break;
2203
2204 if (c==' ') /* count spaces to set depth to 0 for restart */
2205 {
2206 num_spaces++;
2207 if ( (num_spaces==2 && hull==1) ||
2208 (num_spaces==3 && hull==0) )
2209 {
2210 /*replace = 1;*/
2211 buf[j++]='!'; /* mark depth ... */
2212 continue;
2213 }
2214 #if 0
2215 else if ( (num_spaces==3 && hull==1) ||
2216 (num_spaces==4 && hull==0) )
2217 replace = 0;
2218 #endif
2219 buf[j++] = c; /* copy other spaces */
2220 continue;
2221 }
2222 #if 0
2223 if (replace == 1) /* remove three lines here */
2224 buf[j++]='0';/* to play with mindepth */
2225 else /* leaving only the next line */
2226 #endif
2227 buf[j++]=c;
2228 }
2229 buf[j]='\0';
2230
2231 mprintf3((" produced %s\n",buf));
2232 mplrs.cobasis_list = addlist(mplrs.cobasis_list, buf);
2233 }
2234
addlist(slist * list,void * buf)2235 slist *addlist(slist *list, void *buf)
2236 {
2237 slist *n = malloc(sizeof(struct slist));
2238 n->data = buf;
2239 n->next = list;
2240 return n;
2241 }
2242
2243 /* mplrs.cobasis_list may have things to send to the master.
2244 * Send the header, and then the cobases to add to L.
2245 */
return_unfinished_cobases(void)2246 void return_unfinished_cobases(void)
2247 {
2248 int listsize;
2249 slist *list, *next;
2250 int *lengths=NULL;
2251 char *cobases=NULL;
2252 int size = 0;
2253 int i;
2254 int start;
2255 /* header is (strlen(cobases), length of lengths, mplrs.my_tag) */
2256 int *header = malloc(sizeof(int)*3);
2257 msgbuf *msg = malloc(sizeof(msgbuf));
2258 msg->target = MASTER;
2259
2260 for (listsize=0, list=mplrs.cobasis_list; list; list=list->next)
2261 {
2262 listsize++;
2263 size += strlen((char *)list->data);
2264 }
2265
2266 if (listsize == 0)
2267 {
2268 header[0] = -1;
2269 header[1] = -1;
2270 header[2] = -1;
2271 msg->buf = malloc(sizeof(void *));
2272 msg->buf[0] = header;
2273 msg->count = 1;
2274 msg->req = malloc(sizeof(MPI_Request));
2275 msg->queue = 0;
2276 msg->tags = NULL;
2277 msg->sizes = NULL;
2278 msg->types = NULL;
2279 MPI_Isend(header, 3, MPI_INT, MASTER, 10, MPI_COMM_WORLD,
2280 msg->req);
2281 msg->next = mplrs.outgoing;
2282 mplrs.outgoing = msg;
2283 return;
2284 }
2285
2286 lengths = malloc(sizeof(int)*listsize); /*allows unconcatenate*/
2287 cobases = malloc(sizeof(char)*(size+1));/*concatenated + 1 \0*/
2288
2289 for (start=0, i=0, list=mplrs.cobasis_list; list; list=next, i++)
2290 {
2291 next = list->next;
2292
2293 strcpy(cobases+start, (char *)list->data);
2294 lengths[i] = strlen((char *)list->data);
2295 start+=lengths[i];
2296
2297 free(list->data);
2298 free(list);
2299 }
2300 /* final \0 is there */
2301
2302 header[0] = listsize;
2303 header[1] = size+1;
2304 header[2] = mplrs.my_tag;
2305
2306 msg->req = malloc(sizeof(MPI_Request) * 3);
2307 msg->buf = malloc(sizeof(void *) * 3);
2308 msg->buf[0] = header;
2309 msg->buf[1] = cobases;
2310 msg->buf[2] = lengths;
2311
2312 msg->count = 3;
2313 msg->queue = 0;
2314 msg->tags = NULL;
2315 msg->sizes = NULL;
2316 msg->types = NULL;
2317
2318 mprintf2(("%d: Queued send of %d cobases for L\n",mplrs.rank,listsize));
2319 MPI_Isend(header, 3, MPI_INT, MASTER, 10, MPI_COMM_WORLD, msg->req);
2320 MPI_Isend(cobases, header[1], MPI_CHAR, MASTER, mplrs.my_tag,
2321 MPI_COMM_WORLD, msg->req+1);
2322 MPI_Isend(lengths, listsize, MPI_INT, MASTER, mplrs.my_tag,
2323 MPI_COMM_WORLD, msg->req+2);
2324 mplrs.my_tag++;
2325
2326 msg->next = mplrs.outgoing;
2327 mplrs.outgoing = msg;
2328 mplrs.cobasis_list = NULL;
2329 }
2330
2331 /* dest is a string in a buffer with size *size.
2332 * Append src and a newline to the string, realloc()ing as necessary,
2333 * returning the new pointer and updating size.
2334 */
append_out(char * dest,int * size,const char * src)2335 char *append_out(char *dest, int *size, const char *src)
2336 {
2337 int len1 = strlen(dest);
2338 int len2 = strlen(src);
2339 int newsize = *size;
2340 char *newp = dest;
2341
2342 if (src[len2-1]=='\n') /* remove trailing \n, added below */
2343 len2--;
2344
2345 if (len1 + len2 + 2 > *size)
2346 {
2347 newsize = newsize<<1;
2348 while ((newsize < len1+len2+2) && newsize)
2349 newsize = newsize<<1;
2350
2351 if (!newsize)
2352 newsize = len1+len2+2;
2353
2354 newp = realloc(dest, sizeof(char) * newsize);
2355 if (!newp)
2356 {
2357 newsize = len1+len2+2;
2358 newp = realloc(dest, sizeof(char) * newsize);
2359 if (!newp)
2360 {
2361 printf("%d: Error no memory (%d)\n",mplrs.rank,
2362 newsize);
2363 /* MPI_Finalize(); */
2364 exit(2);
2365 }
2366 }
2367 *size = newsize;
2368 }
2369
2370 strncat(newp, src, len2);
2371 newp[len1+len2]='\n';
2372 newp[len1+len2+1]='\0';
2373 return newp;
2374 }
2375
2376 /************
2377 * consumer *
2378 ************/
2379
mplrs_consumer(void)2380 int mplrs_consumer(void)
2381 {
2382 int i;
2383 int check = 0;
2384 initial_print(); /* print version and other information */
2385 /* initialize MPI_Requests and 3*int buffers for incoming messages */
2386 consumer.prodreq = malloc(sizeof(MPI_Request)*mplrs.size);
2387 consumer.prodibf = malloc(sizeof(int)*3*mplrs.size);
2388 consumer.num_producers = mplrs.size - 2;
2389 consumer.overflow = malloc(sizeof(int)*mplrs.size);
2390
2391 if (mplrs.redund) /* don't wait for a begin when doing redund */
2392 consumer.waiting_initial = 0;
2393
2394 for (i=0; i<mplrs.size; i++)
2395 {
2396 consumer.overflow[i] = 0;
2397 if (i==CONSUMER)
2398 continue;
2399 MPI_Irecv(consumer.prodibf+(i*3), 3, MPI_INT, i, 7,
2400 MPI_COMM_WORLD, consumer.prodreq+i);
2401 }
2402
2403 /* final_print condition handles overflow cleanstop in initial
2404 * job
2405 */
2406 while (consumer.num_producers>0 || consumer.incoming ||
2407 (consumer.waiting_initial && consumer.final_print))
2408 {
2409 /*printf("%d %d %d %d\n", consumer.num_producers, consumer.incoming,
2410 consumer.waiting_initial, consumer.final_print);
2411 */
2412 /* check if someone is trying to send us output */
2413 /* if so, queue any incoming messages */
2414 consumer_start_incoming();
2415
2416 /* check for completed message to process */
2417 consumer_proc_messages();
2418
2419 /* check signals */
2420 mplrs_handlesigs();
2421 }
2422
2423 mprintf2(("C: getting stats and exiting\n"));
2424
2425 for (i=0; i<mplrs.size; i++)
2426 {
2427 if (i==CONSUMER || i==MASTER)
2428 continue;
2429 recv_counting_stats(i);
2430 }
2431
2432 free(consumer.prodreq);
2433 free(consumer.prodibf);
2434 consumer.prodreq = NULL; consumer.prodibf = NULL;
2435
2436 mprintf2(("C: checking with master whether checkpointing ...\n"));
2437 MPI_Recv(&check, 1, MPI_INT, MASTER,1,MPI_COMM_WORLD,MPI_STATUS_IGNORE);
2438 mprintf2(("C: checkpointing flag is %d\n", check));
2439
2440 if (check == CHECKFLAG)
2441 return consumer_checkpoint();
2442
2443 recv_master_stats(); /* gets stats on size of L, etc */
2444 if (consumer.final_print)
2445 final_print();
2446 free(mplrs.input); /* must be after final_print, for redund check */
2447 if (consumer.output!=stdout)
2448 fclose(consumer.output);
2449 free(consumer.overflow);
2450 free(mplrs.tfn);
2451 if (mplrs.R != NULL) /* redund final_print calls mplrs_worker_init */
2452 {
2453 free(mplrs.R->redineq);
2454 free(mplrs.R->facet);
2455 free(mplrs.R);
2456 }
2457 free(consumer.redineq);
2458 mplrs_freemps();
2459 free(mplrs.finalwarn);
2460 free(mplrs.curwarn);
2461 MPI_Finalize();
2462 return 0;
2463 }
2464
2465 /* check if anyone is trying to send us their output */
2466 /* if master is trying, probably means we're going to checkpoint */
2467 /* in any case, queue any incoming messages */
consumer_start_incoming(void)2468 void consumer_start_incoming(void)
2469 {
2470 int i;
2471 int flag;
2472 for (i=0; i<mplrs.size; i++)
2473 {
2474 /* don't check ourself and workers that have exited */
2475 if (i==CONSUMER || consumer.prodreq[i] == MPI_REQUEST_NULL)
2476 continue;
2477 MPI_Test(consumer.prodreq+i, &flag, MPI_STATUS_IGNORE);
2478 if (!flag) /* not incoming, check next */
2479 continue;
2480 mprintf3(("C: received message from %d\n",i));
2481 #if 0
2482 if (i==MASTER && consumer.prodibf[0]==CHECKFLAG)
2483 { /* start checkpoint */
2484 /* this condition is no longer reachable */
2485 consumer.checkpoint = 1;
2486 mprintf(("*Checkpointing\n"));
2487 continue;
2488 }
2489 else
2490 #endif
2491 if (i==MASTER && consumer.prodibf[0]==STOPFLAG)
2492 {
2493 /* reachable, since consumer must know if
2494 * overflow happens in initial job ... */
2495 consumer.final_print = 0;
2496 mprintf(("C: stopping...\n"));
2497 continue;
2498 }
2499 else if (i==MASTER && consumer.prodibf[0]==RESTARTFLAG)
2500 {
2501 /* get counting stats for restart */
2502 recv_counting_stats(MASTER);
2503 consumer.waiting_initial = 0;
2504 mprintf(("C: Restarted\n"));
2505 /* master may restart and later checkpoint */
2506 MPI_Irecv(consumer.prodibf+(i*3), 3, MPI_INT, i, 7,
2507 MPI_COMM_WORLD, consumer.prodreq+i);
2508 continue;
2509 }
2510
2511 if (consumer.prodibf[3*i]<=0 && consumer.prodibf[3*i+1]<=0)
2512 {
2513 /* producer i has finished and will exit */
2514 consumer.num_producers--;
2515 mprintf(("C: Producer %d reported exiting, %d left.\n",
2516 i, consumer.num_producers));
2517 continue;
2518 }
2519
2520 /* otherwise, we have the normal situation -- the producer
2521 * wants to send us some output.
2522 */
2523 consumer.incoming =
2524 consumer_queue_incoming(consumer.prodibf+3*i, i);
2525 MPI_Irecv(consumer.prodibf+(i*3), 3, MPI_INT, i, 7,
2526 MPI_COMM_WORLD, consumer.prodreq+i);
2527 }
2528 }
2529
2530 /* target has sent us this header. Queue the corresponding receive and
2531 * return the new value of consumer.incoming.
2532 */
consumer_queue_incoming(int * header,int target)2533 msgbuf *consumer_queue_incoming(int *header, int target)
2534 {
2535 msgbuf *curhead = consumer.incoming;
2536 msgbuf *newmsg = malloc(sizeof(msgbuf));
2537
2538 newmsg->req = malloc(sizeof(MPI_Request)*2);
2539 newmsg->buf = malloc(sizeof(void *));
2540 newmsg->buf[0] = malloc(sizeof(char)*(header[1]+1));
2541 newmsg->count = 1;
2542 newmsg->target = target;
2543 newmsg->next = curhead;
2544 newmsg->queue = 0;
2545 newmsg->tags = NULL;
2546 newmsg->sizes = NULL;
2547 newmsg->types = NULL;
2548
2549 newmsg->data = header[0]; /* bound for stdout or output file */
2550
2551 /* get my_tag from producer via header[2] to uniquely identify msg */
2552 MPI_Irecv(newmsg->buf[0], header[1]+1, MPI_CHAR, target, header[2],
2553 MPI_COMM_WORLD, newmsg->req);
2554
2555 mprintf3(("C: Receiving from %d (%d,%d,%d)",target,header[0],header[1],
2556 header[2]));
2557 return newmsg;
2558 }
2559
2560 /* update consumer.redineq with the redundant inequalities in rstring */
2561 /* these came from process from (used for an optimization)
2562 */
consumer_process_redund(const char * rstring,int from)2563 void consumer_process_redund(const char *rstring, int from)
2564 {
2565 const char *start=rstring;
2566 char *endptr=NULL;
2567 long index;
2568 int i=0;
2569
2570 mprintf2(("C: processing redund_string %s\n", rstring));
2571 do {
2572 index = strtol(start, &endptr, 10);
2573 if (index!=0)
2574 {
2575 mprintf3(("C: got redundant inequality %ld\n",index));
2576 i++;
2577 consumer.redineq[index] = from;
2578 start = endptr;
2579 }
2580 } while (index!=0);
2581 mprintf2(("C: got %d redundant inequalities\n", i));
2582 }
2583
2584 /* check our incoming messages, process and remove anything that
2585 * has completed
2586 */
consumer_proc_messages(void)2587 void consumer_proc_messages(void)
2588 {
2589 msgbuf *msg, *prev=NULL, *next;
2590 int i,len,omit;
2591
2592 for (msg=consumer.incoming; msg; msg=next)
2593 {
2594 omit = 0;
2595 next=msg->next;
2596 if (outgoing_msgbuf_completed(msg))
2597 {
2598 if (consumer.waiting_initial &&
2599 consumer.final_print &&
2600 msg->target != INITIAL && msg->target != MASTER)
2601 {
2602 prev = msg; /* 2020.5.29 need to update prev
2603 * so we don't lose this msg */
2604 continue;
2605 }
2606 /* final_print condition is false when overflow
2607 * has occurred. No longer important to print all
2608 * output, but need to print or discard it in case
2609 * overflow prevents us from seeing "begin"
2610 */
2611
2612 /* we wait on all other output until we've printed
2613 * the initial output containing ...begin\n
2614 * to ensure pretty output, if this is the begin,
2615 * flip the flag.
2616 */
2617 if (consumer.waiting_initial == 1 && msg->target == INITIAL)
2618 {
2619 len=strlen((char *)msg->buf[0])-5;
2620 for (i=0; i<len; i++)
2621 if (!strncmp("begin\n",
2622 (char*)msg->buf[0]+i,6))
2623 {
2624 mprintf3(("C: found begin\n"));
2625 phase1_print();
2626 consumer.waiting_initial = 0;
2627 break;
2628 }
2629 if (consumer.waiting_initial) /* no begin here*/
2630 { /* can happen in 7.1 */
2631 prev=msg; /* wait for a begin line */
2632 continue;
2633 }
2634 }
2635 else if (consumer.waiting_initial == 2 && msg->target == INITIAL)
2636 { /* could combine with other condition above */
2637 /* maybe easier to read if separate */
2638 prev = msg; /* 2020.5.29 same here*/
2639 continue; /* wait for master to report warnings */
2640 }
2641 else if (consumer.waiting_initial == 2 && msg->target == MASTER)
2642 {
2643 consumer.waiting_initial = 1;
2644 /* if no master warning messages produced,
2645 * master sends a space to prod us along.
2646 * don't print that space.
2647 */
2648 if (!strcmp(msg->buf[0], " \n"))
2649 omit = 1;
2650 }
2651 /* print the 'begin' only after phase1_print */
2652 if (msg->data == 1 && !omit)
2653 {
2654 fprintf(consumer.output, "%s",
2655 (char*)msg->buf[0]);
2656 /* flush to get more streaminess to output
2657 * file and not break inside a line, as
2658 * requested
2659 */
2660 fflush(consumer.output);
2661 }
2662 else if (msg->data == 2 && !omit)
2663 {
2664 if (!consumer.oflow_flag)
2665 {
2666 consumer.oflow_flag = 1;
2667 fprintf(stderr,"%s",(char*)msg->buf[0]);
2668 }
2669 }
2670 else if (msg->data == 3 && !omit) /* redund string */
2671 consumer_process_redund((char*)msg->buf[0],
2672 msg->target);
2673 else if (!omit) /* headed to stderr */
2674 fprintf(stderr, "%s", (char*)msg->buf[0]);
2675
2676 free_msgbuf(msg);
2677 if (prev)
2678 prev->next = next;
2679 else
2680 consumer.incoming = next;
2681 continue;
2682 }
2683 prev=msg;
2684 }
2685 }
2686
2687 /* We are checkpointing instead of a normal exit.
2688 * Send counting stats to master, then exit quietly
2689 */
consumer_checkpoint(void)2690 int consumer_checkpoint(void)
2691 {
2692 int len;
2693 char *str;
2694 char *vol = cprat("", mplrs.Vnum, mplrs.Vden);
2695 send_counting_stats(MASTER);
2696 mprintf(("C: in consumer_checkpoint\n"));
2697 MPI_Recv(&len, 1, MPI_INT, MASTER, 1, MPI_COMM_WORLD,
2698 MPI_STATUS_IGNORE);
2699 if (len == -1) /* master produces checkpoint file */
2700 {
2701 mplrs_freemps();
2702 MPI_Finalize();
2703 return 0;
2704 }
2705 fprintf(consumer.output, "*Checkpoint file follows this line\n");
2706 fprintf(consumer.output, "mplrs4\n%llu %llu %llu %llu %llu\n%s\n%llu\n",
2707 mplrs.rays, mplrs.vertices, mplrs.bases, mplrs.facets,
2708 mplrs.intvertices,vol,mplrs.deepest);
2709 free(vol);
2710 while (1)
2711 {
2712 MPI_Recv(&len, 1, MPI_INT, MASTER, 1, MPI_COMM_WORLD,
2713 MPI_STATUS_IGNORE);
2714 if (len<0)
2715 break;
2716 str = malloc(sizeof(char)*len);
2717 MPI_Recv(str, len, MPI_CHAR, MASTER, 1, MPI_COMM_WORLD,
2718 MPI_STATUS_IGNORE);
2719 fprintf(consumer.output,"%s\n",str);
2720 free(str);
2721 }
2722 fprintf(consumer.output,"*Checkpoint finished above this line\n");
2723 mplrs_freemps();
2724 MPI_Finalize();
2725 return 0;
2726 }
2727
2728 /* check whether this outgoing msgbuf has completed.
2729 * If the msg is queued (msg->queue == 1),
2730 * then if the first part has completed, send the remaining parts
2731 * Don't use with *queued* incoming msgbuf.
2732 */
outgoing_msgbuf_completed(msgbuf * msg)2733 int outgoing_msgbuf_completed(msgbuf *msg)
2734 {
2735 int flag;
2736 int count = msg->count;
2737 int i;
2738 if (msg->queue != 1)
2739 {
2740 MPI_Testall(count, msg->req, &flag, MPI_STATUSES_IGNORE);
2741 return flag;
2742 }
2743 MPI_Test(msg->req, &flag, MPI_STATUS_IGNORE);
2744 if (!flag)
2745 return flag;
2746 /* first completed, send the rest of the queued send */
2747 mprintf3(("%d: Sending second part of queued send to %d\n",
2748 mplrs.rank, msg->target));
2749 for (i=1; i<count; i++)
2750 {
2751 if (msg->buf[i])
2752 MPI_Isend(msg->buf[i], msg->sizes[i], msg->types[i],
2753 msg->target, msg->tags[i], MPI_COMM_WORLD,
2754 msg->req+i);
2755 }
2756 msg->queue = 0;
2757 return 0;
2758 }
2759
free_msgbuf(msgbuf * msg)2760 void free_msgbuf(msgbuf *msg)
2761 {
2762 int i;
2763 for (i=0; i<msg->count; i++)
2764 free(msg->buf[i]);
2765 free(msg->buf);
2766 free(msg->req);
2767 free(msg->tags);
2768 free(msg->sizes);
2769 free(msg->types);
2770 free(msg);
2771 return;
2772 }
2773
reverse_list(outlist * head)2774 outlist *reverse_list(outlist* head)
2775 {
2776 outlist * last = head, * new_head = NULL;
2777 while(last)
2778 {
2779 outlist * tmp = last;
2780 last = last->next;
2781 tmp->next = new_head;
2782 new_head = tmp;
2783 }
2784 return new_head;
2785 }
2786
2787 /* send stats on size of L, etc */
send_master_stats(void)2788 void send_master_stats(void)
2789 {
2790 unsigned long stats[4] = {master.tot_L, master.num_empty, 0, 0};
2791 mprintf3(("M: Sending master_stats to consumer\n"));
2792 MPI_Send(stats, 4, MPI_UNSIGNED_LONG, CONSUMER, 1, MPI_COMM_WORLD);
2793 return;
2794 }
2795 /* get master stats on size of L, etc */
recv_master_stats(void)2796 void recv_master_stats(void)
2797 {
2798 unsigned long stats[4] = {0,0,0,0};
2799 MPI_Recv(stats, 4, MPI_UNSIGNED_LONG, MASTER, 1, MPI_COMM_WORLD,
2800 MPI_STATUS_IGNORE);
2801 master.tot_L = stats[0];
2802 master.num_empty = stats[1];
2803 return;
2804 }
2805
2806 /* send stats to target for final print */
send_counting_stats(int target)2807 void send_counting_stats(int target)
2808 {
2809 char *vol = cprat("", mplrs.Vnum, mplrs.Vden);
2810 unsigned long long stats[10] = {mplrs.rays, mplrs.vertices, mplrs.bases,
2811 mplrs.facets, mplrs.intvertices,
2812 strlen(vol)+1, mplrs.deepest, mplrs.overflow,
2813 mplrs.linearities, strlen(mplrs.finalwarn)+1};
2814 mprintf3(("%d: sending counting stats to %d\n", mplrs.rank, target));
2815
2816 MPI_Send(stats, 10, MPI_UNSIGNED_LONG_LONG, target, 1, MPI_COMM_WORLD);
2817 MPI_Send(vol, stats[5], MPI_CHAR, target, 1, MPI_COMM_WORLD);
2818 MPI_Send(mplrs.finalwarn, stats[9], MPI_CHAR, target, 1, MPI_COMM_WORLD);
2819 free(vol);
2820 return;
2821 }
2822
2823 /* gets counting stats from target */
recv_counting_stats(int target)2824 void recv_counting_stats(int target)
2825 {
2826 char *vol, *finalwarn;
2827 unsigned long long stats[10];
2828 MPI_Recv(stats, 10, MPI_UNSIGNED_LONG_LONG, target, 1, MPI_COMM_WORLD,
2829 MPI_STATUS_IGNORE);
2830 mprintf3(("%d: got counting stats from %d\n", mplrs.rank, target));
2831 mplrs.rays+=stats[0];
2832 mplrs.vertices+=stats[1];
2833 mplrs.bases+=stats[2];
2834 mplrs.facets+=stats[3];
2835 mplrs.intvertices+=stats[4];
2836 if (stats[6] > mplrs.deepest)
2837 mplrs.deepest = stats[6];
2838 if (mplrs.rank == CONSUMER)
2839 consumer.overflow[target] = stats[7];
2840 if (stats[8] > mplrs.linearities)
2841 mplrs.linearities = stats[8];
2842 vol = malloc(sizeof(char)*stats[5]);
2843 MPI_Recv(vol, stats[5], MPI_CHAR, target, 1, MPI_COMM_WORLD,
2844 MPI_STATUS_IGNORE);
2845 /* following safe even #ifdef LRSLONG, volume always 0/1 */
2846 plrs_readrat(mplrs.Tnum, mplrs.Tden, vol);
2847 copy(mplrs.tN, mplrs.Vnum); copy(mplrs.tD, mplrs.Vden);
2848 linrat(mplrs.tN, mplrs.tD, 1L, mplrs.Tnum, mplrs.Tden,
2849 1L, mplrs.Vnum, mplrs.Vden);
2850 free(vol);
2851 finalwarn = malloc(sizeof(char)*stats[9]);
2852 MPI_Recv(finalwarn, stats[9], MPI_CHAR, target, 1, MPI_COMM_WORLD,
2853 MPI_STATUS_IGNORE);
2854 if (stats[9]>2)
2855 mplrs.finalwarn = append_out(mplrs.finalwarn,
2856 &mplrs.finalwarn_len,
2857 finalwarn);
2858 free(finalwarn);
2859 return;
2860 }
2861
2862 /* do the initial print */
initial_print(void)2863 void initial_print(void)
2864 {
2865 #ifdef MA
2866 fprintf(consumer.output, "*mplrs:%s%s(hybrid arithmetic)%d processes\n",
2867 TITLE, VERSION, mplrs.size);
2868 #elif defined(GMP)
2869 fprintf(consumer.output, "*mplrs:%s%s(%s gmp v.%d.%d)%d processes\n",
2870 TITLE,VERSION,ARITH,__GNU_MP_VERSION,
2871 __GNU_MP_VERSION_MINOR,mplrs.size);
2872 #elif defined(FLINT)
2873 fprintf(consumer.output, "*mplrs:%s%s(%s, %dbit flint v.%s)%d processes\n",
2874 TITLE,VERSION,ARITH,FLINT_BITS,FLINT_VERSION,
2875 mplrs.size);
2876 #elif defined(SAFE)
2877 fprintf(consumer.output, "*mplrs:%s%s(%s,%s,overflow checking)%d processes\n",
2878 TITLE,VERSION,BIT,ARITH,mplrs.size);
2879 #else
2880 fprintf(consumer.output, "*mplrs:%s%s(%s,%s,no overflow checking)%d processes\n",
2881 TITLE,VERSION,BIT,ARITH,mplrs.size);
2882 #endif
2883 fprintf(consumer.output, "*Input taken from %s\n",
2884 mplrs.input_filename);
2885 if (mplrs.redund == 0)
2886 {
2887 fprintf(consumer.output, "*Starting depth of %d maxcobases=%d ",
2888 master.initdepth, master.maxcobases);
2889 fprintf(consumer.output, "maxdepth=%d lmin=%d lmax=%d scale=%d\n",
2890 master.maxdepth, master.lmin,
2891 master.lmax, master.scalec);
2892 if (mplrs.countonly)
2893 fprintf(consumer.output, "*countonly\n");
2894 }
2895 else
2896 fprintf(consumer.output, "*redund\n");
2897 if (consumer.output==stdout)
2898 return;
2899 #ifdef MA
2900 printf("*mplrs:%s%s(hybrid arithmetic)%d processes\n",
2901 TITLE, VERSION, mplrs.size);
2902 #elif defined(GMP)
2903 printf("*mplrs:%s%s(%s gmp v.%d.%d)%d processes\n",
2904 TITLE,VERSION,ARITH,__GNU_MP_VERSION,
2905 __GNU_MP_VERSION_MINOR,mplrs.size);
2906 #elif defined(FLINT)
2907 printf("*mplrs:%s%s(%s, %dbit flint v.%s)%d processes\n",
2908 TITLE,VERSION,ARITH,FLINT_BITS,FLINT_VERSION,
2909 mplrs.size);
2910 #elif defined(SAFE)
2911 printf("*mplrs:%s%s(%s,%s,overflow checking)%d processes\n",
2912 TITLE,VERSION,BIT,ARITH,mplrs.size);
2913 #else
2914 printf("*mplrs:%s%s(%s,%s,no overflow checking)%d processes\n",
2915 TITLE,VERSION,BIT,ARITH,mplrs.size);
2916 #endif
2917 printf("*Input taken from %s\n",mplrs.input_filename);
2918 printf("*Output written to: %s\n",consumer.output_filename);
2919 if (mplrs.redund == 0)
2920 {
2921 printf("*Starting depth of %d maxcobases=%d ", master.initdepth,
2922 master.maxcobases);
2923 printf("maxdepth=%d lmin=%d lmax=%d scale=%d\n", master.maxdepth,
2924 master.lmin, master.lmax, master.scalec);
2925 if (mplrs.countonly)
2926 printf("*countonly\n");
2927 }
2928 else
2929 printf("*redund\n");
2930 }
2931
2932 /* do the "*Phase 1 time: " print */
phase1_print(void)2933 void phase1_print(void)
2934 {
2935 struct timeval cur;
2936 gettimeofday(&cur, NULL);
2937 fprintf(consumer.output, "*Phase 1 time: %ld seconds.\n",
2938 cur.tv_sec-mplrs.start.tv_sec);
2939 if (consumer.output_filename == NULL)
2940 return;
2941 printf("*Phase 1 time: %ld seconds.\n",cur.tv_sec-mplrs.start.tv_sec);
2942 return;
2943 }
2944
2945 /* sigh... when doing redund, we re-run a final redund check of all
2946 * detected redundant lines, in order to avoid removing e.g. all
2947 * identical copies of a fixed line (when each on a different worker).
2948 * this is done on the consumer. but then lrslib does post_output, which
2949 * normally can't be done on the consumer. so here we intercept the data
2950 * produced by this final redund run, and update consumer.redineq ...
2951 */
consumer_finalredund_handler(const char * data)2952 void consumer_finalredund_handler(const char *data)
2953 {
2954 int i, m = mplrs.P->m_A;
2955 for (i=0; i<=m; i++)
2956 consumer.redineq[i] = 0;
2957 consumer_process_redund(data, CONSUMER);
2958 }
2959
2960 /* set R->redineq using consumer.redineq, for final check */
consumer_setredineq(void)2961 void consumer_setredineq(void)
2962 {
2963 int i, m, max, maxi;
2964 int *counts = calloc(mplrs.size, sizeof(int));
2965 m = mplrs.P->m_A;
2966
2967 /* hack output to file (if using file).
2968 * done here in case of re-init on overflow (eg redund on mit.ine)
2969 */
2970 lrs_ofp = consumer.output;
2971
2972 /* optimization per DA, with current way of splitting redund
2973 * we can choose the worker (maxi) that produced the most redundant
2974 * inequalities and not recheck those at the end
2975 */
2976 for (i=1; i<=m; i++)
2977 if (consumer.redineq[i]!=0)
2978 counts[consumer.redineq[i]]++;
2979 max = -1;
2980 maxi = -1; /* not needed, for warning removal only */
2981 for (i=0; i<mplrs.size; i++)
2982 if (counts[i]>max)
2983 {
2984 max=counts[i];
2985 maxi = i;
2986 }
2987 free(counts);
2988
2989 mprintf(("C: resetting redineq for final check (maxi:%d,max:%d) on:",
2990 maxi,max));
2991 for (i=1; i<=m; i++)
2992 {
2993 if (consumer.redineq[i] == maxi && max>0) /* DA: max=0 means no redundancies found*/
2994 mplrs.R->redineq[i] = -1;
2995 else if (consumer.redineq[i] > 0)
2996 {
2997 mplrs.R->redineq[i] = 1;
2998 mprintf((" %d", i));
2999 }
3000 else if (mplrs.R->redineq[i] != 2)
3001 mplrs.R->redineq[i] = 0;
3002 }
3003 mprintf(("\n"));
3004 mplrs.R->verifyredund = 1;
3005 }
3006
3007 /* do the final print */
final_print(void)3008 void final_print(void)
3009 {
3010 char *argv[] = {argv0};
3011 struct timeval end;
3012 char *vol=NULL;
3013 #ifdef MA
3014 int i, num64=0, num128=0, numgmp=0;
3015 #endif
3016
3017 if (mplrs.redund)
3018 {
3019 mplrs_worker_init();
3020 consumer_setredineq();
3021
3022 mprintf(("C: calling lrs_main for final redund check\n"));
3023 lrs_ofp = consumer.output; /* HACK to get redund output in
3024 * output file ... */
3025 consumer.final_redundcheck = 1;
3026 run_lrs(1, argv, 0, 1, NULL, NULL);
3027 mprintf(("C: lrs_main returned from final redund check\n"));
3028 if (mplrs.overflow != 3)
3029 mplrs.lrs_main(0,NULL,&mplrs.P,&mplrs.Q,0,2,NULL,mplrs.R);
3030 }
3031
3032 else
3033 fprintf(consumer.output, "end\n");
3034
3035 if (strlen(mplrs.finalwarn)>1) /*avoid spurious newline from append_out*/
3036 fprintf(consumer.output, "%s", mplrs.finalwarn);
3037
3038 /* after the (expensive) final redund check */
3039 gettimeofday(&end, NULL);
3040
3041 fprintf(consumer.output, "*Total number of jobs: %lu, L became empty %lu times, tree depth %llu\n", master.tot_L, master.num_empty,mplrs.deepest);
3042 if (consumer.output_filename != NULL)
3043 printf("*Total number of jobs: %lu, L became empty %lu times, tree depth %llu\n", master.tot_L, master.num_empty,mplrs.deepest);
3044 #ifdef MA
3045 for (i=0; i<mplrs.size; i++)
3046 {
3047 if (i==MASTER || i==CONSUMER)
3048 continue;
3049 if (consumer.overflow[i]==0)
3050 num64++;
3051 else if (consumer.overflow[i]==1)
3052 num128++;
3053 else
3054 numgmp++;
3055 }
3056 #ifdef B128
3057 fprintf(consumer.output, "*Finished with %d 64-bit, %d 128-bit, %d GMP workers\n",
3058 num64, num128, numgmp);
3059 if (consumer.output_filename != NULL)
3060 printf("*Finished with %d 64-bit, %d 128-bit, %d GMP workers\n",
3061 num64, num128, numgmp);
3062 #else
3063 fprintf(consumer.output, "*Finished with %d 64-bit, %d GMP workers\n",
3064 num64, num128);
3065 if (consumer.output_filename != NULL)
3066 printf("*Finished with %d 64-bit, %d GMP workers\n",
3067 num64, num128); /*DA since no 128 bit support,
3068 num128 counts gmp */
3069 #endif
3070 #endif
3071 if (mplrs.facets>0)
3072 {
3073 if(!zero(mplrs.Vnum))
3074 {
3075 vol = cprat("*Volume=",mplrs.Vnum,mplrs.Vden);
3076 fprintf(consumer.output,"%s\n",vol);
3077 free(vol);
3078 }
3079 fprintf(consumer.output,"*Totals: facets=%llu bases=%llu",
3080 mplrs.facets, mplrs.bases);
3081 if (mplrs.linearities > 0)
3082 fprintf(consumer.output,
3083 " linearities=%llu facets+linearities=%llu",
3084 mplrs.linearities,mplrs.linearities+mplrs.facets);
3085 fputc('\n', consumer.output);
3086 }
3087 else if (mplrs.redund == 0 && (mplrs.rays+mplrs.vertices>0))
3088 /*DA: seems like V-rep output not redund!?*/
3089 { /* yes, H-rep output is above, V-rep here,
3090 * just 'else' would also get redund runs */
3091 fprintf(consumer.output, "*Totals: vertices=%llu rays=%llu bases=%llu integer-vertices=%llu",
3092 mplrs.vertices,mplrs.rays,mplrs.bases,mplrs.intvertices);
3093 if (mplrs.linearities > 0)
3094 fprintf(consumer.output,
3095 " linearities=%llu", mplrs.linearities);
3096 if (mplrs.rays+mplrs.linearities>0)
3097 {
3098 fprintf(consumer.output," vertices+rays");
3099 if (mplrs.linearities>0)
3100 fprintf(consumer.output, "+linearities");
3101 fprintf(consumer.output, "=%llu",
3102 mplrs.vertices+mplrs.rays+mplrs.linearities);
3103 }
3104 fputc('\n', consumer.output);
3105 }
3106 fprintf(consumer.output, "*Elapsed time: %ld seconds.\n",
3107 end.tv_sec - mplrs.start.tv_sec);
3108
3109 if (consumer.output_filename == NULL)
3110 return;
3111
3112 if (mplrs.facets>0)
3113 {
3114 if (!zero(mplrs.Vnum))
3115 {
3116 vol = cprat("*Volume=",mplrs.Vnum,mplrs.Vden);
3117 printf("%s\n", vol);
3118 free(vol);
3119 }
3120 printf("*Totals: facets=%llu bases=%llu", mplrs.facets,
3121 mplrs.bases);
3122 if (mplrs.linearities > 0)
3123 printf(" linearities=%llu facets+linearities=%llu",
3124 mplrs.linearities,mplrs.linearities+mplrs.facets);
3125 putchar('\n');
3126 }
3127 else if (mplrs.redund == 0 && (mplrs.rays+mplrs.vertices>0))
3128 {
3129 printf("*Totals: vertices=%llu rays=%llu bases=%llu integer-vertices=%llu",
3130 mplrs.vertices,mplrs.rays,mplrs.bases,mplrs.intvertices);
3131 if (mplrs.linearities > 0)
3132 printf(" linearities=%llu", mplrs.linearities);
3133 if (mplrs.rays+mplrs.linearities>0)
3134 {
3135 printf(" vertices+rays");
3136 if (mplrs.linearities>0)
3137 printf("+linearities");
3138 printf("=%llu",
3139 mplrs.vertices+mplrs.rays+mplrs.linearities);
3140 }
3141 putchar('\n');
3142 }
3143 printf("*Elapsed time: %ld seconds.\n", end.tv_sec - mplrs.start.tv_sec);
3144 }
3145
open_outputblock(void)3146 void open_outputblock(void)
3147 {
3148 mplrs.outputblock++;
3149 }
3150
close_outputblock(void)3151 void close_outputblock(void)
3152 {
3153 mplrs.outputblock--;
3154 if (okay_to_flush())
3155 {
3156 process_output(); /* before starting a flush */
3157 clean_outgoing_buffers();
3158 }
3159 }
3160
3161 /* the condition on whether we should do a maxbuf-based
3162 * flush of the output. Bit complicated and used twice
3163 * so broken out here.
3164 */
okay_to_flush(void)3165 int okay_to_flush(void)
3166 {
3167 return
3168 (mplrs.outnum++ > mplrs.maxbuf && /* buffer <maxbuf output lines */
3169 mplrs.outputblock <= 0 && /* don't flush if open output block */
3170 mplrs.initializing != 1 && /* don't flush in phase 1 */
3171 mplrs.rank != MASTER /* need initial warnings together */
3172 #ifdef MA
3173 && mplrs.overflow == 2 /* avoid duplicate output lines if
3174 * overflow possible, only flush when
3175 * using GMP; hurts performance
3176 */
3177 #endif
3178 );
3179 /* not in phase 1 (need 'begin' first) */
3180 /* must not flush during phase 1 to avoid multiple 'begin'
3181 * statements. otherwise, an overflow could happen after
3182 * the flush and we'd produce another 'begin'
3183 */
3184 /* master doesn't flush here to ensure one block with any
3185 * messages from startup, avoiding splitting warnings around 'begin'
3186 */
3187 }
3188
3189
post_output(const char * type,const char * data)3190 void post_output(const char *type, const char *data)
3191 {
3192 outlist *out;
3193
3194 if (mplrs.rank == CONSUMER && !strcmp(type, "redund")) /* sigh ... */
3195 return consumer_finalredund_handler(data);
3196
3197 out = malloc(sizeof(outlist));
3198 out->type = dupstr(type);
3199 out->data = dupstr(data);
3200 out->next = NULL;
3201 if (mplrs.output_list == NULL)
3202 mplrs.output_list = out;
3203 else
3204 mplrs.ol_tail->next = out;
3205 mplrs.ol_tail = out;
3206 if (okay_to_flush() && data[strlen(data)-1]=='\n')
3207 {
3208 process_output(); /* before starting a flush */
3209 clean_outgoing_buffers();
3210 }
3211 }
3212
3213 /* strdup */
dupstr(const char * str)3214 char *dupstr(const char *str)
3215 {
3216 char *buf = malloc(sizeof(char)*(strlen(str)+1));
3217 strcpy(buf,str);
3218 return buf;
3219 }
3220