1 /*
2  * Copyright (c) 2000 Matteo Frigo
3  * Copyright (c) 2000 Massachusetts Institute of Technology
4  *
5  * This program is free software; you can redistribute it and/or modify
6  * it under the terms of the GNU General Public License as published by
7  * the Free Software Foundation; either version 2 of the License, or
8  * (at your option) any later version.
9  *
10  * This program is distributed in the hope that it will be useful,
11  * but WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13  * GNU General Public License for more details.
14  *
15  * You should have received a copy of the GNU General Public License
16  * along with this program; if not, write to the Free Software
17  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
18  *
19  */
20 
21 #include "kernel/ifftw.h"
22 #include <string.h>
23 
24 /* GNU Coding Standards, Sec. 5.2: "Please write the comments in a GNU
25    program in English, because English is the one language that nearly
26    all programmers in all countries can read."
27 
28                     ingemisco tanquam reus
29 		    culpa rubet vultus meus
30 		    supplicanti parce [rms]
31 */
32 
33 #define VALIDP(solution) ((solution)->flags.hash_info & H_VALID)
34 #define LIVEP(solution) ((solution)->flags.hash_info & H_LIVE)
35 #define SLVNDX(solution) ((solution)->flags.slvndx)
36 #define BLISS(flags) (((flags).hash_info) & BLESSING)
37 #define INFEASIBLE_SLVNDX ((1U<<BITS_FOR_SLVNDX)-1)
38 
39 
40 #define MAXNAM 64  /* maximum length of registrar's name.
41 		      Used for reading wisdom.  There is no point
42 		      in doing this right */
43 
44 
45 #ifdef FFTW_DEBUG
46 static void check(hashtab *ht);
47 #endif
48 
49 /* x <= y */
50 #define LEQ(x, y) (((x) & (y)) == (x))
51 
52 /* A subsumes B */
subsumes(const flags_t * a,unsigned slvndx_a,const flags_t * b)53 static int subsumes(const flags_t *a, unsigned slvndx_a, const flags_t *b)
54 {
55      if (slvndx_a != INFEASIBLE_SLVNDX) {
56 	  A(a->timelimit_impatience == 0);
57 	  return (LEQ(a->u, b->u) && LEQ(b->l, a->l));
58      } else {
59 	  return (LEQ(a->l, b->l)
60 		  && a->timelimit_impatience <= b->timelimit_impatience);
61      }
62 }
63 
addmod(unsigned a,unsigned b,unsigned p)64 static unsigned addmod(unsigned a, unsigned b, unsigned p)
65 {
66      /* gcc-2.95/sparc produces incorrect code for the fast version below. */
67 #if defined(__sparc__) && defined(__GNUC__)
68      /* slow version  */
69      return (a + b) % p;
70 #else
71      /* faster version */
72      unsigned c = a + b;
73      return c >= p ? c - p : c;
74 #endif
75 }
76 
77 /*
78   slvdesc management:
79 */
sgrow(planner * ego)80 static void sgrow(planner *ego)
81 {
82      unsigned osiz = ego->slvdescsiz, nsiz = 1 + osiz + osiz / 4;
83      slvdesc *ntab = (slvdesc *)MALLOC(nsiz * sizeof(slvdesc), SLVDESCS);
84      slvdesc *otab = ego->slvdescs;
85      unsigned i;
86 
87      ego->slvdescs = ntab;
88      ego->slvdescsiz = nsiz;
89      for (i = 0; i < osiz; ++i)
90 	  ntab[i] = otab[i];
91      X(ifree0)(otab);
92 }
93 
register_solver(planner * ego,solver * s)94 static void register_solver(planner *ego, solver *s)
95 {
96      slvdesc *n;
97      int kind;
98 
99      if (s) { /* add s to solver list */
100 	  X(solver_use)(s);
101 
102 	  A(ego->nslvdesc < INFEASIBLE_SLVNDX);
103 	  if (ego->nslvdesc >= ego->slvdescsiz)
104 	       sgrow(ego);
105 
106 	  n = ego->slvdescs + ego->nslvdesc;
107 
108 	  n->slv = s;
109 	  n->reg_nam = ego->cur_reg_nam;
110 	  n->reg_id = ego->cur_reg_id++;
111 
112 	  A(strlen(n->reg_nam) < MAXNAM);
113 	  n->nam_hash = X(hash)(n->reg_nam);
114 
115 	  kind = s->adt->problem_kind;
116 	  n->next_for_same_problem_kind = ego->slvdescs_for_problem_kind[kind];
117 	  ego->slvdescs_for_problem_kind[kind] = (int)/*from unsigned*/ego->nslvdesc;
118 
119 	  ego->nslvdesc++;
120      }
121 }
122 
slookup(planner * ego,char * nam,int id)123 static unsigned slookup(planner *ego, char *nam, int id)
124 {
125      unsigned h = X(hash)(nam); /* used to avoid strcmp in the common case */
126      FORALL_SOLVERS(ego, s, sp, {
127 	  UNUSED(s);
128 	  if (sp->reg_id == id && sp->nam_hash == h
129 	      && !strcmp(sp->reg_nam, nam))
130 	       return (unsigned)/*from ptrdiff_t*/(sp - ego->slvdescs);
131      });
132      return INFEASIBLE_SLVNDX;
133 }
134 
135 /* Compute a MD5 hash of the configuration of the planner.
136    We store it into the wisdom file to make absolutely sure that
137    we are reading wisdom that is applicable */
signature_of_configuration(md5 * m,planner * ego)138 static void signature_of_configuration(md5 *m, planner *ego)
139 {
140      X(md5begin)(m);
141      X(md5unsigned)(m, sizeof(R)); /* so we don't mix different precisions */
142      FORALL_SOLVERS(ego, s, sp, {
143 	  UNUSED(s);
144 	  X(md5int)(m, sp->reg_id);
145 	  X(md5puts)(m, sp->reg_nam);
146      });
147      X(md5end)(m);
148 }
149 
150 /*
151   md5-related stuff:
152 */
153 
154 /* first hash function */
h1(const hashtab * ht,const md5sig s)155 static unsigned h1(const hashtab *ht, const md5sig s)
156 {
157      unsigned h = s[0] % ht->hashsiz;
158      A(h == (s[0] % ht->hashsiz));
159      return h;
160 }
161 
162 /* second hash function (for double hashing) */
h2(const hashtab * ht,const md5sig s)163 static unsigned h2(const hashtab *ht, const md5sig s)
164 {
165      unsigned h = 1U + s[1] % (ht->hashsiz - 1);
166      A(h == (1U + s[1] % (ht->hashsiz - 1)));
167      return h;
168 }
169 
md5hash(md5 * m,const problem * p,const planner * plnr)170 static void md5hash(md5 *m, const problem *p, const planner *plnr)
171 {
172      X(md5begin)(m);
173      X(md5unsigned)(m, sizeof(R)); /* so we don't mix different precisions */
174      X(md5int)(m, plnr->nthr);
175      p->adt->hash(p, m);
176      X(md5end)(m);
177 }
178 
md5eq(const md5sig a,const md5sig b)179 static int md5eq(const md5sig a, const md5sig b)
180 {
181      return a[0] == b[0] && a[1] == b[1] && a[2] == b[2] && a[3] == b[3];
182 }
183 
sigcpy(const md5sig a,md5sig b)184 static void sigcpy(const md5sig a, md5sig b)
185 {
186      b[0] = a[0]; b[1] = a[1]; b[2] = a[2]; b[3] = a[3];
187 }
188 
189 /*
190   memoization routines :
191 */
192 
193 /*
194    liber scriptus proferetur
195    in quo totum continetur
196    unde mundus iudicetur
197 */
198 struct solution_s {
199      md5sig s;
200      flags_t flags;
201 };
202 
htab_lookup(hashtab * ht,const md5sig s,const flags_t * flagsp)203 static solution *htab_lookup(hashtab *ht, const md5sig s,
204 			     const flags_t *flagsp)
205 {
206      unsigned g, h = h1(ht, s), d = h2(ht, s);
207      solution *best = 0;
208 
209      ++ht->lookup;
210 
211      /* search all entries that match; select the one with
212 	the lowest flags.u */
213      /* This loop may potentially traverse the whole table, since at
214 	least one element is guaranteed to be !LIVEP, but all elements
215 	may be VALIDP.  Hence, we stop after at the first invalid
216 	element or after traversing the whole table. */
217      g = h;
218      do {
219 	  solution *l = ht->solutions + g;
220 	  ++ht->lookup_iter;
221 	  if (VALIDP(l)) {
222 	       if (LIVEP(l)
223 		   && md5eq(s, l->s)
224 		   && subsumes(&l->flags, SLVNDX(l), flagsp) ) {
225 		    if (!best || LEQ(l->flags.u, best->flags.u))
226 			 best = l;
227 	       }
228 	  } else
229 	       break;
230 
231 	  g = addmod(g, d, ht->hashsiz);
232      } while (g != h);
233 
234      if (best)
235 	  ++ht->succ_lookup;
236      return best;
237 }
238 
hlookup(planner * ego,const md5sig s,const flags_t * flagsp)239 static solution *hlookup(planner *ego, const md5sig s,
240 			 const flags_t *flagsp)
241 {
242      solution *sol = htab_lookup(&ego->htab_blessed, s, flagsp);
243      if (!sol) sol = htab_lookup(&ego->htab_unblessed, s, flagsp);
244      return sol;
245 }
246 
fill_slot(hashtab * ht,const md5sig s,const flags_t * flagsp,unsigned slvndx,solution * slot)247 static void fill_slot(hashtab *ht, const md5sig s, const flags_t *flagsp,
248 		      unsigned slvndx, solution *slot)
249 {
250      ++ht->insert;
251      ++ht->nelem;
252      A(!LIVEP(slot));
253      slot->flags.u = flagsp->u;
254      slot->flags.l = flagsp->l;
255      slot->flags.timelimit_impatience = flagsp->timelimit_impatience;
256      slot->flags.hash_info |= H_VALID | H_LIVE;
257      SLVNDX(slot) = slvndx;
258 
259      /* keep this check enabled in case we add so many solvers
260 	that the bitfield overflows */
261      CK(SLVNDX(slot) == slvndx);
262      sigcpy(s, slot->s);
263 }
264 
kill_slot(hashtab * ht,solution * slot)265 static void kill_slot(hashtab *ht, solution *slot)
266 {
267      A(LIVEP(slot)); /* ==> */ A(VALIDP(slot));
268 
269      --ht->nelem;
270      slot->flags.hash_info = H_VALID;
271 }
272 
hinsert0(hashtab * ht,const md5sig s,const flags_t * flagsp,unsigned slvndx)273 static void hinsert0(hashtab *ht, const md5sig s, const flags_t *flagsp,
274 		     unsigned slvndx)
275 {
276      solution *l;
277      unsigned g, h = h1(ht, s), d = h2(ht, s);
278 
279      ++ht->insert_unknown;
280 
281      /* search for nonfull slot */
282      for (g = h; ; g = addmod(g, d, ht->hashsiz)) {
283 	  ++ht->insert_iter;
284 	  l = ht->solutions + g;
285 	  if (!LIVEP(l)) break;
286 	  A((g + d) % ht->hashsiz != h);
287      }
288 
289      fill_slot(ht, s, flagsp, slvndx, l);
290 }
291 
rehash(hashtab * ht,unsigned nsiz)292 static void rehash(hashtab *ht, unsigned nsiz)
293 {
294      unsigned osiz = ht->hashsiz, h;
295      solution *osol = ht->solutions, *nsol;
296 
297      nsiz = (unsigned)X(next_prime)((INT)nsiz);
298      nsol = (solution *)MALLOC(nsiz * sizeof(solution), HASHT);
299      ++ht->nrehash;
300 
301      /* init new table */
302      for (h = 0; h < nsiz; ++h)
303 	  nsol[h].flags.hash_info = 0;
304 
305      /* install new table */
306      ht->hashsiz = nsiz;
307      ht->solutions = nsol;
308      ht->nelem = 0;
309 
310      /* copy table */
311      for (h = 0; h < osiz; ++h) {
312 	  solution *l = osol + h;
313 	  if (LIVEP(l))
314 	       hinsert0(ht, l->s, &l->flags, SLVNDX(l));
315      }
316 
317      X(ifree0)(osol);
318 }
319 
minsz(unsigned nelem)320 static unsigned minsz(unsigned nelem)
321 {
322      return 1U + nelem + nelem / 8U;
323 }
324 
nextsz(unsigned nelem)325 static unsigned nextsz(unsigned nelem)
326 {
327      return minsz(minsz(nelem));
328 }
329 
hgrow(hashtab * ht)330 static void hgrow(hashtab *ht)
331 {
332      unsigned nelem = ht->nelem;
333      if (minsz(nelem) >= ht->hashsiz)
334 	  rehash(ht, nextsz(nelem));
335 }
336 
337 #if 0
338 /* shrink the hash table, never used */
339 static void hshrink(hashtab *ht)
340 {
341      unsigned nelem = ht->nelem;
342      /* always rehash after deletions */
343      rehash(ht, nextsz(nelem));
344 }
345 #endif
346 
htab_insert(hashtab * ht,const md5sig s,const flags_t * flagsp,unsigned slvndx)347 static void htab_insert(hashtab *ht, const md5sig s, const flags_t *flagsp,
348 			unsigned slvndx)
349 {
350      unsigned g, h = h1(ht, s), d = h2(ht, s);
351      solution *first = 0;
352 
353      /* Remove all entries that are subsumed by the new one.  */
354      /* This loop may potentially traverse the whole table, since at
355 	least one element is guaranteed to be !LIVEP, but all elements
356 	may be VALIDP.  Hence, we stop after at the first invalid
357 	element or after traversing the whole table. */
358      g = h;
359      do {
360 	  solution *l = ht->solutions + g;
361 	  ++ht->insert_iter;
362 	  if (VALIDP(l)) {
363 	       if (LIVEP(l) && md5eq(s, l->s)) {
364 		    if (subsumes(flagsp, slvndx, &l->flags)) {
365 			 if (!first) first = l;
366 			 kill_slot(ht, l);
367 		    } else {
368 			 /* It is an error to insert an element that
369 			    is subsumed by an existing entry. */
370 			 A(!subsumes(&l->flags, SLVNDX(l), flagsp));
371 		    }
372 	       }
373 	  } else
374 	       break;
375 
376 	  g = addmod(g, d, ht->hashsiz);
377      } while (g != h);
378 
379      if (first) {
380 	  /* overwrite FIRST */
381 	  fill_slot(ht, s, flagsp, slvndx, first);
382      } else {
383 	  /* create a new entry */
384  	  hgrow(ht);
385 	  hinsert0(ht, s, flagsp, slvndx);
386      }
387 }
388 
hinsert(planner * ego,const md5sig s,const flags_t * flagsp,unsigned slvndx)389 static void hinsert(planner *ego, const md5sig s, const flags_t *flagsp,
390 		    unsigned slvndx)
391 {
392      htab_insert(BLISS(*flagsp) ? &ego->htab_blessed : &ego->htab_unblessed,
393 		 s, flagsp, slvndx );
394 }
395 
396 
invoke_hook(planner * ego,plan * pln,const problem * p,int optimalp)397 static void invoke_hook(planner *ego, plan *pln, const problem *p,
398 			int optimalp)
399 {
400      if (ego->hook)
401 	  ego->hook(ego, pln, p, optimalp);
402 }
403 
404 #ifdef FFTW_RANDOM_ESTIMATOR
405 /* a "random" estimate, used for debugging to generate "random"
406    plans, albeit from a deterministic seed. */
407 
408 unsigned X(random_estimate_seed) = 0;
409 
random_estimate(const planner * ego,const plan * pln,const problem * p)410 static double random_estimate(const planner *ego, const plan *pln,
411 			      const problem *p)
412 {
413      md5 m;
414      X(md5begin)(&m);
415      X(md5unsigned)(&m, X(random_estimate_seed));
416      X(md5int)(&m, ego->nthr);
417      p->adt->hash(p, &m);
418      X(md5putb)(&m, &pln->ops, sizeof(pln->ops));
419      X(md5putb)(&m, &pln->adt, sizeof(pln->adt));
420      X(md5end)(&m);
421      return ego->cost_hook ? ego->cost_hook(p, m.s[0], COST_MAX) : m.s[0];
422 }
423 
424 #endif
425 
X(iestimate_cost)426 double X(iestimate_cost)(const planner *ego, const plan *pln, const problem *p)
427 {
428      double cost =
429 	  + pln->ops.add
430 	  + pln->ops.mul
431 
432 #if HAVE_FMA
433 	  + pln->ops.fma
434 #else
435 	  + 2 * pln->ops.fma
436 #endif
437 
438 	  + pln->ops.other;
439      if (ego->cost_hook)
440 	  cost = ego->cost_hook(p, cost, COST_MAX);
441      return cost;
442 }
443 
evaluate_plan(planner * ego,plan * pln,const problem * p)444 static void evaluate_plan(planner *ego, plan *pln, const problem *p)
445 {
446      if (ESTIMATEP(ego) || !BELIEVE_PCOSTP(ego) || pln->pcost == 0.0) {
447 	  ego->nplan++;
448 
449 	  if (ESTIMATEP(ego)) {
450 	  estimate:
451 	       /* heuristic */
452 #ifdef FFTW_RANDOM_ESTIMATOR
453 	       pln->pcost = random_estimate(ego, pln, p);
454 	       ego->epcost += X(iestimate_cost)(ego, pln, p);
455 #else
456 	       pln->pcost = X(iestimate_cost)(ego, pln, p);
457 	       ego->epcost += pln->pcost;
458 #endif
459 	  } else {
460 	       double t = X(measure_execution_time)(ego, pln, p);
461 
462 	       if (t < 0) {  /* unavailable cycle counter */
463 		    /* Real programmers can write FORTRAN in any language */
464 		    goto estimate;
465 	       }
466 
467 	       pln->pcost = t;
468 	       ego->pcost += t;
469 	       ego->need_timeout_check = 1;
470 	  }
471      }
472 
473      invoke_hook(ego, pln, p, 0);
474 }
475 
476 /* maintain dynamic scoping of flags, nthr: */
invoke_solver(planner * ego,const problem * p,solver * s,const flags_t * nflags)477 static plan *invoke_solver(planner *ego, const problem *p, solver *s,
478 			   const flags_t *nflags)
479 {
480      flags_t flags = ego->flags;
481      int nthr = ego->nthr;
482      plan *pln;
483      ego->flags = *nflags;
484      PLNR_TIMELIMIT_IMPATIENCE(ego) = 0;
485      A(p->adt->problem_kind == s->adt->problem_kind);
486      pln = s->adt->mkplan(s, p, ego);
487      ego->nthr = nthr;
488      ego->flags = flags;
489      return pln;
490 }
491 
492 /* maintain the invariant TIMED_OUT ==> NEED_TIMEOUT_CHECK */
timeout_p(planner * ego,const problem * p)493 static int timeout_p(planner *ego, const problem *p)
494 {
495      /* do not timeout when estimating.  First, the estimator is the
496 	planner of last resort.  Second, calling X(elapsed_since)() is
497 	slower than estimating */
498      if (!ESTIMATEP(ego)) {
499 	  /* do not assume that X(elapsed_since)() is monotonic */
500 	  if (ego->timed_out) {
501 	       A(ego->need_timeout_check);
502 	       return 1;
503 	  }
504 
505 	  if (ego->timelimit >= 0 &&
506 	      X(elapsed_since)(ego, p, ego->start_time) >= ego->timelimit) {
507 	       ego->timed_out = 1;
508 	       ego->need_timeout_check = 1;
509 	       return 1;
510 	  }
511      }
512 
513      A(!ego->timed_out);
514      ego->need_timeout_check = 0;
515      return 0;
516 }
517 
search0(planner * ego,const problem * p,unsigned * slvndx,const flags_t * flagsp)518 static plan *search0(planner *ego, const problem *p, unsigned *slvndx,
519 		     const flags_t *flagsp)
520 {
521      plan *best = 0;
522      int best_not_yet_timed = 1;
523 
524      /* Do not start a search if the planner timed out. This check is
525 	necessary, lest the relaxation mechanism kick in */
526      if (timeout_p(ego, p))
527 	  return 0;
528 
529      FORALL_SOLVERS_OF_KIND(p->adt->problem_kind, ego, s, sp, {
530 	  plan *pln;
531 
532 	  pln = invoke_solver(ego, p, s, flagsp);
533 
534 	  if (ego->need_timeout_check)
535 	       if (timeout_p(ego, p)) {
536 		    X(plan_destroy_internal)(pln);
537 		    X(plan_destroy_internal)(best);
538 		    return 0;
539 	       }
540 
541 	  if (pln) {
542 	       /* read COULD_PRUNE_NOW_P because PLN may be destroyed
543 		  before we use COULD_PRUNE_NOW_P */
544 	       int could_prune_now_p = pln->could_prune_now_p;
545 
546 	       if (best) {
547 		    if (best_not_yet_timed) {
548 			 evaluate_plan(ego, best, p);
549 			 best_not_yet_timed = 0;
550 		    }
551 		    evaluate_plan(ego, pln, p);
552 		    if (pln->pcost < best->pcost) {
553 			 X(plan_destroy_internal)(best);
554 			 best = pln;
555                          *slvndx = (unsigned)/*from ptrdiff_t*/(sp - ego->slvdescs);
556 		    } else {
557 			 X(plan_destroy_internal)(pln);
558 		    }
559 	       } else {
560 		    best = pln;
561                     *slvndx = (unsigned)/*from ptrdiff_t*/(sp - ego->slvdescs);
562 	       }
563 
564 	       if (ALLOW_PRUNINGP(ego) && could_prune_now_p)
565 		    break;
566 	  }
567      });
568 
569      return best;
570 }
571 
search(planner * ego,const problem * p,unsigned * slvndx,flags_t * flagsp)572 static plan *search(planner *ego, const problem *p, unsigned *slvndx,
573 		    flags_t *flagsp)
574 {
575      plan *pln = 0;
576      unsigned i;
577 
578      /* relax impatience in this order: */
579      static const unsigned relax_tab[] = {
580 	  0, /* relax nothing */
581 	  NO_VRECURSE,
582 	  NO_FIXED_RADIX_LARGE_N,
583 	  NO_SLOW,
584 	  NO_UGLY
585      };
586 
587      unsigned l_orig = flagsp->l;
588      unsigned x = flagsp->u;
589 
590      /* guaranteed to be different from X */
591      unsigned last_x = ~x;
592 
593      for (i = 0; i < sizeof(relax_tab) / sizeof(relax_tab[0]); ++i) {
594 	  if (LEQ(l_orig, x & ~relax_tab[i]))
595 	       x = x & ~relax_tab[i];
596 
597 	  if (x != last_x) {
598 	       last_x = x;
599 	       flagsp->l = x;
600 	       pln = search0(ego, p, slvndx, flagsp);
601 	       if (pln) break;
602 	  }
603      }
604 
605      if (!pln) {
606 	  /* search [L_ORIG, U] */
607 	  if (l_orig != last_x) {
608 	       last_x = l_orig;
609 	       flagsp->l = l_orig;
610 	       pln = search0(ego, p, slvndx, flagsp);
611 	  }
612      }
613 
614      return pln;
615 }
616 
617 #define CHECK_FOR_BOGOSITY						\
618      if ((ego->bogosity_hook ?						\
619 	  (ego->wisdom_state = ego->bogosity_hook(ego->wisdom_state, p)) \
620 	  : ego->wisdom_state) == WISDOM_IS_BOGUS)			\
621 	  goto wisdom_is_bogus;
622 
mkplan(planner * ego,const problem * p)623 static plan *mkplan(planner *ego, const problem *p)
624 {
625      plan *pln;
626      md5 m;
627      unsigned slvndx;
628      flags_t flags_of_solution;
629      solution *sol;
630      solver *s;
631 
632      ASSERT_ALIGNED_DOUBLE;
633      A(LEQ(PLNR_L(ego), PLNR_U(ego)));
634 
635      if (ESTIMATEP(ego))
636 	  PLNR_TIMELIMIT_IMPATIENCE(ego) = 0; /* canonical form */
637 
638 
639 #ifdef FFTW_DEBUG
640      check(&ego->htab_blessed);
641      check(&ego->htab_unblessed);
642 #endif
643 
644      pln = 0;
645 
646      CHECK_FOR_BOGOSITY;
647 
648      ego->timed_out = 0;
649 
650      ++ego->nprob;
651      md5hash(&m, p, ego);
652 
653      flags_of_solution = ego->flags;
654 
655      if (ego->wisdom_state != WISDOM_IGNORE_ALL) {
656 	  if ((sol = hlookup(ego, m.s, &flags_of_solution))) {
657 	       /* wisdom is acceptable */
658 	       wisdom_state_t owisdom_state = ego->wisdom_state;
659 
660 	       /* this hook is mainly for MPI, to make sure that
661 		  wisdom is in sync across all processes for MPI problems */
662 	       if (ego->wisdom_ok_hook && !ego->wisdom_ok_hook(p, sol->flags))
663 		    goto do_search; /* ignore not-ok wisdom */
664 
665 	       slvndx = SLVNDX(sol);
666 
667 	       if (slvndx == INFEASIBLE_SLVNDX) {
668 		    if (ego->wisdom_state == WISDOM_IGNORE_INFEASIBLE)
669 			 goto do_search;
670 		    else
671 			 return 0;   /* known to be infeasible */
672 	       }
673 
674 	       flags_of_solution = sol->flags;
675 
676 	       /* inherit blessing either from wisdom
677 		  or from the planner */
678 	       flags_of_solution.hash_info |= BLISS(ego->flags);
679 
680 	       ego->wisdom_state = WISDOM_ONLY;
681 
682 	       s = ego->slvdescs[slvndx].slv;
683 	       if (p->adt->problem_kind != s->adt->problem_kind)
684 		    goto wisdom_is_bogus;
685 
686 	       pln = invoke_solver(ego, p, s, &flags_of_solution);
687 
688 	       CHECK_FOR_BOGOSITY; 	  /* catch error in child solvers */
689 
690 	       sol = 0; /* Paranoia: SOL may be dangling after
691 			   invoke_solver(); make sure we don't accidentally
692 			   reuse it. */
693 
694 	       if (!pln)
695 		    goto wisdom_is_bogus;
696 
697 	       ego->wisdom_state = owisdom_state;
698 
699 	       goto skip_search;
700 	  }
701 	  else if (ego->nowisdom_hook) /* for MPI, make sure lack of wisdom */
702 	       ego->nowisdom_hook(p);  /*   is in sync across all processes */
703      }
704 
705  do_search:
706      /* cannot search in WISDOM_ONLY mode */
707      if (ego->wisdom_state == WISDOM_ONLY)
708 	  goto wisdom_is_bogus;
709 
710      flags_of_solution = ego->flags;
711      pln = search(ego, p, &slvndx, &flags_of_solution);
712      CHECK_FOR_BOGOSITY; 	  /* catch error in child solvers */
713 
714      if (ego->timed_out) {
715 	  A(!pln);
716 	  if (PLNR_TIMELIMIT_IMPATIENCE(ego) != 0) {
717 	       /* record (below) that this plan has failed because of
718 		  timeout */
719 	       flags_of_solution.hash_info |= BLESSING;
720 	  } else {
721 	       /* this is not the top-level problem or timeout is not
722 		  active: record no wisdom. */
723 	       return 0;
724 	  }
725      } else {
726 	  /* canonicalize to infinite timeout */
727 	  flags_of_solution.timelimit_impatience = 0;
728      }
729 
730  skip_search:
731      if (ego->wisdom_state == WISDOM_NORMAL ||
732 	 ego->wisdom_state == WISDOM_ONLY) {
733 	  if (pln) {
734 	       hinsert(ego, m.s, &flags_of_solution, slvndx);
735 	       invoke_hook(ego, pln, p, 1);
736 	  } else {
737 	       hinsert(ego, m.s, &flags_of_solution, INFEASIBLE_SLVNDX);
738 	  }
739      }
740 
741      return pln;
742 
743  wisdom_is_bogus:
744      X(plan_destroy_internal)(pln);
745      ego->wisdom_state = WISDOM_IS_BOGUS;
746      return 0;
747 }
748 
htab_destroy(hashtab * ht)749 static void htab_destroy(hashtab *ht)
750 {
751      X(ifree)(ht->solutions);
752      ht->solutions = 0;
753      ht->nelem = 0U;
754 }
755 
mkhashtab(hashtab * ht)756 static void mkhashtab(hashtab *ht)
757 {
758      ht->nrehash = 0;
759      ht->succ_lookup = ht->lookup = ht->lookup_iter = 0;
760      ht->insert = ht->insert_iter = ht->insert_unknown = 0;
761 
762      ht->solutions = 0;
763      ht->hashsiz = ht->nelem = 0U;
764      hgrow(ht);			/* so that hashsiz > 0 */
765 }
766 
767 /* destroy hash table entries.  If FORGET_EVERYTHING, destroy the whole
768    table.  If FORGET_ACCURSED, then destroy entries that are not blessed. */
forget(planner * ego,amnesia a)769 static void forget(planner *ego, amnesia a)
770 {
771      switch (a) {
772 	 case FORGET_EVERYTHING:
773 	      htab_destroy(&ego->htab_blessed);
774 	      mkhashtab(&ego->htab_blessed);
775 	      /* fall through */
776 	 case FORGET_ACCURSED:
777 	      htab_destroy(&ego->htab_unblessed);
778 	      mkhashtab(&ego->htab_unblessed);
779 	      break;
780 	 default:
781 	      break;
782      }
783 }
784 
785 /* FIXME: what sort of version information should we write? */
786 #define WISDOM_PREAMBLE PACKAGE "-" VERSION " " STRINGIZE(X(wisdom))
787 static const char stimeout[] = "TIMEOUT";
788 
789 /* tantus labor non sit cassus */
exprt(planner * ego,printer * p)790 static void exprt(planner *ego, printer *p)
791 {
792      unsigned h;
793      hashtab *ht = &ego->htab_blessed;
794      md5 m;
795 
796      signature_of_configuration(&m, ego);
797 
798      p->print(p,
799 	      "(" WISDOM_PREAMBLE " #x%M #x%M #x%M #x%M\n",
800 	      m.s[0], m.s[1], m.s[2], m.s[3]);
801 
802      for (h = 0; h < ht->hashsiz; ++h) {
803 	  solution *l = ht->solutions + h;
804 	  if (LIVEP(l)) {
805 	       const char *reg_nam;
806 	       int reg_id;
807 
808 	       if (SLVNDX(l) == INFEASIBLE_SLVNDX) {
809 		    reg_nam = stimeout;
810 		    reg_id = 0;
811 	       } else {
812 		    slvdesc *sp = ego->slvdescs + SLVNDX(l);
813 		    reg_nam = sp->reg_nam;
814 		    reg_id = sp->reg_id;
815 	       }
816 
817 	       /* qui salvandos salvas gratis
818 		  salva me fons pietatis */
819 	       p->print(p, "  (%s %d #x%x #x%x #x%x #x%M #x%M #x%M #x%M)\n",
820 			reg_nam, reg_id,
821 			l->flags.l, l->flags.u, l->flags.timelimit_impatience,
822 			l->s[0], l->s[1], l->s[2], l->s[3]);
823 	  }
824      }
825      p->print(p, ")\n");
826 }
827 
828 /* mors stupebit et natura
829    cum resurget creatura */
imprt(planner * ego,scanner * sc)830 static int imprt(planner *ego, scanner *sc)
831 {
832      char buf[MAXNAM + 1];
833      md5uint sig[4];
834      unsigned l, u, timelimit_impatience;
835      flags_t flags;
836      int reg_id;
837      unsigned slvndx;
838      hashtab *ht = &ego->htab_blessed;
839      hashtab old;
840      md5 m;
841 
842      if (!sc->scan(sc,
843 		   "(" WISDOM_PREAMBLE " #x%M #x%M #x%M #x%M\n",
844 		   sig + 0, sig + 1, sig + 2, sig + 3))
845 	  return 0; /* don't need to restore hashtable */
846 
847      signature_of_configuration(&m, ego);
848      if (m.s[0] != sig[0] || m.s[1] != sig[1] ||
849 	 m.s[2] != sig[2] || m.s[3] != sig[3]) {
850 	  /* invalid configuration */
851 	  return 0;
852      }
853 
854      /* make a backup copy of the hash table (cache the hash) */
855      {
856 	  unsigned h, hsiz = ht->hashsiz;
857 	  old = *ht;
858 	  old.solutions = (solution *)MALLOC(hsiz * sizeof(solution), HASHT);
859 	  for (h = 0; h < hsiz; ++h)
860 	       old.solutions[h] = ht->solutions[h];
861      }
862 
863      while (1) {
864 	  if (sc->scan(sc, ")"))
865 	       break;
866 
867 	  /* qua resurget ex favilla */
868 	  if (!sc->scan(sc, "(%*s %d #x%x #x%x #x%x #x%M #x%M #x%M #x%M)",
869 			MAXNAM, buf, &reg_id, &l, &u, &timelimit_impatience,
870 			sig + 0, sig + 1, sig + 2, sig + 3))
871 	       goto bad;
872 
873 	  if (!strcmp(buf, stimeout) && reg_id == 0) {
874 	       slvndx = INFEASIBLE_SLVNDX;
875 	  } else {
876 	       if (timelimit_impatience != 0)
877 		    goto bad;
878 
879 	       slvndx = slookup(ego, buf, reg_id);
880 	       if (slvndx == INFEASIBLE_SLVNDX)
881 		    goto bad;
882 	  }
883 
884 	  /* inter oves locum praesta */
885 	  flags.l = l;
886 	  flags.u = u;
887 	  flags.timelimit_impatience = timelimit_impatience;
888 	  flags.hash_info = BLESSING;
889 
890 	  CK(flags.l == l);
891 	  CK(flags.u == u);
892 	  CK(flags.timelimit_impatience == timelimit_impatience);
893 
894 	  if (!hlookup(ego, sig, &flags))
895 	       hinsert(ego, sig, &flags, slvndx);
896      }
897 
898      X(ifree0)(old.solutions);
899      return 1;
900 
901  bad:
902      /* ``The wisdom of FFTW must be above suspicion.'' */
903      X(ifree0)(ht->solutions);
904      *ht = old;
905      return 0;
906 }
907 
908 /*
909  * create a planner
910  */
X(mkplanner)911 planner *X(mkplanner)(void)
912 {
913      int i;
914 
915      static const planner_adt padt = {
916 	  register_solver, mkplan, forget, exprt, imprt
917      };
918 
919      planner *p = (planner *) MALLOC(sizeof(planner), PLANNERS);
920 
921      p->adt = &padt;
922      p->nplan = p->nprob = 0;
923      p->pcost = p->epcost = 0.0;
924      p->hook = 0;
925      p->cost_hook = 0;
926      p->wisdom_ok_hook = 0;
927      p->nowisdom_hook = 0;
928      p->bogosity_hook = 0;
929      p->cur_reg_nam = 0;
930      p->wisdom_state = WISDOM_NORMAL;
931 
932      p->slvdescs = 0;
933      p->nslvdesc = p->slvdescsiz = 0;
934 
935      p->flags.l = 0;
936      p->flags.u = 0;
937      p->flags.timelimit_impatience = 0;
938      p->flags.hash_info = 0;
939      p->nthr = 1;
940      p->need_timeout_check = 1;
941      p->timelimit = -1;
942 
943      mkhashtab(&p->htab_blessed);
944      mkhashtab(&p->htab_unblessed);
945 
946      for (i = 0; i < PROBLEM_LAST; ++i)
947 	  p->slvdescs_for_problem_kind[i] = -1;
948 
949      return p;
950 }
951 
X(planner_destroy)952 void X(planner_destroy)(planner *ego)
953 {
954      /* destroy hash table */
955      htab_destroy(&ego->htab_blessed);
956      htab_destroy(&ego->htab_unblessed);
957 
958      /* destroy solvdesc table */
959      FORALL_SOLVERS(ego, s, sp, {
960 	  UNUSED(sp);
961 	  X(solver_destroy)(s);
962      });
963 
964      X(ifree0)(ego->slvdescs);
965      X(ifree)(ego); /* dona eis requiem */
966 }
967 
X(mkplan_d)968 plan *X(mkplan_d)(planner *ego, problem *p)
969 {
970      plan *pln = ego->adt->mkplan(ego, p);
971      X(problem_destroy)(p);
972      return pln;
973 }
974 
975 /* like X(mkplan_d), but sets/resets flags as well */
X(mkplan_f_d)976 plan *X(mkplan_f_d)(planner *ego, problem *p,
977 		    unsigned l_set, unsigned u_set, unsigned u_reset)
978 {
979      flags_t oflags = ego->flags;
980      plan *pln;
981 
982      PLNR_U(ego) &= ~u_reset;
983      PLNR_L(ego) &= ~u_reset;
984      PLNR_L(ego) |= l_set;
985      PLNR_U(ego) |= u_set | l_set;
986      pln = X(mkplan_d)(ego, p);
987      ego->flags = oflags;
988      return pln;
989 }
990 
991 /*
992  * Debugging code:
993  */
994 #ifdef FFTW_DEBUG
check(hashtab * ht)995 static void check(hashtab *ht)
996 {
997      unsigned live = 0;
998      unsigned i;
999 
1000      A(ht->nelem < ht->hashsiz);
1001 
1002      for (i = 0; i < ht->hashsiz; ++i) {
1003 	  solution *l = ht->solutions + i;
1004 	  if (LIVEP(l))
1005 	       ++live;
1006      }
1007 
1008      A(ht->nelem == live);
1009 
1010      for (i = 0; i < ht->hashsiz; ++i) {
1011 	  solution *l1 = ht->solutions + i;
1012 	  int foundit = 0;
1013 	  if (LIVEP(l1)) {
1014 	       unsigned g, h = h1(ht, l1->s), d = h2(ht, l1->s);
1015 
1016 	       g = h;
1017 	       do {
1018 		    solution *l = ht->solutions + g;
1019 		    if (VALIDP(l)) {
1020 			 if (l1 == l)
1021 			      foundit = 1;
1022 			 else if (LIVEP(l) && md5eq(l1->s, l->s)) {
1023 			      A(!subsumes(&l->flags, SLVNDX(l), &l1->flags));
1024 			      A(!subsumes(&l1->flags, SLVNDX(l1), &l->flags));
1025 			 }
1026 		    } else
1027 			 break;
1028 		    g = addmod(g, d, ht->hashsiz);
1029 	       } while (g != h);
1030 
1031 	       A(foundit);
1032 	  }
1033      }
1034 }
1035 #endif
1036