1 /*************************************************************************/
2 /* Copyright (c) 2004 */
3 /* Daniel Sleator, David Temperley, and John Lafferty */
4 /* Copyright (c) 2010, 2014 Linas Vepstas */
5 /* All rights reserved */
6 /* */
7 /* Use of the link grammar parsing system is subject to the terms of the */
8 /* license set forth in the LICENSE file included with this software. */
9 /* This license allows free redistribution and use in source and binary */
10 /* forms, with or without modification, subject to certain conditions. */
11 /* */
12 /*************************************************************************/
13
14 #include <limits.h> // INT_MAX
15
16 #include "connectors.h"
17 #include "count.h"
18 #include "disjunct-utils.h" // Disjunct
19 #include "extract-links.h"
20 #include "fast-match.h"
21 #include "utilities.h" // Windows rand_r()
22 #include "linkage/linkage.h"
23 #include "tokenize/word-structures.h" // Word_Struct
24
25 //#define RECOUNT
26
27 //#define DEBUG_X_TABLE
28 #ifdef DEBUG_X_TABLE
29 #undef DEBUG_X_TABLE
30 #define DEBUG_X_TABLE(...) __VA_ARGS__
31 #else
32 #define DEBUG_X_TABLE(...)
33 #endif
34
35 typedef struct Parse_choice_struct Parse_choice;
36
37 /* The parse_choice is used to extract links for a given parse */
38 typedef struct Parse_set_struct Parse_set;
39 struct Parse_choice_struct
40 {
41 Parse_choice * next;
42 Parse_set * set[2];
43 Link link[2]; /* the lc fields of these is NULL if there is no link used */
44 Disjunct *md; /* the chosen disjunct for the middle word */
45 };
46
47 struct Parse_set_struct
48 {
49 short lw, rw; /* left and right word index */
50 unsigned int null_count; /* number of island words */
51 int l_id, r_id; /* pending, unconnected connectors */
52
53 s64 count; /* The number of ways to parse. */
54 #ifdef RECOUNT
55 s64 recount; /* Exactly the same as above, but counted at a later stage. */
56 s64 cut_count; /* Count only low-cost parses, i.e. below the cost cutoff */
57 //double cost_cutoff;
58 #undef RECOUNT
59 #define RECOUNT(X) X
60 #else
61 #define RECOUNT(X) /* Make it disappear... */
62 #endif
63 Parse_choice * first;
64 Parse_choice * tail;
65 };
66
67 typedef struct Pset_bucket_struct Pset_bucket;
68 struct Pset_bucket_struct
69 {
70 Parse_set set;
71 Pset_bucket *next;
72 };
73
74 struct extractor_s
75 {
76 unsigned int x_table_size;
77 unsigned int log2_x_table_size;
78 Pset_bucket ** x_table; /* Hash table */
79 Parse_set * parse_set;
80 Word *words;
81 bool islands_ok;
82 bool sort_match_list;
83
84 /* thread-safe random number state */
85 unsigned int rand_state;
86 };
87
88 /**
89 * The first thing we do is we build a data structure to represent the
90 * result of the entire parse search. There will be a set of nodes
91 * built for each call to the count() function that returned a non-zero
92 * value, AND which is part of a valid linkage. Each of these nodes
93 * represents a valid continuation, and contains pointers to two other
94 * sets (one for the left continuation and one for the right
95 * continuation).
96 */
97
free_set(Parse_set * s)98 static void free_set(Parse_set *s)
99 {
100 Parse_choice *p, *xp;
101 if (s == NULL) return;
102 for (p=s->first; p != NULL; p = xp)
103 {
104 xp = p->next;
105 xfree((void *)p, sizeof(*p));
106 }
107 }
108
109 static Parse_choice *
make_choice(Parse_set * lset,Connector * llc,Connector * lrc,Parse_set * rset,Connector * rlc,Connector * rrc,Disjunct * md)110 make_choice(Parse_set *lset, Connector * llc, Connector * lrc,
111 Parse_set *rset, Connector * rlc, Connector * rrc,
112 Disjunct *md)
113 {
114 Parse_choice *pc;
115 pc = (Parse_choice *) xalloc(sizeof(*pc));
116 pc->next = NULL;
117 pc->set[0] = lset;
118 pc->link[0].link_name = NULL;
119 pc->link[0].lw = lset->lw;
120 pc->link[0].rw = lset->rw;
121 pc->link[0].lc = llc;
122 pc->link[0].rc = lrc;
123 pc->set[1] = rset;
124 pc->link[1].link_name = NULL;
125 pc->link[1].lw = rset->lw;
126 pc->link[1].rw = rset->rw;
127 pc->link[1].lc = rlc;
128 pc->link[1].rc = rrc;
129 pc->md = md;
130 return pc;
131 }
132
133 /**
134 * Put this parse_choice into a given set. The tail pointer is always
135 * left pointing to the end of the list.
136 */
put_choice_in_set(Parse_set * s,Parse_choice * pc)137 static void put_choice_in_set(Parse_set *s, Parse_choice *pc)
138 {
139 if (s->first == NULL)
140 {
141 s->first = pc;
142 }
143 else
144 {
145 s->tail->next = pc;
146 }
147 s->tail = pc;
148 pc->next = NULL;
149 }
150
record_choice(Parse_set * lset,Connector * llc,Connector * lrc,Parse_set * rset,Connector * rlc,Connector * rrc,Disjunct * md,Parse_set * s)151 static void record_choice(
152 Parse_set *lset, Connector * llc, Connector * lrc,
153 Parse_set *rset, Connector * rlc, Connector * rrc,
154 Disjunct *md, Parse_set *s)
155 {
156 put_choice_in_set(s, make_choice(lset, llc, lrc,
157 rset, rlc, rrc,
158 md));
159 }
160
161 /**
162 * Allocate the parse info struct
163 *
164 * A piecewise exponential function determines the size of the hash
165 * table. Probably should make use of the actual number of disjuncts,
166 * rather than just the number of words.
167 */
extractor_new(int nwords,unsigned int ranstat)168 extractor_t * extractor_new(int nwords, unsigned int ranstat)
169 {
170 int log2_table_size;
171 extractor_t * pex;
172
173 pex = (extractor_t *) xalloc(sizeof(extractor_t));
174 memset(pex, 0, sizeof(extractor_t));
175 pex->rand_state = ranstat;
176
177 /* Alloc the x_table */
178 if (nwords > 96) {
179 log2_table_size = 15 + nwords / 48;
180 } else if (nwords >= 10) {
181 log2_table_size = 14 + nwords / 24;
182 } else if (nwords >= 4) {
183 log2_table_size = 1 + nwords + nwords / 2;
184 } else {
185 log2_table_size = 5;
186 }
187 /* if (log2_table_size > 21) log2_table_size = 21; */
188 pex->log2_x_table_size = log2_table_size;
189 pex->x_table_size = (1 << log2_table_size);
190
191 DEBUG_X_TABLE(
192 printf("Allocating x_table of size %u (nwords %d)\n",
193 pex->x_table_size, nwords);
194 )
195 pex->x_table = (Pset_bucket**) xalloc(pex->x_table_size * sizeof(Pset_bucket*));
196 memset(pex->x_table, 0, pex->x_table_size * sizeof(Pset_bucket*));
197
198 return pex;
199 }
200
201 /**
202 * This is the function that should be used to free the set structure. Since
203 * it's a dag, a recursive free function won't work. Every time we create
204 * a set element, we put it in the hash table, so this is OK.
205 */
free_extractor(extractor_t * pex)206 void free_extractor(extractor_t * pex)
207 {
208 unsigned int i;
209 Pset_bucket *t, *x;
210 if (!pex) return;
211
212 DEBUG_X_TABLE(int N = 0;)
213 for (i=0; i<pex->x_table_size; i++)
214 {
215 DEBUG_X_TABLE(int c = 0;)
216 for (t = pex->x_table[i]; t!= NULL; t=x)
217 {
218 DEBUG_X_TABLE(c++;)
219 x = t->next;
220 free_set(&t->set);
221 xfree((void *) t, sizeof(Pset_bucket));
222 }
223 DEBUG_X_TABLE(
224 if (c > 0)
225 ;//printf("I %d: chain %d\n", i, c);
226 else
227 N++;
228 )
229 }
230 DEBUG_X_TABLE(
231 printf("Used x_table %u/%u %.2f%%\n",
232 pex->x_table_size-N, pex->x_table_size,
233 100.0f*(pex->x_table_size-N)/pex->x_table_size);
234 )
235 pex->parse_set = NULL;
236
237 //printf("Freeing x_table of size %d\n", pex->x_table_size);
238 xfree((void *) pex->x_table, pex->x_table_size * sizeof(Pset_bucket*));
239 pex->x_table_size = 0;
240 pex->x_table = NULL;
241
242 xfree((void *) pex, sizeof(extractor_t));
243 }
244
245 /**
246 * Returns the pointer to this info, NULL if not there.
247 * Note that there is no need to use (lw, rw) as keys because tracon_id
248 * values are not shared between words.
249 */
x_table_pointer(int lw,int rw,Connector * le,Connector * re,unsigned int null_count,extractor_t * pex)250 static Pset_bucket * x_table_pointer(int lw, int rw,
251 Connector *le, Connector *re,
252 unsigned int null_count, extractor_t * pex)
253 {
254 Pset_bucket *t;
255 int l_id = (NULL != le) ? le->tracon_id : lw;
256 int r_id = (NULL != re) ? re->tracon_id : rw;
257 t = pex->x_table[pair_hash(pex->x_table_size, lw, rw, l_id, r_id, null_count)];
258
259 for (; t != NULL; t = t->next) {
260 if ((t->set.l_id == l_id) && (t->set.r_id == r_id) &&
261 (t->set.null_count == null_count)) return t;
262 }
263 return NULL;
264 }
265
266 /**
267 * Stores the value in the x_table. Assumes it's not already there.
268 */
x_table_store(int lw,int rw,Connector * le,Connector * re,unsigned int null_count,extractor_t * pex)269 static Pset_bucket * x_table_store(int lw, int rw,
270 Connector *le, Connector *re,
271 unsigned int null_count, extractor_t * pex)
272 {
273 Pset_bucket *t, *n;
274 unsigned int h;
275
276 n = (Pset_bucket *) xalloc(sizeof(Pset_bucket));
277 n->set.lw = lw;
278 n->set.rw = rw;
279 n->set.null_count = null_count;
280 n->set.l_id = (NULL != le) ? le->tracon_id : lw;
281 n->set.r_id = (NULL != re) ? re->tracon_id : rw;
282 n->set.count = 0;
283 n->set.first = NULL;
284 n->set.tail = NULL;
285
286 h = pair_hash(pex->x_table_size, lw, rw, n->set.l_id, n->set.r_id, null_count);
287 t = pex->x_table[h];
288 n->next = t;
289 pex->x_table[h] = n;
290 return n;
291 }
292
293 /** Create a bogus parse set that only holds lw, rw. */
dummy_set(int lw,int rw,unsigned int null_count,extractor_t * pex)294 static Parse_set* dummy_set(int lw, int rw,
295 unsigned int null_count, extractor_t * pex)
296 {
297 Pset_bucket *dummy;
298 dummy = x_table_pointer(lw, rw, NULL, NULL, null_count, pex);
299 if (dummy) return &dummy->set;
300
301 dummy = x_table_store(lw, rw, NULL, NULL, null_count, pex);
302 dummy->set.count = 1;
303 return &dummy->set;
304 }
305
cost_compare(const void * a,const void * b)306 static int cost_compare(const void *a, const void *b)
307 {
308 const Disjunct * const * da = a;
309 const Disjunct * const * db = b;
310 if ((*da)->cost < (*db)->cost) return -1;
311 if ((*da)->cost > (*db)->cost) return 1;
312 return 0;
313 }
314
315 /**
316 * Sort the match-list into ascending disjunct cost. The goal here
317 * is to issue the lowest-cost disjuncts first, so that the parse
318 * set ends up quasi-sorted. This is not enough to get us a totally
319 * sorted parse set, but it does guarantee that at least the very
320 * first parse really will be the lowest cost.
321 */
sort_match_list(fast_matcher_t * mchxt,size_t mlb)322 static void sort_match_list(fast_matcher_t *mchxt, size_t mlb)
323 {
324 size_t i = mlb;
325
326 while (get_match_list_element(mchxt, i) != NULL)
327 i++;
328
329 if (i - mlb < 2) return;
330
331 qsort(get_match_list(mchxt, mlb), i - mlb, sizeof(Disjunct *), cost_compare);
332 }
333
334 /**
335 * returns NULL if there are no ways to parse, or returns a pointer
336 * to a set structure representing all the ways to parse.
337 *
338 * This code is similar to do_count() in count.c -- for a good reason:
339 * the do_count() function did a full parse, but didn't actually
340 * allocate a memory structures to hold the parse. This also does
341 * a full parse, but it also allocates and fills out the various
342 * parse structures.
343 */
344 static
mk_parse_set(fast_matcher_t * mchxt,count_context_t * ctxt,int lw,int rw,Connector * le,Connector * re,unsigned int null_count,extractor_t * pex)345 Parse_set * mk_parse_set(fast_matcher_t *mchxt,
346 count_context_t * ctxt,
347 int lw, int rw,
348 Connector *le, Connector *re, unsigned int null_count,
349 extractor_t * pex)
350 {
351 int start_word, end_word, w;
352 Pset_bucket *xt;
353 Count_bin * count;
354
355 assert(null_count < 0x7fff, "mk_parse_set() called with null_count < 0.");
356
357 count = table_lookup(ctxt, lw, rw, le, re, null_count);
358
359 /* If there's no counter, then there's no way to parse. */
360 if (NULL == count) return NULL;
361 if (hist_total(count) == 0) return NULL;
362
363 xt = x_table_pointer(lw, rw, le, re, null_count, pex);
364
365 /* Perhaps we've already computed it; if so, return it. */
366 if (xt != NULL) return &xt->set;
367
368 /* Start it out with the empty set of parse choices. */
369 /* This entry must be updated before we return. */
370 xt = x_table_store(lw, rw, le, re, null_count, pex);
371
372 /* The count we previously computed; it's non-zero. */
373 xt->set.count = hist_total(count);
374
375 //#define NUM_PARSES 4
376 // xt->set.cost_cutoff = hist_cost_cutoff(count, NUM_PARSES);
377 // xt->set.cut_count = hist_cut_total(count, NUM_PARSES);
378
379 RECOUNT({xt->set.recount = 1;})
380
381 /* If the two words are next to each other, the count == 1 */
382 if (lw + 1 == rw) return &xt->set;
383
384 /* The left and right connectors are null, but the two words are
385 * NOT next to each-other. */
386 if ((le == NULL) && (re == NULL))
387 {
388 Parse_set* pset;
389 Parse_set* dummy;
390 Disjunct* dis;
391
392 if (!pex->islands_ok && (lw != -1) && (pex->words[lw].d != NULL))
393 return &xt->set;
394 if (null_count == 0) return &xt->set;
395
396 RECOUNT({xt->set.recount = 0;})
397
398 w = lw + 1;
399 for (int opt = 0; opt <= (int)pex->words[w].optional; opt++)
400 {
401 null_count += opt;
402 for (dis = pex->words[w].d; dis != NULL; dis = dis->next)
403 {
404 if (dis->left == NULL)
405 {
406 pset = mk_parse_set(mchxt, ctxt,
407 w, rw, dis->right, NULL,
408 null_count-1, pex);
409 if (pset == NULL) continue;
410 dummy = dummy_set(lw, w, null_count-1, pex);
411 record_choice(dummy, NULL, NULL,
412 pset, dis->right, NULL,
413 dis, &xt->set);
414 RECOUNT({xt->set.recount += pset->recount;})
415 }
416 }
417 pset = mk_parse_set(mchxt, ctxt,
418 w, rw, NULL, NULL,
419 null_count-1, pex);
420 if (pset != NULL)
421 {
422 dummy = dummy_set(lw, w, null_count-1, pex);
423 record_choice(dummy, NULL, NULL,
424 pset, NULL, NULL,
425 NULL, &xt->set);
426 RECOUNT({xt->set.recount += pset->recount;})
427 }
428 }
429 return &xt->set;
430 }
431
432 if (le == NULL)
433 {
434 start_word = MAX(lw+1, re->farthest_word);
435 }
436 else
437 {
438 start_word = le->nearest_word;
439 }
440
441 if (re == NULL)
442 {
443 end_word = MIN(rw, le->farthest_word+1);
444 }
445 else
446 {
447 if ((le != NULL) && (re->nearest_word > le->farthest_word))
448 end_word = le->farthest_word + 1;
449 else
450 end_word = re->nearest_word + 1;
451 }
452
453 /* This condition can never be true here. It is included so GCC
454 * will be able to optimize the loop over "null_count". Without
455 * this check, GCC thinks this loop may be an infinite loop and
456 * it may omit some optimizations. */
457 if (UINT_MAX == null_count) return NULL;
458
459 RECOUNT({xt->set.recount = 0;})
460 for (w = start_word; w < end_word; w++)
461 {
462 size_t mlb = form_match_list(mchxt, w, le, lw, re, rw);
463 if (pex->sort_match_list) sort_match_list(mchxt, mlb);
464
465 for (size_t mle = mlb; get_match_list_element(mchxt, mle) != NULL; mle++)
466 {
467 Disjunct *d = get_match_list_element(mchxt, mle);
468 bool Lmatch = d->match_left;
469 bool Rmatch = d->match_right;
470
471 for (unsigned int lnull_count = 0; lnull_count <= null_count; lnull_count++)
472 {
473 int i, j;
474 Parse_set *ls[4], *rs[4];
475 bool ls_exists = false;
476
477 /* Here, lnull_count and rnull_count are the null_counts
478 * we're assigning to those parts respectively. */
479 unsigned int rnull_count = null_count - lnull_count;
480
481 /* Now, we determine if (based on table only) we can see that
482 the current range is not parsable. */
483
484 for (i=0; i<4; i++) { ls[i] = rs[i] = NULL; }
485 if (Lmatch)
486 {
487 ls[0] = mk_parse_set(mchxt, ctxt,
488 lw, w, le->next, d->left->next,
489 lnull_count, pex);
490
491 if (le->multi)
492 ls[1] = mk_parse_set(mchxt, ctxt,
493 lw, w, le, d->left->next,
494 lnull_count, pex);
495
496 if (d->left->multi)
497 ls[2] = mk_parse_set(mchxt, ctxt,
498 lw, w, le->next, d->left,
499 lnull_count, pex);
500
501 if (le->multi && d->left->multi)
502 ls[3] = mk_parse_set(mchxt, ctxt,
503 lw, w, le, d->left,
504 lnull_count, pex);
505
506 ls_exists =
507 ls[0] != NULL || ls[1] != NULL || ls[2] != NULL || ls[3] != NULL;
508 }
509
510
511 if (Rmatch && (ls_exists || le == NULL))
512 {
513 rs[0] = mk_parse_set(mchxt, ctxt,
514 w, rw, d->right->next, re->next,
515 rnull_count, pex);
516
517 if (d->right->multi)
518 rs[1] = mk_parse_set(mchxt, ctxt,
519 w, rw, d->right, re->next,
520 rnull_count, pex);
521
522 if (re->multi)
523 rs[2] = mk_parse_set(mchxt, ctxt,
524 w, rw, d->right->next, re,
525 rnull_count, pex);
526
527 if (d->right->multi && re->multi)
528 rs[3] = mk_parse_set(mchxt, ctxt,
529 w, rw, d->right, re,
530 rnull_count, pex);
531 }
532
533 for (i=0; i<4; i++)
534 {
535 /* This ordering is probably not consistent with that
536 * needed to use list_links. (??) */
537 if (ls[i] == NULL) continue;
538 for (j=0; j<4; j++)
539 {
540 if (rs[j] == NULL) continue;
541 record_choice(ls[i], le, d->left,
542 rs[j], d->right, re,
543 d, &xt->set);
544 RECOUNT({xt->set.recount += ls[i]->recount * rs[j]->recount;})
545 }
546 }
547
548 if (ls_exists)
549 {
550 /* Evaluate using the left match, but not the right */
551 Parse_set* rset = mk_parse_set(mchxt, ctxt,
552 w, rw, d->right, re,
553 rnull_count, pex);
554 if (rset != NULL)
555 {
556 for (i=0; i<4; i++)
557 {
558 if (ls[i] == NULL) continue;
559 /* this ordering is probably not consistent with
560 * that needed to use list_links */
561 record_choice(ls[i], le, d->left,
562 rset, NULL /* d->right */,
563 re, /* the NULL indicates no link*/
564 d, &xt->set);
565 RECOUNT({xt->set.recount += ls[i]->recount * rset->recount;})
566 }
567 }
568 }
569 else if ((le == NULL) && (rs[0] != NULL ||
570 rs[1] != NULL || rs[2] != NULL || rs[3] != NULL))
571 {
572 /* Evaluate using the right match, but not the left */
573 Parse_set* lset = mk_parse_set(mchxt, ctxt,
574 lw, w, le, d->left,
575 lnull_count, pex);
576
577 if (lset != NULL)
578 {
579 for (j=0; j<4; j++)
580 {
581 if (rs[j] == NULL) continue;
582 /* this ordering is probably not consistent with
583 * that needed to use list_links */
584 record_choice(lset, NULL /* le */,
585 d->left, /* NULL indicates no link */
586 rs[j], d->right, re,
587 d, &xt->set);
588 RECOUNT({xt->set.recount += lset->recount * rs[j]->recount;})
589 }
590 }
591 }
592 }
593 }
594 pop_match_list(mchxt, mlb);
595 }
596 return &xt->set;
597 }
598
599 /**
600 * Return TRUE if and only if an overflow in the number of parses
601 * occurred. Use a 64-bit int for counting.
602 */
set_node_overflowed(Parse_set * set)603 static bool set_node_overflowed(Parse_set *set)
604 {
605 Parse_choice *pc;
606 s64 n = 0;
607 if (set == NULL || set->first == NULL) return false;
608
609 for (pc = set->first; pc != NULL; pc = pc->next)
610 {
611 n += pc->set[0]->count * pc->set[1]->count;
612 if (PARSE_NUM_OVERFLOW < n) return true;
613 }
614 return false;
615 }
616
set_overflowed(extractor_t * pex)617 static bool set_overflowed(extractor_t * pex)
618 {
619 unsigned int i;
620
621 assert(pex->x_table != NULL, "called set_overflowed with x_table==NULL");
622 for (i=0; i<pex->x_table_size; i++)
623 {
624 Pset_bucket *t;
625 for (t = pex->x_table[i]; t != NULL; t = t->next)
626 {
627 if (set_node_overflowed(&t->set)) return true;
628 }
629 }
630 return false;
631 }
632
633 /**
634 * This is the top level call that computes the whole parse_set. It
635 * points whole_set at the result. It creates the necessary hash
636 * table (x_table) which will be freed at the same time the
637 * whole_set is freed.
638 *
639 * This assumes that do_parse() has been run, and that the count_context
640 * is filled with the values thus computed. This function is structured
641 * much like do_parse(), which wraps the main workhorse do_count().
642 *
643 * If the number of linkages gets huge, then the counts can overflow.
644 * We check if this has happened when verifying the parse set.
645 * This routine returns TRUE iff an overflow occurred.
646 */
647
build_parse_set(extractor_t * pex,Sentence sent,fast_matcher_t * mchxt,count_context_t * ctxt,unsigned int null_count,Parse_Options opts)648 bool build_parse_set(extractor_t* pex, Sentence sent,
649 fast_matcher_t *mchxt,
650 count_context_t *ctxt,
651 unsigned int null_count, Parse_Options opts)
652 {
653 pex->words = sent->word;
654 pex->islands_ok = opts->islands_ok;
655 pex->sort_match_list = test_enabled("sort-match-list");
656
657 pex->parse_set =
658 mk_parse_set(mchxt, ctxt,
659 -1, sent->length, NULL, NULL, null_count+1, pex);
660
661
662 return set_overflowed(pex);
663 }
664
665 // Cannot be static, also called by SAT-solver.
check_link_size(Linkage lkg)666 void check_link_size(Linkage lkg)
667 {
668 if (lkg->lasz <= lkg->num_links)
669 {
670 lkg->lasz = 2 * lkg->lasz + 10;
671 lkg->link_array = realloc(lkg->link_array, lkg->lasz * sizeof(Link));
672 }
673 }
674
675 /**
676 * Assemble the link array and the chosen_disjuncts of a linkage.
677 */
issue_link(Linkage lkg,bool lr,Disjunct * md,Link * link)678 static void issue_link(Linkage lkg, bool lr, Disjunct *md, Link *link)
679 {
680 if (link->rc != NULL)
681 {
682 check_link_size(lkg);
683 lkg->link_array[lkg->num_links] = *link;
684 lkg->num_links++;
685 }
686
687 lkg->chosen_disjuncts[lr ? link->lw : link->rw] = md;
688 }
689
issue_links_for_choice(Linkage lkg,Parse_choice * pc)690 static void issue_links_for_choice(Linkage lkg, Parse_choice *pc)
691 {
692 if (pc->link[0].lc != NULL) { /* there is a link to generate */
693 issue_link(lkg, /*lr*/false, pc->md, &pc->link[0]);
694 }
695 if (pc->link[1].lc != NULL) {
696 issue_link(lkg, /*lr*/true, pc->md, &pc->link[1]);
697 }
698 }
699
list_links(Linkage lkg,const Parse_set * set,int index)700 static void list_links(Linkage lkg, const Parse_set * set, int index)
701 {
702 Parse_choice *pc;
703 s64 n;
704
705 if (set == NULL || set->first == NULL) return;
706 for (pc = set->first; pc != NULL; pc = pc->next) {
707 n = pc->set[0]->count * pc->set[1]->count;
708 if (index < n) break;
709 index -= n;
710 }
711 assert(pc != NULL, "walked off the end in list_links");
712 issue_links_for_choice(lkg, pc);
713 list_links(lkg, pc->set[0], index % pc->set[0]->count);
714 list_links(lkg, pc->set[1], index / pc->set[0]->count);
715 }
716
list_random_links(Linkage lkg,unsigned int * rand_state,const Parse_set * set)717 static void list_random_links(Linkage lkg, unsigned int *rand_state,
718 const Parse_set * set)
719 {
720 Parse_choice *pc;
721 int num_pc, new_index;
722
723 if (set == NULL || set->first == NULL) return;
724
725 /* Most of the times there is only one list element. */
726 if (set->first->next == NULL)
727 {
728 pc = set->first;
729 }
730 else
731 {
732 num_pc = 0;
733 for (pc = set->first; pc != NULL; pc = pc->next) {
734 num_pc++;
735 }
736
737 new_index = rand_r(rand_state) % num_pc;
738
739 num_pc = 0;
740 for (pc = set->first; pc != NULL; pc = pc->next) {
741 if (new_index == num_pc) break;
742 num_pc++;
743 }
744 }
745
746 issue_links_for_choice(lkg, pc);
747 list_random_links(lkg, rand_state, pc->set[0]);
748 list_random_links(lkg, rand_state, pc->set[1]);
749 }
750
751 /**
752 * Generate the list of all links of the index'th parsing of the
753 * sentence. For this to work, you must have already called parse, and
754 * already built the whole_set.
755 */
extract_links(extractor_t * pex,Linkage lkg)756 void extract_links(extractor_t * pex, Linkage lkg)
757 {
758 int index = lkg->lifo.index;
759 if (index < 0)
760 {
761 bool repeatable = false;
762 if (0 == pex->rand_state) repeatable = true;
763 if (repeatable) pex->rand_state = index;
764 list_random_links(lkg, &pex->rand_state, pex->parse_set);
765 if (repeatable)
766 pex->rand_state = 0;
767 else
768 lkg->sent->rand_state = pex->rand_state;
769 }
770 else {
771 list_links(lkg, pex->parse_set, index);
772 }
773 }
774