1 /*************************************************************************/
2 /* Copyright (c) 2004 */
3 /* Daniel Sleator, David Temperley, and John Lafferty */
4 /* Copyright (c) 2014 Linas Vepstas */
5 /* All rights reserved */
6 /* */
7 /* Use of the link grammar parsing system is subject to the terms of the */
8 /* license set forth in the LICENSE file included with this software. */
9 /* This license allows free redistribution and use in source and binary */
10 /* forms, with or without modification, subject to certain conditions. */
11 /* */
12 /*************************************************************************/
13
14 #include <stdint.h>
15 #include <string.h>
16
17 #include "api-structures.h"
18 #include "dict-common/dict-defines.h" // For RIGHT_WALL_WORD
19 #include "dict-common/dict-common.h" // For Dictionary_s
20 #include "dict-common/dict-defines.h" // For MAX_WORD
21 #include "error.h"
22 #include "linkage/linkage.h"
23 #include "post-process/post-process.h"
24 #include "post-process/pp-structures.h"
25 #include "string-set.h"
26 #include "utilities.h"
27
28 #define D_CONST 8 /* debug level for this file */
29
30 #define OPEN_BRACKET '['
31 #define CLOSE_BRACKET ']'
32
33 typedef enum {OPEN_TOK, CLOSE_TOK, WORD_TOK} CType;
34 typedef enum {NONE, STYPE, PTYPE, QTYPE, QDTYPE} WType;
35
36 typedef struct
37 {
38 const char * type;
39 const char * start_link;
40 size_t left; /* leftmost word */
41 size_t right; /* rightmost word */
42 int canon;
43 bool valid;
44 char domain_type;
45 } constituent_t;
46
47 /*
48 * Context used to store assorted intermediate data
49 * when the constituent string is being generated.
50 */
51 typedef struct
52 {
53 String_set * phrase_ss;
54 WType * wordtype;
55 constituent_t * constituent;
56 int conlen;
57 } con_context_t;
58
59
60 typedef struct CNode_s CNode;
61
62 /* Invariant: Leaf if child==NULL */
63 struct CNode_s
64 {
65 char * label;
66 CNode * child;
67 CNode * next;
68 int start, end;
69 };
70
71 /* ================================================================ */
72
73 /**
74 * @param s A Label
75 * @param t A Label
76 * @return True if the uppercase parts of s and t are equal, else false.
77 */
uppercompare(const char * s,const char * t)78 static bool uppercompare(const char * s, const char * t)
79 {
80 while (isupper(*s) || isupper(*t))
81 {
82 if (*s++ != *t++) return false;
83 }
84 return true;
85 }
86
87 /**
88 * If a constituent c has a comma at either end, we exclude the
89 * comma.
90 */
adjust_for_left_comma(con_context_t * ctxt,Linkage linkage,int c)91 static void adjust_for_left_comma(con_context_t * ctxt, Linkage linkage, int c)
92 {
93 int w;
94 w = ctxt->constituent[c].left;
95 if (strcmp(linkage->word[w], ",") == 0)
96 w++;
97 ctxt->constituent[c].left = w;
98 }
99
adjust_for_right_comma(con_context_t * ctxt,Linkage linkage,int c)100 static void adjust_for_right_comma(con_context_t *ctxt, Linkage linkage, int c)
101 {
102 int w;
103 w = ctxt->constituent[c].right;
104 if ((strcmp(linkage->word[w], ",") == 0) ||
105 (strcmp(linkage->word[w], RIGHT_WALL_WORD) == 0))
106 {
107 w--;
108 }
109 ctxt->constituent[c].right = w;
110 }
111
print_constituent(con_context_t * ctxt,Linkage linkage,int c)112 static void print_constituent(con_context_t *ctxt, Linkage linkage, int c)
113 {
114 size_t w;
115
116 err_msg(lg_Debug, " c %2d %4s [%c] (%2zu-%2zu): ",
117 c, ctxt->constituent[c].type, ctxt->constituent[c].domain_type,
118 ctxt->constituent[c].left, ctxt->constituent[c].right);
119 for (w = ctxt->constituent[c].left; w <= ctxt->constituent[c].right; w++) {
120 err_msg(lg_Debug, "%s ", linkage->word[w]); /**PV**/
121 }
122 err_msg(lg_Debug, "\n");
123 }
124
125 /******************************************************
126 * These functions do the bulk of the actual
127 * constituent-generating; they're called once.
128 *********************************************************/
129
130 typedef enum
131 {
132 CASE_S=1,
133 CASE_UNUSED=2, /* XXX not used anywhere... */
134 CASE_REL_CLAUSE=3,
135 CASE_APPOS=4,
136 CASE_OPENER=5,
137 CASE_PPOPEN=6,
138 CASE_SVINV=7,
139 CASE_PART_MOD=8,
140 CASE_PART_OPEN=9,
141
142 } case_type;
143
144 /**
145 * This function looks for constituents of type ctype1. Say it finds
146 * one, call it c1. It searches for the next larger constituent of
147 * type ctype2, call it c2. It then generates a new constituent of
148 * ctype3, containing all the words in c2 but not c1.
149 */
gen_comp(con_context_t * ctxt,Linkage linkage,int numcon_total,int numcon_subl,const char * ctype1,const char * ctype2,const char * ctype3,case_type x)150 static int gen_comp(con_context_t *ctxt, Linkage linkage,
151 int numcon_total, int numcon_subl,
152 const char * ctype1, const char * ctype2,
153 const char * ctype3, case_type x)
154 {
155 size_t w, w2, w3;
156 int c, c1, c2;
157 bool done;
158 c = numcon_total + numcon_subl;
159
160 for (c1=numcon_total; c1<numcon_total + numcon_subl; c1++)
161 {
162 /* If ctype1 is NP, it has to be an appositive to continue */
163 if ((x==CASE_APPOS) && (post_process_match("MX#*", ctxt->constituent[c1].start_link)==0))
164 continue;
165
166 #ifdef REVIVE_DEAD_CODE
167 /* If ctype1 is X, and domain_type is t, it's an infinitive - skip it */
168 if ((x==CASE_UNUSED) && (ctxt->constituent[c1].domain_type=='t'))
169 continue;
170 #endif /* REVIVE_DEAD_CODE */
171
172 /* If it's domain-type z, it's a subject-relative clause;
173 the VP doesn't need an NP */
174 if (ctxt->constituent[c1].domain_type=='z')
175 continue;
176
177 /* If ctype1 is X or VP, and it's not started by an S, don't generate an NP
178 (Neither of the two previous checks are necessary now, right?) */
179 #ifdef REVIVE_DEAD_CODE
180 /* use this ... if ((x==CASE_S || x==CASE_UNUSED) && */
181 #endif /* REVIVE_DEAD_CODE */
182 if ((x==CASE_S) &&
183 (((post_process_match("S", ctxt->constituent[c1].start_link) == 0) &&
184 (post_process_match("SX", ctxt->constituent[c1].start_link) == 0) &&
185 (post_process_match("SF", ctxt->constituent[c1].start_link) == 0)) ||
186 (post_process_match("S##w", ctxt->constituent[c1].start_link) != 0)))
187 continue;
188
189 /* If it's an SBAR (relative clause case), it has to be a relative clause */
190 if ((x==CASE_REL_CLAUSE) &&
191 ((post_process_match("Rn", ctxt->constituent[c1].start_link) == 0) &&
192 (post_process_match("R*", ctxt->constituent[c1].start_link) == 0) &&
193 (post_process_match("MX#r", ctxt->constituent[c1].start_link) == 0) &&
194 (post_process_match("Mr", ctxt->constituent[c1].start_link) == 0) &&
195 (post_process_match("MX#d", ctxt->constituent[c1].start_link) == 0)))
196 continue;
197
198 /* If ctype1 is SBAR (clause opener case), it has to be an f domain */
199 if ((x==CASE_OPENER) && (ctxt->constituent[c1].domain_type!='f'))
200 continue;
201
202 /* If ctype1 is SBAR (pp opener case), it has to be a g domain */
203 if ((x==CASE_PPOPEN) && (ctxt->constituent[c1].domain_type!='g'))
204 continue;
205
206 /* If ctype1 is NP (paraphrase case), it has to be started by an SI */
207 if ((x==CASE_SVINV) && (post_process_match("SI", ctxt->constituent[c1].start_link)==0))
208 continue;
209
210 /* If ctype1 is VP (participle modifier case), it has to be
211 started by an Mv or Mg */
212 if ((x==CASE_PART_MOD) && (post_process_match("M", ctxt->constituent[c1].start_link)==0))
213 continue;
214
215 /* If ctype1 is VP (participle opener case), it has
216 to be started by a COp */
217 if ((x==CASE_PART_OPEN) && (post_process_match("COp", ctxt->constituent[c1].start_link)==0))
218 continue;
219
220 /* Now start at the bounds of c1, and work outwards until you
221 find a larger constituent of type ctype2 */
222 if (!(strcmp(ctxt->constituent[c1].type, ctype1)==0))
223 continue;
224
225 if (verbosity_level(D_CONST))
226 err_msg(lg_Debug, "Generating complement constituent for c %d of type %s\n\\",
227 c1, ctype1);
228 done = false;
229 for (w2 = ctxt->constituent[c1].left; (done == false) && (w2 != (size_t)-1); w2--)
230 {
231 for (w3 = ctxt->constituent[c1].right; w3<linkage->num_words; w3++)
232 {
233 for (c2 = numcon_total; (done == false) &&
234 (c2 < numcon_total + numcon_subl); c2++) {
235 if (!((ctxt->constituent[c2].left == w2) &&
236 (ctxt->constituent[c2].right == w3)) || (c2==c1))
237 continue;
238 if (!(strcmp(ctxt->constituent[c2].type, ctype2)==0))
239 continue;
240
241 /* if the new constituent (c) is to the left
242 of c1, its right edge should be adjacent to the
243 left edge of c1 - or as close as possible. */
244 if ((x==CASE_OPENER) || (x==CASE_PPOPEN) || (x==CASE_PART_OPEN))
245 {
246 /* This is the case where c is to the
247 RIGHT of c1 */
248 w = ctxt->constituent[c1].right + 1;
249 if (w > ctxt->constituent[c2].right)
250 {
251 done = true;
252 continue;
253 }
254 ctxt->constituent[c].left = w;
255 ctxt->constituent[c].right = ctxt->constituent[c2].right;
256 }
257 else
258 {
259 w = ctxt->constituent[c1].left - 1;
260 if (w < ctxt->constituent[c2].left) {
261 done = true;
262 continue;
263 }
264 ctxt->constituent[c].right = w;
265 ctxt->constituent[c].left = ctxt->constituent[c2].left;
266 }
267
268 adjust_for_left_comma(ctxt, linkage, c1);
269 adjust_for_right_comma(ctxt, linkage, c1);
270
271 ctxt->constituent[c].type =
272 string_set_add(ctype3, ctxt->phrase_ss);
273 ctxt->constituent[c].domain_type = 'x';
274 ctxt->constituent[c].start_link =
275 string_set_add("XX", ctxt->phrase_ss);
276 if (verbosity_level(D_CONST))
277 {
278 err_msg(lg_Debug, "Larger c found: c %d (%s); ", c2, ctype2);
279 err_msg(lg_Debug, "Adding constituent:\n\\");
280 print_constituent(ctxt, linkage, c);
281 }
282 c++;
283 assert (c < ctxt->conlen, "Too many constituents");
284 done = true;
285 }
286 }
287 }
288 if (verbosity_level(D_CONST))
289 {
290 if (done == false)
291 err_msg(lg_Debug, "No constituent added, because no larger %s"
292 " was found\n", ctype2);
293 else
294 lg_error_flush();
295 }
296 }
297 numcon_subl = c - numcon_total;
298 return numcon_subl;
299 }
300
301 /**
302 * Look for a constituent started by an MVs or MVg.
303 * Find any VP's or ADJP's that contain it (without going
304 * beyond a larger S or NP). Adjust them so that
305 * they end right before the m domain starts.
306 */
adjust_subordinate_clauses(con_context_t * ctxt,Linkage linkage,int numcon_total,int numcon_subl)307 static void adjust_subordinate_clauses(con_context_t *ctxt, Linkage linkage,
308 int numcon_total,
309 int numcon_subl)
310 {
311 int c, c2;
312 size_t w, w2;
313 bool done;
314
315 for (c=numcon_total; c<numcon_total + numcon_subl; c++)
316 {
317 if ((post_process_match("MVs", ctxt->constituent[c].start_link) == 1) ||
318 (post_process_match("MVg", ctxt->constituent[c].start_link) == 1))
319 {
320 done = false;
321 for (w2 = ctxt->constituent[c].left-1; (false == done) && w2 != (size_t) -1; w2--)
322 {
323 for (c2 = numcon_total; c2 < numcon_total + numcon_subl; c2++)
324 {
325 if (!((ctxt->constituent[c2].left == w2) &&
326 (ctxt->constituent[c2].right >= ctxt->constituent[c].right)))
327 continue;
328 if ((strcmp(ctxt->constituent[c2].type, "S") == 0) ||
329 (strcmp(ctxt->constituent[c2].type, "NP") == 0)) {
330 done = true;
331 break;
332 }
333 if ((ctxt->constituent[c2].domain_type == 'v') ||
334 (ctxt->constituent[c2].domain_type == 'a'))
335 {
336 w = ctxt->constituent[c].left - 1;
337 ctxt->constituent[c2].right = w;
338
339 if (verbosity_level(D_CONST))
340 {
341 err_msg(lg_Debug, "Adjusting constituent %d:\n\\", c2);
342 print_constituent(ctxt, linkage, c2);
343 }
344 }
345 }
346 }
347 if (strcmp(linkage->word[ctxt->constituent[c].left], ",") == 0)
348 ctxt->constituent[c].left++;
349 }
350 }
351 }
352
353 /******************************************************
354 * These functions are called once, after constituents
355 * have been generated, to merge them together and fix up
356 * some other things.
357 *
358 ********************************************************/
359
merge_constituents(con_context_t * ctxt,Linkage linkage,int numcon_total)360 static int merge_constituents(con_context_t *ctxt, Linkage linkage, int numcon_total)
361 {
362 int c1, c2=0;
363
364 /* First go through and give each constituent a canonical number
365 (the index number of the lowest-numbered constituent
366 identical to it) */
367 for (c1 = 0; c1 < numcon_total; c1++)
368 {
369 ctxt->constituent[c1].valid = true;
370 ctxt->constituent[c1].canon = c1;
371 for (c2 = c1 + 1; c2 < numcon_total; c2++)
372 {
373 if ((ctxt->constituent[c1].left == ctxt->constituent[c2].left) &&
374 (ctxt->constituent[c1].right == ctxt->constituent[c2].right) &&
375 (strcmp(ctxt->constituent[c1].type, ctxt->constituent[c2].type) == 0))
376 {
377 ctxt->constituent[c2].canon = c1;
378 }
379 }
380 }
381
382 /* Now go through and find duplicates; if a pair is found,
383 * mark one as invalid.
384 */
385 for (c1 = 0; c1 < numcon_total; c1++)
386 {
387 for (c2 = c1 + 1; c2 < numcon_total; c2++)
388 {
389 if (ctxt->constituent[c2].canon == ctxt->constituent[c1].canon)
390 ctxt->constituent[c2].valid = false;
391 }
392 }
393
394 return numcon_total;
395 }
396
397 /**
398 * Go through all the words. If a word is on the right end of
399 * an S (or SF or SX), wordtype[w]=STYPE. If it's also on the left end of a
400 * Pg*b, I, PP, or Pv, wordtype[w]=PTYPE. If it's a question-word
401 * used in an indirect question, wordtype[w]=QTYPE. If it's a
402 * question-word determiner, wordtype[w]=QDTYPE. Else wordtype[w]=NONE.
403 * (This function is called once.)
404 */
generate_misc_word_info(con_context_t * ctxt,Linkage linkage)405 static void generate_misc_word_info(con_context_t * ctxt, Linkage linkage)
406 {
407 size_t w1, w2, l1, l2;
408 const char * label1, * label2;
409
410 for (w1 = 0; w1 < linkage->num_words; w1++)
411 ctxt->wordtype[w1] = NONE;
412
413 for (l1 = 0; l1 < linkage_get_num_links(linkage); l1++) {
414 w1=linkage_get_link_rword(linkage, l1);
415 label1 = linkage_get_link_label(linkage, l1);
416 if ((uppercompare(label1, "S")) ||
417 (uppercompare(label1, "SX")) ||
418 (uppercompare(label1, "SF"))) {
419 ctxt->wordtype[w1] = STYPE;
420 for (l2 = 0; l2 < linkage_get_num_links(linkage); l2++) {
421 w2=linkage_get_link_lword(linkage, l2);
422 label2 = linkage_get_link_label(linkage, l2);
423 if ((w1 == w2) &&
424 ((post_process_match("Pg#b", label2)==1) ||
425 (uppercompare(label2, "I")) ||
426 (uppercompare(label2, "PP")) ||
427 (post_process_match("Pv", label2)==1))) {
428 /* Pvf, Pgf? */
429 ctxt->wordtype[w1] = PTYPE;
430 }
431 }
432 }
433 if (post_process_match("QI#d", label1)==1) {
434 ctxt->wordtype[w1] = QTYPE;
435 for (l2 = 0; l2 < linkage_get_num_links(linkage); l2++) {
436 w2 = linkage_get_link_lword(linkage, l2);
437 label2 = linkage_get_link_label(linkage, l2);
438 if ((w1 == w2) && (post_process_match("D##w", label2)==1)) {
439 ctxt->wordtype[w1] = QDTYPE;
440 }
441 }
442 }
443 if (post_process_match("Mr", label1)==1) ctxt->wordtype[w1] = QDTYPE;
444 if (post_process_match("MX#d", label1)==1) ctxt->wordtype[w1] = QDTYPE;
445 }
446 }
447
new_style_conjunctions(con_context_t * ctxt,Linkage linkage,int numcon_total)448 static int new_style_conjunctions(con_context_t *ctxt, Linkage linkage, int numcon_total)
449 {
450 #ifdef DEBUG
451 int c;
452 for (c = 0; c < numcon_total; c++)
453 {
454 constituent_t *ct = &ctxt->constituent[c];
455 lgdebug(6, "ola %d valid=%d %s start=%s lr=%zu %zu\n", c,
456 ct->valid, ct->type, ct->start_link, ct->left, ct->right);
457 }
458 #endif
459 return numcon_total;
460 }
461
last_minute_fixes(con_context_t * ctxt,Linkage linkage,int numcon_total)462 static int last_minute_fixes(con_context_t *ctxt, Linkage linkage, int numcon_total)
463 {
464 int c;
465 bool global_leftend_found, global_rightend_found;
466 size_t lastword;
467
468 for (c = 0; c < numcon_total; c++)
469 {
470 /* In a paraphrase construction ("John ran, he said"),
471 the paraphrasing clause doesn't get
472 an S. (This is true in Treebank II, not Treebank I) */
473
474 if (uppercompare(ctxt->constituent[c].start_link, "CP"))
475 {
476 ctxt->constituent[c].valid = false;
477 }
478
479 /* If it's a possessive with an "'s", the NP on the left
480 should be extended to include the "'s". */
481 if ((uppercompare(ctxt->constituent[c].start_link, "YS")) ||
482 (uppercompare(ctxt->constituent[c].start_link, "YP")))
483 {
484 ctxt->constituent[c].right++;
485 }
486
487 /* If a constituent has starting link MVpn, it's a time
488 expression like "last week"; label it as a noun phrase
489 (incorrectly) */
490
491 if (strcmp(ctxt->constituent[c].start_link, "MVpn") == 0)
492 {
493 ctxt->constituent[c].type = string_set_add("NP", ctxt->phrase_ss);
494 }
495 if (strcmp(ctxt->constituent[c].start_link, "COn") == 0)
496 {
497 ctxt->constituent[c].type = string_set_add("NP", ctxt->phrase_ss);
498 }
499 if (strcmp(ctxt->constituent[c].start_link, "Mpn") == 0)
500 {
501 ctxt->constituent[c].type = string_set_add("NP", ctxt->phrase_ss);
502 }
503
504 /* If the constituent is an S started by "but" or "and" at
505 the beginning of the 67411142sentence, it should be ignored. */
506
507 if ((strcmp(ctxt->constituent[c].start_link, "Wdc") == 0) &&
508 (ctxt->constituent[c].left == 2))
509 {
510 ctxt->constituent[c].valid = false;
511 }
512
513 /* For prenominal adjectives, an ADJP constituent is assigned
514 if it's a hyphenated (Ah) or comparative (Am) adjective;
515 otherwise no ADJP is assigned, unless the phrase is more
516 than one word long (e.g. "very big"). The same with certain
517 types of adverbs. */
518 /* That was for Treebank I. For Treebank II, the rule only
519 seems to apply to prenominal adjectives (of all kinds).
520 However, it also applies to number expressions ("QP"). */
521
522 if ((post_process_match("A", ctxt->constituent[c].start_link) == 1) ||
523 (ctxt->constituent[c].domain_type == 'd') ||
524 (ctxt->constituent[c].domain_type == 'h')) {
525 if (ctxt->constituent[c].right-ctxt->constituent[c].left == 0)
526 {
527 ctxt->constituent[c].valid = false;
528 }
529 }
530
531 if ((ctxt->constituent[c].domain_type == 'h') &&
532 (strcmp(linkage->word[ctxt->constituent[c].left - 1], "$") == 0))
533 {
534 ctxt->constituent[c].left--;
535 }
536 }
537
538 /* If there's a global S constituent that includes everything
539 except a final terminating punctuation (period or question mark),
540 extend it by one word. We know its the terminating punctuation,
541 because it links to the right wall with an RW link. If its
542 not, then that final link is not there...
543 */
544 for (c = 0; c < numcon_total; c++)
545 {
546 if ((ctxt->constituent[c].right == linkage->num_words - 3) &&
547 (ctxt->constituent[c].left == 1) &&
548 (strcmp(ctxt->constituent[c].type, "S") == 0))
549 {
550 size_t ln;
551 for (ln = 0; ln < linkage->num_links; ln++)
552 {
553 if ((linkage->link_array[ln].lw == linkage->num_words - 2) &&
554 (linkage->link_array[ln].rw == linkage->num_words - 1))
555 {
556 ctxt->constituent[c].right++;
557 break;
558 }
559 }
560 }
561 }
562
563 /* If there's no S boundary at the very left end of the sentence,
564 or the very right end, create a new S spanning the entire sentence */
565
566 lastword = linkage->num_words - 2;
567 global_leftend_found = false;
568 global_rightend_found = false;
569 for (c = 0; c < numcon_total; c++)
570 {
571 if ((ctxt->constituent[c].left == 1) &&
572 (strcmp(ctxt->constituent[c].type, "S") == 0) &&
573 ctxt->constituent[c].valid)
574 {
575 global_leftend_found = true;
576 }
577 }
578
579 for (c = 0; c < numcon_total; c++)
580 {
581 if ((ctxt->constituent[c].right >= lastword) &&
582 (strcmp(ctxt->constituent[c].type, "S") == 0) &&
583 ctxt->constituent[c].valid)
584 {
585 global_rightend_found = true;
586 }
587 }
588
589 if ((global_leftend_found == false) || (global_rightend_found == false))
590 {
591 c = numcon_total;
592 ctxt->constituent[c].left = 1;
593 ctxt->constituent[c].right = linkage->num_words-1;
594 ctxt->constituent[c].type = string_set_add("S", ctxt->phrase_ss);
595 ctxt->constituent[c].valid = true;
596 ctxt->constituent[c].domain_type = 'x';
597 numcon_total++;
598 if (verbosity_level(D_CONST))
599 {
600 err_msg(lg_Debug, "Adding global sentence constituent:\n\\");
601 print_constituent(ctxt, linkage, c);
602 }
603 }
604
605 return numcon_total;
606 }
607
add_constituent(con_context_t * ctxt,int c,const Linkage linkage,const Domain * domain,int l,int r,const char * name)608 static int add_constituent(con_context_t *ctxt, int c, const Linkage linkage,
609 const Domain *domain,
610 int l, int r, const char * name)
611 {
612 int nwords = linkage->num_words-2;
613 c++;
614
615 /* Avoid running off end, to walls. */
616 if (l < 1) l=1;
617 if (r > nwords) r = nwords;
618 if (l > nwords) l = nwords;
619 assert(l <= r, "negative constituent length!");
620
621 ctxt->constituent[c].type = string_set_add(name, ctxt->phrase_ss);
622 ctxt->constituent[c].left = l;
623 ctxt->constituent[c].right = r;
624 ctxt->constituent[c].domain_type = domain->type;
625 ctxt->constituent[c].start_link =
626 linkage_get_link_label(linkage, domain->start_link);
627 return c;
628 }
629
cons_of_domain(const Linkage linkage,char domain_type)630 static const char * cons_of_domain(const Linkage linkage, char domain_type)
631 {
632 switch (domain_type) {
633 case 'a':
634 return "ADJP";
635 case 'b':
636 return "SBAR";
637 case 'c':
638 return "VP";
639 case 'd':
640 return "QP";
641 case 'e':
642 return "ADVP";
643 case 'f':
644 return "SBAR";
645 case 'g':
646 return "PP";
647 case 'h':
648 return "QP";
649 case 'i':
650 return "ADVP";
651 case 'k':
652 return "PRT";
653 case 'n':
654 return "NP";
655 case 'p':
656 return "PP";
657 case 'q':
658 return "SINV";
659 case 's':
660 return "S";
661 case 't':
662 return "VP";
663 case 'u':
664 return "ADJP";
665 case 'v':
666 return "VP";
667 case 'y':
668 return "NP";
669 case 'z':
670 return "VP";
671 default:
672 {
673 err_ctxt ec = { linkage->sent };
674 err_msgc(&ec, lg_Error, "Illegal domain: %c\n", domain_type);
675 return "";
676 }
677 }
678 }
679
read_constituents_from_domains(con_context_t * ctxt,Linkage linkage,int numcon_total)680 static int read_constituents_from_domains(con_context_t *ctxt, Linkage linkage,
681 int numcon_total)
682 {
683 size_t d, l, w2;
684 int c, w, c2, numcon_subl = 0;
685 PP_data *pp_data = &linkage->sent->constituent_pp->pp_data;
686
687 for (d = 0, c = numcon_total; d < pp_data->N_domains; d++, c++)
688 {
689 size_t leftmost, rightmost, leftlimit;
690 int rootleft;
691 List_o_links * dlink;
692
693 Domain domain = pp_data->domain_array[d];
694
695 // rootright = linkage_get_link_rword(linkage, domain.start_link);
696 rootleft = linkage_get_link_lword(linkage, domain.start_link);
697
698 if ((domain.type=='c') ||
699 (domain.type=='d') ||
700 (domain.type=='e') ||
701 (domain.type=='f') ||
702 (domain.type=='g') ||
703 (domain.type=='u') ||
704 (domain.type=='y'))
705 {
706 leftlimit = 0;
707 leftmost = linkage_get_link_lword(linkage, domain.start_link);
708 rightmost = linkage_get_link_lword(linkage, domain.start_link);
709 }
710 else
711 {
712 leftlimit = linkage_get_link_lword(linkage, domain.start_link) + 1;
713 leftmost = linkage_get_link_rword(linkage, domain.start_link);
714 rightmost = linkage_get_link_rword(linkage, domain.start_link);
715 }
716
717 /* Start by assigning both left and right limits to the
718 * right word of the start link. This will always be contained
719 * in the constituent. This will also handle the case
720 * where the domain contains no links.
721 */
722 for (dlink = domain.lol; dlink != NULL; dlink = dlink->next)
723 {
724 l = dlink->link;
725
726 if ((linkage_get_link_lword(linkage, l) < leftmost) &&
727 (linkage_get_link_lword(linkage, l) >= leftlimit))
728 {
729 leftmost = linkage_get_link_lword(linkage, l);
730 }
731
732 if (linkage_get_link_rword(linkage, l) > rightmost)
733 {
734 rightmost = linkage_get_link_rword(linkage, l);
735 }
736 }
737
738 c--;
739 c = add_constituent(ctxt, c, linkage, &domain, leftmost, rightmost,
740 cons_of_domain(linkage, domain.type));
741
742 if (domain.type == 'z')
743 {
744 c = add_constituent(ctxt, c, linkage, &domain, leftmost, rightmost, "S");
745 }
746 if (domain.type=='c')
747 {
748 c = add_constituent(ctxt, c, linkage, &domain, leftmost, rightmost, "S");
749 }
750 if ((post_process_match("Ce*", ctxt->constituent[c].start_link)==1) ||
751 (post_process_match("Rn", ctxt->constituent[c].start_link)==1))
752 {
753 c = add_constituent(ctxt, c, linkage, &domain, leftmost, rightmost, "SBAR");
754 }
755 if ((post_process_match("R*", ctxt->constituent[c].start_link)==1) ||
756 (post_process_match("MX#r", ctxt->constituent[c].start_link)==1))
757 {
758 w = leftmost;
759 if (strcmp(linkage->word[w], ",") == 0) w++;
760 c = add_constituent(ctxt, c, linkage, &domain, w, w, "WHNP");
761 }
762 if (post_process_match("Mj", ctxt->constituent[c].start_link) == 1)
763 {
764 w = leftmost;
765 if (strcmp(linkage->word[w], ",") == 0) w++;
766 c = add_constituent(ctxt, c, linkage, &domain, w, w+1, "WHPP");
767 c = add_constituent(ctxt, c, linkage, &domain, w+1, w+1, "WHNP");
768 }
769 if ((post_process_match("Ss#d", ctxt->constituent[c].start_link)==1) ||
770 (post_process_match("B#d", ctxt->constituent[c].start_link)==1))
771 {
772 c = add_constituent(ctxt, c, linkage, &domain, rootleft, rootleft, "WHNP");
773 c = add_constituent(ctxt, c, linkage, &domain,
774 rootleft, ctxt->constituent[c-1].right, "SBAR");
775 }
776 if (post_process_match("CP", ctxt->constituent[c].start_link)==1)
777 {
778 if (strcmp(linkage->word[leftmost], ",") == 0)
779 ctxt->constituent[c].left++;
780 c = add_constituent(ctxt, c, linkage, &domain, 1, linkage->num_words-1, "S");
781 }
782 if ((post_process_match("MVs", ctxt->constituent[c].start_link)==1) ||
783 (domain.type=='f'))
784 {
785 w = ctxt->constituent[c].left;
786 if (strcmp(linkage->word[w], ",") == 0)
787 w++;
788 if (strcmp(linkage->word[w], "when") == 0)
789 {
790 c = add_constituent(ctxt, c, linkage, &domain, w, w, "WHADVP");
791 }
792 }
793 if (domain.type=='t')
794 {
795 c = add_constituent(ctxt, c, linkage, &domain, leftmost, rightmost, "S");
796 }
797 if ((post_process_match("QI", ctxt->constituent[c].start_link) == 1) ||
798 (post_process_match("Mr", ctxt->constituent[c].start_link) == 1) ||
799 (post_process_match("MX#d", ctxt->constituent[c].start_link) == 1))
800 {
801 const char * name = "";
802 w = leftmost;
803 if (strcmp(linkage->word[w], ",") == 0) w++;
804 if (ctxt->wordtype[w] == NONE)
805 name = "WHADVP";
806 else if (ctxt->wordtype[w] == QTYPE)
807 name = "WHNP";
808 else if (ctxt->wordtype[w] == QDTYPE)
809 name = "WHNP";
810 else
811 assert(0, "Unexpected word type");
812 c = add_constituent(ctxt, c, linkage, &domain, w, w, name);
813
814 if (ctxt->wordtype[w] == QDTYPE)
815 {
816 /* Now find the finite verb to the right, start an S */
817 /* Limit w2 to sentence length. */
818 // for( w2=w+1; w2 < ctxt->r_limit-1; w2++ )
819 for (w2 = w+1; w2 < rightmost; w2++)
820 if ((ctxt->wordtype[w2] == STYPE) || (ctxt->wordtype[w2] == PTYPE)) break;
821
822 /* Adjust the right boundary of previous constituent */
823 ctxt->constituent[c].right = w2 - 1;
824 c = add_constituent(ctxt, c, linkage, &domain, w2, rightmost, "S");
825 }
826 }
827
828 if (ctxt->constituent[c].domain_type == '\0')
829 {
830 err_ctxt ec = { linkage->sent };
831 err_msgc(&ec, lg_Error, "No domain type assigned to constituent\n");
832 }
833 if (ctxt->constituent[c].start_link == NULL)
834 {
835 err_ctxt ec = { linkage->sent };
836 err_msgc(&ec, lg_Error, "No type assigned to constituent\n");
837 }
838 }
839
840 numcon_subl = c - numcon_total;
841 /* numcon_subl = handle_islands(linkage, numcon_total, numcon_subl); */
842
843 if (verbosity_level(D_CONST))
844 {
845 err_msg(lg_Debug, "Constituents added at first stage:\n\\");
846 for (c = numcon_total; c < numcon_total + numcon_subl; c++)
847 {
848 /* FIXME: Here it cannot be printed as one debug message because
849 * a newline is printed at the end. */
850 print_constituent(ctxt, linkage, c);
851 }
852 }
853
854 /* Opener case - generates S around main clause.
855 (This must be done first; the S generated will be needed for
856 later cases.) */
857 numcon_subl =
858 gen_comp(ctxt, linkage, numcon_total, numcon_subl, "SBAR", "S", "S", CASE_OPENER);
859
860 /* pp opener case */
861 numcon_subl =
862 gen_comp(ctxt, linkage, numcon_total, numcon_subl, "PP", "S", "S", CASE_PPOPEN);
863
864 /* participle opener case */
865 numcon_subl =
866 gen_comp(ctxt, linkage, numcon_total, numcon_subl, "S", "S", "S", CASE_PART_OPEN);
867
868 /* Subject-phrase case; every main VP generates an S */
869 numcon_subl =
870 gen_comp(ctxt, linkage, numcon_total, numcon_subl, "VP", "S", "NP", CASE_S);
871
872 /* Relative clause case; an SBAR generates a complement NP */
873 numcon_subl =
874 gen_comp(ctxt, linkage, numcon_total, numcon_subl, "SBAR", "NP", "NP", CASE_REL_CLAUSE);
875
876 /* Participle modifier case */
877 numcon_subl =
878 gen_comp(ctxt, linkage, numcon_total, numcon_subl, "VP", "NP", "NP", CASE_PART_MOD);
879
880 /* PP modifying NP */
881 numcon_subl =
882 gen_comp(ctxt, linkage, numcon_total, numcon_subl, "PP", "NP", "NP", CASE_PART_MOD);
883
884 /* Appositive case */
885 numcon_subl =
886 gen_comp(ctxt, linkage, numcon_total, numcon_subl, "NP", "NP", "NP", CASE_APPOS);
887
888 /* S-V inversion case; an NP generates a complement VP */
889 numcon_subl =
890 gen_comp(ctxt, linkage, numcon_total, numcon_subl, "NP", "SINV", "VP", CASE_SVINV);
891
892 adjust_subordinate_clauses(ctxt, linkage, numcon_total, numcon_subl);
893 for (c = numcon_total; c < numcon_total + numcon_subl; c++)
894 {
895 if ((ctxt->constituent[c].domain_type=='p') &&
896 (strcmp(linkage->word[ctxt->constituent[c].left], ",")==0))
897 {
898 ctxt->constituent[c].left++;
899 }
900 }
901
902 /* Make sure the constituents are nested. If two constituents
903 * are not nested: whichever constituent has the furthest left
904 * boundary, shift that boundary rightwards to the left boundary
905 * of the other one.
906 */
907 while (true)
908 {
909 bool adjustment_made = false;
910 for (c = numcon_total; c < numcon_total + numcon_subl; c++)
911 {
912 for (c2 = numcon_total; c2 < numcon_total + numcon_subl; c2++)
913 {
914 if ((ctxt->constituent[c].left < ctxt->constituent[c2].left) &&
915 (ctxt->constituent[c].right < ctxt->constituent[c2].right) &&
916 (ctxt->constituent[c].right >= ctxt->constituent[c2].left))
917 {
918 /* We've found two overlapping constituents.
919 If one is larger, except the smaller one
920 includes an extra comma, adjust the smaller one
921 to exclude the comma */
922
923 if ((strcmp(linkage->word[ctxt->constituent[c2].right], ",") == 0) ||
924 (strcmp(linkage->word[ctxt->constituent[c2].right],
925 RIGHT_WALL_WORD) == 0))
926 {
927 if (verbosity_level(D_CONST))
928 err_msg(lg_Debug, "Adjusting %d to fix comma overlap\n", c2);
929 adjust_for_right_comma(ctxt, linkage, c2);
930 adjustment_made = true;
931 }
932 else if (strcmp(linkage->word[ctxt->constituent[c].left], ",") == 0)
933 {
934 if (verbosity_level(D_CONST))
935 err_msg(lg_Debug, "Adjusting c %d to fix comma overlap\n", c);
936 adjust_for_left_comma(ctxt, linkage, c);
937 adjustment_made = true;
938 }
939 else
940 {
941 if (verbosity_level(D_CONST))
942 {
943 err_ctxt ec = { linkage->sent };
944 err_msgc(&ec, lg_Warn,
945 "Warning: the constituents aren't nested! "
946 "Adjusting them. (%d, %d)\n", c, c2);
947 }
948 ctxt->constituent[c].left = ctxt->constituent[c2].left;
949 }
950 }
951 }
952 }
953 if (adjustment_made == false) break;
954 }
955
956 assert (numcon_total + numcon_subl < ctxt->conlen, "Too many constituents");
957 return numcon_subl;
958 }
959
960 static char *
exprint_constituent_structure(con_context_t * ctxt,Linkage linkage,int numcon_total)961 exprint_constituent_structure(con_context_t *ctxt,
962 Linkage linkage, int numcon_total)
963 {
964 size_t w;
965 int c;
966 bool *leftdone = alloca(numcon_total * sizeof(bool));
967 bool *rightdone = alloca(numcon_total * sizeof(bool));
968 int best, bestright, bestleft;
969 char s[MAX_WORD];
970 dyn_str * cs = dyn_str_new();
971
972 assert (numcon_total < ctxt->conlen, "Too many constituents (b)");
973
974 for (c = 0; c < numcon_total; c++)
975 {
976 leftdone[c] = false;
977 rightdone[c] = false;
978 }
979
980 /* Skip left wall; don't skip right wall, since it may
981 * have constituent boundaries. */
982 for (w = 1; w < linkage->num_words; w++)
983 {
984 while (1)
985 {
986 best = -1;
987 bestright = -1;
988 for (c = 0; c < numcon_total; c++)
989 {
990 if ((ctxt->constituent[c].left == w) &&
991 (leftdone[c] == false) && ctxt->constituent[c].valid &&
992 ((int) ctxt->constituent[c].right >= bestright))
993 {
994 best = c;
995 bestright = ctxt->constituent[c].right;
996 }
997 }
998 if (best == -1)
999 break;
1000
1001 leftdone[best] = true;
1002 dyn_strcat(cs, "[");
1003 dyn_strcat(cs, ctxt->constituent[best].type);
1004 dyn_strcat(cs, " ");
1005 }
1006
1007 /* Don't print out right wall */
1008 if (w < linkage->num_words - 1)
1009 {
1010 char *p;
1011 strncpy(s, linkage->word[w], MAX_WORD);
1012 s[MAX_WORD-1] = 0;
1013
1014 /* Constituent processing will crash if the sentence contains
1015 * square brackets, so we have to do something ... replace
1016 * them with curly braces ... this is a terrible hack, but
1017 * will have to do; for now. A better solution would be to
1018 * allow the user to specify some reserved char as the
1019 * bracket symbol, e.g. SOH and EOT or something like that.
1020 */
1021 p = strchr(s, OPEN_BRACKET);
1022 while (p)
1023 {
1024 *p = '{';
1025 p = strchr(p, OPEN_BRACKET);
1026 }
1027
1028 p = strchr(s, CLOSE_BRACKET);
1029 while (p)
1030 {
1031 *p = '}';
1032 p = strchr(p, CLOSE_BRACKET);
1033 }
1034
1035 #if 0 /* firstupper check removed in 0c8107a */
1036 /* Now, if the first character of the word was
1037 originally uppercase, we put it back that way */
1038 if (linkage->chosen_disjuncts[w]->word[0]->status & WS_FIRSTUPPER)
1039 upcase_utf8_str(s, s, MAX_WORD);
1040 #endif
1041 dyn_strcat(cs, s);
1042 dyn_strcat(cs, " ");
1043 }
1044
1045 while (1)
1046 {
1047 best = -1;
1048 bestleft = -1;
1049 for(c = 0; c < numcon_total; c++)
1050 {
1051 if ((ctxt->constituent[c].right == w) &&
1052 (rightdone[c] == false) && ctxt->constituent[c].valid &&
1053 ((int) ctxt->constituent[c].left > bestleft)) {
1054 best = c;
1055 bestleft = ctxt->constituent[c].left;
1056 }
1057 }
1058 if (best == -1)
1059 break;
1060 rightdone[best] = true;
1061 dyn_strcat(cs, ctxt->constituent[best].type);
1062 dyn_strcat(cs, "] ");
1063 }
1064 }
1065
1066 dyn_strcat(cs, "\n");
1067 return dyn_str_take(cs);
1068 }
1069
do_print_flat_constituents(con_context_t * ctxt,Linkage linkage)1070 static char * do_print_flat_constituents(con_context_t *ctxt, Linkage linkage)
1071 {
1072 int numcon_total= 0, numcon_subl;
1073 char * q;
1074 Sentence sent = linkage->sent;
1075
1076 ctxt->phrase_ss = string_set_create();
1077 generate_misc_word_info(ctxt, linkage);
1078
1079 if (NULL == sent->constituent_pp) /* First time for this sentence */
1080 sent->constituent_pp = post_process_new(sent->dict->hpsg_knowledge);
1081
1082 do_post_process(sent->constituent_pp, linkage, linkage->is_sent_long);
1083
1084 /** No-op. If we wanted to debug domain names, we could do this...
1085 * linkage_free_pp_info(linkage);
1086 * linkage_set_domain_names(sent->constituent_pp, linkage);
1087 */
1088 numcon_subl = read_constituents_from_domains(ctxt, linkage, numcon_total);
1089 numcon_total += numcon_subl;
1090 assert (numcon_total < ctxt->conlen, "Too many constituents (c)");
1091 numcon_total = merge_constituents(ctxt, linkage, numcon_total);
1092 assert (numcon_total < ctxt->conlen, "Too many constituents (d)");
1093 numcon_total = new_style_conjunctions(ctxt, linkage, numcon_total);
1094 assert (numcon_total < ctxt->conlen, "Too many constituents (e)");
1095 numcon_total = last_minute_fixes(ctxt, linkage, numcon_total);
1096 assert (numcon_total < ctxt->conlen, "Too many constituents (f)");
1097 q = exprint_constituent_structure(ctxt, linkage, numcon_total);
1098 string_set_delete(ctxt->phrase_ss);
1099 ctxt->phrase_ss = NULL;
1100
1101 post_process_free_data(&sent->constituent_pp->pp_data);
1102
1103 return q;
1104 }
1105
print_flat_constituents(Linkage linkage)1106 static char * print_flat_constituents(Linkage linkage)
1107 {
1108 size_t wts = linkage->num_words * sizeof(WType);
1109 size_t cns = (linkage->num_links + linkage->num_words) * sizeof(constituent_t);
1110
1111 con_context_t ctxt;
1112 memset(&ctxt, 0, sizeof(con_context_t));
1113 ctxt.wordtype = (WType *) alloca(wts);
1114 memset(ctxt.wordtype, 0, wts);
1115 ctxt.conlen = linkage->num_links + linkage->num_words;
1116 ctxt.constituent = (constituent_t *) alloca(cns);
1117 memset(ctxt.constituent, 0, cns);
1118
1119 return do_print_flat_constituents(&ctxt, linkage);
1120 }
1121
token_type(char * token)1122 static CType token_type (char *token)
1123 {
1124 if ((token[0] == OPEN_BRACKET) && (strlen(token) > 1))
1125 return OPEN_TOK;
1126 if ((strlen(token) > 1) && (token[strlen(token) - 1] == CLOSE_BRACKET))
1127 return CLOSE_TOK;
1128 return WORD_TOK;
1129 }
1130
make_CNode(char * q)1131 static CNode * make_CNode(char *q)
1132 {
1133 CNode * cn;
1134 cn = (CNode *) malloc(sizeof(CNode));
1135 cn->label = strdup(q);
1136 cn->child = cn->next = (CNode *) NULL;
1137 cn->next = (CNode *) NULL;
1138 cn->start = cn->end = -1;
1139 return cn;
1140 }
1141
parse_string(CNode * n,char ** saveptr)1142 static CNode * parse_string(CNode * n, char **saveptr)
1143 {
1144 char *q;
1145 CNode *m, *last_child=NULL;
1146
1147 while ((q = strtok_r(NULL, " ", saveptr))) {
1148 switch (token_type(q)) {
1149 case CLOSE_TOK :
1150 q[strlen(q)-1]='\0';
1151 assert(strcmp(q, n->label)==0,
1152 "Constituent tree: Labels do not match.");
1153 return n;
1154 break;
1155 case OPEN_TOK:
1156 m = make_CNode(q+1);
1157 m = parse_string(m, saveptr);
1158 break;
1159 case WORD_TOK:
1160 m = make_CNode(q);
1161 break;
1162 default:
1163 assert(0, "Constituent tree: Illegal token type");
1164 }
1165 if (n->child == NULL) {
1166 last_child = n->child = m;
1167 }
1168 else {
1169 last_child->next = m;
1170 last_child = m;
1171 }
1172 }
1173 assert(0, "Constituent tree: Constituent did not close");
1174 return NULL;
1175 }
1176
print_tree(dyn_str * cs,int indent,CNode * n,int o1,int o2)1177 static void print_tree(dyn_str * cs, int indent, CNode * n, int o1, int o2)
1178 {
1179 int i, child_offset;
1180 CNode * m;
1181
1182 if (n == NULL) return;
1183
1184 if (indent)
1185 for (i = 0; i < o1; ++i) dyn_strcat(cs, " ");
1186
1187 dyn_strcat(cs, "(");
1188 dyn_strcat(cs, n->label);
1189 dyn_strcat(cs, " ");
1190 child_offset = o2 + strlen(n->label) + 2;
1191
1192 for (m = n->child; m != NULL; m = m->next)
1193 {
1194 if (m->child == NULL)
1195 {
1196 char * p;
1197 /* If the original string has left or right parens in it,
1198 * the printed string will be messed up by these ...
1199 * so replace them by curly braces. What else can one do?
1200 */
1201 p = strchr(m->label, '(');
1202 while(p)
1203 {
1204 *p = '{';
1205 p = strchr(p, '(');
1206 }
1207
1208 p = strchr(m->label, ')');
1209 while(p)
1210 {
1211 *p = '}';
1212 p = strchr(p, ')');
1213 }
1214
1215 dyn_strcat(cs, m->label);
1216 if ((m->next != NULL) && (m->next->child == NULL))
1217 dyn_strcat(cs, " ");
1218 }
1219 else
1220 {
1221 if (m != n->child)
1222 {
1223 if (indent) dyn_strcat(cs, "\n");
1224 else dyn_strcat(cs, " ");
1225 print_tree(cs, indent, m, child_offset, child_offset);
1226 }
1227 else
1228 {
1229 print_tree(cs, indent, m, 0, child_offset);
1230 }
1231 if ((m->next != NULL) && (m->next->child == NULL))
1232 {
1233 if (indent)
1234 {
1235 dyn_strcat(cs, "\n");
1236 for (i = 0; i < child_offset; ++i)
1237 dyn_strcat(cs, " ");
1238 }
1239 else dyn_strcat(cs, " ");
1240 }
1241 }
1242 }
1243 dyn_strcat(cs, ")");
1244 }
1245
assign_spans(CNode * n,int start)1246 static int assign_spans(CNode * n, int start)
1247 {
1248 int num_words=0;
1249 CNode * m=NULL;
1250 if (n==NULL) return 0;
1251 n->start = start;
1252 if (n->child == NULL) {
1253 n->end = start;
1254 return 1;
1255 }
1256 else {
1257 for (m=n->child; m!=NULL; m=m->next) {
1258 num_words += assign_spans(m, start+num_words);
1259 }
1260 n->end = start+num_words-1;
1261 }
1262 return num_words;
1263 }
1264
linkage_constituent_tree(Linkage linkage)1265 static CNode * linkage_constituent_tree(Linkage linkage)
1266 {
1267 char *p, *q, *saveptr;
1268 CNode * root;
1269
1270 p = print_flat_constituents(linkage);
1271
1272 q = strtok_r(p, " ", &saveptr);
1273 assert(token_type(q) == OPEN_TOK, "Illegal beginning of string");
1274 root = make_CNode(q+1);
1275 root = parse_string(root, &saveptr);
1276 assign_spans(root, 0);
1277 free(p);
1278 return root;
1279 }
1280
1281 /* Make the compiler shut up about the deprecated functions */
1282 /*
1283 #pragma GCC diagnostic push
1284 #pragma GCC diagnostic ignored "-Wdeprecated-declarations"
1285 */
1286
linkage_free_constituent_tree(CNode * n)1287 static void linkage_free_constituent_tree(CNode * n)
1288 {
1289 CNode *m, *x;
1290 for (m=n->child; m!=NULL; m=x) {
1291 x=m->next;
1292 linkage_free_constituent_tree(m);
1293 }
1294 free(n->label);
1295 free(n);
1296 }
1297
1298 /**
1299 * Print out the constituent tree.
1300 * mode 1: treebank-style constituent tree
1301 * mode 2: flat, bracketed tree [A like [B this B] A]
1302 * mode 3: flat, treebank-style tree (A like (B this))
1303 */
linkage_print_constituent_tree(Linkage linkage,ConstituentDisplayStyle mode)1304 char * linkage_print_constituent_tree(Linkage linkage, ConstituentDisplayStyle mode)
1305 {
1306 CNode * root;
1307
1308 if (!linkage) return NULL;
1309 if (!linkage->sent->dict->hpsg_knowledge) return NULL;
1310 if (mode == NO_DISPLAY)
1311 {
1312 return NULL;
1313 }
1314 else if (mode == MULTILINE || mode == SINGLE_LINE)
1315 {
1316 dyn_str * cs;
1317 cs = dyn_str_new();
1318 root = linkage_constituent_tree(linkage);
1319 print_tree(cs, (mode==1), root, 0, 0);
1320 linkage_free_constituent_tree(root);
1321 dyn_strcat(cs, "\n");
1322 return dyn_str_take(cs);
1323 }
1324 else if (mode == BRACKET_TREE)
1325 {
1326 return print_flat_constituents(linkage);
1327 }
1328 prt_error("Warning: Illegal mode %u for printing constituents\n"
1329 "Allowed values: %d to %d\n", mode, NO_DISPLAY, MAX_STYLES);
1330 return NULL;
1331 }
1332
linkage_free_constituent_tree_str(char * s)1333 void linkage_free_constituent_tree_str(char * s)
1334 {
1335 free(s);
1336 }
1337