1 /***************************************************************************/
2 /* Copyright (c) 2004 */
3 /* Daniel Sleator, David Temperley, and John Lafferty */
4 /* Copyright (c) 2008, 2014 Linas Vepstas */
5 /* All rights reserved */
6 /* */
7 /* Use of the link grammar parsing system is subject to the terms of the */
8 /* license set forth in the LICENSE file included with this software. */
9 /* This license allows free redistribution and use in source and binary */
10 /* forms, with or without modification, subject to certain conditions. */
11 /* */
12 /***************************************************************************/
13
14 /****************************************************************************
15 *
16 * This is a simple example of the link parser API. It simulates most of
17 * the functionality of the original link grammar parser, allowing sentences
18 * to be typed in either interactively or in "batch" mode (if -batch is
19 * specified on the command line, and stdin is redirected to a file).
20 * The program:
21 * Opens up a dictionary
22 * Iterates:
23 * 1. Reads from stdin to get an input string to parse
24 * 2. Tokenizes the string to form a Sentence
25 * 3. Tries to parse it with cost 0
26 * 4. Tries to parse with increasing cost
27 * When a parse is found:
28 * 1. Extracts each Linkage
29 * 2. Passes it to process_some_linkages()
30 * 3. Deletes linkage
31 * After parsing each Sentence is deleted by making a call to
32 * sentence_delete.
33 *
34 ****************************************************************************/
35
36 #include <errno.h>
37 #include <locale.h>
38 #include <stdlib.h>
39 #include <string.h>
40 #include <sys/stat.h>
41
42 /* Used for terminal resizing */
43 #ifndef _WIN32
44 #include <termios.h>
45 #include <sys/ioctl.h>
46 #include <fcntl.h>
47 #include <signal.h>
48 #include <unistd.h>
49 #else
50 #include <windows.h>
51 #include <wchar.h>
52 #include <io.h>
53 #endif /* _WIN32 */
54
55 #ifdef _MSC_VER
56 #define LINK_GRAMMAR_DLL_EXPORT 0
57 #endif /* _MSC_VER */
58
59 #ifndef _WIN32
60 #define LAST_RESORT_LOCALE "en_US.UTF-8" /* Supposing POSIX systems */
61 #else
62 #define LAST_RESORT_LOCALE "" /* Use user default locale */
63 #endif /* _WIN32 */
64
65 #include "parser-utilities.h"
66 #include "command-line.h"
67 #include "lg_readline.h"
68
69 #define DISPLAY_MAX 1024
70
71 static int batch_errors = 0;
72 static int verbosity = 0;
73 static char * debug = (char *)"";
74 static char * test = (char *)"";
75 static bool isatty_stdin, isatty_stdout;
76 #ifdef _WIN32
77 static bool running_under_cygwin = false;
78 #endif /* _WIN32 */
79
80 typedef enum
81 {
82 UNGRAMMATICAL = '*',
83 PARSE_WITH_DISJUNCT_COST_GT_0 = ':', /* Not used anywhere, currently ... */
84 NO_LABEL = ' '
85 } Label;
86
get_terminal_line(char * input_string,FILE * in,FILE * out)87 static char * get_terminal_line(char *input_string, FILE *in, FILE *out)
88 {
89 static char *pline;
90 const char *prompt = (0 == verbosity)? "" : "linkparser> ";
91
92 #ifdef HAVE_EDITLINE
93 pline = lg_readline(prompt);
94 #else
95 fprintf(out, "%s", prompt);
96 fflush(out);
97 #ifdef _WIN32
98 if (!running_under_cygwin)
99 pline = get_console_line();
100 else
101 pline = fgets(input_string, MAX_INPUT, in);
102 #else
103 pline = fgets(input_string, MAX_INPUT, in);
104 #endif /* _WIN32 */
105 #endif /* HAVE_EDITLINE */
106
107 return pline;
108 }
109
fget_input_string(FILE * in,FILE * out,bool check_return)110 static char * fget_input_string(FILE *in, FILE *out, bool check_return)
111 {
112 static char *pline;
113 static char input_string[MAX_INPUT];
114 static bool input_pending = false;
115
116 if (input_pending)
117 {
118 input_pending = false;
119 return pline;
120 }
121
122 input_string[MAX_INPUT-2] = '\0';
123
124 if (((in != stdin) && !check_return) || !isatty_stdin)
125 {
126 /* Get input from a file. */
127 pline = fgets(input_string, MAX_INPUT, in);
128 }
129 else
130 {
131 /* If we are here, the input is from a terminal. */
132 pline = get_terminal_line(input_string, in, out);
133 }
134
135 if (NULL == pline) return NULL; /* EOF or error */
136
137 if (('\0' != input_string[MAX_INPUT-2]) &&
138 ('\n' != input_string[MAX_INPUT-2]))
139 {
140 prt_error("Warning: Input line too long (>%d)\n", MAX_INPUT-1);
141 /* TODO: Ignore it and its continuation part(s). */
142 }
143
144 if (check_return)
145 {
146 if (('\0' == pline[0]) || ('\r' == pline[0]) || ('\n' == pline[0]))
147 return (char *)"\n"; /* Continue linkage display */
148 if ((in == stdin) || ('!' == pline[0]))
149 input_pending = true; /* In !file mode allow commands */
150 return (char *)"x"; /* Stop linkage display */
151 }
152
153 return pline;
154 }
155
156 /**************************************************************************
157 *
158 * This procedure displays a linkage graphically. Since the diagrams
159 * are passed as character strings, they need to be deleted with a
160 * call to free.
161 *
162 **************************************************************************/
163
process_linkage(Linkage linkage,Command_Options * copts)164 static void process_linkage(Linkage linkage, Command_Options* copts)
165 {
166 char * string;
167 ConstituentDisplayStyle mode;
168
169 if (!linkage) return; /* Can happen in timeout mode */
170
171 if (copts->display_bad)
172 {
173 string = linkage_print_pp_msgs(linkage);
174 fprintf(stdout, "%s\n", string);
175 linkage_free_pp_msgs(string);
176 }
177 if (copts->display_on)
178 {
179 string = linkage_print_diagram(linkage, copts->display_walls, copts->screen_width);
180 fprintf(stdout, "%s", string);
181 linkage_free_diagram(string);
182 }
183 if ((mode = copts->display_constituents))
184 {
185 string = linkage_print_constituent_tree(linkage, mode);
186 if (string != NULL)
187 {
188 fprintf(stdout, "%s\n", string);
189 linkage_free_constituent_tree_str(string);
190 }
191 else
192 {
193 copts->display_constituents = 0;
194 prt_error("Error: Can't generate constituents.\n"
195 "Constituent processing has been turned off.\n");
196 }
197 }
198 if (copts->display_links)
199 {
200 string = linkage_print_links_and_domains(linkage);
201 fprintf(stdout, "%s", string);
202 linkage_free_links_and_domains(string);
203 }
204 if (copts->display_disjuncts)
205 {
206 string = linkage_print_disjuncts(linkage);
207 fprintf(stdout, "%s\n", string);
208 linkage_free_disjuncts(string);
209 }
210 if (copts->display_postscript)
211 {
212 string = linkage_print_postscript(linkage,
213 copts->display_walls, copts->display_ps_header);
214 fprintf(stdout, "%s\n", string);
215 linkage_free_postscript(string);
216 }
217 }
218
print_parse_statistics(Sentence sent,Parse_Options opts,Command_Options * copts)219 static void print_parse_statistics(Sentence sent, Parse_Options opts,
220 Command_Options* copts)
221 {
222 if (sentence_num_linkages_found(sent) > 0)
223 {
224 if (sentence_num_linkages_found(sent) >
225 parse_options_get_linkage_limit(opts))
226 {
227 fprintf(stdout, "Found %d linkage%s (%d of %d random " \
228 "linkages had no P.P. violations)",
229 sentence_num_linkages_found(sent),
230 sentence_num_linkages_found(sent) == 1 ? "" : "s",
231 sentence_num_valid_linkages(sent),
232 sentence_num_linkages_post_processed(sent));
233 }
234 else
235 {
236 if ((sentence_num_valid_linkages(sent) > 0) || copts->display_bad)
237 {
238 fprintf(stdout, "Found %d linkage%s (%d had no P.P. violations)",
239 sentence_num_linkages_post_processed(sent),
240 sentence_num_linkages_post_processed(sent) == 1 ? "" : "s",
241 sentence_num_valid_linkages(sent));
242 }
243 }
244 if (sentence_null_count(sent) > 0)
245 {
246 fprintf(stdout, " at null count %d", sentence_null_count(sent));
247 }
248 fprintf(stdout, "\n");
249 }
250 }
251
252 /**
253 * Check whether the given feature is enabled. It is considered
254 * enabled if it is found in the comma delimited list of features.
255 * This list, if not empty, has a leading and a trailing commas.
256 * Return NULL if not enabled, else ",". If the feature appears
257 * as "feature:param", return a pointer to the ":".
258 *
259 * This function is similar to feature_enabled() of the library (which
260 * is not exported) besides not including filename matching.
261 */
test_enabled(const char * feature,const char * test_name)262 static const char *test_enabled(const char *feature, const char *test_name)
263 {
264
265 if ('\0' == feature[0]) return NULL;
266 size_t len = strlen(test_name);
267 char *buff = malloc(len + 2 + 1); /* leading comma + comma/colon + NUL */
268 const char *r = NULL;
269
270 buff[0] = ',';
271 strcpy(buff+1, test_name);
272 strcat(buff, ",");
273
274 if (NULL != strstr(feature, buff))
275 {
276 r = ",";
277 }
278 else
279 {
280 buff[len+1] = ':'; /* check for "feature:param" */
281 if (NULL != strstr(feature, buff))
282 r = strstr(feature, buff) + len + 1;
283 }
284
285 free(buff);
286 return r;
287 }
288
289 /**
290 * Check for the auto-next-linkage test request (for LG code development).
291 * It is given using the special command: test=auto-next-linkage[:display_max]
292 * when :display_max is an optional indication of the maximum number of
293 * linkages to auto-display (the default is DISPLAY_MAX).
294 * For example, to issue up to 20000 linkages for each batch sentence,
295 * the following can be used:
296 * link-parser -limit=30000 -test=auto-next-linkage:20000 < file.batch
297 */
auto_next_linkage_test(const char * test_opt)298 static int auto_next_linkage_test(const char *test_opt)
299 {
300 const char *auto_next_linkage_pos =
301 test_enabled(test_opt, "auto-next-linkage");
302 int max_display = 0;
303
304 if (auto_next_linkage_pos == NULL) return 0;
305 if (':' == auto_next_linkage_pos[0])
306 max_display = atoi(auto_next_linkage_pos + 1);
307 if (max_display != 0) return max_display;
308 return DISPLAY_MAX;
309 }
310
process_some_linkages(FILE * in,Sentence sent,Command_Options * copts)311 static const char *process_some_linkages(FILE *in, Sentence sent,
312 Command_Options* copts)
313 {
314 int i, num_to_query, num_to_display, num_displayed;
315 Linkage linkage;
316 Parse_Options opts = copts->popts;
317 int display_max = DISPLAY_MAX;
318 bool auto_next_linkage = false;
319
320 i = auto_next_linkage_test(test);
321 if (i != 0)
322 {
323 display_max = i;
324 auto_next_linkage = true;
325 }
326
327 if (verbosity > 0) print_parse_statistics(sent, opts, copts);
328 num_to_query = sentence_num_linkages_post_processed(sent);
329 if (!copts->display_bad)
330 {
331 num_to_display = MIN(sentence_num_valid_linkages(sent),
332 display_max);
333 }
334 else
335 {
336 num_to_display = MIN(num_to_query, display_max);
337 }
338
339 for (i=0, num_displayed=0; i<num_to_query; i++)
340 {
341 if ((sentence_num_violations(sent, i) > 0) &&
342 !copts->display_bad)
343 {
344 continue;
345 }
346
347 linkage = linkage_create(i, sent, opts);
348
349 /* Currently, sat solver sets the linkage violation indication
350 * only when it creates the linkage as a result of the above call. */
351 if ((sentence_num_violations(sent, i) > 0) &&
352 !copts->display_bad)
353 {
354 continue;
355 }
356
357 /* Currently, sat solver returns NULL when there ain't no more */
358 if (!linkage)
359 {
360 if (verbosity > 0)
361 {
362 if (0 == i)
363 fprintf(stdout, "No linkages found.\n");
364 else
365 fprintf(stdout, "No more linkages.\n");
366 }
367 break;
368 }
369
370 if (verbosity > 0)
371 {
372 if ((sentence_num_valid_linkages(sent) == 1) &&
373 !copts->display_bad)
374 {
375 fprintf(stdout, "\tUnique linkage, ");
376 }
377 else if (copts->display_bad &&
378 (sentence_num_violations(sent, i) > 0))
379 {
380 fprintf(stdout, "\tLinkage %d (bad), ", num_displayed+1);
381 }
382 else
383 {
384 fprintf(stdout, "\tLinkage %d, ", num_displayed+1);
385 }
386
387 fprintf(stdout, "cost vector = (UNUSED=%d DIS=%5.2f LEN=%d)\n",
388 linkage_unused_word_cost(linkage),
389 linkage_disjunct_cost(linkage),
390 linkage_link_cost(linkage));
391 }
392
393 process_linkage(linkage, copts);
394 linkage_delete(linkage);
395
396 if (++num_displayed < num_to_display)
397 {
398 if (!auto_next_linkage)
399 {
400 if ((verbosity > 0) && (!copts->batch_mode) && isatty_stdin && isatty_stdout)
401 {
402 fprintf(stdout, "Press RETURN for the next linkage.\n");
403 }
404 char *rc = fget_input_string(stdin, stdout, /*check_return*/true);
405 if ((NULL == rc) || (*rc != '\n')) return rc;
406 }
407 }
408 else
409 {
410 break;
411 }
412 }
413 return "x";
414 }
415
there_was_an_error(Label label,Sentence sent,Parse_Options opts)416 static int there_was_an_error(Label label, Sentence sent, Parse_Options opts)
417 {
418 if (sentence_num_valid_linkages(sent) > 0) {
419 if (label == UNGRAMMATICAL) {
420 batch_errors++;
421 return UNGRAMMATICAL;
422 }
423 if ((sentence_disjunct_cost(sent, 0) == 0.0) &&
424 (label == PARSE_WITH_DISJUNCT_COST_GT_0)) {
425 batch_errors++;
426 return PARSE_WITH_DISJUNCT_COST_GT_0;
427 }
428 } else {
429 if (label != UNGRAMMATICAL) {
430 batch_errors++;
431 return UNGRAMMATICAL;
432 }
433 }
434 return 0;
435 }
436
batch_process_some_linkages(Label label,Sentence sent,Command_Options * copts)437 static void batch_process_some_linkages(Label label,
438 Sentence sent,
439 Command_Options* copts)
440 {
441 Parse_Options opts = copts->popts;
442
443 if (there_was_an_error(label, sent, opts))
444 {
445 /* If we found at least one good linkage, print it. */
446 if (sentence_num_valid_linkages(sent) > 0) {
447 Linkage linkage = NULL;
448 int i;
449 for (i=0; i<sentence_num_linkages_post_processed(sent); i++)
450 {
451 if (0 == sentence_num_violations(sent, i))
452 {
453 linkage = linkage_create(i, sent, opts);
454 break;
455 }
456 }
457 process_linkage(linkage, copts);
458 linkage_delete(linkage);
459 }
460 fprintf(stdout, "+++++ error %d\n", batch_errors);
461 }
462 else
463 {
464 if (test_enabled(test, "batch-print-parse-statistics"))
465 {
466 print_parse_statistics(sent, opts, copts);
467 }
468 }
469 }
470
471 /**
472 * If input_string is !command, try to issue it.
473 */
474
special_command(char * input_string,Command_Options * copts,Dictionary dict)475 static int special_command(char *input_string, Command_Options* copts, Dictionary dict)
476 {
477 if (input_string[0] == COMMENT_CHAR) return 'c';
478 if (input_string[0] == '!')
479 return issue_special_command(input_string+1, copts, dict);
480 return 'n';
481 }
482
strip_off_label(char * input_string)483 static Label strip_off_label(char * input_string)
484 {
485 Label c;
486
487 c = (Label) input_string[0];
488 switch(c) {
489 case UNGRAMMATICAL:
490 case PARSE_WITH_DISJUNCT_COST_GT_0:
491 input_string[0] = ' ';
492 return c;
493 case NO_LABEL:
494 default:
495 return NO_LABEL;
496 }
497 }
498
setup_panic_parse_options(Parse_Options opts)499 static void setup_panic_parse_options(Parse_Options opts)
500 {
501 parse_options_set_disjunct_cost(opts, 4.0f);
502 parse_options_set_min_null_count(opts, 1);
503 parse_options_set_max_null_count(opts, 100);
504 parse_options_set_max_parse_time(opts, 60);
505 parse_options_set_islands_ok(opts, false);
506 parse_options_set_short_length(opts, 12);
507 parse_options_set_all_short_connectors(opts, true);
508 parse_options_set_linkage_limit(opts, 100);
509 parse_options_set_spell_guess(opts, 0);
510 }
511
divert_stdio(FILE * from,FILE * to)512 static int divert_stdio(FILE *from, FILE *to)
513 {
514 const int origfd = dup(fileno(from));
515 dup2(fileno(to), fileno(from));
516 return origfd;
517 }
518
519 #if 0 // Unused for now
520 static void restore_stdio(FILE *from, int origfd)
521 {
522 dup2(fileno(from), origfd);
523 }
524 #endif
525
526 /**
527 * Find the basename of the given file name.
528 * The last component that starts with '\\' or '\'
529 * (whichever is last) is returned.
530 * On POSIX systems it can be confused if the program name
531 * contains '\\' characters, but we don't care.
532 */
fbasename(const char * fpath)533 static const char *fbasename(const char *fpath)
534 {
535 const char *progf, *progb;
536
537 if ((NULL == fpath) || ('\0' == fpath[0])) return "(null)";
538
539 progf = strrchr(fpath, '/');
540 if (NULL == progf)
541 progb = strrchr(fpath, '\\');
542 else
543 progb = strchr(progf, '\\');
544
545 if (NULL != progb) return progb + 1;
546 if (NULL == progf) return fpath;
547 return progf + 1;
548 }
549
print_usage(FILE * out,char * argv0,Command_Options * copts,int exit_value)550 static void print_usage(FILE *out, char *argv0, Command_Options *copts, int exit_value)
551 {
552
553 fprintf(out, "Usage: %s [language|dictionary location]\n"
554 " [-<special \"!\" command>]\n"
555 " [--version]\n", fbasename(argv0));
556
557 fprintf(out, "\nSpecial commands are:\n");
558 if (stdout != out) divert_stdio(stdout, out);
559 issue_special_command("var", copts, NULL);
560 if (out == stdout) print_url_info(); /* don't print it for errors */
561 exit(exit_value);
562 }
563
564 /**
565 * On Unix, this checks for the current window size,
566 * and sets the output screen width accordingly.
567 */
check_winsize(Command_Options * copts)568 static void check_winsize(Command_Options* copts)
569 {
570 if (!isatty_stdout) return;
571 int fd = fileno(stdout);
572 #ifdef _WIN32
573 HANDLE console;
574 CONSOLE_SCREEN_BUFFER_INFO info;
575
576 /* Create a handle to the console screen. */
577 console = (HANDLE)_get_osfhandle(fd);
578 if (!console || (console == INVALID_HANDLE_VALUE)) goto fail;
579
580 /* Calculate the size of the console window. */
581 if (GetConsoleScreenBufferInfo(console, &info) == 0) goto fail;
582
583 copts->screen_width = (size_t)(info.srWindow.Right - info.srWindow.Left + 1);
584 return;
585
586 fail:
587 copts->screen_width = 80;
588 return;
589 #else
590 struct winsize ws;
591
592 /* If there is no controlling terminal, the fileno will fail. This
593 * seems to happen while building docker images, I don't know why.
594 */
595 if (fd < 0) return;
596
597 if (0 != ioctl(fd, TIOCGWINSZ, &ws))
598 {
599 perror("stdout: ioctl TIOCGWINSZ");
600 return;
601 }
602
603 /* printf("rows %i\n", ws.ws_row); */
604 /* printf("cols %i\n", ws.ws_col); */
605
606 /* Set the screen width only if the returned value seems
607 * rational: it's positive and not insanely tiny.
608 */
609 if ((10 < ws.ws_col) && (16123 > ws.ws_col))
610 {
611 copts->screen_width = ws.ws_col;
612 }
613 #endif /* _WIN32 */
614 }
615
616 #ifdef INTERRUPT_EXIT
interrupt_exit(int n)617 static void interrupt_exit(int n)
618 {
619 exit(128+n);
620 }
621 #endif
622
main(int argc,char * argv[])623 int main(int argc, char * argv[])
624 {
625 FILE *input_fh = stdin;
626 Dictionary dict;
627 const char *language = NULL;
628 int num_linkages;
629 Label label = NO_LABEL;
630 Command_Options *copts;
631 Parse_Options opts;
632 bool batch_in_progress = false;
633
634 isatty_stdin = isatty(fileno(stdin));
635 isatty_stdout = isatty(fileno(stdout));
636
637 #ifdef _WIN32
638 /* If compiled with MSVC/MinGW, we still support running under Cygwin.
639 * This is done by checking running_under_cygwin to resolve
640 * incompatibilities. */
641 const char *ostype = getenv("OSTYPE");
642 if ((NULL != ostype) && (0 == strcmp(ostype, "cygwin")))
643 running_under_cygwin = true;
644
645 /* argv encoding is in the current locale. */
646 argv = argv2utf8(argc);
647 if (NULL == argv)
648 {
649 prt_error("Fatal error: Unable to parse command line\n");
650 exit(-1);
651 }
652
653 #ifdef _MSC_VER
654 _set_printf_count_output(1); /* enable %n support for display_1line_help()*/
655 #endif /* _MSC_VER */
656
657 win32_set_utf8_output();
658 #endif /* _WIN32 */
659
660 #if LATER
661 /* Try to catch the SIGWINCH ... except this is not working. */
662 struct sigaction winch_act;
663 winch_act.sa_handler = winch_handler;
664 winch_act.sa_sigaction = NULL;
665 sigemptyset (&winch_act.sa_mask);
666 winch_act.sa_flags = 0;
667 sigaction (SIGWINCH, &winch_act, NULL);
668 #endif
669
670 #ifdef INTERRUPT_EXIT
671 (void)signal(SIGINT, interrupt_exit);
672 (void)signal(SIGTERM, interrupt_exit);
673 #endif
674
675 copts = command_options_create();
676 if (copts == NULL || copts->panic_opts == NULL)
677 {
678 prt_error("Fatal error: unable to create parse options\n");
679 exit(-1);
680 }
681 opts = copts->popts;
682
683 setup_panic_parse_options(copts->panic_opts);
684 copts->panic_mode = true;
685
686 parse_options_set_max_parse_time(opts, 30);
687 parse_options_set_linkage_limit(opts, 1000);
688 parse_options_set_min_null_count(opts, 0);
689 parse_options_set_max_null_count(opts, 0);
690 parse_options_set_short_length(opts, 16);
691 parse_options_set_islands_ok(opts, false);
692 parse_options_set_display_morphology(opts, false);
693
694 save_default_opts(copts); /* Options so far are the defaults */
695
696 if ((argc > 1) && (argv[1][0] != '-')) {
697 /* The dictionary is the first argument if it doesn't begin with "-" */
698 language = argv[1];
699 }
700
701 /* Process options used by GNU programs. */
702 int quiet_start = 0; /* Iff > 0, inhibit the initial messages */
703 for (int i = 1; i < argc; i++)
704 {
705 if (strcmp("--help", argv[i]) == 0)
706 {
707 print_usage(stdout, argv[0], copts, 0);
708 }
709
710 if (strcmp("--version", argv[i]) == 0)
711 {
712 printf("Version: %s\n", linkgrammar_get_version());
713 printf("%s\n", linkgrammar_get_configuration());
714 exit(0);
715 }
716
717 if ((strcmp("--quiet", argv[i]) == 0) ||
718 (strcmp("--silent", argv[i]) == 0))
719 {
720 quiet_start = i;
721 }
722 }
723
724 /* Process command line variable-setting commands (only). */
725 for (int i = 1; i < argc; i++)
726 {
727 if (i == quiet_start) continue;
728
729 if (argv[i][0] == '-')
730 {
731 const char *var = argv[i] + ((argv[i][1] != '-') ? 1 : 2);
732 if ((var[0] != '!') && (0 > issue_special_command(var, copts, NULL)))
733 print_usage(stderr, argv[0], copts, -1);
734 }
735 else if (i != 1)
736 {
737 prt_error("Fatal error: Unknown argument '%s'.\n", argv[i]);
738 print_usage(stderr, argv[0], copts, -1);
739 }
740 }
741
742 if (language && *language)
743 {
744 dict = dictionary_create_lang(language);
745 if (dict == NULL)
746 {
747 prt_error("Fatal error: Unable to open dictionary.\n");
748 exit(-1);
749 }
750 }
751 else
752 {
753 dict = dictionary_create_default_lang();
754 if (dict == NULL)
755 {
756 prt_error("Fatal error: Unable to open default dictionary.\n");
757 exit(-1);
758 }
759 }
760
761 /* Process the command line '!' commands */
762 for (int i = 1; i < argc; i++)
763 {
764 if ((argv[i][0] == '-') && (argv[i][1] == '!'))
765 {
766 if (0 > issue_special_command(argv[i]+1, copts, dict))
767 print_usage(stderr, argv[0], copts, -1);
768 }
769 }
770
771 check_winsize(copts);
772
773 if ((parse_options_get_verbosity(opts)) > 0 && (quiet_start == 0))
774 {
775 prt_error("Info: Dictionary version %s, locale %s\n",
776 linkgrammar_get_dict_version(dict),
777 linkgrammar_get_dict_locale(dict));
778 prt_error("Info: Library version %s. Enter \"!help\" for help.\n",
779 linkgrammar_get_version());
780 }
781
782 /* Main input loop */
783 while (true)
784 {
785 char *input_string;
786 Sentence sent = NULL;
787
788 /* Make sure stderr is shown even when MSVC binary runs under
789 * Cygwin/MSYS pty (in that case it is fully buffered(!)). */
790 fflush(stderr);
791
792 verbosity = parse_options_get_verbosity(opts);
793 debug = parse_options_get_debug(opts);
794 test = parse_options_get_test(opts);
795
796 input_string = fget_input_string(input_fh, stdout, /*check_return*/false);
797 check_winsize(copts);
798
799 if (NULL == input_string)
800 {
801 if (ferror(input_fh))
802 prt_error("Error: Read: %s\n", strerror(errno));
803
804 if (input_fh == stdin) break;
805 fclose (input_fh);
806 input_fh = stdin;
807 continue;
808 }
809
810 /* Discard whitespace characters from end of string. */
811 for (char *p = &input_string[strlen(input_string)-1];
812 (p > input_string) && strchr(WHITESPACE, *p); p--)
813 {
814 *p = '\0';
815 }
816
817 /* If the input string is just whitespace, then ignore it. */
818 if (strspn(input_string, WHITESPACE) == strlen(input_string))
819 continue;
820
821 int command = special_command(input_string, copts, dict);
822 if ('e' == command) break; /* It was an exit command */
823 if ('c' == command) continue; /* It was another command */
824 if (-1 == command) continue; /* It was a bad command */
825
826 /* We have to handle the !file command inline; it's too hairy
827 * otherwise ... */
828 if ('f' == command)
829 {
830 char *command_end = &input_string[strcspn(input_string, WHITESPACE)];
831 char *filename = &command_end[strspn(command_end, WHITESPACE)];
832 if (filename[0] == '\0')
833 {
834 prt_error("Error: Missing file name argument\n");
835 continue;
836 }
837
838 char *eh_filename = expand_homedir(filename);
839
840 struct stat statbuf;
841 if ((0 == stat(eh_filename, &statbuf)) && statbuf.st_mode & S_IFDIR)
842 {
843 errno = EISDIR;
844 goto open_error;
845 }
846
847 input_fh = fopen(eh_filename, "r");
848
849 if (NULL == input_fh)
850 {
851 input_fh = stdin;
852 goto open_error;
853 }
854
855 free(eh_filename);
856 continue;
857
858 open_error:
859 prt_error("Error: Cannot open %s: %s\n", eh_filename, strerror(errno));
860 free(eh_filename);
861 continue;
862 }
863
864 if (!copts->batch_mode) batch_in_progress = false;
865 if ('\0' != test[0] && !test_enabled(test, "@"))
866 {
867 /* In batch mode warn only once.
868 * In auto-next-linkage mode don't warn at all. */
869 if (!batch_in_progress && !auto_next_linkage_test(test))
870 {
871 fflush(stdout);
872 /* Remind the developer this is a test mode. */
873 prt_error("Warning: Tests enabled: %s\n", test);
874 if (copts->batch_mode) batch_in_progress = true;
875 }
876 }
877
878 if (copts->echo_on)
879 {
880 printf("%s\n", input_string);
881 }
882
883 if (copts->batch_mode || auto_next_linkage_test(test))
884 {
885 label = strip_off_label(input_string);
886 }
887
888 // Post-processing-based pruning will clip away connectors
889 // that we might otherwise want to examine. So disable PP
890 // pruning in this situation.
891 if (copts->display_bad)
892 parse_options_set_perform_pp_prune(opts, false);
893 else
894 parse_options_set_perform_pp_prune(opts, true);
895
896 sent = sentence_create(input_string, dict);
897
898 if (sentence_split(sent, opts) < 0)
899 {
900 sentence_delete(sent);
901 sent = NULL;
902 continue;
903 }
904
905 if (0 != copts->display_wordgraph)
906 {
907 const char *wg_display_flags = ""; /* default flags */
908 switch (copts->display_wordgraph)
909 {
910 case 1: /* default flags */
911 break;
912 case 2: /* subgraphs with a legend */
913 wg_display_flags = "sl";
914 break;
915 case 3:
916 {
917 /* Use esoteric flags from the test user variable. */
918 const char *s = test_enabled(test, "wg");
919 if ((NULL != s) && (':' == s[0])) wg_display_flags = s;
920 }
921 break;
922 default:
923 prt_error("Warning: wordgraph=%d: Unknown value, using 1\n",
924 copts->display_wordgraph);
925 copts->display_wordgraph = 1;
926 }
927 sentence_display_wordgraph(sent, wg_display_flags);
928 }
929
930 /* First parse with the default disjunct_cost as set by the library
931 * (currently 2.7). Usually parse here with no null links.
932 * However, if "-test=one-step-parse" is used and we are said to
933 * parse with null links, allow parsing here with null links too. */
934 bool one_step_parse = !copts->batch_mode && copts->allow_null &&
935 test_enabled(test, "one-step-parse");
936 int max_null_count = one_step_parse ? sentence_length(sent) : 0;
937
938 parse_options_set_min_null_count(opts, 0);
939 parse_options_set_max_null_count(opts, max_null_count);
940 parse_options_reset_resources(opts);
941
942 num_linkages = sentence_parse(sent, opts);
943
944 /* num_linkages is negative only on a hard-error;
945 * typically, due to a zero-length sentence. */
946 if (num_linkages < 0)
947 {
948 sentence_delete(sent);
949 sent = NULL;
950 continue;
951 }
952
953 #if 0
954 /* Try again, this time omitting the requirement for
955 * definite articles, etc. This should allow for the parsing
956 * of newspaper headlines and other clipped speech.
957 *
958 * XXX Unfortunately, this also allows for the parsing of
959 * all sorts of ungrammatical sentences which should not
960 * parse, and leads to bad parses of many other unparsable
961 * but otherwise grammatical sentences. Thus, this trick
962 * pretty much fails; we leave it here to document the
963 * experiment.
964 */
965 if (num_linkages == 0)
966 {
967 parse_options_set_disjunct_cost(opts, 4.5);
968 num_linkages = sentence_parse(sent, opts);
969 if (num_linkages < 0) continue;
970 }
971 #endif /* 0 */
972
973 /* If asked to show bad linkages, then show them. */
974 if ((num_linkages == 0) && (!copts->batch_mode))
975 {
976 if (copts->display_bad)
977 {
978 num_linkages = sentence_num_linkages_found(sent);
979 }
980 }
981
982 /* Now parse with null links */
983 if (!one_step_parse && num_linkages == 0 && !copts->batch_mode)
984 {
985 if (verbosity > 0) fprintf(stdout, "No complete linkages found.\n");
986
987 if (copts->allow_null)
988 {
989 /* XXX should use expanded disjunct list here too */
990 parse_options_set_min_null_count(opts, 1);
991 parse_options_set_max_null_count(opts, sentence_length(sent));
992 num_linkages = sentence_parse(sent, opts);
993 }
994 }
995
996 if (verbosity > 0)
997 {
998 if (parse_options_timer_expired(opts))
999 fprintf(stdout, "Timer is expired!\n");
1000
1001 if (parse_options_memory_exhausted(opts))
1002 fprintf(stdout, "Memory is exhausted!\n");
1003 }
1004
1005 if ((num_linkages == 0) &&
1006 copts->panic_mode &&
1007 parse_options_resources_exhausted(opts))
1008 {
1009 /* print_total_time(opts); */
1010 batch_errors++;
1011 if (verbosity > 0) fprintf(stdout, "Entering \"panic\" mode...\n");
1012 /* If the parser used was the SAT solver, set the panic parser to
1013 * it too.
1014 * FIXME? Currently, the SAT solver code is not too useful in
1015 * panic mode since it doesn't handle parsing with null words, so
1016 * using the regular parser in that case could be beneficial.
1017 * However, this currently causes a crash due to a memory
1018 * management mess. */
1019 parse_options_set_use_sat_parser(copts->panic_opts,
1020 parse_options_get_use_sat_parser(opts));
1021 parse_options_reset_resources(copts->panic_opts);
1022 parse_options_set_verbosity(copts->panic_opts, verbosity);
1023 (void)sentence_parse(sent, copts->panic_opts);
1024 if (verbosity > 0)
1025 {
1026 if (parse_options_timer_expired(copts->panic_opts))
1027 fprintf(stdout, "Panic timer is expired!\n");
1028 }
1029 }
1030
1031 if (verbosity > 1) parse_options_print_total_time(opts);
1032
1033 const char *rc = "";
1034 if (copts->batch_mode)
1035 {
1036 batch_process_some_linkages(label, sent, copts);
1037 }
1038 else
1039 {
1040 rc = process_some_linkages(input_fh, sent, copts);
1041 }
1042
1043 fflush(stdout);
1044 sentence_delete(sent);
1045 sent = NULL;
1046
1047 if ((NULL == rc) && (input_fh == stdin)) break;
1048 }
1049
1050 if (copts->batch_mode)
1051 {
1052 /* print_time(opts, "Total"); */
1053 fprintf(stderr,
1054 "%d error%s.\n", batch_errors, (batch_errors==1) ? "" : "s");
1055 }
1056
1057 /* Free stuff, so that mem-leak detectors don't complain. */
1058 command_options_delete(copts);
1059 dictionary_delete(dict);
1060
1061 printf ("Bye.\n");
1062 return 0;
1063 }
1064