1 /***************************************************************************/
2 /* Copyright (c) 2004                                                      */
3 /* Daniel Sleator, David Temperley, and John Lafferty                      */
4 /* Copyright (c) 2008, 2014 Linas Vepstas                                  */
5 /* All rights reserved                                                     */
6 /*                                                                         */
7 /* Use of the link grammar parsing system is subject to the terms of the   */
8 /* license set forth in the LICENSE file included with this software.      */
9 /* This license allows free redistribution and use in source and binary    */
10 /* forms, with or without modification, subject to certain conditions.     */
11 /*                                                                         */
12 /***************************************************************************/
13 
14  /****************************************************************************
15  *
16  *   This is a simple example of the link parser API.  It simulates most of
17  *   the functionality of the original link grammar parser, allowing sentences
18  *   to be typed in either interactively or in "batch" mode (if -batch is
19  *   specified on the command line, and stdin is redirected to a file).
20  *   The program:
21  *     Opens up a dictionary
22  *     Iterates:
23  *        1. Reads from stdin to get an input string to parse
24  *        2. Tokenizes the string to form a Sentence
25  *        3. Tries to parse it with cost 0
26  *        4. Tries to parse with increasing cost
27  *     When a parse is found:
28  *        1. Extracts each Linkage
29  *        2. Passes it to process_some_linkages()
30  *        3. Deletes linkage
31  *     After parsing each Sentence is deleted by making a call to
32  *     sentence_delete.
33  *
34  ****************************************************************************/
35 
36 #include <errno.h>
37 #include <locale.h>
38 #include <stdlib.h>
39 #include <string.h>
40 #include <sys/stat.h>
41 
42 /* Used for terminal resizing */
43 #ifndef _WIN32
44 #include <termios.h>
45 #include <sys/ioctl.h>
46 #include <fcntl.h>
47 #include <signal.h>
48 #include <unistd.h>
49 #else
50 #include <windows.h>
51 #include <wchar.h>
52 #include <io.h>
53 #endif /* _WIN32 */
54 
55 #ifdef _MSC_VER
56 #define LINK_GRAMMAR_DLL_EXPORT 0
57 #endif /* _MSC_VER */
58 
59 #ifndef _WIN32
60 #define LAST_RESORT_LOCALE "en_US.UTF-8" /* Supposing POSIX systems */
61 #else
62 #define LAST_RESORT_LOCALE ""            /* Use user default locale */
63 #endif /* _WIN32 */
64 
65 #include "parser-utilities.h"
66 #include "command-line.h"
67 #include "lg_readline.h"
68 
69 #define DISPLAY_MAX 1024
70 
71 static int batch_errors = 0;
72 static int verbosity = 0;
73 static char * debug = (char *)"";
74 static char * test = (char *)"";
75 static bool isatty_stdin, isatty_stdout;
76 #ifdef _WIN32
77 static bool running_under_cygwin = false;
78 #endif /* _WIN32 */
79 
80 typedef enum
81 {
82 	UNGRAMMATICAL = '*',
83 	PARSE_WITH_DISJUNCT_COST_GT_0 = ':',  /* Not used anywhere, currently ... */
84 	NO_LABEL = ' '
85 } Label;
86 
get_terminal_line(char * input_string,FILE * in,FILE * out)87 static char * get_terminal_line(char *input_string, FILE *in, FILE *out)
88 {
89 	static char *pline;
90 	const char *prompt = (0 == verbosity)? "" : "linkparser> ";
91 
92 #ifdef HAVE_EDITLINE
93 	pline = lg_readline(prompt);
94 #else
95 	fprintf(out, "%s", prompt);
96 	fflush(out);
97 #ifdef _WIN32
98 	if (!running_under_cygwin)
99 		pline = get_console_line();
100 	else
101 		pline = fgets(input_string, MAX_INPUT, in);
102 #else
103 	pline = fgets(input_string, MAX_INPUT, in);
104 #endif /* _WIN32 */
105 #endif /* HAVE_EDITLINE */
106 
107 	return pline;
108 }
109 
fget_input_string(FILE * in,FILE * out,bool check_return)110 static char * fget_input_string(FILE *in, FILE *out, bool check_return)
111 {
112 	static char *pline;
113 	static char input_string[MAX_INPUT];
114 	static bool input_pending = false;
115 
116 	if (input_pending)
117 	{
118 		input_pending = false;
119 		return pline;
120 	}
121 
122 	input_string[MAX_INPUT-2] = '\0';
123 
124 	if (((in != stdin) && !check_return) || !isatty_stdin)
125 	{
126 		/* Get input from a file. */
127 		pline = fgets(input_string, MAX_INPUT, in);
128 	}
129 	else
130 	{
131 		/* If we are here, the input is from a terminal. */
132 		pline = get_terminal_line(input_string, in, out);
133 	}
134 
135 	if (NULL == pline) return NULL;      /* EOF or error */
136 
137 	if (('\0' != input_string[MAX_INPUT-2]) &&
138 	    ('\n' != input_string[MAX_INPUT-2]))
139 	{
140 		prt_error("Warning: Input line too long (>%d)\n", MAX_INPUT-1);
141 		/* TODO: Ignore it and its continuation part(s). */
142 	}
143 
144 	if (check_return)
145 	{
146 		if (('\0' == pline[0]) || ('\r' == pline[0]) || ('\n' == pline[0]))
147 			return (char *)"\n";           /* Continue linkage display */
148 		if ((in == stdin) || ('!' == pline[0]))
149 			input_pending = true;          /* In !file mode allow commands */
150 		return (char *)"x";               /* Stop linkage display */
151 	}
152 
153 	return pline;
154 }
155 
156 /**************************************************************************
157 *
158 *  This procedure displays a linkage graphically.  Since the diagrams
159 *  are passed as character strings, they need to be deleted with a
160 *  call to free.
161 *
162 **************************************************************************/
163 
process_linkage(Linkage linkage,Command_Options * copts)164 static void process_linkage(Linkage linkage, Command_Options* copts)
165 {
166 	char * string;
167 	ConstituentDisplayStyle mode;
168 
169 	if (!linkage) return;  /* Can happen in timeout mode */
170 
171 	if (copts->display_bad)
172 	{
173 		string = linkage_print_pp_msgs(linkage);
174 		fprintf(stdout, "%s\n", string);
175 		linkage_free_pp_msgs(string);
176 	}
177 	if (copts->display_on)
178 	{
179 		string = linkage_print_diagram(linkage, copts->display_walls, copts->screen_width);
180 		fprintf(stdout, "%s", string);
181 		linkage_free_diagram(string);
182 	}
183 	if ((mode = copts->display_constituents))
184 	{
185 		string = linkage_print_constituent_tree(linkage, mode);
186 		if (string != NULL)
187 		{
188 			fprintf(stdout, "%s\n", string);
189 			linkage_free_constituent_tree_str(string);
190 		}
191 		else
192 		{
193 			copts->display_constituents = 0;
194 			prt_error("Error: Can't generate constituents.\n"
195 			          "Constituent processing has been turned off.\n");
196 		}
197 	}
198 	if (copts->display_links)
199 	{
200 		string = linkage_print_links_and_domains(linkage);
201 		fprintf(stdout, "%s", string);
202 		linkage_free_links_and_domains(string);
203 	}
204 	if (copts->display_disjuncts)
205 	{
206 		string = linkage_print_disjuncts(linkage);
207 		fprintf(stdout, "%s\n", string);
208 		linkage_free_disjuncts(string);
209 	}
210 	if (copts->display_postscript)
211 	{
212 		string = linkage_print_postscript(linkage,
213 		          copts->display_walls, copts->display_ps_header);
214 		fprintf(stdout, "%s\n", string);
215 		linkage_free_postscript(string);
216 	}
217 }
218 
print_parse_statistics(Sentence sent,Parse_Options opts,Command_Options * copts)219 static void print_parse_statistics(Sentence sent, Parse_Options opts,
220                                    Command_Options* copts)
221 {
222 	if (sentence_num_linkages_found(sent) > 0)
223 	{
224 		if (sentence_num_linkages_found(sent) >
225 			parse_options_get_linkage_limit(opts))
226 		{
227 			fprintf(stdout, "Found %d linkage%s (%d of %d random " \
228 					"linkages had no P.P. violations)",
229 					sentence_num_linkages_found(sent),
230 					sentence_num_linkages_found(sent) == 1 ? "" : "s",
231 					sentence_num_valid_linkages(sent),
232 					sentence_num_linkages_post_processed(sent));
233 		}
234 		else
235 		{
236 			if ((sentence_num_valid_linkages(sent) > 0) || copts->display_bad)
237 			{
238 				fprintf(stdout, "Found %d linkage%s (%d had no P.P. violations)",
239 				        sentence_num_linkages_post_processed(sent),
240 				        sentence_num_linkages_post_processed(sent) == 1 ? "" : "s",
241 				        sentence_num_valid_linkages(sent));
242 			}
243 		}
244 		if (sentence_null_count(sent) > 0)
245 		{
246 			fprintf(stdout, " at null count %d", sentence_null_count(sent));
247 		}
248 		fprintf(stdout, "\n");
249 	}
250 }
251 
252 /**
253  * Check whether the given feature is enabled. It is considered
254  * enabled if it is found in the comma delimited list of features.
255  * This list, if not empty, has a leading and a trailing commas.
256  * Return NULL if not enabled, else ",". If the feature appears
257  * as "feature:param", return a pointer to the ":".
258  *
259  * This function is similar to feature_enabled() of the library (which
260  * is not exported) besides not including filename matching.
261  */
test_enabled(const char * feature,const char * test_name)262 static const char *test_enabled(const char *feature, const char *test_name)
263 {
264 
265 	if ('\0' == feature[0]) return NULL;
266 	size_t len = strlen(test_name);
267 	char *buff = malloc(len + 2 + 1); /* leading comma + comma/colon + NUL */
268 	const char *r = NULL;
269 
270 	buff[0] = ',';
271 	strcpy(buff+1, test_name);
272 	strcat(buff, ",");
273 
274 	if (NULL != strstr(feature, buff))
275 	{
276 		r = ",";
277 	}
278 	else
279 	{
280 		buff[len+1] = ':'; /* check for "feature:param" */
281 		if (NULL != strstr(feature, buff))
282 			r = strstr(feature, buff) + len + 1;
283 	}
284 
285 	free(buff);
286 	return r;
287 }
288 
289 /**
290  * Check for the auto-next-linkage test request (for LG code development).
291  * It is given using the special command: test=auto-next-linkage[:display_max]
292  * when :display_max is an optional indication of the maximum number of
293  * linkages to auto-display (the default is DISPLAY_MAX).
294  * For example, to issue up to 20000 linkages for each batch sentence,
295  * the following can be used:
296  * link-parser -limit=30000 -test=auto-next-linkage:20000 < file.batch
297  */
auto_next_linkage_test(const char * test_opt)298 static int auto_next_linkage_test(const char *test_opt)
299 {
300 	const char *auto_next_linkage_pos =
301 		test_enabled(test_opt, "auto-next-linkage");
302 	int max_display = 0;
303 
304 	if (auto_next_linkage_pos == NULL) return 0;
305 	if (':' == auto_next_linkage_pos[0])
306 		max_display = atoi(auto_next_linkage_pos + 1);
307 	if (max_display != 0) return max_display;
308 	return DISPLAY_MAX;
309 }
310 
process_some_linkages(FILE * in,Sentence sent,Command_Options * copts)311 static const char *process_some_linkages(FILE *in, Sentence sent,
312                                          Command_Options* copts)
313 {
314 	int i, num_to_query, num_to_display, num_displayed;
315 	Linkage linkage;
316 	Parse_Options opts = copts->popts;
317 	int display_max = DISPLAY_MAX;
318 	bool auto_next_linkage = false;
319 
320 	i = auto_next_linkage_test(test);
321 	if (i != 0)
322 	{
323 		display_max = i;
324 		auto_next_linkage = true;
325 	}
326 
327 	if (verbosity > 0) print_parse_statistics(sent, opts, copts);
328 	num_to_query = sentence_num_linkages_post_processed(sent);
329 	if (!copts->display_bad)
330 	{
331 		num_to_display = MIN(sentence_num_valid_linkages(sent),
332 		                     display_max);
333 	}
334 	else
335 	{
336 		num_to_display = MIN(num_to_query, display_max);
337 	}
338 
339 	for (i=0, num_displayed=0; i<num_to_query; i++)
340 	{
341 		if ((sentence_num_violations(sent, i) > 0) &&
342 			!copts->display_bad)
343 		{
344 			continue;
345 		}
346 
347 		linkage = linkage_create(i, sent, opts);
348 
349 		/* Currently, sat solver sets the linkage violation indication
350 		 * only when it creates the linkage as a result of the above call. */
351 		if ((sentence_num_violations(sent, i) > 0) &&
352 			!copts->display_bad)
353 		{
354 			continue;
355 		}
356 
357 		/* Currently, sat solver returns NULL when there ain't no more */
358 		if (!linkage)
359 		{
360 			if (verbosity > 0)
361 			{
362 				if (0 == i)
363 					fprintf(stdout, "No linkages found.\n");
364 				else
365 					fprintf(stdout, "No more linkages.\n");
366 			}
367 			break;
368 		}
369 
370 		if (verbosity > 0)
371 		{
372 			if ((sentence_num_valid_linkages(sent) == 1) &&
373 				!copts->display_bad)
374 			{
375 				fprintf(stdout, "\tUnique linkage, ");
376 			}
377 			else if (copts->display_bad &&
378 			         (sentence_num_violations(sent, i) > 0))
379 			{
380 				fprintf(stdout, "\tLinkage %d (bad), ", num_displayed+1);
381 			}
382 			else
383 			{
384 				fprintf(stdout, "\tLinkage %d, ", num_displayed+1);
385 			}
386 
387 			fprintf(stdout, "cost vector = (UNUSED=%d DIS=%5.2f LEN=%d)\n",
388 			        linkage_unused_word_cost(linkage),
389 			        linkage_disjunct_cost(linkage),
390 			        linkage_link_cost(linkage));
391 		}
392 
393 		process_linkage(linkage, copts);
394 		linkage_delete(linkage);
395 
396 		if (++num_displayed < num_to_display)
397 		{
398 			if (!auto_next_linkage)
399 			{
400 				if ((verbosity > 0) && (!copts->batch_mode) && isatty_stdin && isatty_stdout)
401 				{
402 					fprintf(stdout, "Press RETURN for the next linkage.\n");
403 				}
404 				char *rc = fget_input_string(stdin, stdout, /*check_return*/true);
405 				if ((NULL == rc) || (*rc != '\n')) return rc;
406 			}
407 		}
408 		else
409 		{
410 			break;
411 		}
412 	}
413 	return "x";
414 }
415 
there_was_an_error(Label label,Sentence sent,Parse_Options opts)416 static int there_was_an_error(Label label, Sentence sent, Parse_Options opts)
417 {
418 	if (sentence_num_valid_linkages(sent) > 0) {
419 		if (label == UNGRAMMATICAL) {
420 			batch_errors++;
421 			return UNGRAMMATICAL;
422 		}
423 		if ((sentence_disjunct_cost(sent, 0) == 0.0) &&
424 			(label == PARSE_WITH_DISJUNCT_COST_GT_0)) {
425 			batch_errors++;
426 			return PARSE_WITH_DISJUNCT_COST_GT_0;
427 		}
428 	} else {
429 		if (label != UNGRAMMATICAL) {
430 			batch_errors++;
431 			return UNGRAMMATICAL;
432 		}
433 	}
434 	return 0;
435 }
436 
batch_process_some_linkages(Label label,Sentence sent,Command_Options * copts)437 static void batch_process_some_linkages(Label label,
438                                         Sentence sent,
439                                         Command_Options* copts)
440 {
441 	Parse_Options opts = copts->popts;
442 
443 	if (there_was_an_error(label, sent, opts))
444 	{
445 		/* If we found at least one good linkage, print it. */
446 		if (sentence_num_valid_linkages(sent) > 0) {
447 			Linkage linkage = NULL;
448 			int i;
449 			for (i=0; i<sentence_num_linkages_post_processed(sent); i++)
450 			{
451 				if (0 == sentence_num_violations(sent, i))
452 				{
453 					linkage = linkage_create(i, sent, opts);
454 					break;
455 				}
456 			}
457 			process_linkage(linkage, copts);
458 			linkage_delete(linkage);
459 		}
460 		fprintf(stdout, "+++++ error %d\n", batch_errors);
461 	}
462 	else
463 	{
464 		if (test_enabled(test, "batch-print-parse-statistics"))
465 		{
466 			print_parse_statistics(sent, opts, copts);
467 		}
468 	}
469 }
470 
471 /**
472  * If input_string is !command, try to issue it.
473  */
474 
special_command(char * input_string,Command_Options * copts,Dictionary dict)475 static int special_command(char *input_string, Command_Options* copts, Dictionary dict)
476 {
477 	if (input_string[0] == COMMENT_CHAR) return 'c';
478 	if (input_string[0] == '!')
479 		return issue_special_command(input_string+1, copts, dict);
480 	return 'n';
481 }
482 
strip_off_label(char * input_string)483 static Label strip_off_label(char * input_string)
484 {
485 	Label c;
486 
487 	c = (Label) input_string[0];
488 	switch(c) {
489 	case UNGRAMMATICAL:
490 	case PARSE_WITH_DISJUNCT_COST_GT_0:
491 		input_string[0] = ' ';
492 		return c;
493 	case NO_LABEL:
494 	default:
495 		return NO_LABEL;
496 	}
497 }
498 
setup_panic_parse_options(Parse_Options opts)499 static void setup_panic_parse_options(Parse_Options opts)
500 {
501 	parse_options_set_disjunct_cost(opts, 4.0f);
502 	parse_options_set_min_null_count(opts, 1);
503 	parse_options_set_max_null_count(opts, 100);
504 	parse_options_set_max_parse_time(opts, 60);
505 	parse_options_set_islands_ok(opts, false);
506 	parse_options_set_short_length(opts, 12);
507 	parse_options_set_all_short_connectors(opts, true);
508 	parse_options_set_linkage_limit(opts, 100);
509 	parse_options_set_spell_guess(opts, 0);
510 }
511 
divert_stdio(FILE * from,FILE * to)512 static int divert_stdio(FILE *from, FILE *to)
513 {
514 	const int origfd = dup(fileno(from));
515 	dup2(fileno(to), fileno(from));
516 	return origfd;
517 }
518 
519 #if 0 // Unused for now
520 static void restore_stdio(FILE *from, int origfd)
521 {
522 	dup2(fileno(from), origfd);
523 }
524 #endif
525 
526 /**
527  * Find the basename of the given file name.
528  * The last component that starts with '\\' or '\'
529  * (whichever is last) is returned.
530  * On POSIX systems it can be confused if the program name
531  * contains '\\' characters, but we don't care.
532  */
fbasename(const char * fpath)533 static const char *fbasename(const char *fpath)
534 {
535 	const char *progf, *progb;
536 
537 	if ((NULL == fpath) || ('\0' == fpath[0])) return "(null)";
538 
539 	progf = strrchr(fpath, '/');
540 	if (NULL == progf)
541 		progb = strrchr(fpath, '\\');
542 	else
543 		progb = strchr(progf, '\\');
544 
545 	if (NULL != progb) return progb + 1;
546 	if (NULL == progf) return fpath;
547 	return progf + 1;
548 }
549 
print_usage(FILE * out,char * argv0,Command_Options * copts,int exit_value)550 static void print_usage(FILE *out, char *argv0, Command_Options *copts, int exit_value)
551 {
552 
553 	fprintf(out, "Usage: %s [language|dictionary location]\n"
554 			 "                   [-<special \"!\" command>]\n"
555 			 "                   [--version]\n", fbasename(argv0));
556 
557 	fprintf(out, "\nSpecial commands are:\n");
558 	if (stdout != out) divert_stdio(stdout, out);
559 	issue_special_command("var", copts, NULL);
560 	if (out == stdout) print_url_info(); /* don't print it for errors */
561 	exit(exit_value);
562 }
563 
564 /**
565  * On Unix, this checks for the current window size,
566  * and sets the output screen width accordingly.
567  */
check_winsize(Command_Options * copts)568 static void check_winsize(Command_Options* copts)
569 {
570 	if (!isatty_stdout) return;
571 	int fd = fileno(stdout);
572 #ifdef _WIN32
573 	HANDLE console;
574 	CONSOLE_SCREEN_BUFFER_INFO info;
575 
576 	/* Create a handle to the console screen. */
577 	console = (HANDLE)_get_osfhandle(fd);
578 	if (!console || (console == INVALID_HANDLE_VALUE)) goto fail;
579 
580 	/* Calculate the size of the console window. */
581 	if (GetConsoleScreenBufferInfo(console, &info) == 0) goto fail;
582 
583 	copts->screen_width = (size_t)(info.srWindow.Right - info.srWindow.Left + 1);
584 	return;
585 
586 fail:
587 	copts->screen_width = 80;
588 	return;
589 #else
590 	struct winsize ws;
591 
592 	/* If there is no controlling terminal, the fileno will fail. This
593 	 * seems to happen while building docker images, I don't know why.
594 	 */
595 	if (fd < 0) return;
596 
597 	if (0 != ioctl(fd, TIOCGWINSZ, &ws))
598 	{
599 		perror("stdout: ioctl TIOCGWINSZ");
600 		return;
601 	}
602 
603 	/* printf("rows %i\n", ws.ws_row); */
604 	/* printf("cols %i\n", ws.ws_col); */
605 
606 	/* Set the screen width only if the returned value seems
607 	 * rational: it's positive and not insanely tiny.
608 	 */
609 	if ((10 < ws.ws_col) && (16123 > ws.ws_col))
610 	{
611 		copts->screen_width = ws.ws_col;
612 	}
613 #endif /* _WIN32 */
614 }
615 
616 #ifdef INTERRUPT_EXIT
interrupt_exit(int n)617 static void interrupt_exit(int n)
618 {
619 	exit(128+n);
620 }
621 #endif
622 
main(int argc,char * argv[])623 int main(int argc, char * argv[])
624 {
625 	FILE            *input_fh = stdin;
626 	Dictionary      dict;
627 	const char     *language = NULL;
628 	int             num_linkages;
629 	Label           label = NO_LABEL;
630 	Command_Options *copts;
631 	Parse_Options   opts;
632 	bool batch_in_progress = false;
633 
634 	isatty_stdin = isatty(fileno(stdin));
635 	isatty_stdout = isatty(fileno(stdout));
636 
637 #ifdef _WIN32
638 	/* If compiled with MSVC/MinGW, we still support running under Cygwin.
639 	 * This is done by checking running_under_cygwin to resolve
640 	 * incompatibilities. */
641 	const char *ostype = getenv("OSTYPE");
642 	if ((NULL != ostype) && (0 == strcmp(ostype, "cygwin")))
643 		running_under_cygwin = true;
644 
645 	/* argv encoding is in the current locale. */
646 	argv = argv2utf8(argc);
647 	if (NULL == argv)
648 	{
649 		prt_error("Fatal error: Unable to parse command line\n");
650 		exit(-1);
651 	}
652 
653 #ifdef _MSC_VER
654 	_set_printf_count_output(1); /* enable %n support for display_1line_help()*/
655 #endif /* _MSC_VER */
656 
657 	win32_set_utf8_output();
658 #endif /* _WIN32 */
659 
660 #if LATER
661 	/* Try to catch the SIGWINCH ... except this is not working. */
662 	struct sigaction winch_act;
663 	winch_act.sa_handler = winch_handler;
664 	winch_act.sa_sigaction = NULL;
665 	sigemptyset (&winch_act.sa_mask);
666 	winch_act.sa_flags = 0;
667 	sigaction (SIGWINCH, &winch_act, NULL);
668 #endif
669 
670 #ifdef INTERRUPT_EXIT
671 	(void)signal(SIGINT, interrupt_exit);
672 	(void)signal(SIGTERM, interrupt_exit);
673 #endif
674 
675 	copts = command_options_create();
676 	if (copts == NULL || copts->panic_opts == NULL)
677 	{
678 		prt_error("Fatal error: unable to create parse options\n");
679 		exit(-1);
680 	}
681 	opts = copts->popts;
682 
683 	setup_panic_parse_options(copts->panic_opts);
684 	copts->panic_mode = true;
685 
686 	parse_options_set_max_parse_time(opts, 30);
687 	parse_options_set_linkage_limit(opts, 1000);
688 	parse_options_set_min_null_count(opts, 0);
689 	parse_options_set_max_null_count(opts, 0);
690 	parse_options_set_short_length(opts, 16);
691 	parse_options_set_islands_ok(opts, false);
692 	parse_options_set_display_morphology(opts, false);
693 
694 	save_default_opts(copts); /* Options so far are the defaults */
695 
696 	if ((argc > 1) && (argv[1][0] != '-')) {
697 		/* The dictionary is the first argument if it doesn't begin with "-" */
698 		language = argv[1];
699 	}
700 
701 	/* Process options used by GNU programs. */
702 	int quiet_start = 0; /* Iff > 0, inhibit the initial messages */
703 	for (int i = 1; i < argc; i++)
704 	{
705 		if (strcmp("--help", argv[i]) == 0)
706 		{
707 			print_usage(stdout, argv[0], copts, 0);
708 		}
709 
710 		if (strcmp("--version", argv[i]) == 0)
711 		{
712 			printf("Version: %s\n", linkgrammar_get_version());
713 			printf("%s\n", linkgrammar_get_configuration());
714 			exit(0);
715 		}
716 
717 		if ((strcmp("--quiet", argv[i]) == 0) ||
718 		    (strcmp("--silent", argv[i]) == 0))
719 		{
720 			quiet_start = i;
721 		}
722 	}
723 
724 	/* Process command line variable-setting commands (only). */
725 	for (int i = 1; i < argc; i++)
726 	{
727 		if (i == quiet_start) continue;
728 
729 		if (argv[i][0] == '-')
730 		{
731 			const char *var = argv[i] + ((argv[i][1] != '-') ? 1 : 2);
732 			if ((var[0] != '!') && (0 > issue_special_command(var, copts, NULL)))
733 				print_usage(stderr, argv[0], copts, -1);
734 		}
735 		else if (i != 1)
736 		{
737 			prt_error("Fatal error: Unknown argument '%s'.\n", argv[i]);
738 			print_usage(stderr, argv[0], copts, -1);
739 		}
740 	}
741 
742 	if (language && *language)
743 	{
744 		dict = dictionary_create_lang(language);
745 		if (dict == NULL)
746 		{
747 			prt_error("Fatal error: Unable to open dictionary.\n");
748 			exit(-1);
749 		}
750 	}
751 	else
752 	{
753 		dict = dictionary_create_default_lang();
754 		if (dict == NULL)
755 		{
756 			prt_error("Fatal error: Unable to open default dictionary.\n");
757 			exit(-1);
758 		}
759 	}
760 
761 	/* Process the command line '!' commands */
762 	for (int i = 1; i < argc; i++)
763 	{
764 		if ((argv[i][0] == '-') && (argv[i][1] == '!'))
765 		{
766 			if (0 > issue_special_command(argv[i]+1, copts, dict))
767 				print_usage(stderr, argv[0], copts, -1);
768 		}
769 	}
770 
771 	check_winsize(copts);
772 
773 	if ((parse_options_get_verbosity(opts)) > 0 && (quiet_start == 0))
774 	{
775 		prt_error("Info: Dictionary version %s, locale %s\n",
776 			linkgrammar_get_dict_version(dict),
777 			linkgrammar_get_dict_locale(dict));
778 		prt_error("Info: Library version %s. Enter \"!help\" for help.\n",
779 			linkgrammar_get_version());
780 	}
781 
782 	/* Main input loop */
783 	while (true)
784 	{
785 		char *input_string;
786 		Sentence sent = NULL;
787 
788 		/* Make sure stderr is shown even when MSVC binary runs under
789 		 * Cygwin/MSYS pty (in that case it is fully buffered(!)). */
790 		fflush(stderr);
791 
792 		verbosity = parse_options_get_verbosity(opts);
793 		debug = parse_options_get_debug(opts);
794 		test = parse_options_get_test(opts);
795 
796 		input_string = fget_input_string(input_fh, stdout, /*check_return*/false);
797 		check_winsize(copts);
798 
799 		if (NULL == input_string)
800 		{
801 			if (ferror(input_fh))
802 				prt_error("Error: Read: %s\n", strerror(errno));
803 
804 			if (input_fh == stdin) break;
805 			fclose (input_fh);
806 			input_fh = stdin;
807 			continue;
808 		}
809 
810 		/* Discard whitespace characters from end of string. */
811 		for (char *p = &input_string[strlen(input_string)-1];
812 		     (p > input_string) && strchr(WHITESPACE, *p); p--)
813 		{
814 			*p = '\0';
815 		}
816 
817 		/* If the input string is just whitespace, then ignore it. */
818 		if (strspn(input_string, WHITESPACE) == strlen(input_string))
819 			continue;
820 
821 		int command = special_command(input_string, copts, dict);
822 		if ('e' == command) break;    /* It was an exit command */
823 		if ('c' == command) continue; /* It was another command */
824 		if (-1 == command) continue;  /* It was a bad command */
825 
826 		/* We have to handle the !file command inline; it's too hairy
827 		 * otherwise ... */
828 		if ('f' == command)
829 		{
830 			char *command_end = &input_string[strcspn(input_string, WHITESPACE)];
831 			char *filename = &command_end[strspn(command_end, WHITESPACE)];
832 			if (filename[0] == '\0')
833 			{
834 				prt_error("Error: Missing file name argument\n");
835 				continue;
836 			}
837 
838 			char *eh_filename = expand_homedir(filename);
839 
840 			struct stat statbuf;
841 			if ((0 == stat(eh_filename, &statbuf)) && statbuf.st_mode & S_IFDIR)
842 			{
843 				errno = EISDIR;
844 				goto open_error;
845 			}
846 
847 			input_fh = fopen(eh_filename, "r");
848 
849 			if (NULL == input_fh)
850 			{
851 				input_fh = stdin;
852 				goto open_error;
853 			}
854 
855 			free(eh_filename);
856 			continue;
857 
858 open_error:
859 			prt_error("Error: Cannot open %s: %s\n", eh_filename, strerror(errno));
860 			free(eh_filename);
861 			continue;
862 		}
863 
864 		if (!copts->batch_mode) batch_in_progress = false;
865 		if ('\0' != test[0] && !test_enabled(test, "@"))
866 		{
867 			/* In batch mode warn only once.
868 			 * In auto-next-linkage mode don't warn at all. */
869 			if (!batch_in_progress && !auto_next_linkage_test(test))
870 			{
871 				fflush(stdout);
872 				/* Remind the developer this is a test mode. */
873 				prt_error("Warning: Tests enabled: %s\n", test);
874 				if (copts->batch_mode) batch_in_progress = true;
875 			}
876 		}
877 
878 		if (copts->echo_on)
879 		{
880 			printf("%s\n", input_string);
881 		}
882 
883 		if (copts->batch_mode || auto_next_linkage_test(test))
884 		{
885 			label = strip_off_label(input_string);
886 		}
887 
888 		// Post-processing-based pruning will clip away connectors
889 		// that we might otherwise want to examine. So disable PP
890 		// pruning in this situation.
891 		if (copts->display_bad)
892 			parse_options_set_perform_pp_prune(opts, false);
893 		else
894 			parse_options_set_perform_pp_prune(opts, true);
895 
896 		sent = sentence_create(input_string, dict);
897 
898 		if (sentence_split(sent, opts) < 0)
899 		{
900 			sentence_delete(sent);
901 			sent = NULL;
902 			continue;
903 		}
904 
905 		if (0 != copts->display_wordgraph)
906 		{
907 			const char *wg_display_flags = ""; /* default flags */
908 			switch (copts->display_wordgraph)
909 			{
910 				case 1:     /* default flags */
911 					break;
912 				case 2:     /* subgraphs with a legend */
913 					wg_display_flags = "sl";
914 					break;
915 				case 3:
916 					{
917 						/* Use esoteric flags from the test user variable. */
918 						const char *s = test_enabled(test, "wg");
919 						if ((NULL != s) && (':' == s[0])) wg_display_flags = s;
920 					}
921 					break;
922 				default:
923 					prt_error("Warning: wordgraph=%d: Unknown value, using 1\n",
924 								 copts->display_wordgraph);
925 					copts->display_wordgraph = 1;
926 			}
927 			sentence_display_wordgraph(sent, wg_display_flags);
928 		}
929 
930 		/* First parse with the default disjunct_cost as set by the library
931 		 * (currently 2.7). Usually parse here with no null links.
932 		 * However, if "-test=one-step-parse" is used and we are said to
933 		 * parse with null links, allow parsing here with null links too. */
934 		bool one_step_parse = !copts->batch_mode && copts->allow_null &&
935 		                      test_enabled(test, "one-step-parse");
936 		int max_null_count = one_step_parse ? sentence_length(sent) : 0;
937 
938 		parse_options_set_min_null_count(opts, 0);
939 		parse_options_set_max_null_count(opts, max_null_count);
940 		parse_options_reset_resources(opts);
941 
942 		num_linkages = sentence_parse(sent, opts);
943 
944 		/* num_linkages is negative only on a hard-error;
945 		 * typically, due to a zero-length sentence.  */
946 		if (num_linkages < 0)
947 		{
948 			sentence_delete(sent);
949 			sent = NULL;
950 			continue;
951 		}
952 
953 #if 0
954 		/* Try again, this time omitting the requirement for
955 		 * definite articles, etc. This should allow for the parsing
956 		 * of newspaper headlines and other clipped speech.
957 		 *
958 		 * XXX Unfortunately, this also allows for the parsing of
959 		 * all sorts of ungrammatical sentences which should not
960 		 * parse, and leads to bad parses of many other unparsable
961 		 * but otherwise grammatical sentences.  Thus, this trick
962 		 * pretty much fails; we leave it here to document the
963 		 * experiment.
964 		 */
965 		if (num_linkages == 0)
966 		{
967 			parse_options_set_disjunct_cost(opts, 4.5);
968 			num_linkages = sentence_parse(sent, opts);
969 			if (num_linkages < 0) continue;
970 		}
971 #endif /* 0 */
972 
973 		/* If asked to show bad linkages, then show them. */
974 		if ((num_linkages == 0) && (!copts->batch_mode))
975 		{
976 			if (copts->display_bad)
977 			{
978 				num_linkages = sentence_num_linkages_found(sent);
979 			}
980 		}
981 
982 		/* Now parse with null links */
983 		if (!one_step_parse && num_linkages == 0 && !copts->batch_mode)
984 		{
985 			if (verbosity > 0) fprintf(stdout, "No complete linkages found.\n");
986 
987 			if (copts->allow_null)
988 			{
989 				/* XXX should use expanded disjunct list here too */
990 				parse_options_set_min_null_count(opts, 1);
991 				parse_options_set_max_null_count(opts, sentence_length(sent));
992 				num_linkages = sentence_parse(sent, opts);
993 			}
994 		}
995 
996 		if (verbosity > 0)
997 		{
998 			if (parse_options_timer_expired(opts))
999 				fprintf(stdout, "Timer is expired!\n");
1000 
1001 			if (parse_options_memory_exhausted(opts))
1002 				fprintf(stdout, "Memory is exhausted!\n");
1003 		}
1004 
1005 		if ((num_linkages == 0) &&
1006 			copts->panic_mode &&
1007 			parse_options_resources_exhausted(opts))
1008 		{
1009 			/* print_total_time(opts); */
1010 			batch_errors++;
1011 			if (verbosity > 0) fprintf(stdout, "Entering \"panic\" mode...\n");
1012 			/* If the parser used was the SAT solver, set the panic parser to
1013 			 * it too.
1014 			 * FIXME? Currently, the SAT solver code is not too useful in
1015 			 * panic mode since it doesn't handle parsing with null words, so
1016 			 * using the regular parser in that case could be beneficial.
1017 			 * However, this currently causes a crash due to a memory
1018 			 * management mess. */
1019 			parse_options_set_use_sat_parser(copts->panic_opts,
1020 				parse_options_get_use_sat_parser(opts));
1021 			parse_options_reset_resources(copts->panic_opts);
1022 			parse_options_set_verbosity(copts->panic_opts, verbosity);
1023 			(void)sentence_parse(sent, copts->panic_opts);
1024 			if (verbosity > 0)
1025 			{
1026 				if (parse_options_timer_expired(copts->panic_opts))
1027 					fprintf(stdout, "Panic timer is expired!\n");
1028 			}
1029 		}
1030 
1031 		if (verbosity > 1) parse_options_print_total_time(opts);
1032 
1033 		const char *rc = "";
1034 		if (copts->batch_mode)
1035 		{
1036 			batch_process_some_linkages(label, sent, copts);
1037 		}
1038 		else
1039 		{
1040 			rc = process_some_linkages(input_fh, sent, copts);
1041 		}
1042 
1043 		fflush(stdout);
1044 		sentence_delete(sent);
1045 		sent = NULL;
1046 
1047 		if ((NULL == rc) && (input_fh == stdin)) break;
1048 	}
1049 
1050 	if (copts->batch_mode)
1051 	{
1052 		/* print_time(opts, "Total"); */
1053 		fprintf(stderr,
1054 				"%d error%s.\n", batch_errors, (batch_errors==1) ? "" : "s");
1055 	}
1056 
1057 	/* Free stuff, so that mem-leak detectors don't complain. */
1058 	command_options_delete(copts);
1059 	dictionary_delete(dict);
1060 
1061 	printf ("Bye.\n");
1062 	return 0;
1063 }
1064