1 /*************************************************************************/
2 /* Copyright (c) 2014 Amir Plivatsky */
3 /* All rights reserved */
4 /* */
5 /* Use of the link grammar parsing system is subject to the terms of the */
6 /* license set forth in the LICENSE file included with this software. */
7 /* This license allows free redistribution and use in source and binary */
8 /* forms, with or without modification, subject to certain conditions. */
9 /* */
10 /*************************************************************************/
11
12 #include <stdlib.h>
13
14 #ifdef USE_WORDGRAPH_DISPLAY
15 #include <stdio.h>
16 #include <errno.h>
17 #include <stdint.h>
18 #ifdef HAVE_FORK
19 #include <unistd.h> /* fork() and execl() */
20 #include <sys/wait.h> /* waitpid() */
21 #endif
22 #ifdef HAVE_PRCTL
23 #include <sys/prctl.h> /* prctl() */
24 #endif
25 #include <signal.h> /* SIG* */
26
27 #include "print/print-util.h" /* for append_string */
28 #include "utilities.h" /* for dyn_str functions and UNREACHABLE */
29 #endif /* USE_WORDGRAPH_DISPLAY */
30
31 #include "api-structures.h"
32 #include "error.h"
33 #include "string-set.h"
34 #include "tok-structures.h"
35 #include "wordgraph.h"
36
37 #ifdef __APPLE__
38 #define POPEN_DOT
39 #endif /* __APPLE__ */
40
41 #if USE_WORDGRAPH_DISPLAY || defined(DEBUG)
gword_morpheme(Sentence sent,const Gword * w)42 GNUC_UNUSED const char *gword_morpheme(Sentence sent, const Gword *w)
43 {
44 const char *mt;
45 char buff[64];
46
47 switch (w->morpheme_type)
48 {
49 case MT_INVALID:
50 mt = "MT_INVALID";
51 break;
52 case MT_WORD:
53 mt = "MT_WORD";
54 break;
55 case MT_FEATURE:
56 mt = "MT_FEATURE";
57 break;
58 case MT_INFRASTRUCTURE:
59 mt = "MT_I-S";
60 break;
61 case MT_WALL:
62 mt = "MT_WALL";
63 break;
64 case MT_EMPTY:
65 mt = "MT_EMPTY";
66 break;
67 case MT_UNKNOWN:
68 mt = "MT_UNKNOWN";
69 break;
70 case MT_TEMPLATE:
71 mt = "MT_TEMPLATE";
72 break;
73 case MT_ROOT:
74 mt = "MT_ROOT";
75 break;
76 case MT_CONTR:
77 mt = "MT_CONTR";
78 break;
79 case MT_PUNC:
80 mt = "MT_PUNC";
81 break;
82 case MT_STEM:
83 mt = "MT_STEM";
84 break;
85 case MT_PREFIX:
86 mt = "MT_PREFIX";
87 break;
88 case MT_MIDDLE:
89 mt = "MT_MIDDLE";
90 break;
91 case MT_SUFFIX:
92 mt = "MT_SUFFIX";
93 break;
94 default:
95 /* No truncation is expected. */
96 snprintf(buff, sizeof(buff), "MT_%d", (int)w->morpheme_type);
97 mt = string_set_add(buff, sent->string_set);
98 }
99
100 return mt;
101 }
102 #endif /* USE_WORDGRAPH_DISPLAY || defined(DEBUG) */
103
104 #if USE_WORDGRAPH_DISPLAY
105 /* === Wordgraph graphical representation === */
106
wordgraph_legend(dyn_str * wgd,unsigned int mode)107 static void wordgraph_legend(dyn_str *wgd, unsigned int mode)
108 {
109 size_t i;
110 static char const *wst[] = {
111 "RE", "Matched a regex",
112 "SP", "Result of spell guess",
113 "RU", "Separated run-on word",
114 "HA", "Has an alternative",
115 "UNS", "Also unsplit_word",
116 "IN", "In the dict file",
117 "FI", "First char is uppercase"
118 };
119
120 append_string(wgd,
121 "subgraph cluster_legend {\n"
122 "label=Legend;\n"
123 "%s"
124 "legend [label=\"subword\\n(status-flags)\\nmorpheme-type\"];\n"
125 "legend [xlabel=\"ordinal-number\\ndebug-label\"];\n"
126 "%s"
127 "legend_width [width=4.5 height=0 shape=none label=<\n"
128 "<table border='0' cellborder='1' cellspacing='0'>\n"
129 "<tr><td colspan='2'>status-flags</td></tr>\n",
130 (mode & WGR_SUB) ? "subgraph cluster_unsplit_word {\n"
131 "label=\"ordinal-number unsplit-word\";\n" : "",
132 (mode & WGR_SUB) ? "}\n" : ""
133
134 );
135 for (i = 0; i < sizeof(wst)/sizeof(wst[0]); i += 2)
136 {
137 append_string(wgd,
138 "<tr><td align='left'>%s</td><td align='left'>%s</td></tr>\n",
139 wst[i], wst[i+1]);
140 }
141
142 append_string(wgd,
143 "</table>>];"
144 "}\n"
145 "subgraph cluster_legend_top_space {\n"
146 "style=invis legend_dummy [style=invis height=0 shape=box]\n"
147 "};\n"
148 );
149 }
150
151 /**
152 * Graph node name: Add "Sentence:" for the main node.
153 * Also escape " and \ with a \.
154 */
wlabel(Sentence sent,const Gword * w)155 static const char *wlabel(Sentence sent, const Gword *w)
156 {
157 const char *s;
158 const char sentence_label[] = "Sentence:\\n";
159 dyn_str *l = dyn_str_new();
160 char c0[] = "\0\0";
161
162 assert((NULL != w) && (NULL != w->subword), "Word must exist");
163 if ('\0' == *w->subword)
164 return string_set_add("(nothing)", sent->string_set);
165
166 if (w == sent->wordgraph) dyn_strcat(l, sentence_label);
167
168 for (s = w->subword; *s; s++)
169 {
170 switch (*s)
171 {
172 case '\"':
173 dyn_strcat(l, "\\\"");
174 break;
175 case '\\':
176 dyn_strcat(l, "\\");
177 break;
178 default:
179 *c0 = *s;
180 dyn_strcat(l, c0);
181 }
182 }
183
184 char *label_str = dyn_str_take(l);
185 s = string_set_add(label_str, sent->string_set);
186 free(label_str);
187 return s;
188 }
189
190 /**
191 * Generate the wordgraph in dot(1) format, for debug.
192 */
wordgraph2dot(Sentence sent,unsigned int mode,const char * modestr)193 static dyn_str *wordgraph2dot(Sentence sent, unsigned int mode, const char *modestr)
194 {
195 const Gword *w;
196 Gword **wp;
197 dyn_str *wgd = dyn_str_new(); /* the wordgraph in dot representation */
198 char nn[2*sizeof(char *) + 2 + 2 + 1]; /* \"%p\" node name: "0x..."+NUL*/
199
200 /* This function is called only if we have a wordgraph, in which case
201 * chain_next is non-NULL. So stop static analyzers to complain that
202 * it can be possibly NULL. */
203 UNREACHABLE(NULL == sent->wordgraph->chain_next);
204
205 append_string(wgd, "# Mode: %s\n", modestr);
206 dyn_strcat(wgd, "digraph G {\nsize =\"30,20\";\nrankdir=LR;\n");
207 if ((mode & (WGR_SUB)) && !(mode & WGR_COMPACT))
208 dyn_strcat(wgd, "newrank=true;\n");
209 if (mode & WGR_LEGEND) wordgraph_legend(wgd, mode);
210 append_string(wgd, "\"%p\" [shape=box,style=filled,color=\".7 .3 1.0\"];\n",
211 sent->wordgraph);
212
213 for (w = sent->wordgraph; w; w = w->chain_next)
214 {
215 bool show_node;
216
217 if (!(mode & WGR_UNSPLIT) && (MT_INFRASTRUCTURE != w->morpheme_type))
218 {
219 Gword *wu;
220
221 show_node = false;
222 /* In this mode nodes that are only unsplit_word are not shown. */
223 for (wu = sent->wordgraph; wu; wu = wu->chain_next)
224 {
225 if (NULL != wu->next)
226 {
227 for (wp = wu->next; *wp; wp++)
228 {
229 if (w == *wp)
230 {
231 show_node = true;
232 break;
233 }
234 }
235 }
236 }
237
238 if (!show_node) continue;
239 }
240
241 snprintf(nn, sizeof(nn), "\"%p\"", w);
242
243 /* Subword node format:
244 * +------------------+
245 * + +
246 * + w->subword +
247 * + (w->flags) +
248 * + w->morpheme_type +
249 * + +
250 * +------------------+
251 * w->node_num } <- external node label
252 * w->label }
253 *
254 * The flags and morpheme type are printed symbolically.
255 * The node_num field is the ordinal number of word creation.
256 * The label shows the code positions that created the subword.
257 * The external node label may appear at other positions near the node.
258 *
259 * FIXME: Use HTML labels.
260 */
261
262 append_string(wgd, "%s [label=\"%s\\n(%s)\\n%s\"];\n", nn,
263 wlabel(sent, w), gword_status(sent, w), gword_morpheme(sent, w));
264
265 if (!(mode & WGR_DBGLABEL))
266 {
267 append_string(wgd, "%s [xlabel=\"%zu",
268 nn, w->node_num);
269 }
270 else
271 {
272 append_string(wgd, "%s [xlabel=\"%zu\\n%s",
273 nn, w->node_num, w->label);
274 }
275
276 /* For debugging this function: display also hex node names. */
277 if (mode & WGR_DOTDEBUG)
278 append_string(wgd, "\\n%p-%s", w, wlabel(sent, w));
279
280 dyn_strcat(wgd, "\"];\n");
281
282 if (NULL != w->next)
283 {
284 for (wp = w->next; *wp; wp++)
285 {
286 append_string(wgd, "%s->\"%p\" [label=next color=red];\n",
287 nn, *wp);
288 }
289 }
290 if (mode & WGR_PREV)
291 {
292 if (NULL != w->prev)
293 {
294 for (wp = w->prev; *wp; wp++)
295 {
296 append_string(wgd, "%s->\"%p\" [label=prev color=blue];\n",
297 nn, *wp);
298 }
299 }
300 }
301 if (mode & WGR_UNSPLIT)
302 {
303 if (!(mode & WGR_SUB) && (NULL != w->unsplit_word))
304 {
305 append_string(wgd, "%s->\"%p\" [label=unsplit];\n",
306 nn, w->unsplit_word);
307 }
308 }
309 }
310
311 if (mode & WGR_SUB)
312 {
313 const Gword *old_unsplit = NULL;
314
315 for (w = sent->wordgraph; w; w = w->chain_next)
316 {
317 if (NULL != w->unsplit_word)
318 {
319 if (w->unsplit_word != old_unsplit)
320 {
321 if (NULL != old_unsplit) dyn_strcat(wgd, "}\n");
322 append_string(wgd, "subgraph \"cluster-%p\" {", w->unsplit_word);
323 append_string(wgd, "label=\"%zu %s\"; \n",
324 w->unsplit_word->node_num, wlabel(sent, w->unsplit_word));
325
326 old_unsplit = w->unsplit_word;
327 }
328 snprintf(nn, sizeof(nn), "\"%p\"", w);
329 if (strstr(dyn_str_value(wgd), nn))
330 append_string(wgd, "\"%p\"; ", w);
331 }
332 }
333 dyn_strcat(wgd, "}\n");
334 }
335 else
336 {
337 #ifdef WGR_SHOW_TERMINATOR_AT_LHS /* not defined - not useful */
338 const Gword *terminating_node = NULL;
339 #endif
340
341 dyn_strcat(wgd, "{rank=same; ");
342 for (w = sent->wordgraph->chain_next; w; w = w->chain_next)
343 {
344 snprintf(nn, sizeof(nn), "\"%p\"", w);
345 if (IS_SENTENCE_WORD(sent, w) &&
346 ((mode & WGR_UNSPLIT) || strstr(dyn_str_value(wgd), nn)))
347 {
348 append_string(wgd, "%s; ", nn);
349 }
350
351 #ifdef WGR_SHOW_TERMINATOR_AT_LHS
352 if (NULL == w->next) terminating_node = w;
353 #endif
354 }
355 dyn_strcat(wgd, "}\n");
356
357 #ifdef WGR_SHOW_TERMINATOR_AT_LHS
358 if (terminating_node)
359 append_string(wgd, "{rank=sink; \"%p\"}\n", terminating_node);
360 #endif
361 }
362
363 dyn_strcat(wgd, "\n}\n");
364
365 return wgd;
366 }
367
368 #if defined(HAVE_FORK) && !defined(POPEN_DOT)
369 static pid_t pid; /* XXX not reentrant */
370
371 #ifndef HAVE_PRCTL
372 /**
373 * Cancel the wordgraph viewers, to be used if there is fork() but no prctl().
374 */
wordgraph_show_cancel(void)375 static void wordgraph_show_cancel(void)
376 {
377 kill(pid, SIGTERM);
378 }
379 #endif /* HAVE_FORK */
380 #endif /* HAVE_PRCTL */
381
382 #ifndef DOT_COMMNAD
383 #define DOT_COMMAND "dot"
384 #endif
385
386 #ifndef DOT_DRIVER
387 #define DOT_DRIVER "-Txlib"
388 #endif
389
390 /* In case files are used, their names are fixed. So more than one thread
391 * (or program) cannot use the word-graph display at the same time. This
392 * can be corrected, even though there is no much point to do that
393 * (displaying the word-graph is for debug). */
394 #define DOT_FILENAME "lg-wg.vg"
395
396 #define POPEN_DOT_CMD DOT_COMMAND" "DOT_DRIVER
397 #ifndef POPEN_DOT_CMD_NATIVE
398 # ifdef _WIN32
399 # ifndef IMAGE_VIEWER
400 # define IMAGE_VIEWER "rundll32 PhotoViewer,ImageView_Fullscreen"
401 # endif
402 # define WGJPG "%TEMP%\\lg-wg.jpg"
403 # define POPEN_DOT_CMD_NATIVE \
404 DOT_COMMAND" -Tjpg>"WGJPG"&"IMAGE_VIEWER" "WGJPG"&del "WGJPG
405 # elif __APPLE__
406 # ifndef IMAGE_VIEWER
407 # define IMAGE_VIEWER "open -W"
408 # endif
409 # define WGJPG "$TMPDIR/lg-wg.jpg"
410 # define POPEN_DOT_CMD_NATIVE \
411 DOT_COMMAND" -Tjpg>"WGJPG";"IMAGE_VIEWER" "WGJPG";rm "WGJPG
412 # else
413 # define POPEN_DOT_CMD_NATIVE POPEN_DOT_CMD
414 # endif
415 #endif
416
417 #if !defined HAVE_FORK || defined POPEN_DOT
418 #ifdef _MSC_VER
419 #define popen _popen
420 #define pclose _pclose
421 #endif
422 /**
423 * popen a command with the given input.
424 * If the system doesn't have fork(), popen() is used to launch "dot".
425 * This is an inferior implementation than the one below that uses
426 * fork(), in which the window remains open and is updated automatically
427 * when new sentences are entered. With popen(), the program blocks at
428 * pclose() and the user needs to close the window after each sentence.
429 */
x_popen(const char * cmd,const char * wgds)430 static bool x_popen(const char *cmd, const char *wgds)
431 {
432 lgdebug(+3, "Invoking: %s\n", cmd);
433 FILE *const cmdf = popen(cmd, "w");
434 bool rc = true;
435
436 if (NULL == cmdf)
437 {
438 prt_error("Error: popen of '%s' failed: %s\n", cmd, strerror(errno));
439 rc = false;
440 }
441 else
442 {
443 if (fputs(wgds, cmdf) == EOF) /* see default_error_handler() */
444 {
445 prt_error("Error: x_popen(): fputs() error: %s\n", strerror(errno));
446 rc = false;
447 }
448 if (pclose(cmdf) == -1)
449 {
450 prt_error("Error: x_popen(): pclose() error: %s\n", strerror(errno));
451 rc = false;
452 }
453 }
454
455 return rc;
456 }
457 #else
x_forkexec(const char * const argv[],pid_t * vpid,const char err[])458 static bool x_forkexec(const char *const argv[], pid_t *vpid, const char err[])
459 {
460 /* Fork/exec a graph viewer, and leave it in the background until we exit.
461 * On exit, send SIGHUP. If prctl() is not available and the program
462 * crashes, then it is left to the user to exit the viewer. */
463 if (0 < *vpid)
464 {
465 pid_t rpid = waitpid(*vpid, NULL, WNOHANG);
466
467 if (0 == rpid) return true; /* viewer still active */
468 if (-1 == rpid)
469 {
470 prt_error("Error: waitpid(%d): %s\n", *vpid, strerror(errno));
471 *vpid = 0;
472 return false;
473 }
474 }
475
476 *vpid = fork();
477 switch (*vpid)
478 {
479 case -1:
480 prt_error("Error: fork(): %s\n", strerror(errno));
481 return false;
482 case 0:
483 #ifdef HAVE_PRCTL
484 if (-1 == prctl(PR_SET_PDEATHSIG, SIGHUP))
485 {
486 prt_error("Error: prctl: %s\n", strerror(errno));
487 /* Non-fatal error - continue. */
488 }
489 #endif
490 /* Not closing fd 0/1/2, to allow interaction with the program */
491 execvp(argv[0], (char **)argv);
492 prt_error("Error: execlp of %s: %s%s\n",
493 argv[0], strerror(errno), (ENOENT == errno) ? err : "");
494 _exit(1);
495 default:
496 #ifndef HAVE_PRCTL
497 if (0 != atexit(wordgraph_show_cancel))
498 {
499 prt_error("Warning: atexit(wordgraph_show_cancel) failed.\n");
500 /* Non-fatal error - continue. */
501 }
502 #endif
503 break;
504 }
505
506 return true;
507 }
508 #endif /* !defined HAVE_FORK || defined POPEN_DOT */
509
510 #ifdef _WIN32
511 #define TMPDIR (getenv("TEMP") ? getenv("TEMP") : ".")
512 #else
513 #define TMPDIR (getenv("TMPDIR") ? getenv("TMPDIR") : "/tmp")
514 #endif
515
516 #define concatfn(fn, fn1, fn2) \
517 (fn=alloca(strlen(fn1)+strlen(fn2)+2),\
518 strcpy(fn, fn1), strcat(fn, "/"), strcat(fn, fn2))
519
wordgraph_unlink_xtmpfile(void)520 static void wordgraph_unlink_xtmpfile(void)
521 {
522 char *fn;
523
524 if (!test_enabled("gvfile"))
525 {
526 concatfn(fn, TMPDIR, DOT_FILENAME);
527 if (unlink(fn) == -1)
528 prt_error("Warning: Cannot unlink %s: %s\n", fn, strerror(errno));
529 }
530 }
531
532 /**
533 * Display the word-graph in the indicated mode.
534 * This is for debug and inspiration. It is not reentrant due to the
535 * static pid and the possibly created fixed filenames.
536 * When Using X11, a "dot -Txlib" program is launched on the graph
537 * description file. The xlib driver refreshes the graph when the file is
538 * changed, displaying additional sentences in the same window. The viewer
539 * program exits on program end (see the comments in the code). When
540 * compiled with MSVC or MINGW, the system PhotoViewer is used by default,
541 * unless !wg:x is used (for using X11 when available).
542 *
543 * The "dot" and the "PhotoViewer" programs must be in the PATH.
544 *
545 * FIXME? "dot" may get a SEGV due to memory corruptions in it (a known
546 * problem - exists even in 2.38). This can be worked-around by trying it
547 * again until it succeeds (but the window size, if changed by the user,
548 * will not be preserved).
549 *
550 * modestr: a graph display mode as defined in wordgraph.h (default "ldu").
551 */
sentence_display_wordgraph(Sentence sent,const char * modestr)552 bool sentence_display_wordgraph(Sentence sent, const char *modestr)
553 {
554 dyn_str *wgd;
555 char *gvf_name = NULL;
556 bool generate_gvfile = test_enabled("gvfile"); /* keep it for debug */
557 char *wgds;
558 bool gvfile = false;
559 uint32_t mode = 0;
560 const char *mp;
561 bool rc = true;
562
563 for (mp = modestr; '\0' != *mp && ',' != *mp; mp++)
564 {
565 if ((*mp >= 'a') && (*mp <= 'z')) mode |= 1<<(*mp-'a');
566 }
567 if ((0 == mode) || (WGR_X11 == mode))
568 mode |= WGR_LEGEND|WGR_DBGLABEL|WGR_UNSPLIT;
569
570 wgd = wordgraph2dot(sent, mode, modestr);
571 wgds = dyn_str_take(wgd);
572
573 #if defined(HAVE_FORK) && !defined(POPEN_DOT)
574 gvfile = true;
575 #endif
576
577 if (gvfile || generate_gvfile)
578 {
579 FILE *gvf;
580 bool gvf_error = false;
581 static bool wordgraph_unlink_xtmpfile_needed = true;
582
583 concatfn(gvf_name, TMPDIR, DOT_FILENAME);
584 gvf = fopen(gvf_name, "w");
585 if (NULL == gvf)
586 {
587 prt_error("Error: %s(): fopen() of %s failed: %s\n",
588 __func__, gvf_name, strerror(errno));
589 gvf_error = true;
590 }
591 else
592 {
593 if (fputs(wgds, gvf) == EOF)
594 {
595 gvf_error = true;
596 prt_error("Error: %s(): fputs() to %s failed: %s\n",
597 __func__, gvf_name, strerror(errno));
598 }
599 if (fclose(gvf) == EOF)
600 {
601 gvf_error = true;
602 prt_error("Error: %s(): fclose() of %s failed: %s\n",
603 __func__, gvf_name, strerror(errno));
604 }
605 }
606 if (gvf_error && gvfile) /* we need it - cannot continue */
607 {
608 rc = false;
609 goto finish;
610 }
611
612 if (wordgraph_unlink_xtmpfile_needed)
613 {
614 /* The filename is fixed - removal needed only once. */
615 wordgraph_unlink_xtmpfile_needed = false;
616 atexit(wordgraph_unlink_xtmpfile);
617 }
618 }
619
620 #ifdef _WIN32
621 #define EXITKEY "ALT-F4"
622 #elif __APPLE__
623 #define EXITKEY "⌘-Q"
624 #endif
625
626 #ifdef EXITKEY
627 prt_error("Press "EXITKEY" in the graphical display window to continue\n");
628 #endif
629
630 #if !defined HAVE_FORK || defined POPEN_DOT
631 rc = x_popen((mode & WGR_X11)? POPEN_DOT_CMD : POPEN_DOT_CMD_NATIVE, wgds);
632 #else
633 {
634 assert(NULL != gvf_name, "DOT filename not initialized (#define mess?)");
635 const char *const args[] = { DOT_COMMAND, DOT_DRIVER, gvf_name, NULL };
636 const char notfound[] =
637 " (command not in PATH; \"graphviz\" package not installed?).";
638 rc = x_forkexec(args, &pid, notfound);
639 }
640 #endif
641
642 finish:
643 free(wgds);
644 return rc;
645 }
646 #else
sentence_display_wordgraph(Sentence sent,const char * modestr)647 bool sentence_display_wordgraph(Sentence sent, const char *modestr)
648 {
649 prt_error("Error: Library not configured with wordgraph-display\n");
650 return false;
651 }
652 #endif /* USE_WORDGRAPH_DISPLAY */
653