1 /*
2 * Copyright (C) 2011 - 2012 Vivien Malerba <malerba@gnome-db.org>
3 *
4 * This library is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU Lesser General Public
6 * License as published by the Free Software Foundation; either
7 * version 2 of the License, or (at your option) any later version.
8 *
9 * This library is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 * Lesser General Public License for more details.
13 *
14 * You should have received a copy of the GNU Lesser General Public
15 * License along with this library; if not, write to the
16 * Free Software Foundation, Inc., 51 Franklin St, Fifth Floor,
17 * Boston, MA 02110-1301, USA.
18 */
19
20 #include "rt-parser.h"
21 #include <glib/gi18n-lib.h>
22 #include <gio/gio.h>
23
24 #ifdef HAVE_GDKPIXBUF
25 #include <gdk-pixbuf/gdk-pixbuf.h>
26 #include <gdk-pixbuf/gdk-pixdata.h>
27 #endif
28
29 #include <libgda/gda-debug-macros.h>
30
31 /* RTE markup analysis */
32 typedef enum {
33 MARKUP_NONE, /* 0 */
34 MARKUP_BOLD,
35 MARKUP_TT,
36 MARKUP_VERBATIM,
37 MARKUP_ITALIC,
38 MARKUP_STRIKE, /* 5 */
39 MARKUP_UNDERLINE,
40
41 MARKUP_TITLE1_S,
Update_OgginfoLastHeightNeed(void)42 MARKUP_TITLE1_E,
43 MARKUP_TITLE2_S,
44 MARKUP_TITLE2_E, /* 10 */
45
46 MARKUP_PICTURE_S,
47 MARKUP_PICTURE_E,
48
49 MARKUP_LIST_S,
50 MARKUP_LIST_E,
51
52 MARKUP_EOF
53 } MarkupTag;
54
55 /* for the RtMarkup enum */
56 static gchar *markup_tag_text[] = {
57 "NONE", "PARA", "BOLD", "TT", "VERBATIM", "ITALIC", "STRIKE", "UNDERLINE",
58 "TITLE", "PICTURE", "LIST"
OggInfoSetWin(int xpos,int wid,int ypos,int hgt)59 };
60
61 static
62 RtMarkup
63 internal_markup_to_external (MarkupTag markup, gint *out_offset)
64 {
65 switch (markup) {
66 case MARKUP_NONE:
OggInfoGetWin(struct cpitextmodequerystruct * q)67 return RT_MARKUP_NONE;
68 case MARKUP_BOLD:
69 return RT_MARKUP_BOLD;
70 case MARKUP_TT:
71 return RT_MARKUP_TT;
72 case MARKUP_VERBATIM:
73 return RT_MARKUP_VERBATIM;
74 case MARKUP_ITALIC:
75 return RT_MARKUP_ITALIC;
76 case MARKUP_STRIKE:
77 return RT_MARKUP_STRIKE;
78 case MARKUP_UNDERLINE:
79 return RT_MARKUP_UNDERLINE;
80 case MARKUP_TITLE1_S:
81 *out_offset = 0;
82 return RT_MARKUP_TITLE;
83 case MARKUP_TITLE2_S:
84 *out_offset = 1;
85 return RT_MARKUP_TITLE;
86 case MARKUP_PICTURE_S:
87 return RT_MARKUP_PICTURE;
88 case MARKUP_LIST_S:
89 return RT_MARKUP_LIST;
90 default:
91 g_assert_not_reached ();
92 }
93 return MARKUP_NONE;
94 }
95
96 static MarkupTag get_markup_token (const gchar *alltext, const gchar *start, gint *out_nb_spaces_before,
97 const gchar **out_end, MarkupTag start_tag);
98 static MarkupTag get_token (const gchar *alltext, const gchar *start, gint *out_nb_spaces_before, const gchar **out_end,
99 MarkupTag start_tag);
100
101 /*
102 * get_token
103 *
104 * returns the token type starting from @iter, and positions @out_end to the last used position
105 * position.
106 */
107 static MarkupTag
108 get_token (const gchar *alltext, const gchar *start, gint *out_nb_spaces_before, const gchar **out_end,
109 MarkupTag start_tag)
110 {
111 MarkupTag retval;
OggInfoDraw(int focus)112 const gchar *ptr;
113
114 retval = get_markup_token (alltext, start, out_nb_spaces_before, &ptr, start_tag);
115 if ((retval != MARKUP_NONE) || (retval == MARKUP_EOF)) {
116 *out_end = ptr;
117 return retval;
118 }
119
120 for (; *ptr ; ptr = g_utf8_next_char (ptr)) {
121 retval = get_markup_token (alltext, ptr, NULL, NULL, start_tag);
122 if ((retval != MARKUP_NONE) || (retval == MARKUP_EOF))
123 break;
124 }
125 *out_end = ptr;
126 return MARKUP_NONE;
127 }
128
129 /*
130 * get_markup_token
131 * @alltext: the complete text
132 * @start: starting point
133 * @out_nb_spaces_before: a place to set the number of spaces since the start of line
134 * @out_end: place to put the last used position, or %NULL
135 * @start_tag: the starting tag, if any (to detect the closing tag)
136 *
137 * Parses marking tokens, nothing else
138 *
139 * Returns: a markup token, or MARKUP_NONE or MARKUP_EOF otherwise
140 */
141 static MarkupTag
142 get_markup_token (const gchar *alltext, const gchar *start, gint *out_nb_spaces_before, const gchar **out_end,
143 MarkupTag start_tag)
144 {
145 gchar c;
146 gint ssol = -1; /* spaces since start of line */
147 MarkupTag start_markup = start_tag;
148 const gchar *ptr;
149
150 #define SET_OUT \
151 if (out_end) { \
152 ptr++; \
153 *out_end = ptr; \
154 } \
155 if (out_nb_spaces_before) \
156 *out_nb_spaces_before = ssol
157
158 if (start_tag)
159 start_markup = start_markup;
160
161 ptr = start;
162 if (out_end)
163 *out_end = ptr;
164 if (out_nb_spaces_before)
OggInfoIProcessKey(uint16_t key)165 *out_nb_spaces_before = -1;
166 c = *ptr;
167
168 /* tests involving starting markup before anything else */
169 if (start_markup == MARKUP_PICTURE_S) {
170 if (c == ']') {
171 ptr++;
172 c = *ptr;
173 if (c == ']') {
174 ptr++;
175 c = *ptr;
176 if (c == ']') {
177 SET_OUT;
178 return MARKUP_PICTURE_E;
179 }
180 }
181 }
182 if (!c)
183 return MARKUP_EOF;
184 else
185 return MARKUP_NONE;
186 }
187 else if (start_markup == MARKUP_VERBATIM) {
188 if (c == '"') {
189 ptr++;
OggInfoAProcessKey(uint16_t key)190 c = *ptr;
191 if (c == '"') {
192 ptr++;
193 c = *ptr;
194 if (c == '"') {
195 SET_OUT;
196 return MARKUP_VERBATIM;
197 }
198 }
199 }
200 if (!c)
201 return MARKUP_EOF;
202 else
203 return MARKUP_NONE;
204 }
205
206 if ((*ptr == '\n') && (start_markup == MARKUP_LIST_S)) {
207 SET_OUT;
208 return MARKUP_LIST_E;
209 }
210
211 if (!c)
212 return MARKUP_EOF;
213
214 /* other tests */
215 const gchar *ptr1 = ptr;
216 if (ptr == alltext) {
217 for (; *ptr1 == ' '; ptr1++);
218 ssol = ptr1 - ptr;
219 }
220 else if (ptr[-1] == '\n') {
221 for (; *ptr1 == ' '; ptr1++);
222 ssol = ptr1 - ptr;
223 }
224 if (ssol >= 0) {
225 /* we are on a line with only spaces since its start */
226 if (ssol == 0) {
227 if (c == '=') {
228 ptr++;
229 c = *ptr;
230 if (c == ' ') {
231 SET_OUT;
232 return MARKUP_TITLE1_S;
OggInfoEvent(int ev)233 }
234 else if (c == '=') {
235 ptr++;
236 c = *ptr;
237 if (c == ' ') {
238 SET_OUT;
239 return MARKUP_TITLE2_S;
240 }
241 }
242 }
243 }
244
245 c = *ptr1;
246 if (c == '-') {
247 ptr1++;
248 c = *ptr1;
249 if (c == ' ') {
250 ptr = ptr1;
251 SET_OUT;
252 return MARKUP_LIST_S;
OggInfoInit(void)253 }
254 }
255 }
256
257 if (c == '*') {
258 ptr++;
259 c = *ptr;
260 if (c == '*') {
261 SET_OUT;
262 return MARKUP_BOLD;
263 }
264 }
265 else if (c == '/') {
266 ptr++;
267 c = *ptr;
268 if (c == '/') {
269 const gchar *bptr;
270 bptr = ptr-2;
271 if ((bptr > alltext) && (*bptr == ':')) {}
272 else {
273 SET_OUT;
274 return MARKUP_ITALIC;
275 }
276 }
277 }
278 else if (c == '_') {
279 ptr++;
280 c = *ptr;
281 if (c == '_') {
282 SET_OUT;
283 return MARKUP_UNDERLINE;
284 }
285 }
286 else if (c == '-') {
287 ptr++;
288 c = *ptr;
289 if (c == '-') {
290 SET_OUT;
291 return MARKUP_STRIKE;
292 }
293 }
294 else if (c == '`') {
295 ptr++;
296 c = *ptr;
297 if (c == '`') {
298 SET_OUT;
299 return MARKUP_TT;
300 }
301 }
302 else if (c == '"') {
303 ptr++;
304 c = *ptr;
305 if (c == '"') {
306 ptr++;
307 c = *ptr;
308 if (c == '"') {
309 SET_OUT;
310 return MARKUP_VERBATIM;
311 }
312 }
313 }
314 else if (c == ' ') {
315 ptr++;
316 c = *ptr;
317 if (c == '=') {
318 if (start_markup == MARKUP_TITLE1_S) {
319 /* ignore anything up to the EOL */
320 for (; *ptr && (*ptr != '\n'); ptr++);
321
322 SET_OUT;
323 return MARKUP_TITLE1_E;
324 }
325 else {
326 ptr++;
327 c = *ptr;
328 if (c == '=') {
329 /* ignore anything up to the EOL */
330 for (; *ptr && (*ptr != '\n'); ptr++);
331
332 SET_OUT;
333 return MARKUP_TITLE2_E;
334 }
335 }
336 }
337 }
338 else if (c == '[') {
339 ptr++;
340 c = *ptr;
341 if (c == '[') {
342 ptr++;
343 c = *ptr;
344 if (c == '[') {
345 SET_OUT;
346 return MARKUP_PICTURE_S;
347 }
348 }
349 }
350 return MARKUP_NONE;
351 }
352
353 /*
354 * steals @base64
355 */
356 static gchar *
357 remove_newlines_from_base64 (gchar *base64)
358 {
359 GString *string;
360 gchar *ptr;
361 string = g_string_new ("");
362 for (ptr = base64; *ptr; ptr++) {
363 if (*ptr != '\n')
364 g_string_append_c (string, *ptr);
365 }
366 g_free (base64);
367 return g_string_free (string, FALSE);
368 }
369
370 static gchar *
371 get_node_path (RtNode *node)
372 {
373 gint i;
374 RtNode *tmp;
375 for (i = 0, tmp = node->prev; tmp; tmp = tmp->prev)
376 i++;
377 if (node->parent) {
378 gchar *str, *ret;
379 str = get_node_path (node->parent);
380 ret = g_strdup_printf ("%s:%d", str, i);
381 g_free (str);
382 return ret;
383 }
384 else
385 return g_strdup_printf ("%d", i);
386 }
387
388 static void
389 rt_dump_tree_offset (RtNode *tree, gint offset)
390 {
391 gchar *str = "";
392
393 if (offset) {
394 str = g_new (gchar, offset + 1);
395 memset (str, ' ', offset);
396 str [offset] = 0;
397 }
398
399 g_print ("%p-%s%s ", tree, str, markup_tag_text[tree->markup]);
400
401 gchar *path;
402 path = get_node_path (tree);
403 g_print ("[path=%s] ", path);
404 g_free (path);
405
406 if (tree->offset >= 0)
407 g_print ("[offset=%d] ", tree->offset);
408
409 if (tree->text) {
410 #define MAXSIZE 100
411 g_print ("TEXT [");
412 gchar *copy, *ptr;
413 copy = g_strdup (tree->text);
414 if (strlen (copy) > MAXSIZE) {
415 copy [MAXSIZE] = 0;
416 copy [MAXSIZE - 1] = '.';
417 copy [MAXSIZE - 2] = '.';
418 copy [MAXSIZE - 3] = '.';
419 }
420 for (ptr = copy; *ptr; ptr++) {
421 if (*ptr == '\n') {
422 g_print ("\n %s", str);
423 }
424 else
425 g_print ("%c", *ptr);
426 }
427 g_free (copy);
428 g_print ("]\n");
429 }
430 else if (tree->binary.data) {
431 g_print ("BINARY\n");
432 }
433 else
434 g_print ("\n");
435
436 if (tree->child)
437 rt_dump_tree_offset (tree->child, offset + 8);
438 if (tree->next)
439 rt_dump_tree_offset (tree->next, offset);
440 if (offset)
441 g_free (str);
442 }
443
444 void
445 rt_dump_tree (RtNode *tree)
446 {
447 rt_dump_tree_offset (tree, 0);
448 }
449
450 static void
451 rt_dump_tree_to_string (RtNode *tree, GString *string)
452 {
453 gchar *path;
454
455 path = get_node_path (tree);
456
457 g_string_append_printf (string, "%s-%s ", path, markup_tag_text[tree->markup]);
458 g_free (path);
459
460 if (tree->offset >= 0)
461 g_string_append_printf (string, "[offset=%d] ", tree->offset);
462
463 if (tree->text) {
464 g_string_append (string, "TEXT [");
465 gchar *ptr;
466 for (ptr = tree->text; *ptr; ptr++) {
467 if (*ptr == '\n') {
468 g_string_append_c (string, '$');
469 }
470 else
471 g_string_append_printf (string, "%c", *ptr);
472 }
473 g_string_append (string, "]|");
474 }
475 else if (tree->binary.data)
476 g_string_append (string, "BINARY|");
477 else
478 g_string_append_c (string, '|');
479
480 if (tree->child)
481 rt_dump_tree_to_string (tree->child, string);
482 if (tree->next)
483 rt_dump_tree_to_string (tree->next, string);
484 }
485
486 gchar *
487 rt_dump_to_string (RtNode *tree)
488 {
489 GString *string;
490 string = g_string_new ("");
491 rt_dump_tree_to_string (tree, string);
492 return g_string_free (string, FALSE);
493 }
494
495 void
496 rt_free_node (RtNode *node)
497 {
498 if (node->child)
499 rt_free_node (node->child);
500 if (node->next)
501 rt_free_node (node->next);
502 g_free (node->text);
503 if (node->binary.data)
504 g_free (node->binary.data);
505 g_free (node);
506 }
507
508 static gboolean merge_single_child_text (RtNode *tree);
509 static gboolean merge_text_node_child (RtNode *tree);
510 static gboolean merge_text_node_siblings (RtNode *tree);
511 static gboolean reorganize_children (RtNode *tree, gboolean *out_tree_destroyed);
512
513 /* if @tree is a RT_MARKUP_NONE with no binary data, and has a unique RT_MARKUP_NONE with no binary data child
514 * then merge the child into it */
515 static gboolean
516 merge_text_node_child (RtNode *tree)
517 {
518 RtNode *child;
519 child = tree->child;
520 if (! child)
521 return FALSE;
522
523 if ((tree->markup == RT_MARKUP_NONE) && !tree->binary.data &&
524 (child->markup == RT_MARKUP_NONE) && ! child->child && child->text && !child->binary.data) {
525 if (tree->text) {
526 gchar *tmp;
527 tmp = tree->text;
528 tree->text = g_strconcat (tmp, child->text, NULL);
529 g_free (tmp);
530 g_free (child->text);
531 }
532 else
533 tree->text = child->text;
534 child->text = NULL;
535 RtNode *tnode = child->next;
536 child->next = NULL;
537 rt_free_node (child);
538 tree->child = tnode;
539 return TRUE;
540 }
541 return FALSE;
542 }
543
544 /* if @tree is a RT_MARKUP_NONE with no binary data, then merge all the siblings which are also
545 * RT_MARKUP_NONE into it */
546 static gboolean
547 merge_text_node_siblings (RtNode *tree)
548 {
549 gboolean retval = FALSE;
550 while (1) {
551 if (! tree->next)
552 break;
553 RtNode *next = tree->next;
554 if ((tree->markup == RT_MARKUP_NONE) && !tree->binary.data &&
555 (next->markup == RT_MARKUP_NONE) && !next->binary.data &&
556 ! next->child && next->text) {
557 if (tree->text) {
558 gchar *tmp;
559 tmp = tree->text;
560 tree->text = g_strconcat (tmp, next->text, NULL);
561 g_free (tmp);
562 g_free (next->text);
563 }
564 else
565 tree->text = next->text;
566 next->text = NULL;
567 RtNode *tnode = next->next;
568 next->next = NULL;
569 rt_free_node (next);
570 tree->next = tnode;
571
572 retval = TRUE;
573 }
574 else
575 break;
576 }
577
578 return retval;
579 }
580
581 static gboolean
582 merge_single_child_text (RtNode *tree)
583 {
584 if (! (tree->text || tree->binary.data) &&
585 tree->child && !tree->child->next &&
586 ! tree->child->child &&
587 (tree->child->text || tree->child->binary.data) &&
588 (tree->child->markup == RT_MARKUP_NONE)) {
589 tree->text = tree->child->text;
590 tree->child->text = NULL;
591 tree->binary.data = tree->child->binary.data;
592 tree->child->binary.data = NULL;
593 tree->binary.binary_length = tree->child->binary.binary_length;
594 tree->child->binary.binary_length = 0;
595 rt_free_node (tree->child);
596 tree->child = NULL;
597 return TRUE;
598 }
599
600 return FALSE;
601 }
602
603 static gboolean
604 reorganize_children (RtNode *tree, gboolean *out_tree_destroyed)
605 {
606 gboolean retval = FALSE;
607 *out_tree_destroyed = FALSE;
608 if ((tree->markup == RT_MARKUP_PARA) && (tree->text && !*(tree->text)) &&
609 !tree->child &&
610 tree->next &&
611 ((tree->next->markup == RT_MARKUP_LIST) || tree->next->markup == RT_MARKUP_TITLE)) {
612 /* simply get rid of useless node */
613 RtNode *n;
614 n = tree->next;
615 n->prev = tree->prev;
616 if (tree->prev)
617 tree->prev->next = n;
618 if (tree->parent && (tree->parent->child == tree))
619 tree->parent->child = n;
620 tree->prev = NULL;
621 tree->next = NULL;
622 rt_free_node (tree);
623 *out_tree_destroyed = TRUE;
624 return TRUE;
625 }
626 else if ((tree->markup == RT_MARKUP_PARA) && (tree->text && !*(tree->text)) &&
627 !tree->child && !tree->next) {
628 /* simply get rid of useless node */
629 if (tree->prev)
630 tree->prev->next = NULL;
631 if (tree->parent && (tree->parent->child == tree))
632 tree->parent->child = NULL;
633 tree->prev = NULL;
634 tree->next = NULL;
635 rt_free_node (tree);
636 *out_tree_destroyed = TRUE;
637 return TRUE;
638 }
639 if (tree->markup == RT_MARKUP_LIST) {
640 RtNode *node;
641 for (node = tree->next; node;) {
642 if ((node->markup != RT_MARKUP_LIST) ||
643 (node->offset <= tree->offset))
644 break;
645 RtNode *prev, *next;
646 prev = node->prev;
647 next = node->next;
648 if (tree->child) {
649 RtNode *n;
650 for (n = tree->child; n->next; n = n->next);
651 n->next = node;
652 node->prev = n;
653 node->next = NULL;
654 }
655 else {
656 tree->child = node;
657 node->prev = NULL;
658 node->next = NULL;
659 }
660 if (prev)
661 prev->next = next;
662 if (next)
663 next->prev = prev;
664 node->parent = tree;
665 node = next;
666 retval = TRUE;
667 }
668 }
669 else if (tree->markup == RT_MARKUP_TITLE) {
670 RtNode *node;
671 for (node = tree->next; node;) {
672 if ((node->markup == RT_MARKUP_TITLE) &&
673 (node->offset <= tree->offset))
674 break;
675
676 RtNode *prev, *next;
677 prev = node->prev;
678 next = node->next;
679 if (tree->child) {
680 RtNode *n;
681 for (n = tree->child; n->next; n = n->next);
682 n->next = node;
683 node->prev = n;
684 node->next = NULL;
685 }
686 else {
687 tree->child = node;
688 node->prev = NULL;
689 node->next = NULL;
690 }
691 if (prev)
692 prev->next = next;
693 if (next)
694 next->prev = prev;
695 node->parent = tree;
696 node = next;
697 retval = TRUE;
698 }
699 }
700 else if (tree->markup == RT_MARKUP_PARA) {
701 RtNode *node;
702 for (node = tree->next; node;) {
703 if ((node->markup == RT_MARKUP_TITLE) ||
704 (node->markup == RT_MARKUP_PARA))
705 break;
706
707 RtNode *prev, *next;
708 prev = node->prev;
709 next = node->next;
710 if (tree->child) {
711 RtNode *n;
712 for (n = tree->child; n->next; n = n->next);
713 n->next = node;
714 node->prev = n;
715 node->next = NULL;
716 }
717 else {
718 tree->child = node;
719 node->prev = NULL;
720 node->next = NULL;
721 }
722 if (prev)
723 prev->next = next;
724 if (next)
725 next->prev = prev;
726 node->parent = tree;
727 node = next;
728 retval = TRUE;
729 }
730 }
731 return retval;
732 }
733
734 /*
735 * Simplifies and reorganizes the tree
736 */
737 static gboolean
738 simplify_tree (RtNode *tree)
739 {
740 gboolean mod, tree_del, retval = FALSE;
741
742 for (mod = TRUE, tree_del = FALSE; mod && !tree_del;) {
743 mod = FALSE;
744 if (tree->child)
745 mod = mod || simplify_tree (tree->child);
746 if (tree->next)
747 mod = mod || simplify_tree (tree->next);
748 mod = mod || merge_single_child_text (tree);
749 mod = mod || merge_text_node_child (tree);
750 mod = mod || merge_text_node_siblings (tree);
751 mod = mod || reorganize_children (tree, &tree_del);
752 if (mod)
753 retval = TRUE;
754 }
755 return retval;
756 }
757
758 static const gchar *
759 serialize_tag (MarkupTag tag)
760 {
761 switch (tag) {
762 case MARKUP_BOLD:
763 return "**";
764 case MARKUP_TT:
765 return "``";
766 case MARKUP_VERBATIM:
767 return "\"\"";
768 case MARKUP_ITALIC:
769 return "//";
770 case MARKUP_STRIKE:
771 return "--";
772 case MARKUP_UNDERLINE:
773 return "__";
774 case MARKUP_TITLE1_S:
775 return "= ";
776 case MARKUP_TITLE1_E:
777 return " =";
778 case MARKUP_TITLE2_S:
779 return "== ";
780 case MARKUP_TITLE2_E:
781 return "= ";
782 case MARKUP_PICTURE_S:
783 return "[[[";
784 case MARKUP_PICTURE_E:
785 return "]]]";
786 case MARKUP_LIST_S:
787 return "- ";
788 default:
789 g_assert_not_reached ();
790 }
791 }
792
793 typedef struct {
794 const gchar *m_start;
795 const gchar *m_end;
796 MarkupTag markup;
797 RtNode *rtnode;
798 } TextTag;
799
800 static gboolean
801 markup_tag_match (TextTag *current, MarkupTag tag2, const gchar *last_position)
802 {
803 const gchar *tmp;
804 gboolean sameline = TRUE;
805 for (tmp = current->m_start; *tmp && (tmp < last_position); tmp++) {
806 if (*tmp == '\n') {
807 sameline = FALSE;
808 break;
809 }
810 }
811
812 gboolean retval;
813 switch (current->markup) {
814 case MARKUP_BOLD:
815 case MARKUP_TT:
816 case MARKUP_VERBATIM:
817 case MARKUP_ITALIC:
818 case MARKUP_STRIKE:
819 case MARKUP_UNDERLINE:
820 retval = (current->markup == tag2) ? TRUE : FALSE;
821 break;
822 case MARKUP_TITLE1_S:
823 retval = (tag2 == MARKUP_TITLE1_E) ? TRUE : FALSE;
824 break;
825 case MARKUP_TITLE2_S:
826 retval = (tag2 == MARKUP_TITLE2_E) ? TRUE : FALSE;
827 break;
828 case MARKUP_PICTURE_S:
829 retval = (tag2 == MARKUP_PICTURE_E) ? TRUE : FALSE;
830 break;
831 case MARKUP_LIST_S:
832 retval = (tag2 == MARKUP_LIST_E) ? TRUE : FALSE;
833 break;
834 default:
835 retval = FALSE;
836 break;
837 }
838
839 if (retval) {
840 if ((current->markup != MARKUP_PICTURE_S) && (current->markup != MARKUP_LIST_S) &&
841 (current->markup != MARKUP_VERBATIM))
842 retval = sameline ? TRUE : FALSE;
843 }
844 return retval;
845 }
846
847 RtNode *
848 rt_parse_text (const gchar *text)
849 {
850 RtNode *retnode, *contextnode;
851 GList *queue = NULL; /* list of TextTag pointers */
852 MarkupTag mt;
853 const gchar *ptr, *prev;
854 gint ssol;
855 TextTag *current = NULL;
856
857 retnode = g_new0 (RtNode, 1);
858 contextnode = retnode;
859
860 ptr = text;
861 prev = text;
862
863 for (mt = get_token (text, ptr, &ssol, &ptr, current ? current->markup : MARKUP_NONE);
864 mt != MARKUP_EOF;
865 mt = get_token (text, ptr, &ssol, &ptr, current ? current->markup : MARKUP_NONE)) {
866
867
868 #ifdef GDA_DEBUG_NO
869 gchar *debug;
870 debug = g_strndup (prev, ptr - prev);
871 if (strlen (debug) > 10)
872 debug [10] = 0;
873 g_print ("Token %d [%s] with SSOL %d\n", mt, debug, ssol);
874 g_free (debug);
875 #endif
876
877 if (mt == MARKUP_NONE) {
878 gchar *part;
879 RtNode *node;
880 node = g_new0 (RtNode, 1);
881 node->parent = contextnode;
882 node->markup = RT_MARKUP_NONE;
883 if (prev == text)
884 node->markup = RT_MARKUP_PARA;
885 else if (prev[-1] == '\n')
886 node->markup = RT_MARKUP_PARA;
887 part = g_strndup (prev, ptr - prev);
888
889 if (contextnode->child) {
890 RtNode *n;
891 for (n = contextnode->child; n->next; n = n->next);
892 n->next = node;
893 node->prev = n;
894 }
895 else
896 contextnode->child = node;
897
898 if (contextnode->markup != RT_MARKUP_PICTURE) {
899 /* split the node in multiple parts, one for each paragraph */
900 gchar **array;
901 gint i;
902 array = g_strsplit (part, "\n", -1);
903 for (i = 0; array [i]; i++) {
904 if (! node->text)
905 node->text = array [i];
906 else {
907 RtNode *n;
908 n = g_new0 (RtNode, 1);
909 n->parent = contextnode;
910 n->markup = RT_MARKUP_PARA;
911 n->text = array [i];
912 node->next = n;
913 n->prev = node;
914 node = n;
915 }
916 }
917 g_free (part);
918 }
919 else {
920 gchar *tmp;
921 tmp = remove_newlines_from_base64 (part);
922 node->binary.data = g_base64_decode_inplace (tmp, (gsize*) & node->binary.binary_length);
923 }
924 }
925 else {
926 gboolean tag_matched = FALSE;
927 if (current) {
928 retry:
929 if (markup_tag_match (current, mt, ptr-1)) {
930 /*g_print ("Tags matched for %d,%d\n",
931 current->markup, mt);*/
932 g_free (current);
933 queue = g_list_remove (queue, current);
934 current = NULL;
935
936 if (queue)
937 current = (TextTag*) queue->data;
938 tag_matched = TRUE;
939
940 if (contextnode->parent)
941 contextnode = contextnode->parent;
942 }
943 else {
944 /* detect misplaced tags */
945 GList *list;
946 for (list = queue; list; list = list->next) {
947 TextTag *tt = (TextTag*) list->data;
948
949 if (markup_tag_match (tt, mt, ptr-1)) {
950 /* remove all TextTag before @list */
951 while (queue != list) {
952 RtNode *lnode;
953 current = (TextTag*) queue->data;
954 lnode = current->rtnode;
955
956 lnode->markup = RT_MARKUP_NONE;
957 if (lnode->text) {
958 gchar *tmp;
959 tmp = lnode->text;
960 lnode->text = g_strconcat (serialize_tag (current->markup),
961 tmp, NULL);
962 g_free (tmp);
963 }
964 else if (lnode->binary.data) {
965 TO_IMPLEMENT;
966 }
967 else
968 lnode->text = g_strdup (serialize_tag (current->markup));
969
970 g_free (current);
971 queue = g_list_delete_link (queue, queue);
972
973 }
974 g_assert (queue);
975 current = (TextTag*) queue->data;
976 contextnode = current->rtnode;
977
978 goto retry;
979 }
980 }
981 }
982 }
983
984 /*g_print ("Token %d with SSOL %d\n", mt, ssol);*/
985 if (! tag_matched) {
986 RtNode *node;
987 node = g_new0 (RtNode, 1);
988 node->parent = contextnode;
989 node->offset = ssol;
990 node->markup = internal_markup_to_external (mt, &(node->offset));
991
992 if ((node->offset > 0) && (node->markup == RT_MARKUP_LIST)) {
993 /* add missing list nodes if offset > 0 */
994 if (contextnode->child) {
995 RtNode *n;
996 for (n = contextnode->child; n->next; n = n->next);
997
998 gint i = 0;
999 if (n->markup == RT_MARKUP_LIST)
1000 i = n->offset + 1;
1001 for (; i < node->offset; i++) {
1002 RtNode *tmpn;
1003 tmpn = g_new0 (RtNode, 1);
1004 tmpn->parent = contextnode;
1005 tmpn->markup = RT_MARKUP_LIST;
1006 tmpn->offset = i;
1007 tmpn->prev = n;
1008 n->next = tmpn;
1009 n = tmpn;
1010 }
1011
1012 n->next = node;
1013 node->prev = n;
1014 }
1015 else {
1016 gint i;
1017 RtNode *n = NULL;
1018 for (i = 0; i < node->offset; i++) {
1019 RtNode *tmpn;
1020 tmpn = g_new0 (RtNode, 1);
1021 tmpn->parent = contextnode;
1022 tmpn->markup = RT_MARKUP_LIST;
1023 tmpn->offset = i;
1024 if (n) {
1025 tmpn->prev = n;
1026 n->next = tmpn;
1027 }
1028 else
1029 contextnode->child = tmpn;
1030 n = tmpn;
1031 }
1032 g_assert (n);
1033 n->next = node;
1034 node->prev = n;
1035 }
1036 }
1037 else {
1038 if (contextnode->child) {
1039 RtNode *n;
1040 for (n = contextnode->child; n->next; n = n->next);
1041 n->next = node;
1042 node->prev = n;
1043 }
1044 else
1045 contextnode->child = node;
1046 }
1047 contextnode = node;
1048
1049 /* update @current */
1050 current = g_new0 (TextTag, 1);
1051 current->markup = mt;
1052 current->m_start = prev;
1053 current->m_end = ptr;
1054 current->rtnode = node;
1055
1056 queue = g_list_prepend (queue, current);
1057 }
1058 }
1059 prev = ptr;
1060 }
1061
1062 while (queue) {
1063 current = (TextTag*) queue->data;
1064 g_free (current);
1065 queue = g_list_delete_link (queue, queue);
1066 }
1067
1068 #ifdef GDA_DEBUG_NO
1069 g_print ("============= Before simplify_tree()\n");
1070 rt_dump_tree (retnode);
1071 simplify_tree (retnode);
1072 g_print ("============= After simplify_tree()\n");
1073 rt_dump_tree (retnode);
1074 #else
1075 simplify_tree (retnode);
1076 #endif
1077 return retnode;
1078 }
1079
1080
1081 /*
1082 *
1083 * Rendering
1084 *
1085 */
1086
1087 /*
1088 * @hash: key = rtnode, value = corresponding xmlNodePtr
1089 */
1090 static gint file_nb = 0;
1091 typedef struct {
1092 GHashTable *hash;
1093 gchar *file_path;
1094 gchar *file_prefix;
1095 } RenderingContext;
1096
1097
1098 /*
1099 * DocBook rendering
1100 */
1101 static void
1102 rich_text_node_to_docbook (RenderingContext *context, xmlNodePtr top_parent, RtNode *rtnode, xmlNodePtr parent)
1103 {
1104 xmlNodePtr pattach = NULL, cattach = NULL;
1105 gchar *realtext;
1106 g_assert (parent);
1107 g_assert (context);
1108
1109 if (rtnode->text) {
1110 gchar *optr, *nptr;
1111 gint len;
1112 len = strlen ((gchar*) rtnode->text);
1113 realtext = g_new (gchar, len + 1);
1114 for (optr = (gchar*) rtnode->text, nptr = realtext; *optr; optr++) {
1115 if (*optr != '\n') {
1116 *nptr = *optr;
1117 nptr++;
1118 }
1119 }
1120 *nptr = 0;
1121 }
1122 else
1123 realtext = (gchar *) rtnode->text;
1124
1125 switch (rtnode->markup) {
1126 case RT_MARKUP_NONE:
1127 if (parent) {
1128 xmlNodeAddContent (parent, BAD_CAST realtext);
1129 cattach = parent;
1130 }
1131 else {
1132 cattach = xmlNewNode (NULL, BAD_CAST "para");
1133 xmlNodeAddContent (cattach, BAD_CAST realtext);
1134 }
1135 break;
1136 case RT_MARKUP_BOLD:
1137 cattach = xmlNewChild (parent, NULL, BAD_CAST "emphasis", BAD_CAST realtext);
1138 xmlSetProp (cattach, BAD_CAST "role", BAD_CAST "bold");
1139 break;
1140 case RT_MARKUP_PARA:
1141 pattach = parent;
1142 if ((parent != top_parent) &&
1143 ! strcmp ((gchar*) parent->name, "para"))
1144 pattach = parent->parent;
1145 cattach = xmlNewChild (pattach, NULL, BAD_CAST "para", BAD_CAST realtext);
1146 parent = cattach;
1147 break;
1148 case RT_MARKUP_TT:
1149 case RT_MARKUP_VERBATIM:
1150 case RT_MARKUP_ITALIC:
1151 cattach = xmlNewChild (parent, NULL, BAD_CAST "emphasis", BAD_CAST realtext);
1152 break;
1153 case RT_MARKUP_STRIKE:
1154 cattach = xmlNewChild (parent, NULL, BAD_CAST "emphasis", BAD_CAST realtext);
1155 xmlSetProp (cattach, BAD_CAST "role", BAD_CAST "strikethrough");
1156 break;
1157 case RT_MARKUP_UNDERLINE:
1158 cattach = xmlNewChild (parent, NULL, BAD_CAST "emphasis", BAD_CAST realtext);
1159 xmlSetProp (cattach, BAD_CAST "role", BAD_CAST "underline");
1160 break;
1161 case RT_MARKUP_PICTURE: {
1162 gboolean saved = FALSE;
1163 gint type = 2; /* 0 for image, 1 for TXT and 2 for general binary */
1164 gchar *file, *tmp;
1165 tmp = g_strdup_printf ("%s_%04d.jpg", context->file_prefix,
1166 file_nb ++);
1167 file = g_build_filename (context->file_path, tmp, NULL);
1168 g_free (tmp);
1169
1170 #ifdef HAVE_GDKPIXBUF
1171 GdkPixdata pixdata;
1172 if (rtnode->binary.data &&
1173 gdk_pixdata_deserialize (&pixdata, rtnode->binary.binary_length,
1174 (guint8*) rtnode->binary.data, NULL)) {
1175 GdkPixbuf *pixbuf;
1176 pixbuf = gdk_pixbuf_from_pixdata (&pixdata, TRUE, NULL);
1177 if (pixbuf) {
1178 /* write to file */
1179 if (gdk_pixbuf_save (pixbuf, file, "jpeg", NULL,
1180 "quality", "100", NULL)) {
1181 g_print ("Writen JPG file '%s'\n", file);
1182 saved = TRUE;
1183 type = 0;
1184 }
1185
1186 g_object_unref (pixbuf);
1187 }
1188 }
1189 #endif
1190
1191 if (!saved) {
1192 if (rtnode->binary.data &&
1193 g_file_set_contents (file, (gchar*) rtnode->binary.data,
1194 rtnode->binary.binary_length, NULL)) {
1195 g_print ("Writen BIN file '%s'\n", file);
1196 saved = TRUE;
1197 type = 2;
1198 }
1199 else if (rtnode->text)
1200 type = 1;
1201 }
1202 if (! saved && (type != 1))
1203 TO_IMPLEMENT;
1204 else {
1205 switch (type) {
1206 case 0:
1207 pattach = xmlNewChild (parent, NULL, BAD_CAST "informalfigure",
1208 NULL);
1209 pattach = xmlNewChild (pattach, NULL, BAD_CAST "mediaobject",
1210 NULL);
1211 pattach = xmlNewChild (pattach, NULL, BAD_CAST "imageobject",
1212 NULL);
1213 cattach = xmlNewChild (pattach, NULL, BAD_CAST "imagedata",
1214 NULL);
1215 xmlSetProp (cattach, BAD_CAST "fileref", BAD_CAST file);
1216 break;
1217 case 1:
1218 xmlNodeAddContent (parent, BAD_CAST (rtnode->text));
1219 break;
1220 case 2:
1221 cattach = xmlNewChild (parent, NULL, BAD_CAST "ulink",
1222 BAD_CAST _("link"));
1223 xmlSetProp (cattach, BAD_CAST "url", BAD_CAST file);
1224 break;
1225 default:
1226 g_assert_not_reached ();
1227 }
1228 }
1229 g_free (file);
1230 break;
1231 }
1232 case RT_MARKUP_TITLE: {
1233 gchar *sect;
1234 pattach = parent;
1235 if (!strcmp ((gchar*) parent->name, "para"))
1236 pattach = parent->parent;
1237 sect = g_strdup_printf ("sect%d", rtnode->offset + 1);
1238 cattach = xmlNewChild (pattach, NULL, BAD_CAST sect, NULL);
1239 g_free (sect);
1240 pattach = xmlNewChild (cattach, NULL, BAD_CAST "title", BAD_CAST realtext);
1241 break;
1242 }
1243 case RT_MARKUP_LIST: {
1244 xmlNodePtr tmp = NULL;
1245
1246 if (rtnode->prev &&
1247 (rtnode->prev->markup == RT_MARKUP_LIST)) {
1248 tmp = g_hash_table_lookup (context->hash, rtnode->prev);
1249 g_assert (tmp);
1250 /* use the same <itemizedlist> */
1251 g_assert (!strcmp ((gchar*) tmp->name, "itemizedlist"));
1252 g_assert (rtnode->prev->offset == rtnode->offset);
1253 g_hash_table_insert (context->hash, rtnode, tmp);
1254 tmp = xmlNewChild (tmp, NULL, BAD_CAST "listitem", NULL);
1255 cattach = xmlNewChild (tmp, NULL, BAD_CAST "para", BAD_CAST realtext);
1256 }
1257 else {
1258 pattach = xmlNewChild (parent, NULL, BAD_CAST "itemizedlist", NULL);
1259 g_hash_table_insert (context->hash, rtnode, pattach);
1260 pattach = xmlNewChild (pattach, NULL, BAD_CAST "listitem", NULL);
1261 cattach = xmlNewChild (pattach, NULL, BAD_CAST "para", BAD_CAST realtext);
1262 }
1263 break;
1264 }
1265 default:
1266 if (rtnode->parent)
1267 g_assert_not_reached ();
1268 else
1269 cattach = parent;
1270 break;
1271 }
1272
1273 if (rtnode->text)
1274 g_free (realtext);
1275
1276 if (rtnode->child)
1277 rich_text_node_to_docbook (context, top_parent, rtnode->child, cattach);
1278 if (rtnode->next)
1279 rich_text_node_to_docbook (context, top_parent, rtnode->next, parent);
1280 }
1281
1282 void
1283 parse_rich_text_to_docbook (GdaReportEngine *eng, xmlNodePtr top, const gchar *text)
1284 {
1285 RtNode *rtnode;
1286 RenderingContext context;
1287 g_return_if_fail (!eng || GDA_IS_REPORT_ENGINE (eng));
1288
1289 context.hash = g_hash_table_new (NULL, NULL);
1290 context.file_path = ".";
1291 if (eng)
1292 g_object_get (eng, "output-directory", &context.file_path, NULL);
1293 context.file_prefix = "IMG";
1294 rtnode = rt_parse_text (text);
1295 /*rt_dump_tree (rtnode);*/
1296 rich_text_node_to_docbook (&context, top, rtnode, top);
1297 g_hash_table_destroy (context.hash);
1298 rt_free_node (rtnode);
1299
1300 if (eng)
1301 g_free (context.file_path);
1302 }
1303
1304 static xmlNodePtr
1305 new_html_child (xmlNodePtr parent, const gchar *ns, const gchar *name, const gchar *contents)
1306 {
1307 if (!parent || (parent->name && (*parent->name != 'h')))
1308 return xmlNewChild (parent, NULL, BAD_CAST name, BAD_CAST contents);
1309 else
1310 return new_html_child (parent->parent, ns, name, contents);
1311 }
1312
1313 /*
1314 * HTML rendering
1315 */
1316 static void
1317 rich_text_node_to_html (RenderingContext *context, xmlNodePtr top_parent, RtNode *rtnode, xmlNodePtr parent)
1318 {
1319 xmlNodePtr pattach = NULL, cattach = NULL;
1320 gchar *realtext;
1321 g_assert (parent);
1322 g_assert (context);
1323
1324 if (rtnode->text) {
1325 gchar *optr, *nptr;
1326 gint len;
1327 len = strlen ((gchar*) rtnode->text);
1328 realtext = g_new (gchar, len + 1);
1329 for (optr = (gchar*) rtnode->text, nptr = realtext; *optr; optr++) {
1330 if (*optr != '\n') {
1331 *nptr = *optr;
1332 nptr++;
1333 }
1334 }
1335 *nptr = 0;
1336 }
1337 else
1338 realtext = (gchar *) rtnode->text;
1339
1340 switch (rtnode->markup) {
1341 case RT_MARKUP_NONE:
1342 if (parent) {
1343 xmlNodeAddContent (parent, BAD_CAST realtext);
1344 cattach = parent;
1345 }
1346 else {
1347 cattach = xmlNewNode (NULL, BAD_CAST "para");
1348 xmlNodeAddContent (cattach, BAD_CAST realtext);
1349 }
1350 break;
1351 case RT_MARKUP_BOLD:
1352 cattach = new_html_child (parent, NULL, "b", realtext);
1353 break;
1354 case RT_MARKUP_PARA:
1355 pattach = parent;
1356 if ((parent != top_parent) &&
1357 ! strcmp ((gchar*) parent->name, "p"))
1358 pattach = parent->parent;
1359 cattach = new_html_child (pattach, NULL, "p", realtext);
1360 parent = cattach;
1361 break;
1362 case RT_MARKUP_TT:
1363 case RT_MARKUP_VERBATIM:
1364 case RT_MARKUP_ITALIC:
1365 cattach = new_html_child (parent, NULL, "i", realtext);
1366 break;
1367 case RT_MARKUP_STRIKE:
1368 cattach = new_html_child (parent, NULL, "del", realtext);
1369 break;
1370 case RT_MARKUP_UNDERLINE:
1371 cattach = new_html_child (parent, NULL, "ins", realtext);
1372 break;
1373 case RT_MARKUP_PICTURE: {
1374 gboolean saved = FALSE;
1375 gint type = 2; /* 0 for image, 1 for TXT and 2 for general binary */
1376 gchar *file, *tmp;
1377 tmp = g_strdup_printf ("%s_%04d.jpg", context->file_prefix,
1378 file_nb ++);
1379 file = g_build_filename (context->file_path, tmp, NULL);
1380 g_free (tmp);
1381
1382 #ifdef HAVE_GDKPIXBUF
1383 GdkPixdata pixdata;
1384 if (rtnode->binary.data &&
1385 gdk_pixdata_deserialize (&pixdata, rtnode->binary.binary_length,
1386 (guint8*) rtnode->binary.data, NULL)) {
1387 GdkPixbuf *pixbuf;
1388 pixbuf = gdk_pixbuf_from_pixdata (&pixdata, TRUE, NULL);
1389 if (pixbuf) {
1390 /* write to file */
1391 if (gdk_pixbuf_save (pixbuf, file, "jpeg", NULL,
1392 "quality", "100", NULL)) {
1393 g_print ("Writen JPG file '%s'\n", file);
1394 saved = TRUE;
1395 type = 0;
1396 }
1397
1398 g_object_unref (pixbuf);
1399 }
1400 }
1401 #endif
1402
1403 if (!saved) {
1404 if (rtnode->binary.data &&
1405 g_file_set_contents (file, (gchar*) rtnode->binary.data,
1406 rtnode->binary.binary_length, NULL)) {
1407 g_print ("Writen BIN file '%s'\n", file);
1408 saved = TRUE;
1409 type = 2;
1410 }
1411 else if (rtnode->text)
1412 type = 1;
1413 }
1414 if (! saved && (type != 1))
1415 TO_IMPLEMENT;
1416 else {
1417 switch (type) {
1418 case 0:
1419 pattach = new_html_child (parent, NULL, "img",
1420 NULL);
1421 xmlSetProp (pattach, BAD_CAST "src", BAD_CAST file);
1422 break;
1423 case 1:
1424 xmlNodeAddContent (parent, BAD_CAST (rtnode->text));
1425 break;
1426 case 2:
1427 cattach = new_html_child (parent, NULL, "ulink",
1428 _("link"));
1429 xmlSetProp (cattach, BAD_CAST "url", BAD_CAST file);
1430 break;
1431 default:
1432 g_assert_not_reached ();
1433 }
1434 }
1435 g_free (file);
1436 break;
1437 }
1438 case RT_MARKUP_TITLE: {
1439 gchar *sect;
1440 pattach = parent;
1441 if (!strcmp ((gchar*) parent->name, "para"))
1442 pattach = parent->parent;
1443 sect = g_strdup_printf ("h%d", rtnode->offset + 1);
1444 cattach = new_html_child (pattach, NULL, sect, realtext);
1445 g_free (sect);
1446 break;
1447 }
1448 case RT_MARKUP_LIST: {
1449 xmlNodePtr tmp = NULL;
1450
1451 if (rtnode->prev &&
1452 (rtnode->prev->markup == RT_MARKUP_LIST)) {
1453 tmp = g_hash_table_lookup (context->hash, rtnode->prev);
1454 g_assert (tmp);
1455 /* use the same <itemizedlist> */
1456 g_assert (!strcmp ((gchar*) tmp->name, "ul"));
1457 g_assert (rtnode->prev->offset == rtnode->offset);
1458 g_hash_table_insert (context->hash, rtnode, tmp);
1459 tmp = new_html_child (tmp, NULL, "li", NULL);
1460 cattach = new_html_child (tmp, NULL, "p", realtext);
1461 }
1462 else {
1463 pattach = new_html_child (parent, NULL, "ul", NULL);
1464 g_hash_table_insert (context->hash, rtnode, pattach);
1465 pattach = new_html_child (pattach, NULL, "li", NULL);
1466 cattach = new_html_child (pattach, NULL, "p", realtext);
1467 }
1468 break;
1469 }
1470 default:
1471 if (rtnode->parent)
1472 g_assert_not_reached ();
1473 else
1474 cattach = parent;
1475 break;
1476 }
1477
1478 if (rtnode->text)
1479 g_free (realtext);
1480
1481 if (rtnode->child)
1482 rich_text_node_to_html (context, top_parent, rtnode->child, cattach);
1483 if (rtnode->next)
1484 rich_text_node_to_html (context, top_parent, rtnode->next, parent);
1485 }
1486
1487 void
1488 parse_rich_text_to_html (GdaReportEngine *eng, xmlNodePtr top, const gchar *text)
1489 {
1490 RtNode *rtnode;
1491 RenderingContext context;
1492 g_return_if_fail (!eng || GDA_IS_REPORT_ENGINE (eng));
1493
1494 context.hash = g_hash_table_new (NULL, NULL);
1495 context.file_path = ".";
1496 if (eng)
1497 g_object_get (eng, "output-directory", &context.file_path, NULL);
1498 context.file_prefix = "IMG";
1499 rtnode = rt_parse_text (text);
1500 /*rt_dump_tree (rtnode);*/
1501 rich_text_node_to_html (&context, top, rtnode, top);
1502 g_hash_table_destroy (context.hash);
1503 rt_free_node (rtnode);
1504
1505 if (eng)
1506 g_free (context.file_path);
1507 }
1508