1 /* $Id: reader.cc,v 1.27 1997/04/17 20:24:41 dps Exp $ */
2 /* Reads the word document */
3 #ifdef HAVE_CONFIG_H
4 #include "config.h"
5 #endif /* HAVE_CONFIG_H */
6
7 #ifdef __GNUC__
8 #define alloca __builtin_alloca
9 #else
10 #if HAVE_ALLOCA_H
11 #include <alloca.h>
12 #else /* Do not have alloca.h */
13 #ifdef _AIX
14 #pragma alloca
15 #else /* not _AIX */
16 extern "C" char *alloca(int);
17 #endif /* _AIX */
18 #endif /* HAVE_ALLOCA_H */
19 #endif /* __GNUC__ */
20
21 #include <iostream>
22 using namespace std;
23 #include <stdio.h>
24 #ifdef HAVE_STRING_H
25 #include <string.h>
26 #endif /* HAVE_STRING_H */
27 #ifdef HAVE_STRINGS_H
28 #include <strings.h>
29 #endif /* HAVE_STRINGS_H */
30 #ifdef HAVE_CTYPE_H
31 #include <ctype.h>
32 #endif /* HAVE_CTYPE_H */
33 #include "word6.h"
34 #include "interface.h"
35 #include "reader.h"
36
37 /* This code is basically a layered filtration process. At the bottom layer
38 is this next function that reads a character from a word document. Pointers
39 to object are used extensively to avoid implicit copies. */
40
41 /* Please be aware that the junk should be stripped from in */
read_character(istream * in)42 static int read_character(istream *in)
43 {
44 char c,d;
45 static int s_ch=-1;
46
47 if (s_ch==-1)
48 {
49 if (in->eof())
50 return EOF;
51
52 in->get(c);
53 }
54 else
55 {
56 c=(unsigned char) s_ch;
57 s_ch=-1;
58 }
59
60 if (c=='\n')
61 c='\r';
62
63 switch(c)
64 {
65 case PAR_END:
66 return (CH_PAR | CONTROL_FLAG);
67
68 case TABLE_SEP:
69 if (!in->eof())
70 in->get(d);
71 else
72 d=c+1; // Not equal to c
73 if (d!=c)
74 {
75 s_ch=d; /* Push back character */
76 return (CH_FIELD | CONTROL_FLAG);
77 }
78 return (CH_ROW | CONTROL_FLAG);
79
80 case START_CTL:
81 return (CH_SPEC | CONTROL_FLAG);
82
83 case END_CTL:
84 return (CH_ENDSPEC | CONTROL_FLAG);
85
86 case HARD_RETURN:
87 return (CH_HDRTN | CONTROL_FLAG);
88
89 case NEW_PAGE:
90 return (CH_PAGE | CONTROL_FLAG);
91
92 case FOOTNOTE:
93 return (CH_FOOTNOTE | CONTROL_FLAG);
94
95 default:
96 if (c<' ')
97 return (CH_OTHER | CONTROL_FLAG);
98 else
99 return c;
100 }
101 /* NOT REACHED */
102 }
103
104
105 /* This function reads a paragraph, field of a table or whatever. It copies
106 everything in any embed tags unprocessed and leaves it in the element */
read_chunk_raw(void)107 void chunk_reader::read_chunk_raw(void)
108 {
109 int c, is_ctl=0;
110
111 text.zero(); // Zero text buffer
112 while ((c=read_character(in))!=(EOF | CONTROL_FLAG))
113 {
114 if (c & CONTROL_FLAG)
115 {
116 c &= ~CONTROL_FLAG;
117 /* If in embedded item then ignore all but end embed */
118 if (is_ctl)
119 {
120 if (c==CH_ENDSPEC)
121 {
122 is_ctl=0;
123 text.add(c);
124 }
125 continue;
126 }
127
128 switch(c)
129 {
130 case CH_PAR:
131 case CH_FIELD:
132 case CH_ROW:
133 break;
134
135 case CH_HDRTN:
136 text.add('\n'); // Add newline
137 continue; // Continue processing
138
139 case CH_OTHER:
140 continue; // Just ignore character
141
142 case CH_SPEC:
143 text.add(c);
144 if (!is_ctl)
145 is_ctl=1;
146 continue;
147
148 case CH_ENDSPEC:
149 cerr<<"Suprious ^U ignored\n";
150 continue;
151
152 case CH_PAGE:
153 case CH_FOOTNOTE:
154 text.add(c);
155 continue;
156
157 default:
158 cerr<<"Unexpected value "<<(c & (~CONTROL_FLAG))\
159 <<" switch\n";
160 continue;
161 }
162 type=c;
163 tptr=text;
164 return;
165 }
166 /* Not control or end of inclusion */
167 text.add(c);
168 }
169 type=CH_EOF;
170 tptr=text;
171 return;
172 }
173
174
175 /* This function reads chunks from using read_chunk_raw and hands them
176 out in contigous peices of the same type. Emebedded stuff gets
177 seperated from the rest here. The partial flag is set if only some
178 of a field is returned (usually because of an embedded item).
179 */
read_chunk(void)180 struct chunk_rtn chunk_reader::read_chunk(void)
181 {
182 const char *s; // Save stupid compilers
183 struct chunk_rtn res;
184
185 if (tptr==NULL)
186 this->read_chunk_raw();
187
188 s=tptr;
189
190 /* Embed */
191 if (*s==CH_SPEC)
192 {
193 while (*(++s))
194 {
195 if (*s==CH_ENDSPEC)
196 break;
197 res.txt.add(*s);
198 }
199 tptr=s+1;
200 res.type=CH_SPEC;
201 return res;
202 }
203
204 /* New page */
205 if (*s==CH_PAGE)
206 {
207 res.type=CH_PAGE;
208 tptr=s+1;
209 return res;
210 }
211
212 if (*s==CH_FOOTNOTE)
213 {
214 res.type=CH_FOOTNOTE;
215 tptr=s+1;
216 return res;
217 }
218
219 /* Normal */
220 while (*s)
221 {
222 if (*s==CH_SPEC || *s==CH_PAGE || *s==CH_FOOTNOTE)
223 {
224 tptr=s;
225 res.type=(PART_FLAG | type);
226 return res;
227 }
228
229 res.txt.add(*s);
230 s++;
231 }
232 res.type=type;
233 tptr=NULL;
234 text.zero(); // Save memory
235 return res;
236 }
237
238
239
240 /*----------------------------------------------------------------------*/
241 /* Tables and basic stuff */
242
243 /*
244 * Refill the token queue.
245 */
rd_token(void)246 int tok_seq::rd_token(void)
247 {
248 struct chunk_rtn r;
249 tok *t;
250 char other[2];
251
252 r=read_chunk();
253 if (r.type==CH_EOF)
254 return 0;
255
256 switch(r.type & ~PART_FLAG)
257 {
258 case CH_ROW:
259 if (table==NULL)
260 table=new(table_info);
261 /* Handle 1 field rows properly */
262 if (table->col==0)
263 {
264 t=new(tok)(T_ROW, (void *) NULL, tok::TOK_START);
265 table->enqueue(t);
266 }
267 table->col++;
268 if (table->col>table->cols)
269 table->cols=table->col;
270 table->rows++;
271 table->tok_push(T_FIELD, &(r.txt));
272 t=new(tok)(T_ROW, (void *) NULL, tok::TOK_END);
273 table->enqueue(t);
274 table->col=0;
275 break;
276
277 case CH_FIELD:
278 if (table==NULL)
279 {
280 table=new(table_info);
281 }
282 if (table->col==0)
283 {
284 t=new(tok)(T_ROW, (void *) NULL, tok::TOK_START);
285 table->enqueue(t);
286 }
287 table->col++;
288 table->tok_push(T_FIELD, &(r.txt));
289 break;
290
291
292 case CH_PAR:
293 if (table!=NULL)
294 {
295 /* Table handling */
296 if (table->col!=0)
297 {
298 #if 0
299 table->tok_push(T_FIELD, &(r.txt));
300 t=new(tok)(T_ROW, (void *) NULL, tok::TOK_END);
301 table->enqueue(t);
302 t=new(tok)(T_ROW, (void *) NULL, tok::TOK_START);
303 table->enqueue(t);
304 for (i=0; i<table->col; i++)
305 {
306 t=new(tok)(T_FIELD, "\0", tok::TOK_START);
307 table->enqueue(t);
308 t=new(tok)(T_FIELD, (void *) NULL, tok::TOK_END);
309 table->enqueue(t);
310 }
311 table->rows++;
312 break;
313 #else
314 t=new(tok)(T_ROW, (void *) NULL, tok::TOK_END);
315 table->enqueue(t);
316 if (table->col>table->cols)
317 table->cols=table->col;
318 table->rows++;
319 #endif
320 }
321 table->finish(&output);
322 delete(table);
323 table=NULL;
324 }
325
326 if (r.type & PART_FLAG)
327 {
328 tok *td;
329 td=new(tok)(T_PARAGRAPH, (const char *) (r.txt), tok::TOK_START);
330 output.enqueue(td);
331 }
332 else
333 tok_push(T_PARAGRAPH, &(r.txt));
334 break;
335
336 case CH_SPEC:
337 tok_push(T_SPEC, &(r.txt));
338 break;
339
340 case CH_PAGE:
341 case CH_FOOTNOTE:
342 other[0]=r.type;
343 other[1]='\0';
344 t=new(tok)(T_CODE, other, tok::TOK_START);
345 output.enqueue(t);
346 break;
347
348 default:
349 break;
350 }
351
352 return 1;
353 }
354
355
356
357 /*----------------------------------------------------------------------*/
358 /* Equations.... */
359
360 /*
361 * Code that scans forward to the end of stuff that looks like an extension
362 * of some maths that was the last thing.
363 */
math_forward_scan(const char * s)364 const char *math_forward_scan(const char *s)
365 {
366 const char *scan, *end;
367 int blvl;
368
369 end=scan=s;
370
371 /* Check whether the first part looks like more of the equation */
372 while (1)
373 {
374 /* Skip spaces */
375 while (isspace(*scan))
376 scan++;
377
378 /* Look for binary operator */
379 if (*scan=='+' || *scan=='-' || *scan=='*' || *scan=='/' ||
380 *scan=='=')
381 {
382 /* skip spaces */
383 scan++;
384 while (isspace(*scan))
385 scan++;
386
387 /* Grab next word */
388 blvl=0;
389 while (!isspace(*scan) || blvl>0)
390 {
391 switch(*scan)
392 {
393 case '(':
394 blvl++;
395 break;
396
397 case ')':
398 blvl--;
399 break;
400
401 default:
402 break;
403 }
404 if (*scan=='\0')
405 break; // Robustness fix
406 scan++;
407 }
408
409 end=scan; // Update end
410 }
411 else
412 break; // No binary operator, assume no text
413 }
414 return end;
415 }
416
417 /*
418 * Code that scans backwards to the start of stuff that looks like it should
419 * ohave been prepended to the current maths.
420 */
math_reverse_scan(const char * s)421 const char *math_reverse_scan(const char *s)
422 {
423 const char *scan, *start;
424 int blvl;
425
426 start=scan=s+strlen(s)-1;
427
428 /* Check whether the first part looks like more of the equation */
429 while (scan>=s)
430 {
431 /* Skip spaces */
432 while (scan>=s && isspace(*scan))
433 scan--;
434 if (scan<s)
435 return s;
436
437 /* Look for binary operator */
438 if (*scan=='+' || *scan=='-' || *scan=='*' || *scan=='/' ||
439 *scan=='=')
440 {
441 /* skip spaces */
442 scan--;
443 while (scan>=s && isspace(*scan))
444 scan--;
445 if (scan<s)
446 return s;
447
448 /* Grab next word */
449 blvl=0;
450 while (!isspace(*scan) || blvl>0 )
451 {
452 switch(*scan)
453 {
454 case ')':
455 blvl++;
456 break;
457
458 case '(':
459 blvl--;
460 break;
461
462 default:
463 break;
464 }
465 if (scan==s)
466 return s; // Robustness fix
467 scan--;
468 }
469 start=scan; // Update end
470 }
471 else
472 break; // No binary operator, assume no text
473 }
474 return start;
475 }
476
477 /*
478 * Code to feed a token one at a time. (private, need prostproccessing
479 * to compensate for equation abuse by word users)
480 */
feed_token(void)481 const tok_seq::tok *tok_seq::feed_token(void)
482 {
483 while (output.is_empty())
484 {
485 if (!rd_token())
486 return NULL;
487 }
488 return output.dequeue();
489 }
490
491 /* Private token reader, compensates for equation abuse */
math_collect(void)492 const tok_seq::tok *tok_seq::math_collect(void)
493 {
494 const tok *rdt, *ntok, *nntok;
495 const char *mptr, *endptr;
496 char *s, *t;
497
498 math_aggregate: ;
499 if ((rdt=this->saved_tok)==NULL)
500 {
501 if ((rdt=this->feed_token())==NULL)
502 return NULL;
503 }
504 else
505 saved_tok=NULL;
506
507 switch (rdt->tokval & (~PART_FLAG))
508 {
509 case T_PARAGRAPH:
510 if (rdt->end!=tok::TOK_START || (rdt->tokval & PART_FLAG==0)
511 || rdt->data.d==NULL)
512 break;
513 if ((ntok=this->feed_token())==NULL)
514 break;
515 /* Passed all the easy rejection cases, invoke math_reverse_scan */
516 saved_tok=ntok;
517 if (ntok->tokval==T_SPEC && ntok->end==tok::TOK_START &&
518 ntok->data.d!=NULL && strncmp(ntok->data.d, "eq ", 3)==0)
519 {
520 mptr=math_reverse_scan(rdt->data.d);
521 endptr=rdt->data.d+strlen(rdt->data.d)-1;
522 if (mptr>=endptr)
523 break;
524 /* Allocate memory */
525 if ((s=(char *) malloc(mptr-rdt->data.d+1))==NULL)
526 {
527 cerr<<"Malloc read_token::malloc failure (fatal)\n";
528 exit(1);
529 }
530 if ((t=(char *) malloc(strlen(ntok->data.d)+endptr-mptr+1))==NULL)
531 {
532 free((void *) s);
533 cerr<<"Malloc read_token::malloc failure (fatal)\n";
534 exit(1);
535 }
536 /* Compute result strings */
537 memcpy(s, rdt->data.d, mptr-rdt->data.d);
538 *(s+(mptr-rdt->data.d))='\0';
539 memcpy(t, ntok->data.d, 3);
540 memcpy(t+3, mptr, endptr-mptr+1);
541 strcpy(t+3+(endptr-mptr)+1, ntok->data.d+3);
542 /* Replace original data */
543 free((void *) rdt->data.d);
544 ((tok *) rdt)->data.d=s;
545 free((void *) ntok->data.d);
546 ((tok *) ntok)->data.d=t;
547 }
548 break;
549
550
551 case T_SPEC:
552 if (rdt->end!=tok::TOK_START || rdt->data.d==NULL ||
553 strncmp(rdt->data.d, "eq ", 3)!=0)
554 break;
555 if ((nntok=this->feed_token())==NULL)
556 break; // this is the end of the SPEC.
557 if (nntok->tokval!=T_SPEC || nntok->end!=tok::TOK_END)
558 {
559 cerr<<"Unexpected value of nntok: type "
560 <<nntok->tokval<<" end "<<nntok->end<<"\n";
561 }
562 if ((ntok=this->feed_token())==NULL)
563 {
564 output.insert(nntok);
565 break;
566 }
567 /* Passed all the easy rejection cases, invoke math_forward_scan */
568 saved_tok=ntok;
569 if (ntok->tokval==T_PARAGRAPH && ntok->end!=tok::TOK_END &&
570 ntok->data.d!=NULL)
571 {
572 mptr=math_forward_scan(ntok->data.d);
573 endptr=ntok->data.d+strlen(ntok->data.d);
574 if (mptr==ntok->data.d)
575 {
576 output.insert(ntok); // This comes out second
577 output.insert(nntok);
578 saved_tok=NULL;
579 break;
580 }
581 /* Allocate memory */
582 if (*mptr!='\0')
583 {
584 if ((s=(char *) malloc(endptr-mptr))==NULL)
585 {
586 cerr<<"Malloc read_token::malloc failure (fatal)\n";
587 exit(1);
588 }
589 memcpy(s, mptr, endptr-mptr);
590 *(s+(endptr-mptr))='\0';
591 }
592 else
593 s=NULL;
594
595 if ((t=(char *)
596 malloc(strlen(rdt->data.d)+mptr-ntok->data.d+1))==NULL)
597 {
598 if (s!=NULL)
599 free((void *) s);
600 cerr<<"Malloc read_token::malloc failure (fatal)\n";
601 exit(1);
602 }
603 endptr=rdt->data.d+strlen(rdt->data.d);
604 memcpy(t, rdt->data.d, endptr-rdt->data.d);
605 memcpy(t+(endptr-rdt->data.d), ntok->data.d, mptr-ntok->data.d);
606 *(t+(endptr-rdt->data.d)+(mptr-ntok->data.d))='\0';
607 /* Afjust result */
608 free((void *) rdt->data.d);
609 ((tok *) rdt)->data.d=t;
610 if (*mptr=='\0')
611 {
612 /* If we consumed 100% continue seeking */
613 delete(ntok);
614 saved_tok=rdt;
615 output.insert(nntok); // Re-insert end of spec.
616 goto math_aggregate;
617 }
618 free((void *) ntok->data.d);
619 ((tok *) ntok)->data.d=s;
620 /* Not all consumed, return result */
621 }
622 else if (ntok->tokval==T_SPEC && ntok->end==tok::TOK_START &&
623 ntok->data.d!=NULL && strncmp(ntok->data.d, "eq ", 3)==0)
624 {
625 /* Combine consecutive eq's */
626 endptr=rdt->data.d+strlen(rdt->data.d);
627 if ((t=(char *)
628 malloc((endptr-rdt->data.d)+strlen(ntok->data.d)-2))==NULL)
629 {
630 cerr<<"Malloc read_token::malloc failure (fatal)\n";
631 exit(1);
632 }
633 memcpy(t, rdt->data.d, endptr-rdt->data.d);
634 strcpy(t+(endptr-rdt->data.d), ntok->data.d+3);
635 delete(nntok); // Reply on end of spec following this eq
636 delete(ntok); // Junk this eq
637 free((void *) rdt->data.d);
638 ((tok *) rdt)->data.d=t;
639 saved_tok=rdt;
640 goto math_aggregate;
641 }
642 output.insert(ntok); // This comes out second
643 output.insert(nntok);
644 saved_tok=NULL;
645 break;
646
647
648 default:
649 break;
650 }
651 return rdt;
652 }
653
654
655 /* Private choke point between equations and lists token reader */
eqn_rd_token(void)656 const tok_seq::tok *tok_seq::eqn_rd_token(void)
657 {
658 const tok *t, *n;
659 fifo<tok> *tf;
660 int tot, specs;
661
662 if ((t=this->math_collect())==NULL)
663 return NULL;
664
665 switch(t->tokval)
666 {
667 case T_PARAGRAPH:
668 if (t->end!=tok::TOK_START)
669 return t;
670 /* Check for spec only paragraph */
671
672 tf=new(fifo<tok>);
673 n=t;
674 tot=0;
675 specs=0;
676 /*
677 * This loop counts the number of characters in paragraphs and other
678 * items untilt the end of the paragraph. Each item is dumped on tf
679 * and this is inserted onto the beginning of the output queue.
680 */
681 while(1)
682 {
683 tf->enqueue(n);
684 if (n->tokval==T_PARAGRAPH)
685 {
686 if (n->end==tok::TOK_END)
687 break;
688 if (n->data.d!=NULL)
689 tot+=strlen(n->data.d);
690 if (tot>DISPL_TRESHOLD)
691 break;
692 }
693 else
694 specs++;
695
696 if (n->tokval!=T_SPEC && n->tokval!=T_OTHER && n->tokval!=T_PARAGRAPH)
697 {
698 tot+=DISPL_TRESHOLD;
699 break;
700 }
701 if ((n=this->math_collect())==NULL)
702 break;
703 }
704 /*
705 * If the total is small enough and there is one or more item that
706 * will make it through the filter. Since insert()ed things end up
707 * in reverse order we must first reverse the queue (this is the
708 * uncommon case, so it is OK if it costs a bit more).
709 */
710 if (tot<DISPL_TRESHOLD && specs>0)
711 {
712 tf->rev();
713 while ((n=tf->dequeue())!=NULL)
714 {
715 if (n->tokval!=T_PARAGRAPH)
716 output.insert(n);
717 else
718 delete(n);
719 }
720 }
721 else
722 {
723 output.ins_trans(tf);
724 }
725 delete(tf);
726 t=output.dequeue();
727 break;
728
729 default:
730 break;
731 }
732
733 return t;
734 }
735
736
737 /*----------------------------------------------------------------------*/
738 /* Now move on to lists.... */
739
740 /* Return NULL or a new list record */
list_type(const char * txt)741 struct tok_seq::list_info *tok_seq::list_type(const char *txt)
742 {
743 struct list_info *nl;
744 int i,n;
745
746 /* Determine initial number, if any */
747 if (!isdigit(txt[0]))
748 n=-1;
749 else
750 {
751 n=i=0;
752 for (n=0, i=0; isdigit(txt[i]); i++)
753 n=n*10+txt[i]-'0';
754 }
755
756 if (n==1)
757 {
758 nl=new(struct list_info);
759 nl->list_type=LIST_ENUMERATE;
760 nl->ldata.item_no=0;
761 nl->obj_cnt=0;
762 nl->text_cnt=0;
763 nl->last_item=new(fifo<tok_seq::tok>);
764 nl->items=0;
765 return nl;
766 }
767
768 /* a., b., c. */
769 if (txt[0]=='a')
770 {
771 i=(txt[1]=='.') ? 2 : 1;
772 if (isspace(txt[i]))
773 {
774 nl=new(struct list_info);
775 nl->list_type=LIST_ENUM_ALPHA;
776 nl->ldata.lbullet=txt[0]-1;
777 nl->obj_cnt=0;
778 nl->text_cnt=0;
779 nl->last_item=new(fifo<tok_seq::tok>);
780 nl->items=0;
781 return nl;
782 }
783 }
784
785 /* A., B., C. */
786 if (txt[0]=='A')
787 {
788 i=(txt[1]=='.') ? 2 : 1;
789 if (isspace(txt[i]))
790 {
791 nl=new(struct list_info);
792 nl->list_type=LIST_ENUM_ALPHA;
793 nl->ldata.lbullet=txt[0]-1;
794 nl->obj_cnt=0;
795 nl->text_cnt=0;
796 nl->last_item=new(fifo<tok_seq::tok>);
797 nl->items=0;
798 return nl;
799 }
800 }
801
802 /* At present we only know about one of bullet */
803 if (txt[0]==(char) BULLET_CODE)
804 {
805 nl=new(struct list_info);
806 nl->list_type=LIST_BULLET;
807 nl->ldata.lbullet=txt[0];
808 nl->obj_cnt=0;
809 nl->text_cnt=0;
810 nl->last_item=new(fifo<tok_seq::tok>);
811 nl->items=0;
812 return nl;
813 }
814
815 return NULL;
816 }
817
818
l_type_name(const struct list_info * lp)819 const char *tok_seq::l_type_name(const struct list_info *lp)
820 {
821 switch(lp->list_type)
822 {
823 case LIST_BULLET:
824 return "itemize";
825 /* Not reached */
826
827 case LIST_ENUMERATE:
828 return "enumerate";
829 /* Not reached */
830
831 case LIST_ENUM_ALPHA:
832 return "listalpha";
833 /* Not reached */
834
835 default:
836 return "programming error";
837 /* Not reached */
838 }
839 /* Not reached */
840 }
841
842
843 /* Dequeue a list and queue it is as paragraphs */
list_to_para(fifo<tok_seq::tok> * out,fifo<tok_seq::tok> * add)844 static void list_to_para(fifo<tok_seq::tok> *out, fifo<tok_seq::tok> *add)
845 {
846 tblock txt;
847 int was_item_st;
848 const tok_seq::tok *t;
849
850 was_item_st=0;
851 while(!add->is_empty())
852 {
853 t=add->dequeue();
854 switch(t->tokval)
855 {
856 case T_LIST:
857 delete(t);
858 continue;
859 /* Not reached */
860
861 case T_ITEM:
862 if (t->end==tok_seq::tok::TOK_START)
863 {
864 txt.add(t->data.d);
865 txt.add(' ');
866 was_item_st=1;
867 }
868 delete(t);
869 continue;
870 /* not reached */
871
872 case T_PARAGRAPH:
873 if (t->end!=tok_seq::tok::TOK_START)
874 break;
875 if (!was_item_st)
876 break;
877
878 txt.add(t->data.d);
879 delete(t);
880 t=new(tok_seq::tok)(T_PARAGRAPH, (const char *) txt,
881 tok_seq::tok::TOK_START);
882 txt.zero();
883 was_item_st=0;
884 break;
885
886 default:
887 break;
888 }
889 out->enqueue(t);
890 }
891 }
892
893 /*
894 * This handles cues for lists and the like. if ( ) else if ()
895 * ... gets messy fast
896 */
list_check(const char * txt,list_info ** lh)897 const char *tok_seq::list_check(const char *txt, list_info **lh)
898 {
899 struct list_info *lp, *nl;
900 char *s;
901 tok *t;
902 int i,n;
903
904 /* Determine initial number. This will not change */
905 if (!isdigit(txt[0]))
906 n=-1;
907 else
908 {
909 n=i=0;
910 for (n=0, i=0; isdigit(txt[i]); i++)
911 n=n*10+txt[i]-'0';
912 }
913
914 lp=*lh;
915 list_reconsider:
916 while (lp!=NULL)
917 {
918 *lh=lp; // Makes no change unless lp changed below
919 switch (lp->list_type)
920 {
921 case LIST_ENUMERATE:
922 if (n==lp->ldata.item_no+1)
923 {
924 if (txt[i]=='.')
925 i++;
926 while (isspace(txt[i]))
927 i++;
928 if ((s=(char *) alloca(i+1))==NULL)
929 {
930 fprintf(stderr,
931 "Warning: item label skipped due to lack"
932 " of memory\n");
933 }
934 else
935 {
936 memcpy(s, txt, i);
937 *(s+i)='\0';
938 }
939 if (lp->items!=0)
940 {
941 outqueue.transfer(lp->last_item);
942 t=new(tok)(T_ITEM, (void *) NULL, tok::TOK_END);
943 outqueue.enqueue(t);
944 }
945 t=new(tok)(T_ITEM, s, tok::TOK_START);
946 lp->last_item->enqueue(t);
947 t=new(tok)(T_PARAGRAPH, txt+i, tok::TOK_START);
948 lp->last_item->enqueue(t);
949
950 lp->ldata.item_no++;
951 lp->obj_cnt=0; // No not list objects after this one
952 lp->text_cnt=0;
953 lp->items++;
954 return NULL;
955 }
956 break;
957
958
959 case LIST_BULLET:
960 if (txt[0]==lp->ldata.lbullet)
961 {
962 for (i=0; (isspace(txt[i])); i++ ) ;
963 if ((s=(char *) alloca(2))==NULL)
964 {
965 fprintf(stderr,
966 "Warning: item label skipped due to lack"
967 " of memory\n");
968 }
969 else
970 {
971 *s=lp->ldata.lbullet;
972 *(s+1)='\0';
973 }
974 if (lp->items!=0)
975 {
976 outqueue.transfer(lp->last_item);
977 t=new(tok)(T_ITEM, (void *) NULL, tok::TOK_END);
978 outqueue.enqueue(t);
979 }
980 t=new(tok)(T_ITEM, s, tok::TOK_START);
981 lp->last_item->enqueue(t);
982
983 while (isspace(*(++txt)));
984 t=new(tok)(T_PARAGRAPH, txt, tok::TOK_START);
985 lp->last_item->enqueue(t);
986
987 lp->obj_cnt=0; // No not list objects after this one
988 lp->text_cnt=0;
989 lp->items++;
990 return NULL;
991 }
992 break;
993
994 case LIST_ENUM_ALPHA:
995 if (txt[0]==lp->ldata.lbullet+1)
996 {
997 lp->ldata.lbullet++;
998 if ((s=(char *) alloca(3))==NULL)
999 {
1000 fprintf(stderr,
1001 "Warning: item label skipped due to lack"
1002 " of memory\n");
1003 }
1004 else
1005 {
1006 *s=lp->ldata.lbullet;
1007 if (txt[1]=='.')
1008 {
1009 *(s+1)='.';
1010 *(s+2)='\0';
1011 }
1012 else
1013 *(s+1)='\0';
1014 }
1015 if (lp->items!=0)
1016 {
1017 outqueue.transfer(lp->last_item);
1018 t=new(tok)(T_ITEM, (void *) NULL, tok::TOK_END);
1019 outqueue.enqueue(t);
1020 }
1021 t=new(tok)(T_ITEM, s, tok::TOK_START);
1022 lp->last_item->enqueue(t);
1023
1024 for (i=0; (!isspace(txt[i])); i++ ) ;
1025 for ( ;(isspace(txt[i])); i++) ;
1026 t=new(tok)(T_PARAGRAPH, txt+i, tok::TOK_START);
1027 lp->last_item->enqueue(t);
1028
1029 lp->obj_cnt=0; // No not list objects after this one
1030 lp->text_cnt=0;
1031 lp->items++;
1032 return NULL;
1033 }
1034 break;
1035
1036 default:
1037 fprintf(stderr, "Popping invalid list type %d\n",
1038 lp->ldata.item_no);
1039 nl=lp->next_list;
1040 free(lp);
1041 continue;
1042 }
1043
1044 /* Not the right thing */
1045 if ((nl=list_type(txt))!=NULL)
1046 {
1047 if (lp!=NULL && !(lp->last_item->is_empty()))
1048 outqueue.transfer(lp->last_item); // Output outstanding items
1049 t=new(tok)(T_LIST, l_type_name(nl), tok::TOK_START);
1050 nl->last_item->enqueue(t);
1051 nl->next_list=lp;
1052 lp=nl;
1053 continue;
1054 }
1055
1056 lp->obj_cnt++;
1057 lp->text_cnt +=strlen(txt);
1058 if (lp->obj_cnt>PAR_ITEM_SEP_LIMIT || lp->text_cnt>TEXT_ITEM_SEP_LIMIT)
1059 {
1060 /* If only one item, not a list */
1061 if (lp->items<2)
1062 {
1063 recycled=new(fifo<tok_seq::tok>);
1064 list_to_para(recycled, lp->last_item);
1065 delete(lp->last_item);
1066 nl=lp->next_list;
1067 free(lp);
1068 lp=nl;
1069 *lh=lp;
1070 if (lp!=NULL)
1071 lp->last_item->enqueue(recycled->dequeue());
1072 else
1073 outqueue.enqueue(recycled->dequeue());
1074 return NULL;
1075 }
1076
1077 /* Copy the list item */
1078 if (!(lp->last_item->is_empty()))
1079 {
1080 const tok *tf;
1081
1082 tf=lp->last_item->dequeue();
1083 while (tf->tokval!=T_PARAGRAPH || tf->end!=tok::TOK_END)
1084 {
1085 outqueue.enqueue(tf);
1086 if (lp->last_item->is_empty())
1087 goto lend_para_done;
1088 tf=lp->last_item->dequeue();
1089 }
1090 outqueue.enqueue(tf);
1091 lend_para_done: ;
1092 }
1093
1094 /* Finish off the list */
1095 t=new(tok)(T_ITEM, (void *) NULL, tok::TOK_END);
1096 outqueue.enqueue(t);
1097 t=new(tok)(T_LIST, l_type_name(lp), tok::TOK_END);
1098 outqueue.enqueue(t);
1099 nl=lp->next_list;
1100 recycled=lp->last_item; // Recycle elements queued
1101 t=new(tok)(T_PARAGRAPH, txt, tok::TOK_START);
1102 recycled->enqueue(t);
1103 free(lp);
1104 lp=nl;
1105 *lh=lp;
1106 return NULL;
1107 }
1108
1109 t=new(tok)(T_PARAGRAPH, txt, tok::TOK_START);
1110 lp->last_item->enqueue(t);
1111 return NULL;
1112 }
1113
1114 /* lp==NULL if we get here */
1115
1116 if ((nl=list_type(txt))!=NULL)
1117 {
1118 nl->next_list=lp;
1119 lp=nl;
1120 t=new(tok)(T_LIST, l_type_name(nl), tok::TOK_START);
1121 nl->last_item->enqueue(t);
1122 goto list_reconsider;
1123 }
1124
1125 return txt;
1126
1127 }
1128
read_token(void)1129 const tok_seq::tok *tok_seq::read_token(void)
1130 {
1131 const tok *tf;
1132 const char *tp;
1133 tok *t;
1134 struct list_info *nl;
1135
1136 while(outqueue.is_empty())
1137 {
1138 if (recycled!=NULL)
1139 {
1140 if (recycled->is_empty())
1141 {
1142 delete(recycled);
1143 recycled=NULL;
1144 continue; // outqueue still empty
1145 }
1146 tf=recycled->dequeue();
1147 }
1148 else
1149 tf=this->eqn_rd_token();
1150 if (tf==NULL)
1151 {
1152 if (!done_end)
1153 {
1154 tok *t;
1155 t=new(tok)(T_DOC, "End of word2x output", tok::TOK_END);
1156 output.enqueue(t);
1157 done_end=1;
1158 continue;
1159 }
1160 else
1161 return NULL;
1162 }
1163
1164 if (tf->tokval==T_DOC && tf->end==tok::TOK_END)
1165 {
1166 /* End all lists */
1167 while (lp!=NULL)
1168 {
1169 tp=l_type_name(lp);
1170 nl=lp->next_list;
1171
1172 if (!(lp->last_item->is_empty()))
1173 outqueue.transfer(lp->last_item);
1174 delete(lp->last_item);
1175 free(lp);
1176 t=new(tok)(T_ITEM, (void *) NULL, tok::TOK_END);
1177 outqueue.enqueue(t);
1178 t=new(tok)(T_LIST, tp, tok::TOK_END);
1179 outqueue.enqueue(t);
1180 lp=nl;
1181 }
1182 outqueue.enqueue(tf);
1183 }
1184 else if (tf->tokval==T_PARAGRAPH && tf->end==tok::TOK_START)
1185 {
1186 tp=list_check(tf->data.d, &lp);
1187 if (tp!=NULL)
1188 {
1189 t=new(tok)(T_PARAGRAPH, tp, tok::TOK_START);
1190 outqueue.enqueue(t);
1191 /* End paragraph will come from previous stage */
1192 }
1193 delete(tf);
1194 }
1195 else
1196 {
1197 if (lp==NULL)
1198 outqueue.enqueue(tf);
1199 else
1200 lp->last_item->enqueue(tf);
1201 }
1202 }
1203 tf=outqueue.dequeue();
1204 return tf;
1205 }
1206
1207
operator <<(ostream & os,const tok_seq::tok * d)1208 ostream &operator<<(ostream &os, const tok_seq::tok *d)
1209 {
1210 os<<'('<<d->tokval<<',';
1211 switch(d->dtype)
1212 {
1213 case 1:
1214 if (d->data.d!=NULL && strlen(d->data.d)>10)
1215 {
1216 char foo[11];
1217 int i;
1218
1219 for(i=0; i<7; i++)
1220 foo[i]=d->data.d[i];
1221 for ( ; i<10; i++)
1222 foo[i]='.';
1223 foo[10]='\0';
1224 os<<foo;
1225 }
1226 else
1227 os<<d->data.d;
1228 break;
1229 case 0:
1230 os<<d->data.table.rows<<'x'<<d->data.table.cols;
1231 break;
1232 }
1233 os<<','<<((d->end==tok_seq::tok::TOK_START) ? "start" : "end")<<')';
1234 return os;
1235 }
1236
operator =(const tok_seq::tok & d)1237 tok_seq::tok &tok_seq::tok::operator=(const tok_seq::tok &d)
1238 {
1239 tokval=d.tokval;
1240 end=d.end;
1241 dtype=d.dtype;
1242 if (d.dtype==TEXT && d.data.d!=NULL)
1243 {
1244 data.d=strdup(d.data.d);
1245 }
1246 return (*this);
1247 }
1248