1 /* $Id: reader.cc,v 1.27 1997/04/17 20:24:41 dps Exp $ */
2 /* Reads the word document */
3 #ifdef HAVE_CONFIG_H
4 #include "config.h"
5 #endif /* HAVE_CONFIG_H */
6 
7 #ifdef __GNUC__
8 #define alloca __builtin_alloca
9 #else
10 #if HAVE_ALLOCA_H
11 #include <alloca.h>
12 #else /* Do not have alloca.h */
13 #ifdef _AIX
14  #pragma alloca
15 #else /* not _AIX */
16 extern "C" char *alloca(int);
17 #endif /* _AIX */
18 #endif /* HAVE_ALLOCA_H */
19 #endif /* __GNUC__ */
20 
21 #include <iostream>
22 using namespace std;
23 #include <stdio.h>
24 #ifdef HAVE_STRING_H
25 #include <string.h>
26 #endif /* HAVE_STRING_H */
27 #ifdef HAVE_STRINGS_H
28 #include <strings.h>
29 #endif /* HAVE_STRINGS_H */
30 #ifdef HAVE_CTYPE_H
31 #include <ctype.h>
32 #endif /* HAVE_CTYPE_H */
33 #include "word6.h"
34 #include "interface.h"
35 #include "reader.h"
36 
37 /* This code is basically a layered filtration process. At the bottom layer
38    is this next function that reads a character from a word document. Pointers
39    to object are used extensively to avoid implicit copies. */
40 
41 /* Please be aware that the junk should be stripped from in */
read_character(istream * in)42 static int read_character(istream *in)
43 {
44     char c,d;
45     static int s_ch=-1;
46 
47     if (s_ch==-1)
48     {
49 	if (in->eof())
50 	    return EOF;
51 
52 	in->get(c);
53     }
54     else
55     {
56 	c=(unsigned char) s_ch;
57 	s_ch=-1;
58     }
59 
60     if (c=='\n')
61 	c='\r';
62 
63     switch(c)
64     {
65     case PAR_END:
66 	return (CH_PAR | CONTROL_FLAG);
67 
68     case TABLE_SEP:
69 	if (!in->eof())
70 	    in->get(d);
71 	else
72 	    d=c+1;		// Not equal to c
73 	if (d!=c)
74 	{
75 	    s_ch=d;		/* Push back character */
76 	    return (CH_FIELD | CONTROL_FLAG);
77 	}
78 	return (CH_ROW | CONTROL_FLAG);
79 
80     case START_CTL:
81 	return (CH_SPEC | CONTROL_FLAG);
82 
83     case END_CTL:
84 	return (CH_ENDSPEC | CONTROL_FLAG);
85 
86     case HARD_RETURN:
87 	return (CH_HDRTN | CONTROL_FLAG);
88 
89     case NEW_PAGE:
90 	return (CH_PAGE | CONTROL_FLAG);
91 
92     case FOOTNOTE:
93 	return (CH_FOOTNOTE | CONTROL_FLAG);
94 
95     default:
96 	if (c<' ')
97 	    return (CH_OTHER | CONTROL_FLAG);
98 	else
99 	    return c;
100     }
101     /* NOT REACHED */
102 }
103 
104 
105 /* This function reads a paragraph, field of a table or whatever. It copies
106    everything in any embed tags unprocessed and leaves it in the element */
read_chunk_raw(void)107 void chunk_reader::read_chunk_raw(void)
108 {
109     int c, is_ctl=0;
110 
111     text.zero();		// Zero text buffer
112     while ((c=read_character(in))!=(EOF | CONTROL_FLAG))
113     {
114 	if (c & CONTROL_FLAG)
115 	{
116 	    c &= ~CONTROL_FLAG;
117 	    /* If in embedded item then ignore all but end embed */
118 	    if (is_ctl)
119 	    {
120 		if (c==CH_ENDSPEC)
121 		{
122 		    is_ctl=0;
123 		    text.add(c);
124 		}
125 		continue;
126 	    }
127 
128 	    switch(c)
129 	    {
130 	    case CH_PAR:
131 	    case CH_FIELD:
132 	    case CH_ROW:
133 		break;
134 
135 	    case CH_HDRTN:
136 		text.add('\n');	// Add newline
137 		continue;	// Continue processing
138 
139 	    case CH_OTHER:
140 		continue;	// Just ignore character
141 
142 	    case CH_SPEC:
143 		text.add(c);
144 		if (!is_ctl)
145 		    is_ctl=1;
146 		continue;
147 
148 	    case CH_ENDSPEC:
149 		cerr<<"Suprious ^U ignored\n";
150 		continue;
151 
152 	    case CH_PAGE:
153 	    case CH_FOOTNOTE:
154 		text.add(c);
155 		continue;
156 
157 	    default:
158 		cerr<<"Unexpected value "<<(c & (~CONTROL_FLAG))\
159 		    <<" switch\n";
160 		continue;
161 	    }
162 	    type=c;
163 	    tptr=text;
164 	    return;
165 	}
166 	/* Not control or end of inclusion */
167 	text.add(c);
168     }
169     type=CH_EOF;
170     tptr=text;
171     return;
172 }
173 
174 
175 /* This function reads chunks from using read_chunk_raw and hands them
176    out in contigous peices of the same type. Emebedded stuff gets
177    seperated from the rest here. The partial flag is set if only some
178    of a field is returned (usually because of an embedded item).
179 */
read_chunk(void)180 struct chunk_rtn chunk_reader::read_chunk(void)
181 {
182     const char *s;		// Save stupid compilers
183     struct chunk_rtn res;
184 
185     if (tptr==NULL)
186 	this->read_chunk_raw();
187 
188     s=tptr;
189 
190     /* Embed */
191     if (*s==CH_SPEC)
192     {
193 	while (*(++s))
194 	{
195 	    if (*s==CH_ENDSPEC)
196 		break;
197 	    res.txt.add(*s);
198 	}
199         tptr=s+1;
200 	res.type=CH_SPEC;
201 	return res;
202     }
203 
204     /* New page */
205     if (*s==CH_PAGE)
206     {
207 	res.type=CH_PAGE;
208 	tptr=s+1;
209 	return res;
210     }
211 
212     if (*s==CH_FOOTNOTE)
213     {
214 	res.type=CH_FOOTNOTE;
215 	tptr=s+1;
216 	return res;
217     }
218 
219     /* Normal */
220     while (*s)
221     {
222 	if (*s==CH_SPEC || *s==CH_PAGE || *s==CH_FOOTNOTE)
223 	{
224 	    tptr=s;
225 	    res.type=(PART_FLAG | type);
226 	    return res;
227 	}
228 
229 	res.txt.add(*s);
230 	s++;
231     }
232     res.type=type;
233     tptr=NULL;
234     text.zero();		// Save memory
235     return res;
236 }
237 
238 
239 
240 /*----------------------------------------------------------------------*/
241 /* Tables and basic stuff */
242 
243 /*
244  * Refill the token queue.
245  */
rd_token(void)246 int tok_seq::rd_token(void)
247 {
248     struct chunk_rtn r;
249     tok *t;
250     char other[2];
251 
252     r=read_chunk();
253     if (r.type==CH_EOF)
254 	return 0;
255 
256     switch(r.type & ~PART_FLAG)
257     {
258     case CH_ROW:
259 	if (table==NULL)
260 	    table=new(table_info);
261 	/* Handle 1 field rows properly */
262 	if (table->col==0)
263 	{
264 	    t=new(tok)(T_ROW, (void *) NULL, tok::TOK_START);
265 	    table->enqueue(t);
266 	}
267 	table->col++;
268 	if (table->col>table->cols)
269 	    table->cols=table->col;
270 	table->rows++;
271 	table->tok_push(T_FIELD, &(r.txt));
272 	t=new(tok)(T_ROW, (void *) NULL, tok::TOK_END);
273 	table->enqueue(t);
274 	table->col=0;
275 	break;
276 
277     case CH_FIELD:
278 	if (table==NULL)
279 	{
280 	    table=new(table_info);
281 	}
282 	if (table->col==0)
283 	{
284 	    t=new(tok)(T_ROW, (void *) NULL, tok::TOK_START);
285 	    table->enqueue(t);
286 	}
287 	table->col++;
288 	table->tok_push(T_FIELD, &(r.txt));
289 	break;
290 
291 
292     case CH_PAR:
293 	if (table!=NULL)
294 	{
295 	    /* Table handling */
296 	    if (table->col!=0)
297 	    {
298 #if 0
299 		table->tok_push(T_FIELD, &(r.txt));
300 		t=new(tok)(T_ROW, (void *) NULL, tok::TOK_END);
301 		table->enqueue(t);
302 		t=new(tok)(T_ROW, (void *) NULL, tok::TOK_START);
303 		table->enqueue(t);
304 		for (i=0; i<table->col; i++)
305 		{
306 		    t=new(tok)(T_FIELD, "\0", tok::TOK_START);
307 		    table->enqueue(t);
308 		    t=new(tok)(T_FIELD, (void *) NULL, tok::TOK_END);
309 		    table->enqueue(t);
310 		}
311 		table->rows++;
312 		break;
313 #else
314 		t=new(tok)(T_ROW, (void *) NULL, tok::TOK_END);
315 		table->enqueue(t);
316 		if (table->col>table->cols)
317 		    table->cols=table->col;
318 		table->rows++;
319 #endif
320 	    }
321 	    table->finish(&output);
322 	    delete(table);
323 	    table=NULL;
324 	}
325 
326 	if (r.type & PART_FLAG)
327 	{
328 	    tok *td;
329 	    td=new(tok)(T_PARAGRAPH, (const char *) (r.txt), tok::TOK_START);
330 	    output.enqueue(td);
331 	}
332 	else
333 	    tok_push(T_PARAGRAPH, &(r.txt));
334 	break;
335 
336     case CH_SPEC:
337 	tok_push(T_SPEC, &(r.txt));
338 	break;
339 
340     case CH_PAGE:
341     case CH_FOOTNOTE:
342 	other[0]=r.type;
343 	other[1]='\0';
344 	t=new(tok)(T_CODE, other, tok::TOK_START);
345 	output.enqueue(t);
346 	break;
347 
348     default:
349 	break;
350     }
351 
352     return 1;
353 }
354 
355 
356 
357 /*----------------------------------------------------------------------*/
358 /* Equations.... */
359 
360 /*
361  * Code that scans forward to the end of stuff that looks like an extension
362  * of some maths that was the last thing.
363  */
math_forward_scan(const char * s)364 const char *math_forward_scan(const char *s)
365 {
366     const char *scan, *end;
367     int blvl;
368 
369     end=scan=s;
370 
371     /* Check whether the first part looks like more of the equation */
372     while (1)
373     {
374 	/* Skip spaces */
375 	while (isspace(*scan))
376 	    scan++;
377 
378 	/* Look for binary operator */
379 	if (*scan=='+' || *scan=='-' || *scan=='*' || *scan=='/' ||
380 	    *scan=='=')
381 	{
382 	    /* skip spaces */
383 	    scan++;
384 	    while (isspace(*scan))
385 		scan++;
386 
387 	    /* Grab next word */
388 	    blvl=0;
389 	    while (!isspace(*scan) || blvl>0)
390 	    {
391 		switch(*scan)
392 		{
393 		case '(':
394 		    blvl++;
395 		    break;
396 
397 		case ')':
398 		    blvl--;
399 		    break;
400 
401 		default:
402 		    break;
403 		}
404 		if (*scan=='\0')
405 		    break;	// Robustness fix
406 		scan++;
407 	    }
408 
409 	    end=scan;		// Update end
410 	}
411 	else
412 	    break;		// No binary operator, assume no text
413     }
414     return end;
415 }
416 
417 /*
418  * Code that scans backwards to the start of stuff that looks like it should
419  * ohave been prepended to the current maths.
420  */
math_reverse_scan(const char * s)421 const char *math_reverse_scan(const char *s)
422 {
423     const char *scan, *start;
424     int blvl;
425 
426     start=scan=s+strlen(s)-1;
427 
428     /* Check whether the first part looks like more of the equation */
429     while (scan>=s)
430     {
431 	/* Skip spaces */
432 	while (scan>=s && isspace(*scan))
433 	    scan--;
434 	if (scan<s)
435 	    return s;
436 
437 	/* Look for binary operator */
438 	if (*scan=='+' || *scan=='-' || *scan=='*' || *scan=='/' ||
439 	    *scan=='=')
440 	{
441 	    /* skip spaces */
442 	    scan--;
443 	    while (scan>=s && isspace(*scan))
444 		scan--;
445 	    if (scan<s)
446 		return s;
447 
448 	    /* Grab next word */
449 	    blvl=0;
450 	    while (!isspace(*scan) || blvl>0 )
451 	    {
452 		switch(*scan)
453 		{
454 		case ')':
455 		    blvl++;
456 		    break;
457 
458 		case '(':
459 		    blvl--;
460 		    break;
461 
462 		default:
463 		    break;
464 		}
465 		if (scan==s)
466 		    return s;	// Robustness fix
467 		scan--;
468 	    }
469 	    start=scan;		// Update end
470 	}
471 	else
472 	    break;		// No binary operator, assume no text
473     }
474     return start;
475 }
476 
477 /*
478  * Code to feed a token one at a time. (private, need prostproccessing
479  * to compensate for equation abuse by word users)
480  */
feed_token(void)481 const tok_seq::tok *tok_seq::feed_token(void)
482 {
483     while (output.is_empty())
484     {
485 	if (!rd_token())
486 	    return NULL;
487     }
488     return output.dequeue();
489 }
490 
491 /* Private token reader, compensates for equation abuse */
math_collect(void)492 const tok_seq::tok *tok_seq::math_collect(void)
493 {
494     const tok *rdt, *ntok, *nntok;
495     const char *mptr, *endptr;
496     char *s, *t;
497 
498  math_aggregate: ;
499     if ((rdt=this->saved_tok)==NULL)
500     {
501 	if ((rdt=this->feed_token())==NULL)
502 		return NULL;
503     }
504     else
505 	saved_tok=NULL;
506 
507     switch (rdt->tokval & (~PART_FLAG))
508     {
509     case T_PARAGRAPH:
510 	if (rdt->end!=tok::TOK_START || (rdt->tokval & PART_FLAG==0)
511 	    || rdt->data.d==NULL)
512 	    break;
513 	if ((ntok=this->feed_token())==NULL)
514 	    break;
515 	/* Passed all the easy rejection cases, invoke math_reverse_scan */
516 	saved_tok=ntok;
517 	if (ntok->tokval==T_SPEC && ntok->end==tok::TOK_START &&
518 	    ntok->data.d!=NULL && strncmp(ntok->data.d, "eq ", 3)==0)
519 	{
520 	    mptr=math_reverse_scan(rdt->data.d);
521 	    endptr=rdt->data.d+strlen(rdt->data.d)-1;
522 	    if (mptr>=endptr)
523 		break;
524 	    /* Allocate memory */
525 	    if ((s=(char *) malloc(mptr-rdt->data.d+1))==NULL)
526 	    {
527 		cerr<<"Malloc read_token::malloc failure (fatal)\n";
528 		exit(1);
529 	    }
530 	    if ((t=(char *) malloc(strlen(ntok->data.d)+endptr-mptr+1))==NULL)
531 	    {
532 		free((void *) s);
533 		cerr<<"Malloc read_token::malloc failure (fatal)\n";
534 		exit(1);
535 	    }
536 	    /* Compute result strings */
537 	    memcpy(s, rdt->data.d, mptr-rdt->data.d);
538 	    *(s+(mptr-rdt->data.d))='\0';
539 	    memcpy(t, ntok->data.d, 3);
540 	    memcpy(t+3, mptr, endptr-mptr+1);
541 	    strcpy(t+3+(endptr-mptr)+1, ntok->data.d+3);
542 	    /* Replace original data */
543 	    free((void *) rdt->data.d);
544 	    ((tok *) rdt)->data.d=s;
545 	    free((void *) ntok->data.d);
546 	    ((tok *) ntok)->data.d=t;
547 	}
548 	break;
549 
550 
551     case T_SPEC:
552 	if (rdt->end!=tok::TOK_START || rdt->data.d==NULL ||
553 	    strncmp(rdt->data.d, "eq ", 3)!=0)
554 	    break;
555 	if ((nntok=this->feed_token())==NULL)
556 	    break;		// this is the end of the SPEC.
557 	if (nntok->tokval!=T_SPEC || nntok->end!=tok::TOK_END)
558 	{
559 	    cerr<<"Unexpected value of nntok: type "
560 		<<nntok->tokval<<" end "<<nntok->end<<"\n";
561 	}
562 	if ((ntok=this->feed_token())==NULL)
563 	{
564 	    output.insert(nntok);
565 	    break;
566 	}
567 	/* Passed all the easy rejection cases, invoke math_forward_scan */
568 	saved_tok=ntok;
569 	if (ntok->tokval==T_PARAGRAPH && ntok->end!=tok::TOK_END &&
570 	    ntok->data.d!=NULL)
571 	{
572 	    mptr=math_forward_scan(ntok->data.d);
573 	    endptr=ntok->data.d+strlen(ntok->data.d);
574 	    if (mptr==ntok->data.d)
575 	    {
576 		output.insert(ntok); // This comes out second
577 		output.insert(nntok);
578 		saved_tok=NULL;
579 		break;
580 	    }
581 	    /* Allocate memory */
582 	    if (*mptr!='\0')
583 	    {
584 		if ((s=(char *) malloc(endptr-mptr))==NULL)
585 		{
586 		    cerr<<"Malloc read_token::malloc failure (fatal)\n";
587 		    exit(1);
588 		}
589 		memcpy(s, mptr, endptr-mptr);
590 		*(s+(endptr-mptr))='\0';
591 	    }
592 	    else
593 		s=NULL;
594 
595 	    if ((t=(char *)
596 		 malloc(strlen(rdt->data.d)+mptr-ntok->data.d+1))==NULL)
597 	    {
598 		if (s!=NULL)
599 		    free((void *) s);
600 		cerr<<"Malloc read_token::malloc failure (fatal)\n";
601 		exit(1);
602 	    }
603 	    endptr=rdt->data.d+strlen(rdt->data.d);
604 	    memcpy(t, rdt->data.d, endptr-rdt->data.d);
605 	    memcpy(t+(endptr-rdt->data.d), ntok->data.d, mptr-ntok->data.d);
606 	    *(t+(endptr-rdt->data.d)+(mptr-ntok->data.d))='\0';
607 	    /* Afjust result */
608 	    free((void *) rdt->data.d);
609 	    ((tok *) rdt)->data.d=t;
610 	    if (*mptr=='\0')
611 	    {
612 		/* If we consumed 100% continue seeking */
613 		delete(ntok);
614 		saved_tok=rdt;
615 		output.insert(nntok); // Re-insert end of spec.
616 		goto math_aggregate;
617 	    }
618 	    free((void *) ntok->data.d);
619 	    ((tok *) ntok)->data.d=s;
620 	    /* Not all consumed, return result */
621 	}
622 	else if (ntok->tokval==T_SPEC && ntok->end==tok::TOK_START &&
623 		 ntok->data.d!=NULL && strncmp(ntok->data.d, "eq ", 3)==0)
624 	{
625 	    /* Combine consecutive eq's */
626 	    endptr=rdt->data.d+strlen(rdt->data.d);
627 	    if ((t=(char *)
628 		 malloc((endptr-rdt->data.d)+strlen(ntok->data.d)-2))==NULL)
629 	    {
630 		cerr<<"Malloc read_token::malloc failure (fatal)\n";
631 		exit(1);
632 	    }
633 	    memcpy(t, rdt->data.d, endptr-rdt->data.d);
634 	    strcpy(t+(endptr-rdt->data.d), ntok->data.d+3);
635 	    delete(nntok);	// Reply on end of spec following this eq
636 	    delete(ntok);	// Junk this eq
637 	    free((void *) rdt->data.d);
638 	    ((tok *) rdt)->data.d=t;
639 	    saved_tok=rdt;
640 	    goto math_aggregate;
641 	}
642 	output.insert(ntok); // This comes out second
643     	output.insert(nntok);
644 	saved_tok=NULL;
645 	break;
646 
647 
648     default:
649 	break;
650     }
651     return rdt;
652 }
653 
654 
655 /* Private choke point between equations and lists token reader */
eqn_rd_token(void)656 const tok_seq::tok *tok_seq::eqn_rd_token(void)
657 {
658     const tok *t, *n;
659     fifo<tok> *tf;
660     int tot, specs;
661 
662     if ((t=this->math_collect())==NULL)
663 	return NULL;
664 
665     switch(t->tokval)
666     {
667     case T_PARAGRAPH:
668 	if (t->end!=tok::TOK_START)
669 	    return t;
670 	/* Check for spec only paragraph */
671 
672 	tf=new(fifo<tok>);
673 	n=t;
674 	tot=0;
675 	specs=0;
676 	/*
677 	 * This loop counts the number of characters in paragraphs and other
678 	 * items untilt the end of the paragraph. Each item is dumped on tf
679 	 * and this is inserted onto the beginning of the output queue.
680 	 */
681 	while(1)
682 	{
683 	    tf->enqueue(n);
684 	    if (n->tokval==T_PARAGRAPH)
685 	    {
686 		if (n->end==tok::TOK_END)
687 		    break;
688 		if (n->data.d!=NULL)
689 		    tot+=strlen(n->data.d);
690 		if (tot>DISPL_TRESHOLD)
691 		    break;
692 	    }
693 	    else
694 		specs++;
695 
696 	    if (n->tokval!=T_SPEC && n->tokval!=T_OTHER && n->tokval!=T_PARAGRAPH)
697 	    {
698 		tot+=DISPL_TRESHOLD;
699 		break;
700 	    }
701 	    if ((n=this->math_collect())==NULL)
702 		break;
703 	}
704 	/*
705 	 * If the total is small enough and there is one or more item that
706 	 * will make it through the filter. Since insert()ed things end up
707 	 * in reverse order we must first reverse the queue (this is the
708 	 * uncommon case, so it is OK if it costs a bit more).
709 	 */
710 	if (tot<DISPL_TRESHOLD && specs>0)
711 	{
712 	    tf->rev();
713 	    while ((n=tf->dequeue())!=NULL)
714 	    {
715 		if (n->tokval!=T_PARAGRAPH)
716 		    output.insert(n);
717 		else
718 		    delete(n);
719 	    }
720 	}
721 	else
722 	{
723 	    output.ins_trans(tf);
724 	}
725 	delete(tf);
726 	t=output.dequeue();
727 	break;
728 
729     default:
730 	break;
731     }
732 
733     return t;
734 }
735 
736 
737 /*----------------------------------------------------------------------*/
738 /* Now move on to lists.... */
739 
740 /* Return NULL or a new list record */
list_type(const char * txt)741 struct tok_seq::list_info *tok_seq::list_type(const char *txt)
742 {
743     struct list_info *nl;
744     int i,n;
745 
746     /* Determine initial number, if any */
747     if (!isdigit(txt[0]))
748 	n=-1;
749     else
750     {
751 	n=i=0;
752 	for (n=0, i=0; isdigit(txt[i]); i++)
753 	    n=n*10+txt[i]-'0';
754     }
755 
756     if (n==1)
757     {
758 	nl=new(struct list_info);
759 	nl->list_type=LIST_ENUMERATE;
760 	nl->ldata.item_no=0;
761 	nl->obj_cnt=0;
762 	nl->text_cnt=0;
763 	nl->last_item=new(fifo<tok_seq::tok>);
764 	nl->items=0;
765 	return nl;
766     }
767 
768     /* a., b., c. */
769     if (txt[0]=='a')
770     {
771 	i=(txt[1]=='.') ? 2 : 1;
772 	if (isspace(txt[i]))
773 	{
774 	    nl=new(struct list_info);
775 	    nl->list_type=LIST_ENUM_ALPHA;
776 	    nl->ldata.lbullet=txt[0]-1;
777 	    nl->obj_cnt=0;
778 	    nl->text_cnt=0;
779 	    nl->last_item=new(fifo<tok_seq::tok>);
780 	    nl->items=0;
781 	    return nl;
782 	}
783     }
784 
785     /* A., B., C. */
786     if (txt[0]=='A')
787     {
788 	i=(txt[1]=='.') ? 2 : 1;
789 	if (isspace(txt[i]))
790 	{
791 	    nl=new(struct list_info);
792 	    nl->list_type=LIST_ENUM_ALPHA;
793 	    nl->ldata.lbullet=txt[0]-1;
794 	    nl->obj_cnt=0;
795 	    nl->text_cnt=0;
796 	    nl->last_item=new(fifo<tok_seq::tok>);
797 	    nl->items=0;
798 	    return nl;
799 	}
800     }
801 
802     /* At present we only know about one of bullet */
803     if (txt[0]==(char) BULLET_CODE)
804     {
805 	nl=new(struct list_info);
806 	nl->list_type=LIST_BULLET;
807 	nl->ldata.lbullet=txt[0];
808 	nl->obj_cnt=0;
809 	nl->text_cnt=0;
810 	nl->last_item=new(fifo<tok_seq::tok>);
811 	nl->items=0;
812 	return nl;
813     }
814 
815     return NULL;
816 }
817 
818 
l_type_name(const struct list_info * lp)819 const char *tok_seq::l_type_name(const struct list_info *lp)
820 {
821     switch(lp->list_type)
822     {
823     case LIST_BULLET:
824 	return "itemize";
825 	/* Not reached */
826 
827     case LIST_ENUMERATE:
828 	return "enumerate";
829 	/* Not reached */
830 
831     case LIST_ENUM_ALPHA:
832 	return "listalpha";
833 	/* Not reached */
834 
835     default:
836 	return "programming error";
837 	/* Not reached */
838     }
839     /* Not reached */
840 }
841 
842 
843 /* Dequeue a list and queue it is as paragraphs */
list_to_para(fifo<tok_seq::tok> * out,fifo<tok_seq::tok> * add)844 static void list_to_para(fifo<tok_seq::tok> *out, fifo<tok_seq::tok> *add)
845 {
846     tblock txt;
847     int was_item_st;
848     const tok_seq::tok *t;
849 
850     was_item_st=0;
851     while(!add->is_empty())
852     {
853 	t=add->dequeue();
854 	switch(t->tokval)
855 	{
856 	case T_LIST:
857 	    delete(t);
858 	    continue;
859 	    /* Not reached */
860 
861 	case T_ITEM:
862 	    if (t->end==tok_seq::tok::TOK_START)
863 	    {
864 		txt.add(t->data.d);
865 		txt.add(' ');
866 		was_item_st=1;
867 	    }
868 	    delete(t);
869 	    continue;
870 	    /* not reached */
871 
872 	case T_PARAGRAPH:
873 	    if (t->end!=tok_seq::tok::TOK_START)
874 		break;
875 	    if (!was_item_st)
876 		break;
877 
878 	    txt.add(t->data.d);
879 	    delete(t);
880 	    t=new(tok_seq::tok)(T_PARAGRAPH, (const char *) txt,
881 				tok_seq::tok::TOK_START);
882 	    txt.zero();
883 	    was_item_st=0;
884 	    break;
885 
886 	default:
887 	    break;
888 	}
889 	out->enqueue(t);
890     }
891 }
892 
893 /*
894  * This handles cues for lists and the like. if ( ) else if ()
895  * ... gets messy fast
896  */
list_check(const char * txt,list_info ** lh)897 const char *tok_seq::list_check(const char *txt, list_info **lh)
898 {
899     struct list_info *lp, *nl;
900     char *s;
901     tok *t;
902     int i,n;
903 
904     /* Determine initial number. This will not change */
905     if (!isdigit(txt[0]))
906 	n=-1;
907     else
908     {
909 	n=i=0;
910 	for (n=0, i=0; isdigit(txt[i]); i++)
911 	    n=n*10+txt[i]-'0';
912     }
913 
914     lp=*lh;
915  list_reconsider:
916     while (lp!=NULL)
917     {
918 	*lh=lp;			// Makes no change unless lp changed below
919 	switch (lp->list_type)
920 	{
921 	case LIST_ENUMERATE:
922 	    if (n==lp->ldata.item_no+1)
923 	    {
924 		if (txt[i]=='.')
925 		    i++;
926 		while (isspace(txt[i]))
927 		    i++;
928 		if ((s=(char *) alloca(i+1))==NULL)
929 		{
930 		    fprintf(stderr,
931 			    "Warning: item label skipped due to lack"
932 			    " of memory\n");
933 		}
934 		else
935 		{
936 		    memcpy(s, txt, i);
937 		    *(s+i)='\0';
938 		}
939 		if (lp->items!=0)
940 		{
941 		    outqueue.transfer(lp->last_item);
942 		    t=new(tok)(T_ITEM, (void *) NULL, tok::TOK_END);
943 		    outqueue.enqueue(t);
944 		}
945 		t=new(tok)(T_ITEM, s, tok::TOK_START);
946 		lp->last_item->enqueue(t);
947 		t=new(tok)(T_PARAGRAPH, txt+i, tok::TOK_START);
948 		lp->last_item->enqueue(t);
949 
950 		lp->ldata.item_no++;
951 		lp->obj_cnt=0;	// No not list objects after this one
952 		lp->text_cnt=0;
953 		lp->items++;
954 		return NULL;
955 	    }
956 	    break;
957 
958 
959 	case LIST_BULLET:
960 	    if (txt[0]==lp->ldata.lbullet)
961 	    {
962 		for (i=0; (isspace(txt[i])); i++ ) ;
963 		if ((s=(char *) alloca(2))==NULL)
964 		{
965 		    fprintf(stderr,
966 			    "Warning: item label skipped due to lack"
967 			    " of memory\n");
968 		}
969 		else
970 		{
971 		    *s=lp->ldata.lbullet;
972 		    *(s+1)='\0';
973 		}
974 		if (lp->items!=0)
975 		{
976 		    outqueue.transfer(lp->last_item);
977 		    t=new(tok)(T_ITEM, (void *) NULL, tok::TOK_END);
978 		    outqueue.enqueue(t);
979 		}
980 		t=new(tok)(T_ITEM, s, tok::TOK_START);
981 		lp->last_item->enqueue(t);
982 
983 		while (isspace(*(++txt)));
984 		t=new(tok)(T_PARAGRAPH, txt, tok::TOK_START);
985 		lp->last_item->enqueue(t);
986 
987 		lp->obj_cnt=0;	// No not list objects after this one
988 		lp->text_cnt=0;
989 		lp->items++;
990 		return NULL;
991 	    }
992 	    break;
993 
994 	case LIST_ENUM_ALPHA:
995 	    if (txt[0]==lp->ldata.lbullet+1)
996 	    {
997 		lp->ldata.lbullet++;
998 		if ((s=(char *) alloca(3))==NULL)
999 		{
1000 		    fprintf(stderr,
1001 			    "Warning: item label skipped due to lack"
1002 			    " of memory\n");
1003 		}
1004 		else
1005 		{
1006 		    *s=lp->ldata.lbullet;
1007 		    if (txt[1]=='.')
1008 		    {
1009 			*(s+1)='.';
1010 			*(s+2)='\0';
1011 		    }
1012 		    else
1013 			*(s+1)='\0';
1014 		}
1015 		if (lp->items!=0)
1016 		{
1017 		    outqueue.transfer(lp->last_item);
1018 		    t=new(tok)(T_ITEM, (void *) NULL, tok::TOK_END);
1019 		    outqueue.enqueue(t);
1020 		}
1021 		t=new(tok)(T_ITEM, s, tok::TOK_START);
1022 		lp->last_item->enqueue(t);
1023 
1024 		for (i=0; (!isspace(txt[i])); i++ ) ;
1025 		for ( ;(isspace(txt[i])); i++) ;
1026 		t=new(tok)(T_PARAGRAPH, txt+i, tok::TOK_START);
1027 		lp->last_item->enqueue(t);
1028 
1029 		lp->obj_cnt=0;	// No not list objects after this one
1030 		lp->text_cnt=0;
1031 		lp->items++;
1032 		return NULL;
1033 	    }
1034 	    break;
1035 
1036 	default:
1037 	    fprintf(stderr, "Popping invalid list type %d\n",
1038 		    lp->ldata.item_no);
1039 	    nl=lp->next_list;
1040 	    free(lp);
1041 	    continue;
1042 	}
1043 
1044 	/* Not the right thing */
1045 	if ((nl=list_type(txt))!=NULL)
1046 	{
1047 	    if (lp!=NULL && !(lp->last_item->is_empty()))
1048 		outqueue.transfer(lp->last_item); // Output outstanding items
1049 	    t=new(tok)(T_LIST, l_type_name(nl), tok::TOK_START);
1050 	    nl->last_item->enqueue(t);
1051 	    nl->next_list=lp;
1052 	    lp=nl;
1053 	    continue;
1054 	}
1055 
1056 	lp->obj_cnt++;
1057 	lp->text_cnt +=strlen(txt);
1058 	if (lp->obj_cnt>PAR_ITEM_SEP_LIMIT || lp->text_cnt>TEXT_ITEM_SEP_LIMIT)
1059 	{
1060 	    /* If only one item, not a list */
1061 	    if (lp->items<2)
1062 	    {
1063 		recycled=new(fifo<tok_seq::tok>);
1064 		list_to_para(recycled, lp->last_item);
1065 		delete(lp->last_item);
1066 		nl=lp->next_list;
1067 		free(lp);
1068 		lp=nl;
1069 		*lh=lp;
1070 		if (lp!=NULL)
1071 		    lp->last_item->enqueue(recycled->dequeue());
1072 		else
1073 		    outqueue.enqueue(recycled->dequeue());
1074 		return NULL;
1075 	    }
1076 
1077 	    /* Copy the list item */
1078 	    if (!(lp->last_item->is_empty()))
1079 	    {
1080 		const tok *tf;
1081 
1082 		tf=lp->last_item->dequeue();
1083 		while (tf->tokval!=T_PARAGRAPH || tf->end!=tok::TOK_END)
1084 		{
1085 		    outqueue.enqueue(tf);
1086 		    if (lp->last_item->is_empty())
1087 			goto lend_para_done;
1088 		    tf=lp->last_item->dequeue();
1089 		}
1090 		outqueue.enqueue(tf);
1091 	    lend_para_done: ;
1092 	    }
1093 
1094 	    /* Finish off the list */
1095 	    t=new(tok)(T_ITEM, (void *) NULL, tok::TOK_END);
1096 	    outqueue.enqueue(t);
1097 	    t=new(tok)(T_LIST, l_type_name(lp), tok::TOK_END);
1098 	    outqueue.enqueue(t);
1099 	    nl=lp->next_list;
1100 	    recycled=lp->last_item;	// Recycle elements queued
1101 	    t=new(tok)(T_PARAGRAPH, txt, tok::TOK_START);
1102 	    recycled->enqueue(t);
1103 	    free(lp);
1104 	    lp=nl;
1105 	    *lh=lp;
1106 	    return NULL;
1107 	}
1108 
1109 	t=new(tok)(T_PARAGRAPH, txt, tok::TOK_START);
1110 	lp->last_item->enqueue(t);
1111 	return NULL;
1112     }
1113 
1114     /* lp==NULL if we get here */
1115 
1116     if ((nl=list_type(txt))!=NULL)
1117     {
1118 	nl->next_list=lp;
1119 	lp=nl;
1120 	t=new(tok)(T_LIST, l_type_name(nl), tok::TOK_START);
1121 	nl->last_item->enqueue(t);
1122 	goto list_reconsider;
1123     }
1124 
1125     return txt;
1126 
1127 }
1128 
read_token(void)1129 const tok_seq::tok *tok_seq::read_token(void)
1130 {
1131     const tok *tf;
1132     const char *tp;
1133     tok *t;
1134     struct list_info *nl;
1135 
1136     while(outqueue.is_empty())
1137     {
1138 	if (recycled!=NULL)
1139 	{
1140 	    if (recycled->is_empty())
1141 	    {
1142 		delete(recycled);
1143 		recycled=NULL;
1144 		continue;	// outqueue still empty
1145 	    }
1146 	    tf=recycled->dequeue();
1147 	}
1148 	else
1149 	    tf=this->eqn_rd_token();
1150 	if (tf==NULL)
1151 	{
1152 	    if (!done_end)
1153 	    {
1154 		tok *t;
1155 		t=new(tok)(T_DOC, "End of word2x output", tok::TOK_END);
1156 		output.enqueue(t);
1157 		done_end=1;
1158 		continue;
1159 	    }
1160 	    else
1161 		return NULL;
1162 	}
1163 
1164 	if (tf->tokval==T_DOC && tf->end==tok::TOK_END)
1165 	{
1166 	    /* End all lists */
1167 	    while (lp!=NULL)
1168 	    {
1169 		tp=l_type_name(lp);
1170 		nl=lp->next_list;
1171 
1172 		if (!(lp->last_item->is_empty()))
1173 		    outqueue.transfer(lp->last_item);
1174 		delete(lp->last_item);
1175 		free(lp);
1176 		t=new(tok)(T_ITEM, (void *) NULL, tok::TOK_END);
1177 		outqueue.enqueue(t);
1178 		t=new(tok)(T_LIST, tp, tok::TOK_END);
1179 		outqueue.enqueue(t);
1180 		lp=nl;
1181 	    }
1182 	    outqueue.enqueue(tf);
1183 	}
1184 	else if (tf->tokval==T_PARAGRAPH && tf->end==tok::TOK_START)
1185 	{
1186 	    tp=list_check(tf->data.d, &lp);
1187 	    if (tp!=NULL)
1188 	    {
1189 		t=new(tok)(T_PARAGRAPH, tp, tok::TOK_START);
1190 		outqueue.enqueue(t);
1191 		/* End paragraph will come from previous stage */
1192 	    }
1193 	    delete(tf);
1194 	}
1195 	else
1196 	{
1197 	    if (lp==NULL)
1198 		outqueue.enqueue(tf);
1199 	    else
1200 		lp->last_item->enqueue(tf);
1201 	}
1202     }
1203     tf=outqueue.dequeue();
1204     return tf;
1205 }
1206 
1207 
operator <<(ostream & os,const tok_seq::tok * d)1208 ostream &operator<<(ostream &os, const tok_seq::tok *d)
1209 {
1210     os<<'('<<d->tokval<<',';
1211     switch(d->dtype)
1212     {
1213     case 1:
1214 	if (d->data.d!=NULL && strlen(d->data.d)>10)
1215 	{
1216 	    char foo[11];
1217 	    int i;
1218 
1219 	    for(i=0; i<7; i++)
1220 		foo[i]=d->data.d[i];
1221 	    for ( ; i<10; i++)
1222 		foo[i]='.';
1223 	    foo[10]='\0';
1224 	    os<<foo;
1225 	}
1226 	else
1227 	    os<<d->data.d;
1228 	break;
1229     case 0:
1230 	os<<d->data.table.rows<<'x'<<d->data.table.cols;
1231 	break;
1232     }
1233     os<<','<<((d->end==tok_seq::tok::TOK_START) ? "start" : "end")<<')';
1234     return os;
1235 }
1236 
operator =(const tok_seq::tok & d)1237 tok_seq::tok &tok_seq::tok::operator=(const tok_seq::tok &d)
1238 {
1239     tokval=d.tokval;
1240     end=d.end;
1241     dtype=d.dtype;
1242     if (d.dtype==TEXT && d.data.d!=NULL)
1243     {
1244 	data.d=strdup(d.data.d);
1245     }
1246     return (*this);
1247 }
1248