1 /* wvWare
2  * Copyright (C) Caolan McNamara, Dom Lachowicz, and others
3  *
4  * This program is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU General Public License
6  * as published by the Free Software Foundation; either version 2
7  * of the License, or (at your option) any later version.
8  *
9  * This program is distributed in the hope that it will be useful,
10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12  * GNU General Public License for more details.
13  *
14  * You should have received a copy of the GNU General Public License
15  * along with this program; if not, write to the Free Software
16  * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
17  * 02111-1307, USA.
18  */
19 
20 #ifdef HAVE_CONFIG_H
21 #include "config.h"
22 #endif
23 
24 #include <stdlib.h>
25 #include <stdio.h>
26 #include <string.h>
27 #include <ctype.h>
28 #include "wv.h"
29 #include "utf.h"
30 
31 #ifdef HAVE_LIBXML2
32 #include <libxml/parser.h>
33 #endif
34 
35 #include <gsf/gsf-input-stdio.h>
36 #include <gsf/gsf-utils.h>
37 
38 int
wvInit(void)39 wvInit (void)
40 {
41   gsf_init ();
42 
43 #ifdef HAVE_LIBXML2
44   xmlInitParser ();
45 #endif
46 
47   return 1;
48 }
49 
50 void
wvShutdown(void)51 wvShutdown (void)
52 {
53   gsf_shutdown ();
54 
55 #ifdef HAVE_LIBXML2
56   xmlCleanupParser ();
57 #endif
58 }
59 
60 static int
wvOpenPreOLE(GsfInput * path,wvStream ** mainfd,wvStream ** tablefd0,wvStream ** tablefd1,wvStream ** data,wvStream ** summary)61 wvOpenPreOLE (GsfInput *path, wvStream ** mainfd, wvStream ** tablefd0,
62 	      wvStream ** tablefd1, wvStream ** data, wvStream ** summary)
63 {
64     int ret = -1;
65     U16 magic;
66 
67     if (path == NULL)
68       {
69 	  wvError (("Cannot open file $s\n", path));
70 	  return (-1);
71       }
72 
73     wvStream_gsf_create (mainfd, path);
74 
75     /* what's the lifecycle on these look like? */
76     *tablefd0 = *mainfd;
77     *tablefd1 = *mainfd;
78     *data     = *mainfd;
79     *summary  = *mainfd;
80 
81     magic = read_16ubit (*mainfd);
82     if (0xa5db == magic)
83       {
84 	  wvError (
85 		   ("Theres a good chance that this is a word 2 doc of nFib %d\n",
86 		    read_16ubit (*mainfd)));
87 	  wvStream_rewind (*mainfd);
88 	  /* return(-1); */
89 	  return (0);
90       }
91     else if (0x37fe == magic)
92       {
93 	  wvError (
94 		   ("Theres a good chance that this is a word 5 doc of nFib %d\n",
95 		    read_16ubit (*mainfd)));
96 	  wvStream_rewind (*mainfd);
97 	  return (0);
98       }
99 
100     return (ret);
101 }
102 
103 static void tokenTreeInit (void);
104 
wvCreateParser(void)105 wvParseStruct * wvCreateParser (void)
106 {
107   return (wvParseStruct *)calloc (1, sizeof (wvParseStruct));
108 }
109 
wvDeleteParser(wvParseStruct * ps)110 void wvDeleteParser (wvParseStruct * ps)
111 {
112   if (ps)
113     free (ps);
114 }
115 
wvInitParser_gsf(wvParseStruct * ps,GsfInput * path)116 int wvInitParser_gsf (wvParseStruct * ps, GsfInput *path)
117 {
118     int ret = 0, reason = 0;
119 
120     memset ( ps, 0, sizeof ( wvParseStruct ) ) ;
121 
122     ps->userData = NULL;
123     ps->lst = NULL;
124     ps->intable = 0;
125     ps->endcell = 0;
126     ps->vmerges = NULL;
127     ps->norows = 0;
128     ps->cellbounds = NULL;
129     ps->nocellbounds = 0;
130     ps->fieldstate = 0;
131     ps->fieldmiddle = 0;
132 
133     ps->charhandler = 0;
134     ps->scharhandler = 0;
135     ps->elehandler = 0;
136     ps->dochandler = 0;
137 
138     ps->password[0] = 0;
139     /* set up the token table tree for faster lookups */
140     tokenTreeInit ();
141 
142     ret = wvOLEDecode_gsf (ps, path, &ps->mainfd, &ps->tablefd0, &ps->tablefd1,
143 			   &ps->data, &ps->summary);
144 
145     switch (ret)
146       {
147       case 0:
148 	  break;
149       case 2:
150 	  ret = wvOpenPreOLE (path, &ps->mainfd, &ps->tablefd0, &ps->tablefd1,
151 			      &ps->data, &ps->summary);
152 	  if (ret)
153 	      return (ret);
154 	  break;
155       case 3:
156       case 5:
157 	  wvError (("Bad Ole\n"));
158 	  return (3);
159       default:
160 	  return (-1);
161       }
162 
163     if (ps->mainfd == NULL)
164       {
165 	  ret = 4;
166 	  wvOLEFree (ps);
167 	  wvError (("Not a word document\n"));
168 	  return (-1);
169       }
170 
171     wvGetFIB (&ps->fib, ps->mainfd);
172 
173     ps->tablefd = wvWhichTableStream (&ps->fib, ps);
174 
175     /* Check the validity of the table stream. */
176     if (ps->tablefd == NULL)
177       {
178 	wvOLEFree(ps);
179 	wvError(("Data Stream Corrupt or Not Readable\n"));
180 	return (-1);
181       }
182 
183     /* When the data stream is null, it is highly probable
184        that the document is corrupt */
185     if (ps->data == NULL)
186       {
187 	/* checking for the validity of the Clx data
188 	   from the table stream for not encrypted files */
189 	if (!ps->fib.fEncrypted && wvStream_goto(ps->tablefd, ps->fib.fcClx)==-1)
190 	  {
191 	    wvOLEFree(ps);
192 	    wvError(("Data Stream Corrupt or Not Readable\n"));
193 	    return (-1);
194 	  }
195 
196 	/* Reset the stream to the begining */
197 	wvStream_rewind(ps->tablefd);
198       }
199 
200     ret = wvQuerySupported (&ps->fib, &reason);
201 
202     if ((ret & 0x7fff) != WORD8)
203 	ps->data = ps->mainfd;
204 
205     if ((ret != WORD8) && (ret != WORD7) && (ret != WORD6) && (ret != WORD2))
206 	/* WORD2 test */
207       {
208 	  /* return the errors and the encrypted files */
209 	  if (!(ret & 0x8000))
210 	      wvError (("%s\n", wvReason (reason)));
211 	  return (ret);
212       }
213     ret = 0;
214     return ret;
215 }
216 
217 int
wvInitParser(wvParseStruct * ps,char * path)218 wvInitParser (wvParseStruct * ps, char *path)
219 {
220   GsfInput *input;
221   int rval;
222 
223   input = gsf_input_stdio_new (path, NULL);
224   rval = wvInitParser_gsf (ps, input);
225 
226   if (rval == 0)
227     ps->filename = path;
228   ps->input = input;
229 
230   return rval;
231 }
232 
233 void
wvSetPassword(const char * pass,wvParseStruct * ps)234 wvSetPassword (const char *pass, wvParseStruct * ps)
235 {
236     int i = 0, len;
237 
238     char * password = (char *)pass; /* causes no harm */
239 
240     /* at this stage we are passing in an utf-8 password and
241        later converting it to unicode, we should use the generic
242        available mb to wide char stuff, but that isnt very prevalent
243        yet, and this is the only time i think i go from utf to
244        unicode */
245 
246     while (*password)
247       {
248 	  len = our_mbtowc (&(ps->password[i]), password, 5);
249 	  i++;
250 	  password += len;
251 	  if (i == 16)
252 	      break;
253       }
254     ps->password[i] = 0;
255 }
256 
257 static Tokenptr tokenTreeRoot = NULL;
258 
259 static const TokenTable s_Tokens[] = {
260     {"*", TT_OTHER},		/* must be FIRST */
261     {"begin", TT_BEGIN},
262     {"end", TT_END},
263     {"title", TT_TITLE},
264     {"charset", TT_CHARSET},
265     {"version", TT_VERSION},
266     {"filename", TT_FILENAME},
267     {"htmlgraphic", TT_htmlgraphic},
268     {"colspan", TT_COLSPAN},
269     {"cellrelwidth", TT_CELLRELWIDTH},
270     {"cellrelpagewidth", TT_CELLRELPAGEWIDTH},
271     {"rowspan", TT_ROWSPAN},
272     {"cellbgcolor", TT_CELLBGCOLOR},
273     {"parabgcolor", TT_PARABGCOLOR},
274     {"parafgcolor", TT_PARAFGCOLOR},
275     {"tablerelwidth", TT_TABLERELWIDTH},
276     {"no_rows", TT_no_rows},
277     {"no_cols", TT_no_cols},
278     {"style", TT_STYLE},
279     {"comment", TT_COMMENT},
280     {"ibstanno", TT_IBSTANNO},
281     {"xstUsrInitl", TT_xstUsrInitl},
282     {"mmParaBefore", TT_mmParaBefore},
283     {"mmParaAfter", TT_mmParaAfter},
284     {"mmParaLeft", TT_mmParaLeft},
285     {"mmParaRight", TT_mmParaRight},
286     {"mmParaLeft1", TT_mmParaLeft1},
287     /* >>---------- PATCH */
288     {"stylename", TT_stylename},
289     /* << ---------------- */
290     {"bordertopstyle", TT_BORDERTopSTYLE},
291     {"bordertopcolor", TT_BORDERTopCOLOR},
292     {"borderleftstyle", TT_BORDERLeftSTYLE},
293     {"borderleftcolor", TT_BORDERLeftCOLOR},
294     {"borderrightstyle", TT_BORDERRightSTYLE},
295     {"borderrightcolor", TT_BORDERRightCOLOR},
296     {"borderbottomstyle", TT_BORDERBottomSTYLE},
297     {"borderbottomcolor", TT_BORDERBottomCOLOR},
298     {"mmPadTop", TT_mmPadTop},
299     {"mmPadRight", TT_mmPadRight},
300     {"mmPadBottom", TT_mmPadBottom},
301     {"mmPadLeft", TT_mmPadLeft},
302     {"mmLineHeight", TT_mmLineHeight},
303     {"document", TT_DOCUMENT},
304     {"picture", TT_PICTURE},
305     {"charentity", TT_CHARENTITY},
306     {"pixPicWidth", TT_pixPicWidth},
307     {"pixPicHeight", TT_pixPicHeight},
308     {"htmlAlignGuess", TT_htmlAlignGuess},
309     {"htmlNextLineGuess", TT_htmlNextLineGuess},
310     {"section", TT_SECTION},
311     {"paragraph", TT_PARA},
312     {"table", TT_TABLE},
313     {"table.begin", TT_TABLEB},
314     {"table.end", TT_TABLEE},
315     {"row", TT_ROW},
316     {"row.begin", TT_ROWB},
317     {"row.end", TT_ROWE},
318     {"cell", TT_CELL},
319     {"cell.begin", TT_CELLB},
320     {"cell.end", TT_CELLE},
321     {"lastcell", TT_LASTCELL},
322     {"lastcell.begin", TT_LASTCELLB},
323     {"lastcell.end", TT_LASTCELLE},
324     {"tableoverrides", TT_TABLEOVERRIDES},
325     {"ParaBefore", TT_ParaBefore},
326     {"ParaAfter", TT_ParaAfter},
327     {"ParaLeft", TT_ParaLeft},
328     {"ParaRight", TT_ParaRight},
329     {"ParaLeft1", TT_ParaLeft1},
330     {"VertMergedCells", TT_VertMergedCells},
331     {"block", TT_BLOCK},
332     {"justification", TT_JUSTIFICATION},
333     {"just", TT_JUST},
334     {"left", TT_LEFT},
335     {"right", TT_RIGHT},
336     {"center", TT_CENTER},
337     {"asian", TT_ASIAN},
338     {"pmargin", TT_PMARGIN},
339     {"pborder", TT_PBORDER},
340     {"paramargin", TT_PARAMARGIN},
341     {"paraborder", TT_PARABORDER},
342     {"nfc", TT_nfc},
343     {"start", TT_START},
344     {"numbering", TT_numbering},
345     {"arabic", TT_Arabic},
346     {"upperroman", TT_UpperRoman},
347     {"lowerroman", TT_LowerRoman},
348     {"uppercasen", TT_UpperCaseN},
349     {"lowercasen", TT_LowerCaseN},
350     {"text", TT_TEXT,},
351     {"text.begin", TT_TEXTB,},
352     {"text.end", TT_TEXTE,},
353     {"olist", TT_OLIST,},
354     {"olist.begin", TT_OLISTB,},
355     {"olist.end", TT_OLISTE,},
356     {"ulist", TT_ULIST,},
357     {"ulist.begin", TT_ULISTB,},
358     {"ulist.end", TT_ULISTE,},
359     {"entry", TT_ENTRY,},
360     {"entry.begin", TT_ENTRYB,},
361     {"entry.end", TT_ENTRYE,},
362     {"character", TT_CHAR},
363     {"bold", TT_BOLD},
364     {"bold.begin", TT_BOLDB},
365     {"bold.end", TT_BOLDE},
366     {"italic", TT_ITALIC},
367     {"italic.begin", TT_ITALICB},
368     {"italic.end", TT_ITALICE,},
369     {"strike", TT_STRIKE},
370     {"strike.begin", TT_STRIKEB},
371     {"strike.end", TT_STRIKEE,},
372     {"rmarkdel", TT_RMarkDel,},
373     {"rmarkdel.begin", TT_RMarkDelB,},
374     {"rmarkdel.end", TT_RMarkDelE,},
375     {"outline", TT_OUTLINE,},
376     {"outline.begin", TT_OUTLINEB,},
377     {"outline.end", TT_OUTLINEE,},
378     {"smallcaps", TT_SMALLCAPS,},
379     {"smallcaps.begin", TT_SMALLCAPSB,},
380     {"smallcaps.end", TT_SMALLCAPSE,},
381     {"caps", TT_CAPS,},
382     {"caps.begin", TT_CAPSB,},
383     {"caps.end", TT_CAPSE,},
384     {"vanish", TT_VANISH,},
385     {"vanish.begin", TT_VANISHB,},
386     {"vanish.end", TT_VANISHE,},
387     {"rmark", TT_RMark,},
388     {"rmark.begin", TT_RMarkB,},
389     {"rmark.end", TT_RMarkE,},
390     {"shadow", TT_SHADOW,},
391     {"shadow.begin", TT_SHADOWB,},
392     {"shadow.end", TT_SHADOWE,},
393     {"lowercase", TT_LOWERCASE,},
394     {"lowercase.begin", TT_LOWERCASEB,},
395     {"lowercase.end", TT_LOWERCASEE,},
396     {"emboss", TT_EMBOSS,},
397     {"emboss.begin", TT_EMBOSSB,},
398     {"emboss.end", TT_EMBOSSE,},
399     {"imprint", TT_IMPRINT,},
400     {"imprint.begin", TT_IMPRINTB,},
401     {"imprint.end", TT_IMPRINTE,},
402     {"dstrike", TT_DSTRIKE,},
403     {"dstrike.begin", TT_DSTRIKEB,},
404     {"dstrike.end", TT_DSTRIKEE,},
405     {"super", TT_SUPER,},
406     {"super.begin", TT_SUPERB,},
407     {"super.end", TT_SUPERE,},
408     {"sub", TT_SUB,},
409     {"sub.begin", TT_SUBB,},
410     {"sub.end", TT_SUBE,},
411     {"singleu", TT_SINGLEU,},
412     {"singleu.begin", TT_SINGLEUB,},
413     {"singleu.end", TT_SINGLEUE,},
414     {"wordu", TT_WORDU,},
415     {"wordu.begin", TT_WORDUB,},
416     {"wordu.end", TT_WORDUE,},
417     {"doubleu", TT_DOUBLEU,},
418     {"doubleu.begin", TT_DOUBLEUB,},
419     {"doubleu.end", TT_DOUBLEUE,},
420     {"dottedu", TT_DOTTEDU,},
421     {"dottedu.begin", TT_DOTTEDUB,},
422     {"dottedu.end", TT_DOTTEDUE,},
423     {"hiddenu", TT_HIDDENU,},
424     {"hiddenu.begin", TT_HIDDENUB,},
425     {"hiddenu.end", TT_HIDDENUE,},
426     {"thicku", TT_THICKU,},
427     {"thicku.begin", TT_THICKUB,},
428     {"thicku.end", TT_THICKUE,},
429     {"dashu", TT_DASHU,},
430     {"dashu.begin", TT_DASHUB,},
431     {"dashu.end", TT_DASHUE,},
432     {"dotu", TT_DOTU,},
433     {"dotu.begin", TT_DOTUB,},
434     {"dotu.end", TT_DOTUE,},
435     {"dotdashu", TT_DOTDASHU,},
436     {"dotdashu.begin", TT_DOTDASHUB,},
437     {"dotdashu.end", TT_DOTDASHUE,},
438     {"dotdotdashu", TT_DOTDOTDASHU,},
439     {"dotdotdashu.begin", TT_DOTDOTDASHUB,},
440     {"dotdotdashu.end", TT_DOTDOTDASHUE,},
441     {"waveu", TT_WAVEU,},
442     {"waveu.begin", TT_WAVEUB,},
443     {"waveu.end", TT_WAVEUE,},
444     {"black", TT_BLACK,},
445     {"black.begin", TT_BLACKB,},
446     {"black.end", TT_BLACKE,},
447     {"blue", TT_BLUE,},
448     {"blue.begin", TT_BLUEB,},
449     {"blue.end", TT_BLUEE,},
450     {"cyan", TT_CYAN,},
451     {"cyan.begin", TT_CYANB,},
452     {"cyan.end", TT_CYANE,},
453     {"green", TT_GREEN,},
454     {"green.begin", TT_GREENB,},
455     {"green.end", TT_GREENE,},
456     {"magenta", TT_MAGENTA,},
457     {"magenta.begin", TT_MAGENTAB,},
458     {"magenta.end", TT_MAGENTAE,},
459     {"red", TT_RED,},
460     {"red.begin", TT_REDB,},
461     {"red.end", TT_REDE,},
462     {"yellow", TT_YELLOW,},
463     {"yellow.begin", TT_YELLOWB,},
464     {"yellow.end", TT_YELLOWE,},
465     {"white", TT_WHITE,},
466     {"white.begin", TT_WHITEB,},
467     {"white.end", TT_WHITEE,},
468     {"dkblue", TT_DKBLUE,},
469     {"dkblue.begin", TT_DKBLUEB,},
470     {"dkblue.end", TT_DKBLUEE,},
471     {"dkcyan", TT_DKCYAN,},
472     {"dkcyan.begin", TT_DKCYANB,},
473     {"dkcyan.end", TT_DKCYANE,},
474     {"dkgreen", TT_DKGREEN,},
475     {"dkgreen.begin", TT_DKGREENB,},
476     {"dkgreen.end", TT_DKGREENE,},
477     {"dkmagenta", TT_DKMAGENTA,},
478     {"dkmagenta.begin", TT_DKMAGENTAB,},
479     {"dkmagenta.end", TT_DKMAGENTAE,},
480     {"dkred", TT_DKRED,},
481     {"dkred.begin", TT_DKREDB,},
482     {"dkred.end", TT_DKREDE,},
483     {"dkyellow", TT_DKYELLOW,},
484     {"dkyellow.begin", TT_DKYELLOWB,},
485     {"dkyellow.end", TT_DKYELLOWE,},
486     {"dkgray", TT_DKGRAY,},
487     {"dkgray.begin", TT_DKGRAYB,},
488     {"dkgray.end", TT_DKGRAYE,},
489     {"ltgray", TT_LTGRAY,},
490     {"ltgray.begin", TT_LTGRAYB,},
491     {"ltgray.end", TT_LTGRAYE,},
492     {"fontstr", TT_FONTSTR,},
493     {"fontstr.begin", TT_FONTSTRB,},
494     {"fontstr.end", TT_FONTSTRE,},
495     {"color", TT_COLOR,},
496     {"color.begin", TT_COLORB,},
497     {"color.end", TT_COLORE,},
498     {"ibstrmark", TT_ibstRMark,},
499     {"ibstrmarkdel", TT_ibstRMarkDel,},
500     {"dttmRMark", TT_dttmRMark,},
501     {"dttmRMarkDel", TT_dttmRMarkDel,},
502     {"proprmark", TT_PropRMark,},
503     {"proprmark.begin", TT_PropRMarkB,},
504     {"proprmark.end", TT_PropRMarkE,},
505     {"ibstPropRMark", TT_ibstPropRMark,},
506     {"dttmPropRMark", TT_dttmPropRMark,},
507     {"LasVegas", TT_LasVegas,},
508     {"LasVegas.begin", TT_LasVegasB,},
509     {"LasVegas.end", TT_LasVegasE,},
510     {"BackgroundBlink", TT_BackgroundBlink,},
511     {"BackgroundBlink.begin", TT_BackgroundBlinkB,},
512     {"BackgroundBlink.end", TT_BackgroundBlinkE,},
513     {"SparkleText", TT_SparkleText,},
514     {"SparkleText.begin", TT_SparkleTextB,},
515     {"SparkleText.end", TT_SparkleTextE,},
516     {"MarchingAnts", TT_MarchingAnts,},
517     {"MarchingAnts.begin", TT_MarchingAntsB,},
518     {"MarchingAnts.end", TT_MarchingAntsE,},
519     {"MarchingRedAnts", TT_MarchingRedAnts,},
520     {"MarchingRedAnts.begin", TT_MarchingRedAntsB,},
521     {"MarchingRedAnts.end", TT_MarchingRedAntsE,},
522     {"Shimmer", TT_Shimmer,},
523     {"Shimmer.begin", TT_ShimmerB,},
524     {"Shimmer.end", TT_ShimmerE,},
525     {"ANIMATION", TT_ANIMATION,},
526     {"ANIMATION.begin", TT_ANIMATIONB,},
527     {"ANIMATION.end", TT_ANIMATIONE,},
528     {"DispFldRMark", TT_DispFldRMark,},
529     {"DispFldRMark.begin", TT_DispFldRMarkB,},
530     {"DispFldRMark.end", TT_DispFldRMarkE,},
531     {"ibstDispFldRMark", TT_ibstDispFldRMark,},
532     {"dttmDispFldRMark", TT_dttmDispFldRMark,},
533     {"xstDispFldRMark", TT_xstDispFldRMark,},
534     {"border", TT_BORDER,},
535     {"noned", TT_NONED,},
536     {"singled", TT_SINGLED,},
537     {"thickd", TT_THICKD,},
538     {"doubled", TT_DOUBLED,},
539     {"number4d", TT_NUMBER4D,},
540     {"hairlined", TT_HAIRLINED,},
541     {"dotd", TT_DOTD,},
542     {"dashlargegapd", TT_DASHLARGEGAPD,},
543     {"dotdashd", TT_DOTDASHD,},
544     {"dotdotdashd", TT_DOTDOTDASHD,},
545     {"tripled", TT_TRIPLED,},
546     {"thin-thicksmallgapd", TT_thin_thicksmallgapD,},
547     {"thick-thinsmallgapd", TT_thick_thinsmallgapD,},
548     {"thin-thick-thinsmallgapd", TT_thin_thick_thinsmallgapD,},
549     {"thin-thickmediumgapd", TT_thin_thickmediumgapD,},
550     {"thick-thinmediumgapd", TT_thick_thinmediumgapD,},
551     {"thin-thick-thinmediumgapd", TT_thin_thick_thinmediumgapD,},
552     {"thin-thicklargegapd", TT_thin_thicklargegapD,},
553     {"thick-thinlargegapd", TT_thick_thinlargegapD,},
554     {"thin-thick-thinlargegapd", TT_thin_thick_thinlargegapD,},
555     {"waved", TT_WAVED,},
556     {"doublewaved", TT_DOUBLEWAVED,},
557     {"dashsmallgapd", TT_DASHSMALLGAPD,},
558     {"dashdotstrokedd", TT_DASHDOTSTROKEDD,},
559     {"emboss3Dd", TT_EMBOSS3DD,},
560     {"engrave3Dd", TT_ENGRAVE3DD,},
561     {"direction", TT_DIRECTION,},
562     {"dir", TT_DIR,},
563     {"defaultd", TT_DEFAULTD,}
564 };
565 
566 #define TOKEN_BUFSIZE 1000
567 static Tokenptr tokenbuf;
568 static int tokenbufn = 0, tokenfreen = 0;
569 static void *tokenfreearr[10000];
570 
571 static void
tokenTreeInsert(int token)572 tokenTreeInsert (int token)
573 {
574     int pos;
575     int d;
576     const char *s;
577     char ch;
578     Tokenptr pp, *p;
579     /* start at one - TT_OTHER is the zero element. */
580     p = &tokenTreeRoot;
581     s = s_Tokens[token].m_name;
582     pos = 0;
583     for (;;)
584       {
585 	  ch = toupper (s[pos]);
586 	  pp = *p;
587 	  while (pp != NULL)
588 	    {
589 		d = ch - pp->splitchar;
590 		if (d == 0)
591 		  {
592 		      if (s[pos] == 0)
593 			  break;
594 		      pos++;
595 		      ch = toupper (s[pos]);
596 		      p = &(pp->eqkid);
597 		  }
598 		else if (d < 0)
599 		    p = &(pp->lokid);
600 		else
601 		    p = &(pp->hikid);
602 		pp = *p;
603 	    }
604 	  if (tokenbufn == 0)
605 	    {
606 		tokenbuf = (Tokenptr) wvMalloc (TOKEN_BUFSIZE *
607 						sizeof (Tokennode));
608 		tokenfreearr[tokenfreen++] = (void *) tokenbuf;
609 		tokenbufn = TOKEN_BUFSIZE;
610 	    }
611 	  tokenbufn--;
612 	  *p = &(tokenbuf[tokenbufn]);
613 	  pp = *p;
614 	  pp->splitchar = ch;
615 	  pp->lokid = pp->eqkid = pp->hikid = 0;
616 	  pp->token = 0;
617 	  if (s[pos] == 0)
618 	    {
619 		pp->token = token;
620 		break;
621 	    }
622 	  pos++;
623 	  p = &(pp->eqkid);
624       }
625 }
626 
627 /* this routine will insert the tokens in a balanced way
628 as long as the token table is sorted. */
629 static void
tokenTreeRecursiveInsert(int min,int max)630 tokenTreeRecursiveInsert (int min, int max)
631 {
632     int token;
633     if (min > max)
634 	return;
635     token = (min + max) / 2;
636     tokenTreeInsert (token);
637     tokenTreeRecursiveInsert (token + 1, max);
638     tokenTreeRecursiveInsert (min, token - 1);
639 }
640 
641 static void
tokenTreeInit(void)642 tokenTreeInit (void)
643 {
644     tokenTreeRecursiveInsert (1, TokenTableSize - 1);
645 }
646 
647 void
tokenTreeFreeAll(void)648 tokenTreeFreeAll (void)
649 {
650     int i;
651     for (i = 0; i < tokenfreen; i++)
652 	wvFree (tokenfreearr[i]);
653     tokenfreen = 0;
654     tokenbufn = 0;
655     tokenbuf = NULL;
656     tokenTreeRoot = NULL;
657 }
658 
659  /* this loop is called *a lot* so I've made it a binary search */
660 static unsigned int
s_mapNameToToken(const char * name)661 s_mapNameToToken (const char *name)
662 {
663     Tokenptr p;
664     int i = 0;
665     char ch;
666 
667     p = tokenTreeRoot;
668 
669     ch = toupper (name[i]);
670     while (p)
671       {
672 	  if (ch < p->splitchar)
673 	      p = p->lokid;
674 	  else if (ch == p->splitchar)
675 	    {
676 		if (name[i] == 0)
677 		    return p->token;
678 		p = p->eqkid;
679 		i++;
680 		ch = toupper (name[i]);
681 	    }
682 	  else
683 	      p = p->hikid;
684       }
685     /* this is one of several lines of code that rely
686        on TT_OTHER being first in the token table. */
687     return 0;
688 }
689 
690 unsigned int
wvMapNameToTokenType(const char * name)691 wvMapNameToTokenType (const char * name)
692 {
693     unsigned int tokenIndex = s_mapNameToToken (name);
694     return s_Tokens[tokenIndex].m_type;
695 }
696