1 //========================================================================
2 //
3 // PSTokenizer.cc
4 //
5 // Copyright 2002-2003 Glyph & Cog, LLC
6 //
7 //========================================================================
8 
9 //========================================================================
10 //
11 // Modified under the Poppler project - http://poppler.freedesktop.org
12 //
13 // All changes made under the Poppler project to this file are licensed
14 // under GPL version 2 or later
15 //
16 // Copyright (C) 2006 Scott Turner <scotty1024@mac.com>
17 // Copyright (C) 2008 Albert Astals Cid <aacid@kde.org>
18 //
19 // To see a description of the changes please see the Changelog file that
20 // came with your tarball or type make ChangeLog if you are building from git
21 //
22 //========================================================================
23 
24 #include <config.h>
25 
26 #ifdef USE_GCC_PRAGMAS
27 #pragma implementation
28 #endif
29 
30 #include <stdio.h>
31 #include <stdlib.h>
32 #include "PSTokenizer.h"
33 
34 //------------------------------------------------------------------------
35 
36 // A '1' in this array means the character is white space.  A '1' or
37 // '2' means the character ends a name or command.
38 static const char specialChars[256] = {
39   1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 1, 0, 0,   // 0x
40   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,   // 1x
41   1, 0, 0, 0, 0, 2, 0, 0, 2, 2, 0, 0, 0, 0, 0, 2,   // 2x
42   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 2, 0,   // 3x
43   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,   // 4x
44   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 2, 0, 0,   // 5x
45   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,   // 6x
46   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 2, 0, 0,   // 7x
47   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,   // 8x
48   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,   // 9x
49   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,   // ax
50   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,   // bx
51   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,   // cx
52   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,   // dx
53   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,   // ex
54   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0    // fx
55 };
56 
57 //------------------------------------------------------------------------
58 
PSTokenizer(int (* getCharFuncA)(void *),void * dataA)59 PSTokenizer::PSTokenizer(int (*getCharFuncA)(void *), void *dataA) {
60   getCharFunc = getCharFuncA;
61   data = dataA;
62   charBuf = -1;
63 }
64 
~PSTokenizer()65 PSTokenizer::~PSTokenizer() {
66 }
67 
getToken(char * buf,int size,int * length)68 GBool PSTokenizer::getToken(char *buf, int size, int *length) {
69   GBool comment, backslash;
70   int c;
71   int i;
72 
73   // skip leading whitespace and comments
74   comment = gFalse;
75   while (1) {
76     if ((c = getChar()) == EOF) {
77       buf[0] = '\0';
78       *length = 0;
79       return gFalse;
80     }
81     if (comment) {
82       if (c == '\x0a' || c == '\x0d') {
83 	comment = gFalse;
84       }
85     } else if (c == '%') {
86       comment = gTrue;
87     } else if (specialChars[c] != 1) {
88       break;
89     }
90   }
91 
92   // Reserve room for terminating '\0'
93   size--;
94 
95   // read a token
96   i = 0;
97   buf[i++] = c;
98   if (c == '(') {
99     backslash = gFalse;
100     while ((c = lookChar()) != EOF) {
101       consumeChar();
102       if (i < size) {
103 	buf[i++] = c;
104       }
105       if (c == '\\') {
106 	backslash = gTrue;
107       } else if (!backslash && c == ')') {
108 	break;
109       } else {
110 	backslash = gFalse;
111       }
112     }
113   } else if (c == '<') {
114     while ((c = lookChar()) != EOF) {
115       consumeChar();
116       if (i < size && specialChars[c] != 1) {
117 	buf[i++] = c;
118       }
119       if (c == '>') {
120 	break;
121       }
122     }
123   } else if (c != '[' && c != ']') {
124     while ((c = lookChar()) != EOF && !specialChars[c]) {
125       consumeChar();
126       if (i < size) {
127 	buf[i++] = c;
128       }
129     }
130   }
131   // Zero terminate token string
132   buf[i] = '\0';
133   // Return length of token
134   *length = i;
135 
136   return gTrue;
137 }
138 
lookChar()139 int PSTokenizer::lookChar() {
140   if (charBuf < 0) {
141     charBuf = (*getCharFunc)(data);
142   }
143   return charBuf;
144 }
145 
consumeChar()146 void PSTokenizer::consumeChar() {
147   charBuf = -1;
148 }
149 
getChar()150 int PSTokenizer::getChar() {
151   int c = charBuf;
152 
153   if (c < 0) {
154     c = (*getCharFunc)(data);
155   } else {
156     charBuf = -1;
157   }
158   return c;
159 }
160