1 /* This file is part of the KDE project
2    Copyright (C) 2002, Dirk Schönberger <dirk.schoenberger@sz-online.de>
3 
4    This library is free software; you can redistribute it and/or
5    modify it under the terms of the GNU Library General Public
6    License as published by the Free Software Foundation; either
7    version 2 of the License, or (at your option) any later version.
8 
9    This library is distributed in the hope that it will be useful,
10    but WITHOUT ANY WARRANTY; without even the implied warranty of
11    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
12    Library General Public License for more details.
13 
14    You should have received a copy of the GNU Library General Public License
15    along with this library; see the file COPYING.LIB.  If not, write to
16    the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
17  * Boston, MA 02110-1301, USA.
18 */
19 
20 #include "PsCommentLexer.h"
21 
22 #include <stdlib.h>
23 #include <ctype.h>
24 #include <QStringList>
25 
26 #define CATEGORY_WHITESPACE -1
27 #define CATEGORY_ALPHA -2
28 #define CATEGORY_DIGIT -3
29 #define CATEGORY_SPECIAL -4
30 #define CATEGORY_LETTERHEX -5
31 #define CATEGORY_INTTOOLONG -6
32 
33 #define CATEGORY_ANY -127
34 
35 #define MAX_INTLEN 9
36 #define MIN_HEXCHARS 6
37 
38 #define STOP 0
39 
iswhitespace(char c)40 int iswhitespace(char c)
41 {
42     return (c == ' ') || (c == '\n') || (c == '\t') || (c == '\r');
43 }
44 
isSpecial(char c)45 int isSpecial(char c)
46 {
47     return (c == '*') || (c == '_') || (c == '?') || (c == '~') || (c == '-') || (c == '^') || (c == '`') || (c == '!') || (c == '.') || (c == '@') || (c == '&') || (c == '$') || (c == '=');
48 }
49 
isletterhex(char c)50 int isletterhex(char c)
51 {
52     return (c == 'A') || (c == 'B') || (c == 'C') || (c == 'D') || (c == 'E') || (c == 'F');
53 }
54 
statetoa(State state)55 const char*statetoa(State state)
56 {
57     switch (state) {
58     case State_Comment : return "comment";
59     case State_CommentEncodedChar : return "encoded char (comment)";
60     default : return "unknown";
61     }
62 }
63 
64 typedef struct {
65     State oldState;
66     signed char c;
67     State newState;
68     Action action;
69 } Transition;
70 
71 static const Transition transitions[] = {
72     { State_Comment, '\n', State_Start, Action_Output},
73     { State_Comment, '\r', State_Start, Action_Output},
74     { State_Comment, '\\', State_CommentEncodedChar, Action_InitTemp},
75     { State_Comment, CATEGORY_ANY, State_Comment, Action_Copy},
76     { State_CommentEncodedChar, '\\', State_Comment, Action_Copy},
77     { State_CommentEncodedChar, CATEGORY_DIGIT, State_CommentEncodedChar, Action_CopyTemp},
78     { State_CommentEncodedChar, CATEGORY_ANY, State_Comment, Action_DecodeUnget},
79     { State_Start, '%', State_Comment, Action_Ignore},
80     { State_Start, CATEGORY_ANY, State_Start, Action_Ignore},
81     { State_Start, STOP, State_Start, Action_Abort}
82 };
83 
PSCommentLexer()84 PSCommentLexer::PSCommentLexer()
85 {
86 }
~PSCommentLexer()87 PSCommentLexer::~PSCommentLexer()
88 {
89 }
90 
parse(QIODevice & fin)91 bool PSCommentLexer::parse(QIODevice& fin)
92 {
93     char c;
94 
95     m_buffer.clear();
96     m_curState = State_Start;
97 
98     parsingStarted();
99 
100     while (!fin.atEnd()) {
101         fin.getChar(&c);
102 
103 //    qDebug ("got %c", c);
104 
105         State newState;
106         Action action;
107 
108         nextStep(c, &newState, &action);
109 
110         switch (action) {
111         case Action_Copy :
112             m_buffer.append(c);
113             break;
114         case Action_CopyOutput :
115             m_buffer.append(c);
116             doOutput();
117             break;
118         case Action_Output :
119             doOutput();
120             break;
121         case Action_OutputUnget :
122             doOutput();
123             fin.ungetChar(c);
124             break;
125         case Action_Ignore :
126             /* ignore */
127             break;
128         case Action_Abort :
129             qWarning("state %s / %s char %c (%d)" , statetoa(m_curState), statetoa(newState), c, c);
130             parsingAborted();
131             return false;
132             break;
133         case Action_InitTemp :
134             m_temp.clear();
135             break;
136         case Action_CopyTemp :
137             m_temp.append(c);
138             break;
139         case Action_DecodeUnget :
140             m_buffer.append(decode());
141             fin.ungetChar(c);
142             break;
143         default :
144             qWarning("unknown action: %d ", action);
145         }
146 
147         m_curState = newState;
148     }
149 
150     parsingFinished();
151     return true;
152 }
153 
doOutput()154 void PSCommentLexer::doOutput()
155 {
156     if (m_buffer.length() == 0) return;
157     switch (m_curState) {
158     case State_Comment :
159         gotComment(m_buffer.toLatin1());
160         break;
161     default:
162         qWarning("unknown state: %d", m_curState);
163     }
164 
165     m_buffer.clear();
166 }
167 
gotComment(const char * value)168 void PSCommentLexer::gotComment(const char *value)
169 {
170     qDebug("gotComment: %s ", value);
171 }
172 
parsingStarted()173 void PSCommentLexer::parsingStarted()
174 {
175     qDebug("parsing started");
176 }
177 
parsingFinished()178 void PSCommentLexer::parsingFinished()
179 {
180     qDebug("parsing finished");
181 }
182 
parsingAborted()183 void PSCommentLexer::parsingAborted()
184 {
185     qDebug("parsing aborted");
186 }
187 
nextStep(char c,State * newState,Action * newAction)188 void PSCommentLexer::nextStep(char c, State *newState, Action *newAction)
189 {
190     int i = 0;
191 
192     while (true) {
193         Transition trans = transitions[i];
194 
195         if (trans.c == STOP) {
196             *newState = trans.newState;
197             *newAction = trans.action;
198             return;
199         }
200 
201         bool found = false;
202 
203         if (trans.oldState == m_curState) {
204             switch (trans.c) {
205             case CATEGORY_WHITESPACE : found = isspace(c); break;
206             case CATEGORY_ALPHA : found = isalpha(c); break;
207             case CATEGORY_DIGIT : found = isdigit(c); break;
208             case CATEGORY_SPECIAL : found = isSpecial(c); break;
209             case CATEGORY_LETTERHEX : found = isletterhex(c); break;
210             case CATEGORY_INTTOOLONG : found = m_buffer.length() > MAX_INTLEN; break;
211             case CATEGORY_ANY : found = true; break;
212             default : found = (trans.c == c);
213             }
214 
215             if (found) {
216                 *newState = trans.newState;
217                 *newAction = trans.action;
218 
219                 return;
220             }
221         }
222 
223 
224         i++;
225     }
226 }
227 
decode()228 uchar PSCommentLexer::decode()
229 {
230     uchar value = m_temp.toString().toShort(nullptr, 8);
231 //  qDebug ("got encoded char %c",value);
232     return value;
233 }
234 
235 /* StringBuffer implementation */
236 
237 const int initialSize = 20;
238 const int addSize = 10;
239 
StringBuffer()240 StringBuffer::StringBuffer()
241 {
242     m_buffer = (char*)calloc(initialSize, sizeof(char));
243     m_length = 0;
244     m_capacity = initialSize;
245 }
246 
~StringBuffer()247 StringBuffer::~StringBuffer()
248 {
249     free(m_buffer);
250 }
251 
append(char c)252 void StringBuffer::append(char c)
253 {
254     ensureCapacity(m_length + 1);
255     m_buffer[m_length] = c;
256     m_length++;
257 }
258 
clear()259 void StringBuffer::clear()
260 {
261     for (uint i = 0; i < m_length; i++) m_buffer[i] = '\0';
262     m_length = 0;
263 }
264 
toString() const265 QString StringBuffer::toString() const
266 {
267     QString ret(m_buffer);
268     return ret;
269 }
270 
ensureCapacity(int p_capacity)271 void StringBuffer::ensureCapacity(int p_capacity)
272 {
273     if (m_capacity >= p_capacity) return;
274 
275     int newSize = m_capacity + addSize;
276     if (p_capacity > newSize) newSize = p_capacity;
277 
278     char* oldBuffer = m_buffer;
279     char *newBuffer = (char*)calloc(newSize, sizeof(char));
280     strcpy(newBuffer, m_buffer);
281     free(oldBuffer);
282     m_buffer = newBuffer;
283     m_capacity = newSize;
284 }
285 
length() const286 uint StringBuffer::length() const
287 {
288     return m_length;
289 }
290 
toFloat()291 double StringBuffer::toFloat()
292 {
293     QString data = toString();
294     return data.toFloat();
295 }
296 
toInt()297 int StringBuffer::toInt()
298 {
299     QString data = toString();
300     return data.toInt();
301 }
302 
toLatin1() const303 const char *StringBuffer::toLatin1() const
304 {
305     return m_buffer;
306 }
307 
mid(uint index,uint len) const308 QString StringBuffer::mid(uint index, uint len) const
309 {
310     QString data = toString();
311     return data.mid(index, len);
312 }
313 
314 /* BoundingBoxExtractor */
BoundingBoxExtractor()315 BoundingBoxExtractor:: BoundingBoxExtractor() : m_llx(0), m_lly(0), m_urx(0), m_ury(0) {}
~BoundingBoxExtractor()316 BoundingBoxExtractor::~BoundingBoxExtractor() {}
317 
gotComment(const char * value)318 void BoundingBoxExtractor::gotComment(const char *value)
319 {
320     QString data(value);
321     if (data.indexOf("%BoundingBox:") == -1) return;
322 
323     getRectangle(value, m_llx, m_lly, m_urx, m_ury);
324 }
325 
getRectangle(const char * input,int & llx,int & lly,int & urx,int & ury)326 bool BoundingBoxExtractor::getRectangle(const char* input, int &llx, int &lly, int &urx, int &ury)
327 {
328     if (input == nullptr) return false;
329 
330     QString s(input);
331     if (s.contains("(atend)")) return false;
332 
333     s.remove("%BoundingBox:");
334     QStringList values = s.split(' ');
335     qDebug("size is %d", values.size());
336 //  if (values.size() < 5) return false;
337     llx = values[0].toInt();
338     lly = values[1].toInt();
339     urx = values[2].toInt();
340     ury = values[3].toInt();
341 
342     return true;
343 }
344 
345