1 /* This file is part of the KDE project
2 Copyright (C) 2002, Dirk Schönberger <dirk.schoenberger@sz-online.de>
3
4 This library is free software; you can redistribute it and/or
5 modify it under the terms of the GNU Library General Public
6 License as published by the Free Software Foundation; either
7 version 2 of the License, or (at your option) any later version.
8
9 This library is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 Library General Public License for more details.
13
14 You should have received a copy of the GNU Library General Public License
15 along with this library; see the file COPYING.LIB. If not, write to
16 the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
17 * Boston, MA 02110-1301, USA.
18 */
19
20 #include "PsCommentLexer.h"
21
22 #include <stdlib.h>
23 #include <ctype.h>
24 #include <QStringList>
25
26 #define CATEGORY_WHITESPACE -1
27 #define CATEGORY_ALPHA -2
28 #define CATEGORY_DIGIT -3
29 #define CATEGORY_SPECIAL -4
30 #define CATEGORY_LETTERHEX -5
31 #define CATEGORY_INTTOOLONG -6
32
33 #define CATEGORY_ANY -127
34
35 #define MAX_INTLEN 9
36 #define MIN_HEXCHARS 6
37
38 #define STOP 0
39
iswhitespace(char c)40 int iswhitespace(char c)
41 {
42 return (c == ' ') || (c == '\n') || (c == '\t') || (c == '\r');
43 }
44
isSpecial(char c)45 int isSpecial(char c)
46 {
47 return (c == '*') || (c == '_') || (c == '?') || (c == '~') || (c == '-') || (c == '^') || (c == '`') || (c == '!') || (c == '.') || (c == '@') || (c == '&') || (c == '$') || (c == '=');
48 }
49
isletterhex(char c)50 int isletterhex(char c)
51 {
52 return (c == 'A') || (c == 'B') || (c == 'C') || (c == 'D') || (c == 'E') || (c == 'F');
53 }
54
statetoa(State state)55 const char*statetoa(State state)
56 {
57 switch (state) {
58 case State_Comment : return "comment";
59 case State_CommentEncodedChar : return "encoded char (comment)";
60 default : return "unknown";
61 }
62 }
63
64 typedef struct {
65 State oldState;
66 signed char c;
67 State newState;
68 Action action;
69 } Transition;
70
71 static const Transition transitions[] = {
72 { State_Comment, '\n', State_Start, Action_Output},
73 { State_Comment, '\r', State_Start, Action_Output},
74 { State_Comment, '\\', State_CommentEncodedChar, Action_InitTemp},
75 { State_Comment, CATEGORY_ANY, State_Comment, Action_Copy},
76 { State_CommentEncodedChar, '\\', State_Comment, Action_Copy},
77 { State_CommentEncodedChar, CATEGORY_DIGIT, State_CommentEncodedChar, Action_CopyTemp},
78 { State_CommentEncodedChar, CATEGORY_ANY, State_Comment, Action_DecodeUnget},
79 { State_Start, '%', State_Comment, Action_Ignore},
80 { State_Start, CATEGORY_ANY, State_Start, Action_Ignore},
81 { State_Start, STOP, State_Start, Action_Abort}
82 };
83
PSCommentLexer()84 PSCommentLexer::PSCommentLexer()
85 {
86 }
~PSCommentLexer()87 PSCommentLexer::~PSCommentLexer()
88 {
89 }
90
parse(QIODevice & fin)91 bool PSCommentLexer::parse(QIODevice& fin)
92 {
93 char c;
94
95 m_buffer.clear();
96 m_curState = State_Start;
97
98 parsingStarted();
99
100 while (!fin.atEnd()) {
101 fin.getChar(&c);
102
103 // qDebug ("got %c", c);
104
105 State newState;
106 Action action;
107
108 nextStep(c, &newState, &action);
109
110 switch (action) {
111 case Action_Copy :
112 m_buffer.append(c);
113 break;
114 case Action_CopyOutput :
115 m_buffer.append(c);
116 doOutput();
117 break;
118 case Action_Output :
119 doOutput();
120 break;
121 case Action_OutputUnget :
122 doOutput();
123 fin.ungetChar(c);
124 break;
125 case Action_Ignore :
126 /* ignore */
127 break;
128 case Action_Abort :
129 qWarning("state %s / %s char %c (%d)" , statetoa(m_curState), statetoa(newState), c, c);
130 parsingAborted();
131 return false;
132 break;
133 case Action_InitTemp :
134 m_temp.clear();
135 break;
136 case Action_CopyTemp :
137 m_temp.append(c);
138 break;
139 case Action_DecodeUnget :
140 m_buffer.append(decode());
141 fin.ungetChar(c);
142 break;
143 default :
144 qWarning("unknown action: %d ", action);
145 }
146
147 m_curState = newState;
148 }
149
150 parsingFinished();
151 return true;
152 }
153
doOutput()154 void PSCommentLexer::doOutput()
155 {
156 if (m_buffer.length() == 0) return;
157 switch (m_curState) {
158 case State_Comment :
159 gotComment(m_buffer.toLatin1());
160 break;
161 default:
162 qWarning("unknown state: %d", m_curState);
163 }
164
165 m_buffer.clear();
166 }
167
gotComment(const char * value)168 void PSCommentLexer::gotComment(const char *value)
169 {
170 qDebug("gotComment: %s ", value);
171 }
172
parsingStarted()173 void PSCommentLexer::parsingStarted()
174 {
175 qDebug("parsing started");
176 }
177
parsingFinished()178 void PSCommentLexer::parsingFinished()
179 {
180 qDebug("parsing finished");
181 }
182
parsingAborted()183 void PSCommentLexer::parsingAborted()
184 {
185 qDebug("parsing aborted");
186 }
187
nextStep(char c,State * newState,Action * newAction)188 void PSCommentLexer::nextStep(char c, State *newState, Action *newAction)
189 {
190 int i = 0;
191
192 while (true) {
193 Transition trans = transitions[i];
194
195 if (trans.c == STOP) {
196 *newState = trans.newState;
197 *newAction = trans.action;
198 return;
199 }
200
201 bool found = false;
202
203 if (trans.oldState == m_curState) {
204 switch (trans.c) {
205 case CATEGORY_WHITESPACE : found = isspace(c); break;
206 case CATEGORY_ALPHA : found = isalpha(c); break;
207 case CATEGORY_DIGIT : found = isdigit(c); break;
208 case CATEGORY_SPECIAL : found = isSpecial(c); break;
209 case CATEGORY_LETTERHEX : found = isletterhex(c); break;
210 case CATEGORY_INTTOOLONG : found = m_buffer.length() > MAX_INTLEN; break;
211 case CATEGORY_ANY : found = true; break;
212 default : found = (trans.c == c);
213 }
214
215 if (found) {
216 *newState = trans.newState;
217 *newAction = trans.action;
218
219 return;
220 }
221 }
222
223
224 i++;
225 }
226 }
227
decode()228 uchar PSCommentLexer::decode()
229 {
230 uchar value = m_temp.toString().toShort(nullptr, 8);
231 // qDebug ("got encoded char %c",value);
232 return value;
233 }
234
235 /* StringBuffer implementation */
236
237 const int initialSize = 20;
238 const int addSize = 10;
239
StringBuffer()240 StringBuffer::StringBuffer()
241 {
242 m_buffer = (char*)calloc(initialSize, sizeof(char));
243 m_length = 0;
244 m_capacity = initialSize;
245 }
246
~StringBuffer()247 StringBuffer::~StringBuffer()
248 {
249 free(m_buffer);
250 }
251
append(char c)252 void StringBuffer::append(char c)
253 {
254 ensureCapacity(m_length + 1);
255 m_buffer[m_length] = c;
256 m_length++;
257 }
258
clear()259 void StringBuffer::clear()
260 {
261 for (uint i = 0; i < m_length; i++) m_buffer[i] = '\0';
262 m_length = 0;
263 }
264
toString() const265 QString StringBuffer::toString() const
266 {
267 QString ret(m_buffer);
268 return ret;
269 }
270
ensureCapacity(int p_capacity)271 void StringBuffer::ensureCapacity(int p_capacity)
272 {
273 if (m_capacity >= p_capacity) return;
274
275 int newSize = m_capacity + addSize;
276 if (p_capacity > newSize) newSize = p_capacity;
277
278 char* oldBuffer = m_buffer;
279 char *newBuffer = (char*)calloc(newSize, sizeof(char));
280 strcpy(newBuffer, m_buffer);
281 free(oldBuffer);
282 m_buffer = newBuffer;
283 m_capacity = newSize;
284 }
285
length() const286 uint StringBuffer::length() const
287 {
288 return m_length;
289 }
290
toFloat()291 double StringBuffer::toFloat()
292 {
293 QString data = toString();
294 return data.toFloat();
295 }
296
toInt()297 int StringBuffer::toInt()
298 {
299 QString data = toString();
300 return data.toInt();
301 }
302
toLatin1() const303 const char *StringBuffer::toLatin1() const
304 {
305 return m_buffer;
306 }
307
mid(uint index,uint len) const308 QString StringBuffer::mid(uint index, uint len) const
309 {
310 QString data = toString();
311 return data.mid(index, len);
312 }
313
314 /* BoundingBoxExtractor */
BoundingBoxExtractor()315 BoundingBoxExtractor:: BoundingBoxExtractor() : m_llx(0), m_lly(0), m_urx(0), m_ury(0) {}
~BoundingBoxExtractor()316 BoundingBoxExtractor::~BoundingBoxExtractor() {}
317
gotComment(const char * value)318 void BoundingBoxExtractor::gotComment(const char *value)
319 {
320 QString data(value);
321 if (data.indexOf("%BoundingBox:") == -1) return;
322
323 getRectangle(value, m_llx, m_lly, m_urx, m_ury);
324 }
325
getRectangle(const char * input,int & llx,int & lly,int & urx,int & ury)326 bool BoundingBoxExtractor::getRectangle(const char* input, int &llx, int &lly, int &urx, int &ury)
327 {
328 if (input == nullptr) return false;
329
330 QString s(input);
331 if (s.contains("(atend)")) return false;
332
333 s.remove("%BoundingBox:");
334 QStringList values = s.split(' ');
335 qDebug("size is %d", values.size());
336 // if (values.size() < 5) return false;
337 llx = values[0].toInt();
338 lly = values[1].toInt();
339 urx = values[2].toInt();
340 ury = values[3].toInt();
341
342 return true;
343 }
344
345