1 /* sswf_lexical.c++ -- written by Alexis WILKE for Made to Order Software Corp. (c) 2002-2009 */
2
3 /*
4
5 Copyright (c) 2002-2009 Made to Order Software Corp.
6
7 Permission is hereby granted, free of charge, to any
8 person obtaining a copy of this software and
9 associated documentation files (the "Software"), to
10 deal in the Software without restriction, including
11 without limitation the rights to use, copy, modify,
12 merge, publish, distribute, sublicense, and/or sell
13 copies of the Software, and to permit persons to whom
14 the Software is furnished to do so, subject to the
15 following conditions:
16
17 The above copyright notice and this permission notice
18 shall be included in all copies or substantial
19 portions of the Software.
20
21 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF
22 ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT
23 LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
24 FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO
25 EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
26 LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
27 WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
28 ARISING FROM, OUT OF OR IN CONNECTION WITH THE
29 SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30 SOFTWARE.
31
32 */
33
34
35 #define SSWF_NEED_ASSERT
36 #include "sswf.h"
37
38 extern "C" {
39 #include "sswf_grammar.h"
40 };
41
42
43 #include "sswf/libsswf.h"
44
45
46 extern YYLTYPE yylloc;
47
48 #define UNREAD_COUNT_MAX 4
49 #define UNGET_COUNT_MAX 16
50 #define MULTIBYTE_MAX 16
51
52 class ScriptFile
53 {
54 public:
55 struct string_t : public sswf::ItemBase {
56 char * f_string;
57 };
58 typedef sswf::sswf_ucs4_t c_t; // UCS chars are 31 bits, negative values are used for errors
59
60 // NOTES:
61 //
62 // Glossary
63 // ASCII American Standard Code for Information Interchange
64 // BE Big-endian (most significant byte first)
65 // LE Little-endian (least significant byte first)
66 // UCS Universal Character Set
67 // UTF Universal Transformation Format
68 //
69 // Note that UCS2, UTF16, UCS4 and UTF32 don't have endian specified.
70 // This is correct since we can infer the endian by checking the
71 // few first bytes of input (which MUST represent a comment)
72 //
73 enum scriptfile_type_t {
74 // totally unknown
75 SCRIPTFILE_TYPE_UNKNOWN = 0, // still unknown
76
77 // unknown but valid for iconv()
78 SCRIPTFILE_TYPE_MULTIBYTES, // a specified encoding (using iconv() to convert the characters)
79
80 // 8 bits
81 SCRIPTFILE_TYPE_ASCII, // accept characters upto 127 as is; others are viewed as erroneous
82 SCRIPTFILE_TYPE_ISO88591, // use input as is (like Unicode page 0)
83 SCRIPTFILE_TYPE_UTF8, // 1 to 6 bytes to encode any character
84 SCRIPTFILE_TYPE_CESU8, // 1 to 4 bytes to encode 0x110000 characters, 0xD800 to 0xDFFF is interpreted
85
86 // 16 bits
87 SCRIPTFILE_TYPE_UCS2, // UCS-2 chars (limited to 0x10000 characters)
88 SCRIPTFILE_TYPE_UCS2BE, // same as UCS2 in big endian
89 SCRIPTFILE_TYPE_UCS2LE, // same as UCS2 in little endian
90 SCRIPTFILE_TYPE_UCS2SAME, // UCS-2 in processor endian
91 SCRIPTFILE_TYPE_UCS2SWAP, // UCS-2 in opposite process endian
92 SCRIPTFILE_TYPE_UTF16, // UTF-16 (0xD800 to 0xDFFF are escapes to represent 20 bits)
93 SCRIPTFILE_TYPE_UTF16BE, // same as UTF-16 in big endian
94 SCRIPTFILE_TYPE_UTF16LE, // same as UTF-16 in little endian
95
96 // 32 bits
97 SCRIPTFILE_TYPE_UCS4, // UCS-4 (unlimited character set, except negative values)
98 SCRIPTFILE_TYPE_UCS4BE, // same as UCS-4 big endian
99 SCRIPTFILE_TYPE_UCS4LE, // same as UCS-4 little endian
100 SCRIPTFILE_TYPE_UCS4SAME, // UCS-4 in processor endian
101 SCRIPTFILE_TYPE_UCS4SWAP, // UCS-4 in opposite process endian
102 SCRIPTFILE_TYPE_UTF32, // UTF-32 (limited to 0x110000 chars)
103 SCRIPTFILE_TYPE_UTF32BE, // UTF-32 big endian
104 SCRIPTFILE_TYPE_UTF32LE, // UTF-32 little endian
105
106 SCRIPTFILE_TYPE_SAME, // keep input type
107
108 SCRIPTFILE_TYPE_max
109 };
110 struct sf_type_t {
111 scriptfile_type_t f_type; // corresponding type (internally supported)
112 const char * f_name; // official encoding name (as in iconv)
113 unsigned long f_input; // accepted input encoding (the one we determine we our internal algorithm)
114 };
115
116 #define SCRIPTFILE_EOF ((c_t) -1) // UCS chars are 31 bits max.
117 #define SCRIPTFILE_BAD ((c_t) -2) // UCS chars are 31 bits max.
118
119 ScriptFile(ScriptFile *parent);
120 ~ScriptFile();
121
122 int OpenFile(const char *filename, sswf::Vectors& user_include_paths, bool use_internal_paths);
123 void CloseFile(void);
124 int GetToken(void);
125 unsigned int Line(void) const;
126 ScriptFile * Parent(void);
127 const char * Filename(void);
128 int ReadActionscript(void);
129 void SetReadActionscript(bool yes);
130
131 private:
132 void Reset(void);
133 int FindFile(const char *filename, sswf::Vectors& user_include_paths, bool use_internal_paths);
134 c_t GetChar(void);
135 void UngetChar(c_t c);
136 c_t ReadChar(void);
137 int ReadByte(void);
138 void UnreadByte(unsigned char c);
139 void SkipComment(int close);
140 int ReadIdentifier(c_t c);
141 int ReadString(c_t c);
142 int ReadValue(c_t c);
143
144 ScriptFile * f_parent;
145 const char * f_filename;
146 unsigned int f_line;
147 unsigned int f_first_line;
148 scriptfile_type_t f_type;
149 FILE * f_file;
150 int f_last_errno;
151 c_t f_last_char;
152 unsigned int f_unread_count;
153 unsigned char f_unread[UNREAD_COUNT_MAX];
154 unsigned int f_unget_count;
155 c_t f_unget[UNGET_COUNT_MAX];
156 bool f_iconvertor_open;
157 iconv_t f_iconvertor;
158 size_t f_mb_count;
159 char f_multibytes[MULTIBYTE_MAX];
160 bool f_read_actionscript;
161 };
162
163
164 ScriptFile *sf; // the current script file
165 sswf::Vectors include_paths; // an array of strings where files are being searched
166 int no_default_include;
167
168
169
170
171 #define SF_TYPE_TO_FLAG1(a) (1<<ScriptFile::SCRIPTFILE_TYPE_##a)
172 #define SF_TYPE_TO_FLAG2(a,b) SF_TYPE_TO_FLAG1(a)|SF_TYPE_TO_FLAG1(b)
173 #define SF_TYPE_TO_FLAG3(a,b,c) SF_TYPE_TO_FLAG2(a,b)|SF_TYPE_TO_FLAG1(c)
174 #define SF_TYPE_TO_FLAG4(a,b,c,d) SF_TYPE_TO_FLAG3(a,b,c)|SF_TYPE_TO_FLAG1(d)
175 #define SF_TYPE_TO_FLAG5(a,b,c,d,e) SF_TYPE_TO_FLAG4(a,b,c,d)|SF_TYPE_TO_FLAG1(e)
176 #define SF_TYPE_TO_FLAG6(a,b,c,d,e,f) SF_TYPE_TO_FLAG5(a,b,c,d,e)|SF_TYPE_TO_FLAG1(f)
177
178 #define SF_GET_ELEMS(size, elems) elems
179 //#define SF_ELEMS(array) SF_GET_ELEMS array
180
181 #define SF_GET_SIZE(size, elems) size
182 //#define SF_SIZE(array) SF_GET_SIZE array
183
184 #define SF_CALL1(macro, count, elems) macro##count elems
185 #define SF_CALL(macro, count, elems) SF_CALL1(macro, count, elems)
186
187 #define SF_FOREACH(macro, array) SF_CALL(macro, SF_GET_SIZE array, SF_GET_ELEMS array)
188
189
190 #define SCRIPTFILE_TYPE(name, alias, accepted_array) \
191 { \
192 ScriptFile::SCRIPTFILE_TYPE_##name, \
193 alias, \
194 SF_FOREACH(SF_TYPE_TO_FLAG, accepted_array) \
195 },
196
197 static const ScriptFile::sf_type_t internal_types[] =
198 {
199 // ASCII
200 SCRIPTFILE_TYPE(ASCII, "ASCII", (1, (ISO88591)))
201 SCRIPTFILE_TYPE(ASCII, "USASCII", (1, (ISO88591)))
202 SCRIPTFILE_TYPE(ASCII, "CSASCII", (1, (ISO88591)))
203 SCRIPTFILE_TYPE(ASCII, "US", (1, (ISO88591)))
204 SCRIPTFILE_TYPE(ASCII, "ISO646US", (1, (ISO88591)))
205 SCRIPTFILE_TYPE(ASCII, "ISO646.IRV", (1, (ISO88591)))
206 SCRIPTFILE_TYPE(ASCII, "ISO646.IRV:1991", (1, (ISO88591)))
207 SCRIPTFILE_TYPE(ASCII, "ISO646.1991IRV", (1, (ISO88591)))
208 SCRIPTFILE_TYPE(ASCII, "ISOIR6", (1, (ISO88591)))
209 SCRIPTFILE_TYPE(ASCII, "ANSIX3.4-1968", (1, (ISO88591)))
210 SCRIPTFILE_TYPE(ASCII, "ANSIX3.4-1986", (1, (ISO88591)))
211 SCRIPTFILE_TYPE(ASCII, "CP367", (1, (ISO88591)))
212 SCRIPTFILE_TYPE(ASCII, "IBM367", (1, (ISO88591)))
213
214 // ISO8859-1
215 SCRIPTFILE_TYPE(SAME, "88591", (1, (ISO88591)))
216 SCRIPTFILE_TYPE(SAME, "88591:1987", (1, (ISO88591)))
217 SCRIPTFILE_TYPE(SAME, "ISO88591", (1, (ISO88591)))
218 SCRIPTFILE_TYPE(SAME, "ISO88591:1987", (1, (ISO88591)))
219 SCRIPTFILE_TYPE(SAME, "ISOIR100", (1, (ISO88591)))
220 SCRIPTFILE_TYPE(SAME, "CSISOLATIN1", (1, (ISO88591)))
221 SCRIPTFILE_TYPE(SAME, "LATIN1", (1, (ISO88591)))
222 SCRIPTFILE_TYPE(SAME, "L1", (1, (ISO88591)))
223 SCRIPTFILE_TYPE(SAME, "CP819", (1, (ISO88591)))
224 SCRIPTFILE_TYPE(SAME, "IBM819", (1, (ISO88591)))
225
226 // UTF-8
227 SCRIPTFILE_TYPE(UTF8, "UTF8", (2, (ISO88591, UTF8)))
228
229 // CESU-8
230 SCRIPTFILE_TYPE(CESU8, "CESU8", (2, (ISO88591, UTF8)))
231
232 // UCS-2
233 SCRIPTFILE_TYPE(SAME, "UCS2", (2, (UCS2LE, UCS2BE)))
234 SCRIPTFILE_TYPE(SAME, "CSUNICODE", (2, (UCS2LE, UCS2BE)))
235 SCRIPTFILE_TYPE(SAME, "ISO10646:1993/UCS2", (2, (UCS2LE, UCS2BE)))
236 SCRIPTFILE_TYPE(SAME, "10646/UCS2", (2, (UCS2LE, UCS2BE)))
237 SCRIPTFILE_TYPE(SAME, "106461/UCS2", (2, (UCS2LE, UCS2BE)))
238 SCRIPTFILE_TYPE(SAME, "ISO10646UCS2", (2, (UCS2LE, UCS2BE)))
239 SCRIPTFILE_TYPE(SAME, "10646UCS2", (2, (UCS2LE, UCS2BE)))
240
241 SCRIPTFILE_TYPE(SAME, "UCS2INTERNAL", (2, (UCS2LE, UCS2BE)))
242 SCRIPTFILE_TYPE(SAME, "UCS2SWAPPED", (2, (UCS2LE, UCS2BE)))
243
244 SCRIPTFILE_TYPE(SAME, "UCS2BE", (1, (UCS2BE)))
245 SCRIPTFILE_TYPE(SAME, "UNICODEBIG", (1, (UCS2BE)))
246 SCRIPTFILE_TYPE(SAME, "UNICODE11", (1, (UCS2BE)))
247 SCRIPTFILE_TYPE(SAME, "CSUNICODE11", (1, (UCS2BE)))
248
249 SCRIPTFILE_TYPE(SAME, "UCS2LE", (1, (UCS2LE)))
250 SCRIPTFILE_TYPE(SAME, "UNICODEOLITTLE", (1, (UCS2LE)))
251
252 // UTF-16
253 SCRIPTFILE_TYPE(UTF16BE, "UTF16", (1, (UCS2BE)))
254 SCRIPTFILE_TYPE(UTF16BE, "UTF16BE", (1, (UCS2BE)))
255
256 SCRIPTFILE_TYPE(UTF16LE, "UTF16", (1, (UCS2LE)))
257 SCRIPTFILE_TYPE(UTF16LE, "UTF16LE", (1, (UCS2LE)))
258
259 // UCS-4
260 SCRIPTFILE_TYPE(SAME, "UCS4", (2, (UCS4LE, UCS4BE)))
261 SCRIPTFILE_TYPE(SAME, "CSUCS4", (2, (UCS4LE, UCS4BE)))
262 SCRIPTFILE_TYPE(SAME, "ISO10646", (2, (UCS4LE, UCS4BE)))
263 SCRIPTFILE_TYPE(SAME, "ISO10646:1993", (2, (UCS4LE, UCS4BE)))
264 SCRIPTFILE_TYPE(SAME, "ISO10646:1993/UCS4", (2, (UCS4LE, UCS4BE)))
265 SCRIPTFILE_TYPE(SAME, "10646", (2, (UCS4LE, UCS4BE)))
266 SCRIPTFILE_TYPE(SAME, "10646/UCS4", (2, (UCS4LE, UCS4BE)))
267 SCRIPTFILE_TYPE(SAME, "106461", (2, (UCS4LE, UCS4BE)))
268 SCRIPTFILE_TYPE(SAME, "106461/UCS4", (2, (UCS4LE, UCS4BE)))
269
270 SCRIPTFILE_TYPE(SAME, "UCS4INTERNAL", (2, (UCS4LE, UCS4BE)))
271 SCRIPTFILE_TYPE(SAME, "UCS4SWAPPED", (2, (UCS4LE, UCS4BE)))
272
273 SCRIPTFILE_TYPE(SAME, "UCS4BE", (1, (UCS4BE)))
274 SCRIPTFILE_TYPE(SAME, "UCS4LE", (1, (UCS4LE)))
275
276 // UTF-32
277 SCRIPTFILE_TYPE(UTF32BE, "UTF32", (1, (UCS4BE)))
278 SCRIPTFILE_TYPE(UTF32BE, "UTF32BE", (1, (UCS4BE)))
279
280 SCRIPTFILE_TYPE(UTF32LE, "UTF32", (1, (UCS4LE)))
281 SCRIPTFILE_TYPE(UTF32LE, "UTF32LE", (1, (UCS4LE)))
282 };
283
284
285
286
287
288
ScriptFile(ScriptFile * parent)289 ScriptFile::ScriptFile(ScriptFile *parent)
290 : f_parent(parent)
291 {
292 //f_parent -- already initialized in decl.
293 f_filename = 0;
294 f_line = 0;
295 f_first_line = 0;
296 f_type = SCRIPTFILE_TYPE_UNKNOWN;
297 f_file = 0;
298 f_last_errno = 0;
299 f_last_char = '\0';
300 f_unread_count = 0;
301 //f_unread -- the counter is at zero
302 f_unget_count = 0;
303 //f_unget -- the counter is at zero
304 f_iconvertor_open = false;
305 //f_convertor -- flag is false
306 f_mb_count = 0;
307 //f_multibytes -- counter is at zero
308 f_read_actionscript = false;
309 }
310
311
~ScriptFile()312 ScriptFile::~ScriptFile()
313 {
314 Reset();
315 }
316
317
Reset(void)318 void ScriptFile::Reset(void)
319 {
320 CloseFile();
321 // sswf_clean(&f_filename); -- this is used here and there, don't delete
322
323 f_line = 0;
324 f_last_char = 0;
325 f_unread_count = 0;
326 f_unget_count = 0;
327 f_type = SCRIPTFILE_TYPE_UNKNOWN;
328 f_mb_count = 0;
329
330 if(f_iconvertor_open) {
331 iconv_close(f_iconvertor);
332 f_iconvertor_open = false;
333 }
334 }
335
336
Line(void) const337 unsigned int ScriptFile::Line(void) const
338 {
339 return f_line;
340 }
341
342
Parent(void)343 ScriptFile *ScriptFile::Parent(void)
344 {
345 return f_parent;
346 }
347
348
Filename(void)349 const char *ScriptFile::Filename(void)
350 {
351 return f_filename;
352 }
353
354
SetReadActionscript(bool yes)355 void ScriptFile::SetReadActionscript(bool yes)
356 {
357 f_read_actionscript = yes;
358 }
359
360
FindFile(const char * filename,sswf::Vectors & user_include_paths,bool use_internal_paths)361 int ScriptFile::FindFile(const char *filename, sswf::Vectors& user_include_paths, bool use_internal_paths)
362 {
363 // TODO: this shouldn't be hard coded; instead some deep well hidden
364 // configuration file should specify these default directories (and
365 // the path to that setup file, where do we get it?!)
366 static const char * default_include_paths[] = {
367 // Alexis' suggested install dir.
368 "/usr/include/sswf/scripts",
369 // Linux
370 "/usr/share/sswf/scripts",
371 "/usr/share/sswf/include/scripts",
372 "/usr/local/share/sswf/scripts",
373 "/usr/local/share/sswf/include/scripts",
374 // MAC OS X with Fink
375 "/sw/local/share/sswf/scripts",
376 "/sw/local/share/sswf/include/scripts",
377 // IRIX
378 "/opt/sswf/scripts",
379 "/opt/sswf/include/scripts",
380 0
381 };
382 const char *s, **p;
383 char *name;
384 int idx, max;
385
386 if(show_input_search) {
387 printf(" %% File \"%s\" exists?\n", filename);
388 }
389
390 /* special case, use stdin instead of a file */
391 if(strcmp(filename, "-") == 0) {
392 f_filename = "*standard input*";
393 f_file = stdin;
394 if(show_input_filenames) {
395 printf(" -> Input File: \"%s\".\n", f_filename);
396 }
397 return 0;
398 }
399
400 /* check file as is (from current dir.) */
401 f_file = fopen(filename, "rb");
402 if(f_file != NULL) {
403 f_filename = sswf_strdup(filename);
404 if(show_input_filenames) {
405 printf(" -> Input File: \"%s\".\n", f_filename);
406 }
407 return 0;
408 }
409 f_last_errno = errno;
410 if(f_last_errno != ENOENT) {
411 return -1;
412 }
413
414 /* a file specified with a full path can't be searched any more */
415 s = filename;
416 while(*s != '/' && *s != '\\' && *s != '\0' && *s != ':') {
417 s++;
418 }
419 if((s == filename && (s[0] == '/' || s[0] == '\\')) || s[0] == ':') {
420 return -1;
421 }
422
423 /* look within the user include directories */
424 max = user_include_paths.Count();
425 for(idx = 0; idx < max; idx++) {
426 s = (const char *) user_include_paths.Get(idx);
427 name = sswf_strchild(s, filename);
428 if(show_input_search) {
429 printf(" %% File \"%s\" exists?\n", name);
430 }
431 f_file = fopen(name, "rb");
432 if(f_file != 0) {
433 f_filename = name;
434 if(show_input_filenames) {
435 printf(" -> Input File: \"%s\".\n", f_filename);
436 }
437 return 0;
438 }
439 f_last_errno = errno;
440 sswf_free(name);
441 if(f_last_errno != ENOENT) {
442 return -1;
443 }
444 }
445
446 /* if not turned off, try the internal paths now */
447 if(use_internal_paths) {
448 for(p = default_include_paths; *p != 0; p++) {
449 name = sswf_strchild(*p, filename);
450 if(show_input_search) {
451 printf(" %% File \"%s\" exists?\n", name);
452 }
453 f_file = fopen(name, "rb");
454 if(f_file != 0) {
455 f_filename = name;
456 if(show_input_filenames) {
457 printf(" -> Input File: \"%s\".\n", f_filename);
458 }
459 return 0;
460 }
461 f_last_errno = errno;
462 sswf_free(name);
463 if(f_last_errno != ENOENT) {
464 return -1;
465 }
466 }
467 }
468
469 /* file not found... */
470 return -1;
471 }
472
473
OpenFile(const char * filename,sswf::Vectors & user_include_paths,bool use_internal_paths)474 int ScriptFile::OpenFile(const char *filename, sswf::Vectors& user_include_paths, bool use_internal_paths)
475 {
476 int a, b, c, d;
477 unsigned long input;
478 char encoding[256];
479 const ScriptFile::sf_type_t *types;
480
481 Reset();
482
483 if(FindFile(filename, user_include_paths, use_internal_paths) != 0) {
484 fprintf(stderr, "ERROR: can't open file \"%s\" (errno: %d).\n", filename, errno);
485 return 1;
486 }
487 f_line = 1;
488
489 /* at the very start we need to check for U16/U32 files */
490 /*
491 * The following are the tests which will be conducted
492 * on the input to try to determine the type of the file.
493 * Note that we need at least 4 characters in any SSFW
494 * file. Don't forget also that the file needs to start
495 * with a comment.
496 *
497 * File Starts with Default Encoding
498 * 0xEF 0xBB 0xBF 0x?? ... UTF8
499 * 0x?? 0x00 0x?? 0x00 ... USC2LE
500 * 0x00 0x?? 0x00 0x?? ... USC2BE
501 * 0xFF 0xFE 0x?? 0x00 ... USC2LE
502 * 0xFE 0xFF 0x00 0x?? ... USC2BE
503 * 0x?? 0x00 0x00 0x00 ... USC4LE
504 * 0x00 0x00 0x00 0x?? ... USC4BE
505 * 0xFF 0xFE 0x00 0x00 ... USC4LE
506 * 0x00 0x00 0xFE 0xFF ... USC4BE
507 *
508 * The rest will force UNKNOWN and a comment must be present
509 * on the first line. This comment must include the encoding.
510 * For instance:
511 *
512 * encoding="utf-16"
513 *
514 * Note that even auto-detected formats can include an
515 * encoding. In that case, the user specified encoding
516 * needs to match what we have detected (we can't
517 * switch from UCS-2LE to UTF-32BE).
518 *
519 * IMPORTANT NOTE:
520 * It is to be noted that the parser will only
521 * accept a few characters at the beginning of a
522 * file and this is why this algorithm works this
523 * way.
524 *
525 * The possible characters are as defined here:
526 *
527 * . spaces (U+0009 '\t', U+000A '\n', U+000C '\f',
528 * U+000D '\r', U+0020 ' ', U+FEFF)
529 * . comment (U+0028 '(', U+002F '/')
530 * . identifier (U+0041 'A' to U+005A 'Z', U+005F '_'
531 * U+0061 'a' to U+007A 'z')
532 *
533 * The identifier can be either an object name
534 * (such as "sequence", "text", "button"...)
535 * or the name of a variable (as in "a = 56")
536 */
537 a = ReadByte();
538 b = ReadByte();
539 c = ReadByte();
540 d = ReadByte();
541
542 if(a == -1 || b == -1 || c == -1 || d == -1) {
543 /*
544 * this is not good, a script can't be
545 * less than 4 bytes?!?
546 */
547 CloseFile();
548 fprintf(stderr, "ERROR: file \"%s\" seems empty or too small a file for a ScriptSWF.\n", filename);
549 f_last_errno = EBADF;
550 return -1; // return EOF
551 }
552 UnreadByte(d);
553 UnreadByte(c);
554 UnreadByte(b);
555 UnreadByte(a);
556
557 if(a == 0xEF && b == 0xBB && c == 0xBF) {
558 // UTF-8 starting with 0xFEFF is represented by 0xEF, 0xBB and 0xBF
559 // NOTE:
560 // This sequence represent i with trema, the double closing quotes '>>'
561 // and an upside down question mark (for Spanish); that's really
562 // unlikely not UTF-8!
563 f_type = SCRIPTFILE_TYPE_UTF8;
564 }
565 if(a != 0 && b == 0 && c != 0 && d == 0) {
566 f_type = SCRIPTFILE_TYPE_UCS2LE;
567 }
568 else if(a == 0 && b != 0 && c == 0 && d != 0) {
569 f_type = SCRIPTFILE_TYPE_UCS2BE;
570 }
571 else if(a == 0xFF && b == 0xFE && c != 0 && d == 0) {
572 f_type = SCRIPTFILE_TYPE_UCS2LE;
573 }
574 else if(a == 0xFE && b == 0xFF && c == 0 && d != 0) {
575 f_type = SCRIPTFILE_TYPE_UCS2BE;
576 }
577 else if(a == 0xFF && b == 0xFE && c == 0 && d == 0) {
578 f_type = SCRIPTFILE_TYPE_UCS4LE;
579 }
580 else if(a == 0 && b == 0 && c == 0xFE && d == 0xFF) {
581 f_type = SCRIPTFILE_TYPE_UCS4BE;
582 }
583 else if(a != 0 && b == 0 && c == 0 && d == 0) {
584 f_type = SCRIPTFILE_TYPE_UCS4LE;
585 }
586 else if(a == 0 && b == 0 && c == 0 && d != 0) {
587 f_type = SCRIPTFILE_TYPE_UCS4BE;
588 }
589 else {
590 /*
591 * In this case we assume ISO-8859-1
592 * this is useful to read the starting
593 * comment as if it were read with
594 * ReadByte() calls!
595 */
596 f_type = SCRIPTFILE_TYPE_ISO88591;
597 }
598
599 /*
600 * We expect (want) a comment with the name of
601 * an encoding; as we read the comment check for
602 * the following: 'encoding=\"<name>\"'; only the
603 * first encoding entry is used
604 */
605 retry:
606 do {
607 a = GetChar();
608 } while(a == ' ' || a == '\t' || a == '\n');
609 b = '\0';
610 if(a == '/') {
611 a = GetChar();
612 if(a == '/') { // C++ comment
613 b = '\n';
614 }
615 else if(a == '*') { /* standard C comment */
616 b = '/';
617 }
618 }
619 else if(a == '(') {
620 a = GetChar();
621 if(a == '*') { // standard Pascal comment
622 b = ')';
623 }
624 }
625 if(b == '\0') {
626 /*
627 * This is wrong, we must have a comment at the
628 * start of the file!
629 */
630 CloseFile();
631 fprintf(stderr, "ERROR: can't determine the encoding of \"%s\", no proper comment found at the beginning of the file.\n", filename);
632 f_last_errno = EINVAL;
633 return 1;
634 }
635 c = 0;
636 a = GetChar();
637 for(;;) {
638 if(a == -1) {
639 CloseFile();
640 fprintf(stderr, "ERROR: end of file \"%s\" found before the end of the starting comment.\n", filename);
641 f_last_errno = EBADF;
642 return -1;
643 }
644 if(a == '*') { // C or Pascal comment ends
645 a = GetChar();
646 if(a == b) {
647 // Ooops no encoding="..." in this comment!
648 // Just try again
649 goto retry;
650 }
651 c = 0; // new word after an asterisk...
652 continue;
653 }
654 if((a < 'A' || a > 'Z') && (a < 'a' || a > 'z') && a != '\"' && a != '=') {
655 // new word
656 if(b == '\n' && a == '\n') { // C++ comment ends
657 // Ooops no encoding="..." in this comment!
658 // Just try again
659 goto retry;
660 }
661 c = 0;
662 }
663 else if(c < 10) {
664 // searching: encoding="...
665 encoding[c] = a;
666 c++;
667 if(c == 10 && strncasecmp(encoding, "encoding=\"", 10) == 0) {
668 c = 0;
669 a = GetChar();
670 while(a != '"' && a != '\n' && a != -1 && c < (int) (sizeof(encoding) - 1)) {
671 if(a != '-' && a != '_') {
672 encoding[c] = a;
673 c++;
674 }
675 a = GetChar();
676 }
677 while(c > 0 && encoding[c - 1] == '/') {
678 c--;
679 }
680 encoding[c] = '\0';
681 break;
682 }
683 }
684 a = GetChar();
685 }
686 // we found an encoding="..." entry
687 // let's skip the rest of the comment first
688 if(b == '\n') {
689 do {
690 a = GetChar();
691 if(a == -1) {
692 fprintf(stderr, "ERROR: end of file \"%s\" found before the end of a comment.\n", filename);
693 CloseFile();
694 f_last_errno = EBADF;
695 return -1;
696 }
697 } while(a != '\n');
698 }
699 else {
700 do {
701 a = GetChar();
702 while(a == '*') {
703 a = GetChar();
704 if(a == -1) {
705 fprintf(stderr, "ERROR: end of file \"%s\" found before the end of a comment.\n", filename);
706 CloseFile();
707 f_last_errno = EBADF;
708 return -1;
709 }
710 if(a == b) {
711 a = -1;
712 break;
713 }
714 }
715 } while(a != -1);
716 }
717
718 input = 1 << f_type;
719 types = internal_types;
720 while(types->f_type != SCRIPTFILE_TYPE_UNKNOWN) {
721 if(strcasecmp(encoding, types->f_name) == 0 && (types->f_input & input) != 0) {
722 if(types->f_type != SCRIPTFILE_TYPE_SAME) {
723 f_type = types->f_type;
724 }
725 // we found the proper type, we're done here.
726 return 0;
727 }
728 types++;
729 }
730 // didn't find anything compatible, check out for an iconv(3C) convertion
731 if(f_type != SCRIPTFILE_TYPE_ISO88591) {
732 // the encoding doesn't match and it should!
733 f_type = SCRIPTFILE_TYPE_UNKNOWN;
734 fprintf(stderr, "ERROR: unacceptable encoding \"%s\" for this file.\n", encoding);
735 return 1;
736 }
737
738 // the input encoding needs to be an 8 bits encoding!
739 f_iconvertor = iconv_open("UCS-4-INTERNAL", encoding);
740 if(f_iconvertor == (iconv_t) -1) {
741 f_last_errno = errno;
742 CloseFile();
743 fprintf(stderr, "ERROR: encoding \"%s\" not understood. Please, check your iconv_open() manual page for a complete list of possible convertions.\n", encoding);
744 return 1;
745 }
746
747 f_type = SCRIPTFILE_TYPE_MULTIBYTES;
748
749 return 0;
750
751 #if 0
752 // old stuff...
753 switch(f_type) {
754 case SCRIPTFILE_TYPE_UTF16LE:
755 if(strcasecmp(encoding, "UCS-2") == 0
756 && strcasecmp(encoding, "UCS-2LE") == 0) {
757 f_type = SCRIPTFILE_TYPE_UCS2LE;
758 }
759 else if(strcasecmp(encoding, "UTF-16") != 0
760 && strcasecmp(encoding, "UTF-16LE") != 0) {
761 // we've got a problem here!
762 f_type = SCRIPTFILE_TYPE_UNKNOWN;
763 }
764 break;
765
766 case SCRIPTFILE_TYPE_UTF16BE:
767 if(strcasecmp(encoding, "UCS-2") == 0
768 || strcasecmp(encoding, "UCS-2BE") == 0) {
769 f_type = SCRIPTFILE_TYPE_UCS2BE;
770 }
771 else if(strcasecmp(encoding, "UTF-16") != 0
772 && strcasecmp(encoding, "UTF-16BE") != 0) {
773 // we've got a problem here!
774 f_type = SCRIPTFILE_TYPE_UNKNOWN;
775 }
776 break;
777
778 case SCRIPTFILE_TYPE_UCS2LE:
779 if(strcasecmp(encoding, "UTF-16") != 0
780 && strcasecmp(encoding, "UTF-16LE") != 0) {
781 f_type = SCRIPTFILE_TYPE_UTF16LE;
782 }
783 else if(strcasecmp(encoding, "UCS-2") != 0
784 && strcasecmp(encoding, "UCS-2LE") != 0) {
785 // we've got a problem here!
786 f_type = SCRIPTFILE_TYPE_UNKNOWN;
787 }
788 break;
789
790 case SCRIPTFILE_TYPE_UCS2BE:
791 if(strcasecmp(encoding, "UTF-16") == 0
792 || strcasecmp(encoding, "UTF-16BE") == 0) {
793 f_type = SCRIPTFILE_TYPE_UTF16BE;
794 }
795 else if(strcasecmp(encoding, "UCS-2") != 0
796 && strcasecmp(encoding, "UCS-2BE") != 0) {
797 // we've got a problem here!
798 f_type = SCRIPTFILE_TYPE_UNKNOWN;
799 }
800 break;
801
802 case SCRIPTFILE_TYPE_UCS4LE:
803 if(strcasecmp(encoding, "UTF-32") == 0
804 && strcasecmp(encoding, "UTF-32LE") == 0) {
805 f_type = SCRIPTFILE_TYPE_UTF32LE;
806 }
807 else if(strcasecmp(encoding, "UCS-4") != 0
808 && strcasecmp(encoding, "UCS-4LE") != 0) {
809 // we've got a problem here!
810 f_type = SCRIPTFILE_TYPE_UNKNOWN;
811 }
812 break;
813
814 case SCRIPTFILE_TYPE_UCS4BE:
815 if(strcasecmp(encoding, "UTF-32") == 0
816 || strcasecmp(encoding, "UTF-32BE") == 0) {
817 f_type = SCRIPTFILE_TYPE_UTF32BE;
818 }
819 else if(strcasecmp(encoding, "UCS-4") != 0
820 && strcasecmp(encoding, "UCS-4BE") != 0) {
821 // we've got a problem here!
822 f_type = SCRIPTFILE_TYPE_UNKNOWN;
823 }
824 break;
825
826 case SCRIPTFILE_TYPE_ISO8859_1:
827 // now we have a name, check for what we understand internally...
828 if(strcasecmp(encoding, "UTF-8") == 0) {
829 f_type = SCRIPTFILE_TYPE_UTF8;
830 }
831 else if(strcasecmp(encoding, "iso-8859-1") == 0 || strcasecmp(encoding, "iso_8859-1") == 0 || strcasecmp(encoding, "iso8859-1") == 0) {
832 f_type = SCRIPTFILE_TYPE_ISO8859_1;
833 }
834 else if(strcasecmp(encoding, "ascii") == 0) {
835 f_type = SCRIPTFILE_TYPE_ASCII;
836 }
837 else {
838 // otherwise, use iconv() facility
839 // the input encoding needs to be an 8 bits encoding!
840 f_iconvertor = iconv_open("UCS-4-INTERNAL", encoding);
841 if(f_iconvertor == (iconv_t) -1) {
842 f_last_errno = errno;
843 CloseFile();
844 fprintf(stderr, "ERROR: encoding \"%s\" not understood. Please, check your iconv_open() manual page for a complete list of possible convertions.\n", encoding);
845 return 1;
846 }
847 f_type = SCRIPTFILE_TYPE_MULTIBYTES;
848 }
849 break;
850
851 #if DEBUG
852 default:
853 assert(0, "INTERNAL ERROR: f_type seems to be set to a value we didn't have control over (%d).", f_type);
854 #endif
855
856 }
857 #endif
858 }
859
860
CloseFile(void)861 void ScriptFile::CloseFile(void)
862 {
863 if(f_file != 0) {
864 /*
865 * Avoid closing the standard input file since this
866 * is usually done by the system at exit()
867 */
868 if(f_file != stdin) {
869 fclose(f_file);
870 }
871 f_file = 0;
872 }
873 }
874
875
876
877
ReadActionscript(void)878 int ScriptFile::ReadActionscript(void)
879 {
880 char *str;
881 int max, pos, count;
882 size_t size;
883 bool in_string;
884 c_t c, last_char, quote;
885
886 // create the node at the start so the f_line is at the start
887 // (we need to pass that to the parser so it err at the
888 // right line!)
889 yylval.node = node_alloc(NODE_TYPE_STRING, NODE_SUBTYPE_UNKNOWN, f_first_line);
890
891 str = (char *) sswf_malloc(256, "ReadActionscript() -- small string buffer");
892 /*
893 * We allocated 256, but save 1 byte for the null terminator
894 * and up to 6 for the last multi-byte
895 */
896 max = 256 - 6 - 1;
897 pos = 0;
898
899 // we read everything up to a closing '}' since an action
900 // script is always written between '{' and '}'
901 count = 1;
902 in_string = false;
903 quote = '\0';
904 c = ' ';
905 do {
906 last_char = c;
907 c = GetChar();
908 switch(c) {
909 case SCRIPTFILE_EOF:
910 case SCRIPTFILE_BAD:
911 count = 0;
912 c = '\0';
913 break;
914
915 case '{':
916 if(!in_string) {
917 count++;
918 }
919 break;
920
921 case '}':
922 if(!in_string) {
923 count--;
924 }
925 break;
926
927 case '"':
928 case '\'':
929 case '`':
930 if(in_string) {
931 if(quote == c && last_char != '\\') {
932 in_string = false;
933 }
934 }
935 else {
936 quote = c;
937 in_string = true;
938 }
939 break;
940
941 // other characters kept as is
942 }
943 if(count > 0) {
944 if(pos >= max) {
945 max += 256;
946 /*
947 * +6 because some multi-bytes take that many bytes
948 * +1 so the null terminator is reserved
949 */
950 str = (char *) sswf_remalloc(str, max + 6 + 1, "StrAppend() -- large string buffer");
951 }
952 size = 6;
953 // TODO: should we check for errors?
954 sswf::wctomb(&c, sizeof(c), str + pos, size);
955 pos += 6 - size;
956 }
957 } while(count > 0);
958 str[pos] = '\0';
959
960 // the '}' character needs to be restored
961 UngetChar(c);
962
963 #if ADJUT_STRINGS
964 /* on most systems this is really fast and it can save some memory */
965 str = sswf_remalloc(str, pos + 1, "ReadActionscript() -- adjusted to the minimum");
966 #endif
967
968 yylval.node->string = str;
969
970 //fprintf(stderr, "Read actionscript [%s]\n", str);
971
972 return STRING;
973 }
974
975
GetToken(void)976 int ScriptFile::GetToken(void)
977 {
978 c_t c;
979
980 yylloc.first_line = f_first_line = f_line;
981
982 if(f_read_actionscript) {
983 return ReadActionscript();
984 }
985
986 for(;;) {
987 do {
988 c = GetChar();
989 } while(c == ' ' || c == '\t' || c == '\f' || c == '\n');
990
991 if((c >= 'A' && c <= 'Z')
992 || (c >= 'a' && c <= 'z')
993 || c == '_'
994 || c >= 0x0C0) { // international character
995 /* an identifier or keyword */
996 return ReadIdentifier(c);
997 }
998
999 switch(c) {
1000 case SCRIPTFILE_EOF:
1001 return EOF;
1002
1003 case SCRIPTFILE_BAD:
1004 // TODO: ???
1005 return EOF;
1006
1007 case '0':
1008 case '1':
1009 case '2':
1010 case '3':
1011 case '4':
1012 case '5':
1013 case '6':
1014 case '7':
1015 case '8':
1016 case '9':
1017 return ReadValue(c);
1018
1019 case '\'':
1020 case '`':
1021 case '\"':
1022 return ReadString(c);
1023
1024 case '.':
1025 c = GetChar();
1026 if(c == '.') {
1027 return RANGE;
1028 }
1029 if(c >= '0' && c <= '9') {
1030 UngetChar(c);
1031 return ReadValue('.');
1032 }
1033 UngetChar(c);
1034 return '.';
1035
1036 case '*':
1037 c = GetChar();
1038 if(c == '*') {
1039 return POWER;
1040 }
1041 UngetChar(c);
1042 return '*';
1043
1044 case '<':
1045 c = GetChar();
1046 if(c == '?') {
1047 return MIN_OP;
1048 }
1049 if(c == '<') {
1050 return SHIFT_LEFT;
1051 }
1052 if(c == '>') {
1053 return NOT_EQUAL;
1054 }
1055 if(c == '=') {
1056 return LESS_EQUAL;
1057 }
1058 UngetChar(c);
1059 return '<';
1060
1061 case '>':
1062 c = GetChar();
1063 if(c == '?') {
1064 return MAX_OP;
1065 }
1066 if(c == '>') {
1067 c = GetChar();
1068 if(c == '>') {
1069 return SHIFT_RIGHT_UNSIGNED;
1070 }
1071 UngetChar(c);
1072 return SHIFT_RIGHT;
1073 }
1074 if(c == '=') {
1075 return GREATER_EQUAL;
1076 }
1077 UngetChar(c);
1078 return '>';
1079
1080 case '!':
1081 c = GetChar();
1082 if(c == '<') {
1083 return ROTATE_LEFT;
1084 }
1085 if(c == '>') {
1086 return ROTATE_RIGHT;
1087 }
1088 if(c == '=') {
1089 return NOT_EQUAL;
1090 }
1091 UngetChar(c);
1092 return '!';
1093
1094 case '=':
1095 c = GetChar();
1096 if(c == '=') {
1097 return EQUAL;
1098 }
1099 UngetChar(c);
1100 return '=';
1101
1102 case ':':
1103 c = GetChar();
1104 if(c == '=') {
1105 return c;
1106 }
1107 UngetChar(c);
1108 return ':';
1109
1110 case '|':
1111 c = GetChar();
1112 if(c == '|') {
1113 return LOGICAL_OR;
1114 }
1115 UngetChar(c);
1116 return '|';
1117
1118 case '^':
1119 c = GetChar();
1120 if(c == '^') {
1121 return LOGICAL_XOR;
1122 }
1123 UngetChar(c);
1124 return '^';
1125
1126 case '&':
1127 c = GetChar();
1128 if(c == '&') {
1129 return LOGICAL_AND;
1130 }
1131 UngetChar(c);
1132 return '&';
1133
1134 case '/':
1135 c = GetChar();
1136 if(c == '*') {
1137 SkipComment('/');
1138 continue;
1139 }
1140 if(c == '/') {
1141 // C++ comment, read until '\n'
1142 do {
1143 c = GetChar();
1144 } while(c != '\n' && c != SCRIPTFILE_EOF);
1145 continue;
1146 }
1147 UngetChar(c);
1148 return '/';
1149
1150 case '(':
1151 c = GetChar();
1152 if(c == '*') {
1153 SkipComment(')');
1154 continue;
1155 }
1156 UngetChar(c);
1157 return '(';
1158
1159 // anything else is returned as is
1160 default:
1161 return c;
1162
1163 }
1164 }
1165
1166 return 0;
1167 }
1168
1169
SkipComment(int close)1170 void ScriptFile::SkipComment(int close)
1171 {
1172 register int c, p;
1173
1174 c = 0;
1175 do {
1176 p = c;
1177 c = GetChar();
1178 } while(c != SCRIPTFILE_EOF && c != SCRIPTFILE_BAD && (c != close || p != '*'));
1179 }
1180
1181
1182 struct keyword_t {
1183 size_t f_size; // number of chars in the keyword
1184 const char * f_name; // the keyword
1185 node_type_t f_type; // the type or unit
1186 node_type_t f_subtype; // the sub-type
1187 unsigned int f_flags; // what we need to do here
1188 };
1189 #define KEYWORD_FLAG_DIRECT 0x00000000
1190 #define KEYWORD_FLAG_OBJECT 0x00000001
1191 #define KEYWORD_FLAG_UNIT 0x00000002 // f_type is the corresponding node_unit_t
1192 #define KEYWORD_FLAG_INTEGER 0x00000004 // f_type is an integer (for FALSE and TRUE)
1193
1194 #define KEYWORD_FLAG_MULTIWORD 0x80000000
1195
1196
1197 #define KEYWORD(w, type, subtype, flg) { (sizeof(w) - 1), (w), ((node_type_t) type), (subtype), (flg) }
1198
1199 #define OBJECT_KEYWORD(w, flg) KEYWORD(#w, NODE_TYPE_OBJECT, NODE_SUBTYPE_##w, (flg) | KEYWORD_FLAG_OBJECT)
1200 #define DIRECT_KEYWORD(w, flg) KEYWORD(#w, (w), NODE_SUBTYPE_UNKNOWN, (flg))
1201 #define UNIT_KEYWORD(w, unit, flg) KEYWORD(#w, UNIT_##unit, NODE_SUBTYPE_##w, (flg) | KEYWORD_FLAG_UNIT)
1202
1203
1204
1205 struct all_keywords_t {
1206 keyword_t * f_keywords;
1207 size_t f_count;
1208 };
1209 #define ALL_KEYWORD_ENTRY(x) { x##_keywords, (sizeof(x##_keywords) / sizeof(keyword_t)) }
1210
1211
1212
1213 keyword_t a_keywords[] = {
1214 OBJECT_KEYWORD(ACTION, 0),
1215 DIRECT_KEYWORD(ACTION_SCRIPT, 0)
1216 };
1217
1218 keyword_t b_keywords[] = {
1219 UNIT_KEYWORD(BC, COLOR, 0),
1220 // BLOCK == LIST
1221 KEYWORD("BLOCK", NODE_TYPE_OBJECT, NODE_SUBTYPE_LIST, KEYWORD_FLAG_OBJECT),
1222 OBJECT_KEYWORD(BUTTON, 0)
1223 };
1224
1225 keyword_t c_keywords[] = {
1226 OBJECT_KEYWORD(CATCH, 0),
1227 UNIT_KEYWORD(CM, SIZE, 0),
1228 OBJECT_KEYWORD(COLOR_TRANSFORM, KEYWORD_FLAG_MULTIWORD),
1229 OBJECT_KEYWORD(COLOR, 0)
1230 };
1231
1232 keyword_t d_keywords[] = {
1233 KEYWORD("DEFINE_SHAPE", NODE_TYPE_OBJECT, NODE_SUBTYPE_SHAPE, KEYWORD_FLAG_OBJECT | KEYWORD_FLAG_MULTIWORD),
1234 UNIT_KEYWORD(DEG, ANGLE, 0),
1235 OBJECT_KEYWORD(DO_ACTION, KEYWORD_FLAG_MULTIWORD)
1236 };
1237
1238 keyword_t e_keywords[] = {
1239 OBJECT_KEYWORD(EDGES, 0),
1240 OBJECT_KEYWORD(EDIT_TEXT, KEYWORD_FLAG_MULTIWORD),
1241 DIRECT_KEYWORD(ELSE, 0),
1242 OBJECT_KEYWORD(END, 0),
1243 KEYWORD("ENVELOP", NODE_TYPE_OBJECT, NODE_SUBTYPE_ENVELOPE, KEYWORD_FLAG_OBJECT),
1244 OBJECT_KEYWORD(ENVELOPE, 0),
1245 OBJECT_KEYWORD(EXPORT, 0)
1246 };
1247
1248 keyword_t f_keywords[] = {
1249 KEYWORD("FALSE", 0, NODE_SUBTYPE_UNKNOWN, KEYWORD_FLAG_INTEGER),
1250 UNIT_KEYWORD(FC, COLOR, 0),
1251 OBJECT_KEYWORD(FILL_STYLE, KEYWORD_FLAG_MULTIWORD),
1252 OBJECT_KEYWORD(FINALLY, 0),
1253 OBJECT_KEYWORD(FONT, 0),
1254 DIRECT_KEYWORD(FOR, 0),
1255 UNIT_KEYWORD(FPF, SPEED, 0),
1256 UNIT_KEYWORD(FPS, SPEED, 0),
1257 OBJECT_KEYWORD(FRAME_LABEL, KEYWORD_FLAG_MULTIWORD),
1258 UNIT_KEYWORD(FRM, TIME, 0),
1259 OBJECT_KEYWORD(FUNCTION, 0)
1260 };
1261
1262 keyword_t g_keywords[] = {
1263 OBJECT_KEYWORD(GLYPH, 0),
1264 UNIT_KEYWORD(GRAD, ANGLE, 0),
1265 OBJECT_KEYWORD(GRADIENT, 0)
1266 };
1267
1268 keyword_t i_keywords[] = {
1269 DIRECT_KEYWORD(IF, 0),
1270 OBJECT_KEYWORD(IMAGE, 0),
1271 OBJECT_KEYWORD(IMPORT, 0),
1272 UNIT_KEYWORD(IN, SIZE, 0)
1273 };
1274
1275 keyword_t l_keywords[] = {
1276 OBJECT_KEYWORD(LABEL, 0),
1277 OBJECT_KEYWORD(LINE_STYLE, KEYWORD_FLAG_MULTIWORD),
1278 OBJECT_KEYWORD(LIST, 0)
1279 };
1280
1281 keyword_t m_keywords[] = {
1282 OBJECT_KEYWORD(MATRIX, 0),
1283 OBJECT_KEYWORD(METADATA, 0),
1284 UNIT_KEYWORD(MIN, TIME, 0)
1285 };
1286
1287 keyword_t o_keywords[] = {
1288 OBJECT_KEYWORD(ON_EVENT, KEYWORD_FLAG_MULTIWORD)
1289 };
1290
1291 keyword_t p_keywords[] = {
1292 OBJECT_KEYWORD(PLACE_OBJECT, KEYWORD_FLAG_MULTIWORD),
1293 OBJECT_KEYWORD(POINTS, 0),
1294 UNIT_KEYWORD(PR, RATIO, 0),
1295 UNIT_KEYWORD(PX, SIZE, 0)
1296 };
1297
1298 keyword_t r_keywords[] = {
1299 UNIT_KEYWORD(RAD, ANGLE, 0),
1300 OBJECT_KEYWORD(RECT, 0),
1301 KEYWORD("RECTANGLE", NODE_TYPE_OBJECT, NODE_SUBTYPE_RECT, KEYWORD_FLAG_OBJECT),
1302 OBJECT_KEYWORD(REMOVE, 0),
1303 OBJECT_KEYWORD(REPLACE_OBJECT, KEYWORD_FLAG_MULTIWORD),
1304 UNIT_KEYWORD(RT, RATIO, 0)
1305 };
1306
1307 keyword_t s_keywords[] = {
1308 OBJECT_KEYWORD(SCRIPT_LIMITS, KEYWORD_FLAG_MULTIWORD),
1309 UNIT_KEYWORD(SEC, TIME, 0),
1310 OBJECT_KEYWORD(SEQUENCE, 0),
1311 OBJECT_KEYWORD(SET_BACKGROUND_COLOR, KEYWORD_FLAG_MULTIWORD),
1312 OBJECT_KEYWORD(SET_TAB_INDEX, KEYWORD_FLAG_MULTIWORD),
1313 OBJECT_KEYWORD(SHAPE, 0),
1314 OBJECT_KEYWORD(SHOW_FRAME, KEYWORD_FLAG_MULTIWORD),
1315 OBJECT_KEYWORD(SOUND_INFO, KEYWORD_FLAG_MULTIWORD),
1316 OBJECT_KEYWORD(SOUND, 0),
1317 OBJECT_KEYWORD(SPRITE, 0),
1318 OBJECT_KEYWORD(STATE, 0)
1319 };
1320
1321 keyword_t t_keywords[] = {
1322 OBJECT_KEYWORD(TEXT_SETUP, KEYWORD_FLAG_MULTIWORD),
1323 OBJECT_KEYWORD(TEXT, 0),
1324 KEYWORD("TRUE", 1, NODE_SUBTYPE_UNKNOWN, KEYWORD_FLAG_INTEGER),
1325 OBJECT_KEYWORD(TRY, 0),
1326 UNIT_KEYWORD(TW, SIZE, 0)
1327 };
1328
1329 keyword_t w_keywords[] = {
1330 OBJECT_KEYWORD(WITH, 0)
1331 };
1332
1333
1334
1335 all_keywords_t all_keywords[26] = {
1336 /* A */ ALL_KEYWORD_ENTRY(a),
1337 /* B */ ALL_KEYWORD_ENTRY(b),
1338 /* C */ ALL_KEYWORD_ENTRY(c),
1339 /* D */ ALL_KEYWORD_ENTRY(d),
1340 /* E */ ALL_KEYWORD_ENTRY(e),
1341 /* F */ ALL_KEYWORD_ENTRY(f),
1342 /* G */ ALL_KEYWORD_ENTRY(g),
1343 /* H */ { 0, 0 }, // ALL_KEYWORD_ENTRY(h),
1344 /* I */ ALL_KEYWORD_ENTRY(i),
1345 /* J */ { 0, 0 }, // ALL_KEYWORD_ENTRY(j),
1346 /* K */ { 0, 0 }, // ALL_KEYWORD_ENTRY(k),
1347 /* L */ ALL_KEYWORD_ENTRY(l),
1348 /* M */ ALL_KEYWORD_ENTRY(m),
1349 /* N */ { 0, 0 }, // ALL_KEYWORD_ENTRY(n),
1350 /* O */ ALL_KEYWORD_ENTRY(o),
1351 /* P */ ALL_KEYWORD_ENTRY(p),
1352 /* Q */ { 0, 0 }, // ALL_KEYWORD_ENTRY(q),
1353 /* R */ ALL_KEYWORD_ENTRY(r),
1354 /* S */ ALL_KEYWORD_ENTRY(s),
1355 /* T */ ALL_KEYWORD_ENTRY(t),
1356 /* U */ { 0, 0 }, // ALL_KEYWORD_ENTRY(u),
1357 /* V */ { 0, 0 }, // ALL_KEYWORD_ENTRY(v),
1358 /* W */ ALL_KEYWORD_ENTRY(w),
1359 /* X */ { 0, 0 }, // ALL_KEYWORD_ENTRY(x),
1360 /* Y */ { 0, 0 }, // ALL_KEYWORD_ENTRY(y),
1361 /* Z */ { 0, 0 } // ALL_KEYWORD_ENTRY(z)
1362 };
1363
1364
ReadIdentifier(c_t c)1365 int ScriptFile::ReadIdentifier(c_t c)
1366 {
1367 char a, identifier[256]; /* by default we expect that identifiers are less than 256 chars */
1368 char *id;
1369 const char *s1, *s2;
1370 size_t size;
1371 int pos, max, cnt, idx;
1372 const keyword_t *k;
1373 bool has_international, found;
1374 c_t ex;
1375
1376 id = identifier;
1377 has_international = c >= 0x80;
1378 if(has_international) {
1379 size = 6;
1380 sswf::wctomb(&c, sizeof(c), id, size);
1381 pos = 6 - size;
1382 }
1383 else {
1384 id[0] = (char) c;
1385 pos = 1;
1386 }
1387 max = sizeof(identifier) - 6 - 1;
1388
1389 // read one identifier -- keywords may be composed of
1390 // multiple identifiers, others will be read if required
1391 // only
1392 c = GetChar();
1393 while((c >= '0' && c <= '9')
1394 || (c >= 'A' && c <= 'Z')
1395 || (c >= 'a' && c <= 'z')
1396 || c == '_'
1397 || c >= 0x0C0) { // we accept any international character too
1398 if(pos >= max) {
1399 // need a larger buffer
1400 max += 256;
1401 if(id != identifier) {
1402 id = (char *) sswf_remalloc(id, max + 6 + 1, "ReadIdentifier() -- really large identifier buffer");
1403 }
1404 else {
1405 id = (char *) sswf_malloc(max + 6 + 1, "ReadIdentifier() -- large identifier buffer");
1406 memcpy(id, identifier, max - 256);
1407 }
1408 }
1409 if(c >= 0x80) {
1410 size = 6;
1411 sswf::wctomb(&c, sizeof(c), id + pos, size);
1412 pos += 6 - size;
1413 has_international = true;
1414 }
1415 else {
1416 // ASCII is anyway saved as is and this is much
1417 // faster than a crazy call to another function
1418 // (it also enables me to set the has_international
1419 // flag in the first part)
1420 id[pos] = (char) c;
1421
1422 //printf("Adding char [%c] %d\n", (char) c, (int) c);
1423
1424 pos++;
1425 }
1426 c = GetChar();
1427 }
1428 UngetChar(c);
1429 id[pos] = '\0';
1430
1431 //printf(stderr, "Got word [%s]...\n", id);
1432
1433 /*
1434 * Check for keywords...
1435 * NOTE: keywords only include [A-Z0-9_ \t\r\n]
1436 * (blanks are only if the keywords can be composed
1437 * of multiple words)
1438 *
1439 * The test below is valid since (1) we save the international
1440 * characters in UTF-8 and (2) the has_international flag will
1441 * make sure the function skips the test for keywords at once.
1442 */
1443 a = toupper(id[0]);
1444 if(!has_international
1445 && pos < 20 // no keyword longer than that
1446 && a >= 'A' // all keywords start with a letter
1447 && a <= 'Z') {
1448 if(c == '\t' || c == '\n') {
1449 c = ' ';
1450 }
1451 //
1452 // NOTE: max was used to know if the identifier string was
1453 // allocated and needed extension; here we know that
1454 // it will fit in the identifier buffer and thus we
1455 // don't have to worry about it
1456 //
1457 // pos can't be modified unless a multi-word keyword
1458 // is found since all the extra characters read
1459 // because of that multi-word entry need to be put
1460 // back in the input stream
1461 //
1462 // notice that all spaces (' ', '\t' and '\n') are
1463 // transformed in one underscore ('_') within a
1464 // keyword
1465 //
1466 max = pos;
1467 a -= 'A';
1468 // NOTE: though a will always be positive, it's
1469 // still safer to have a cast to unsigned char!
1470 k = all_keywords[(unsigned char) a].f_keywords;
1471 cnt = all_keywords[(unsigned char) a].f_count;
1472 found = false;
1473
1474 //printf("%d keywords to check with [%s]... (%p)\n", cnt, id, k);
1475
1476 while(cnt > 0) {
1477 cnt--;
1478
1479 #if DEBUG
1480 // make sure that all the keywords are given in upper case
1481 // [this was actually fixed in 1.7.3]
1482 idx = k->f_size;
1483 while(idx > 0) {
1484 idx--;
1485 assert(k->f_name[idx] == '_' || (k->f_name[idx] >= 'A' && k->f_name[idx] <= 'Z'), "ScriptFile::ReadIdentifier(): a keyword must fully be given in uppercase (%s)", k->f_name);
1486 }
1487 #endif
1488
1489 // in case we have a multi-word we can't check the size
1490 if(k->f_size >= (size_t) pos) {
1491 // This wouldn't take the multi-word written
1492 // as one word in account...
1493 //found = strncasecmp(k->f_name, id, pos) == 0;
1494 s1 = k->f_name;
1495 s2 = id;
1496 while(*s1 != '\0' && (*s2 != '\0' && *s2 != ' ')) {
1497 if(*s1 == '_') {
1498 s1++;
1499 assert(*s1 != '\0' && *s1 != '_', "ScriptFile::ReadIdentifier(): a keyword can't end with an underscore nor have two underscores one after another");
1500 }
1501 if(*s1 != toupper(*s2)) {
1502 // this is not valid
1503 break;
1504 }
1505 s1++;
1506 s2++;
1507 }
1508 //printf("Compare [%s]/[%s] with [%s]/[%s] - '%02X' %c\n", id, k->f_name, s2, s1, *s2, *s2);
1509 if(*s1 == '\0' && (*s2 == '\0' || *s2 == ' ')) {
1510 found = true;
1511 break;
1512 }
1513 }
1514 if(c == ' ' && (k->f_flags & KEYWORD_FLAG_MULTIWORD) != 0) {
1515 // 1. do we have enough characters already?
1516 idx = k->f_size + 1;
1517 if(max < idx) {
1518 // not enough, read more
1519 ex = GetChar();
1520 while(max < idx
1521 && ((ex >= '0' && ex <= '9')
1522 || (ex >= 'A' && ex <= 'Z')
1523 || (ex >= 'a' && ex <= 'z')
1524 || ex == '_' || ex == ' ' || ex == '\t' || ex == '\n'
1525 /*|| ex >= 0x0C0*/)) { // we DON'T accept international characters since all keywords are in ASCII
1526 if(ex == ' ' || ex == '\t' || ex == '\n') {
1527 // ignore multiple spaces
1528 if(id[max - 1] != ' ') {
1529 // ASCII is saved as is
1530 id[max] = ' ';
1531 max++;
1532 }
1533 }
1534 else {
1535 // ASCII is saved as is
1536 id[max] = (char) ex;
1537 max++;
1538 }
1539 ex = GetChar();
1540 }
1541 UngetChar(ex);
1542 }
1543 id[max] = '\0';
1544
1545
1546 // 2. make sure the following character is a space (' ')
1547 // and that the input had enough characters
1548 idx--;
1549 if((size_t) max >= k->f_size && (id[idx] == ' ' || id[idx] == '\0')) {
1550 // compare the words "by hand" because of the ' ' and '_'
1551 // which have to be considered as being equal
1552 //
1553 // IMPORTANT: the 'break's below would be "wrong" if we
1554 // didn't already know that the very first character was
1555 // always to be equal (that's the case because we selected
1556 // this table specifically because all the keywords to
1557 // check are starting with that letter)
1558 while(idx > 0) {
1559 idx--;
1560 if(id[idx] == ' ' || id[idx] == '_') {
1561 if(k->f_name[idx] != '_') {
1562 break;
1563 }
1564 }
1565 else if(id[idx] >= 'a' && id[idx] <= 'z') {
1566 if(k->f_name[idx] != (id[idx] & 0x5F)) {
1567 break;
1568 }
1569 }
1570 else if(k->f_name[idx] != id[idx]) {
1571 break;
1572 }
1573 }
1574 found = idx == 0;
1575 if(found) {
1576 pos = k->f_size;
1577 break;
1578 }
1579 }
1580 }
1581 k++;
1582 }
1583 while(max > pos) {
1584 max--;
1585 UngetChar(id[max]); // we know id[] is only composed of ASCII
1586 }
1587 if(found) {
1588 // TODO: get rid of the eventual extra chars we read
1589 // we found a match with a keyword, act on it
1590 //printf("Found! (%p) [%s] %08X\n", k, k->f_name, k->f_flags);
1591 if((k->f_flags & KEYWORD_FLAG_OBJECT) != 0) {
1592 yylval.node = node_alloc(k->f_type, k->f_subtype, f_first_line);
1593 return OBJECT;
1594 }
1595 if((k->f_flags & KEYWORD_FLAG_UNIT) != 0) {
1596 yylval.type = k->f_subtype;
1597 return k->f_type;
1598 }
1599 if((k->f_flags & KEYWORD_FLAG_INTEGER) != 0) {
1600 yylval.node = node_alloc(NODE_TYPE_INTEGER, NODE_SUBTYPE_UNKNOWN, f_first_line);
1601 yylval.node->integer = k->f_type;
1602 return VALUE;
1603 }
1604 // KEYWORD_FLAG_DIRECT
1605 return k->f_type;
1606 }
1607 id[pos] = '\0';
1608 }
1609
1610 //printf("*** Identifier [%s]\n", id);
1611
1612 yylval.node = node_alloc(NODE_TYPE_IDENTIFIER, NODE_SUBTYPE_UNKNOWN, f_first_line);
1613 if(id != identifier) {
1614 yylval.node->string = id;
1615 }
1616 else {
1617 yylval.node->string = sswf_strdup(id);
1618 }
1619
1620 return IDENTIFIER;
1621 }
1622
1623
ReadString(c_t quote)1624 int ScriptFile::ReadString(c_t quote)
1625 {
1626 register c_t c, o;
1627 register int pos, max;
1628 c_t r;
1629 int cnt;
1630 char *str;
1631 size_t size;
1632
1633 // create the node at the start so the f_line is at the start
1634 yylval.node = node_alloc(NODE_TYPE_STRING, NODE_SUBTYPE_UNKNOWN, f_first_line);
1635
1636 str = (char *) sswf_malloc(256, "ReadString() -- small string buffer");
1637 /*
1638 * We allocated 256, but save 1 byte for the null terminator
1639 * and up to 6 for the last multi-byte
1640 */
1641 max = 256 - 6 - 1;
1642 pos = 0;
1643 for(;;) {
1644 c = GetChar();
1645 if(c == quote || c == SCRIPTFILE_EOF) {
1646 break;
1647 }
1648 // skip bad chars.
1649 if(c == SCRIPTFILE_BAD) {
1650 continue;
1651 }
1652 if(c == '\\') {
1653 c = GetChar();
1654 if(c == SCRIPTFILE_EOF) {
1655 break;
1656 }
1657 o = c;
1658 switch(c) {
1659 case 'a': c = 7; break;
1660 case 'b': c = 8; break;
1661 case 't': c = 9; break;
1662 case 'n': c = 10; break;
1663 case 'r': c = 11; break;
1664 case 'f': c = 12; break;
1665 case 'v': c = 13; break;
1666
1667 case 'U':
1668 case 'u':
1669 c = GetChar();
1670 if(c != '+') {
1671 UngetChar(c);
1672 c = o;
1673 break;
1674 }
1675 c = GetChar();
1676 if(c >= '0' && c <= '9') {
1677 c -= '0';
1678 }
1679 else if(c >= 'a' && c <= 'f') {
1680 c -= 'a' - 10;
1681 }
1682 else if(c >= 'A' && c <= 'F') {
1683 c -= 'A' - 10;
1684 }
1685 else {
1686 UngetChar('+');
1687 UngetChar(c);
1688 c = o;
1689 break;
1690 }
1691 goto readhex;
1692
1693 case 'X':
1694 case 'x':
1695 c = GetChar();
1696 if(c >= '0' && c <= '9') {
1697 c -= '0';
1698 }
1699 else if(c >= 'a' && c <= 'f') {
1700 c -= 'a' - 10;
1701 }
1702 else if(c >= 'A' && c <= 'F') {
1703 c -= 'A' - 10;
1704 }
1705 else {
1706 UngetChar(c);
1707 c = o;
1708 break;
1709 }
1710 readhex:
1711 cnt = 7;
1712 while(cnt > 0) {
1713 cnt--;
1714 r = GetChar();
1715 if(r >= '0' && r <= '9') {
1716 c = c * 16 + r - '0';
1717 }
1718 else if(r >= 'a' && r <= 'f') {
1719 c = c * 16 + r - 'a' + 10;
1720 }
1721 else if(r >= 'A' && r <= 'F') {
1722 c = c * 16 + r - 'A' + 10;
1723 }
1724 else {
1725 UngetChar(r);
1726 break;
1727 }
1728 }
1729 break;
1730
1731 case '0':
1732 c = GetChar();
1733 if(c == 'x' || c == 'X') {
1734 o = c;
1735 c = GetChar();
1736 if(c >= '0' && c <= '9') {
1737 c -= '0';
1738 }
1739 else if(c >= 'a' && c <= 'f') {
1740 c -= 'a' - 10;
1741 }
1742 else if(c >= 'A' && c <= 'F') {
1743 c -= 'A' - 10;
1744 }
1745 else {
1746 UngetChar(c);
1747 UngetChar(o);
1748 c = 0;
1749 break;
1750 }
1751 goto readhex;
1752 }
1753 case '1':
1754 case '2':
1755 case '3':
1756 case '4':
1757 case '5':
1758 case '6':
1759 case '7':
1760 c = c - '0';
1761 cnt = 10;
1762 while(cnt > 0) {
1763 cnt--;
1764 r = GetChar();
1765 if(r < '0' || r > '7') {
1766 UngetChar(r);
1767 break;
1768 }
1769 c = c * 8 + r - '0';
1770 }
1771 break;
1772
1773 }
1774 }
1775 if(c > 0) {
1776 if(pos >= max) {
1777 max += 256;
1778 /*
1779 * +6 because some multi-bytes take that many bytes
1780 * +1 so the null terminator is reserved
1781 */
1782 str = (char *) sswf_remalloc(str, max + 6 + 1, "ReadString() -- large string buffer");
1783 }
1784 size = 6;
1785 // TODO: should we check for errors?
1786 sswf::wctomb(&c, sizeof(c), str + pos, size);
1787 pos += 6 - size;
1788 }
1789 }
1790 str[pos] = '\0';
1791
1792 #if ADJUT_STRINGS
1793 /* on most systems this is really fast and it can save some memory */
1794 str = sswf_remalloc(str, pos + 1, "ReadString() -- adjusted to the minimum");
1795 #endif
1796
1797 yylval.node->string = str;
1798
1799 //fprintf(stderr, "Read string [%s]\n", str);
1800
1801 return STRING;
1802 }
1803
1804
ReadValue(c_t c)1805 int ScriptFile::ReadValue(c_t c)
1806 {
1807 long r, p, sign;
1808 double fr, div, exp;
1809
1810 r = 0;
1811 fr = 0.0;
1812
1813 if(c == '0') { /* check for hexa, otherwise it's probably octal or floating point? */
1814 c = GetChar();
1815 if(c == 'x' || c == 'X') {
1816 /* hexadecimal */
1817 p = c;
1818 c = GetChar();
1819 if((c < '0' || c > '9') && (c < 'a' || c > 'f') && (c < 'A' || c > 'F')) {
1820 /* this is NOT a valid hex. value (just 0) */
1821 UngetChar(c);
1822 UngetChar(p);
1823 c = '\0';
1824 }
1825 else {
1826 p = 0;
1827 for(;;) {
1828 if(c >= '0' && c <= '9') {
1829 p = r;
1830 r = r * 16 + c - '0';
1831 }
1832 else if(c >= 'a' && c <= 'f') {
1833 p = r;
1834 r = r * 16 + c - 'a' + 10;
1835 }
1836 else if(c >= 'A' && c <= 'F') {
1837 p = r;
1838 r = r * 16 + c - 'A' + 10;
1839 }
1840 else {
1841 if((c == 'm' || c == 'M') && (r & 15) == 0xC) {
1842 /* this is an hex. followed by 'cm'! */
1843 UngetChar(c);
1844 c = 'c';
1845 r = p; /* restore saved value (because of possible overflow!) */
1846 fprintf(stderr, "WARNING: hexadecimal followed by the CM unit.\n");
1847 }
1848 UngetChar(c);
1849 c = '\0';
1850 break;
1851 }
1852 c = GetChar();
1853 }
1854 }
1855 }
1856 else if(c != '.') {
1857 /* octal */
1858 while(c >= '0' && c <= '8') {
1859 r = r * 8 + c - '0';
1860 c = GetChar();
1861 }
1862 if(c == '8' || c == '9') {
1863 fprintf(stderr, "ERROR: invalid octal number.\n");
1864 /* skip the rest of the number */
1865 do {
1866 c = GetChar();
1867 } while(c >= '0' && c <= '9');
1868 }
1869 UngetChar(c);
1870 c = '\0';
1871 }
1872 }
1873 if(c != '.') {
1874 while(c >= '0' && c <= '9') {
1875 r = r * 10 + c - '0';
1876 fr = fr * 10 + c - '0'; // avoid overflows we may get in 'r' if the number is followed by a period (.)
1877 c = GetChar();
1878 }
1879 }
1880 if(c == '.') {
1881 /* we found a floating point value */
1882 /* TODO: the following is wrong because the '...e+/-<value>' will change the outcome very much */
1883 //fr = (double) r;
1884 div = 0.1;
1885 c = GetChar();
1886 while(c >= '0' && c <= '9') {
1887 fr += (double) (c - '0') * (double) div;
1888 div /= 10.0;
1889 c = GetChar();
1890 }
1891 if(c == 'e' || c == 'E') {
1892 r = c;
1893 c = GetChar();
1894 exp = 0;
1895 sign = 1;
1896 if(c == '+') {
1897 c = GetChar();
1898 if(c < '0' || c > '9') {
1899 UngetChar(c);
1900 UngetChar('+');
1901 c = r;
1902 }
1903 }
1904 else if(c == '-') {
1905 c = GetChar();
1906 if(c < '0' || c > '9') {
1907 UngetChar(c);
1908 UngetChar('-');
1909 c = r;
1910 }
1911 else {
1912 sign = -1;
1913 }
1914 }
1915 else if(c < '0' || c > '9') {
1916 UngetChar(c);
1917 c = r;
1918 }
1919 while(c >= '0' && c <= '9') {
1920 exp = exp * 10 + c - '0';
1921 c = GetChar();
1922 }
1923 if(exp != 0) {
1924 fr *= pow(10.0, exp * (double) sign);
1925 }
1926 }
1927 yylval.node = node_alloc(NODE_TYPE_FLOAT, NODE_SUBTYPE_UNKNOWN, f_first_line);
1928 yylval.node->floating_point = fr;
1929 }
1930 else {
1931 /* we have a integer number */
1932 yylval.node = node_alloc(NODE_TYPE_INTEGER, NODE_SUBTYPE_UNKNOWN, f_first_line);
1933 yylval.node->integer = r;
1934 }
1935
1936 if(c != '\0') {
1937 UngetChar(c);
1938 }
1939
1940 return VALUE;
1941 }
1942
1943
1944
1945
GetChar(void)1946 ScriptFile::c_t ScriptFile::GetChar(void)
1947 {
1948 c_t c;
1949
1950 if(f_unget_count > 0) {
1951 f_unget_count--;
1952 f_last_char = f_unget[f_unget_count];
1953 }
1954 else {
1955 f_last_char = ReadChar();
1956 }
1957
1958 // the following test simplifies the line counting
1959 if(f_last_char == '\r') {
1960 f_last_char = '\n'; // \n, \r or \r\n -> \n
1961 // skip the \n in a \r\n sequence
1962 c = ReadChar();
1963 if(c != '\n') {
1964 UngetChar(c);
1965 }
1966 }
1967
1968 if(f_last_char == '\n') {
1969 f_line++;
1970 }
1971
1972 return f_last_char;
1973 }
1974
1975
UngetChar(c_t c)1976 void ScriptFile::UngetChar(c_t c)
1977 {
1978 // don't record the end of file or an error!
1979 //if(c == SCRIPTFILE_EOF || c == SCRIPTFILE_BAD)
1980 if(c < 0) {
1981 return;
1982 }
1983
1984 if(c == '\n') {
1985 f_line--;
1986 }
1987
1988 assert(f_unget_count < UNGET_COUNT_MAX, "too many ScriptFile::UngetChar()");
1989 f_unget[f_unget_count] = c;
1990 f_unget_count++;
1991 }
1992
1993
ReadChar(void)1994 ScriptFile::c_t ScriptFile::ReadChar(void)
1995 {
1996 int a, b, cnt, min;
1997 const char *input;
1998 char *output;
1999 size_t out;
2000 c_t outchar;
2001
2002 // the following is to avoid warnings -- a would always be
2003 // properly initialized without it
2004 a = 0;
2005
2006 if(f_type == SCRIPTFILE_TYPE_MULTIBYTES) {
2007 if(f_file != 0) {
2008 while(f_mb_count < MULTIBYTE_MAX) {
2009 a = ReadByte();
2010 if(a == -1) {
2011 CloseFile();
2012 break;
2013 }
2014 f_multibytes[f_mb_count] = a;
2015 f_mb_count++;
2016 }
2017 }
2018 // anything in the input stream?
2019 if(f_mb_count == 0) {
2020 return SCRIPTFILE_EOF;
2021 }
2022 // 8 bits files need to have each character
2023 // converted according to the encoding we've
2024 // got reading the script starting comment
2025 input = f_multibytes;
2026 out = sizeof(outchar);
2027 output = (char *) &outchar;
2028 b = (int) f_mb_count;
2029 a = (int) iconv(f_iconvertor, ICONV_INPUT_CAST &input, &f_mb_count, &output, &out);
2030 // the output buffer will usually be full before the
2031 // input is fully emptied!
2032 if(a < 0 && errno == E2BIG && out == 0 && b != (int) f_mb_count) {
2033 a = 1;
2034 }
2035 if(a < 0) {
2036 f_last_errno = errno;
2037 fprintf(stderr, "ERROR: can't convert the bytes: ");
2038 for(a = 0; a < (int) f_mb_count; a++) {
2039 fprintf(stderr, " 0x%02X", f_multibytes[a]);
2040 }
2041 fprintf(stderr, ", to a character (errno: %d)\n", f_last_errno);
2042 outchar = SCRIPTFILE_BAD;
2043 f_mb_count--; // we need to do this if we don't want to loop forever
2044 }
2045 // the characters used need to be removed from the input buffer
2046 memmove(f_multibytes, input, f_mb_count);
2047 return outchar;
2048 }
2049
2050 if(f_file == 0) {
2051 return SCRIPTFILE_EOF;
2052 }
2053
2054 a = ReadByte();
2055 if(a == -1) {
2056 CloseFile();
2057 return SCRIPTFILE_EOF;
2058 }
2059
2060 for(;;) {
2061 switch(f_type) {
2062 case SCRIPTFILE_TYPE_UTF16LE:
2063 b = ReadByte();
2064 if(b == -1) {
2065 fprintf(stderr, "ERROR: invalid UTF16LE end (odd size file)\n");
2066 CloseFile();
2067 return SCRIPTFILE_EOF;
2068 }
2069 outchar = a + b * 256;
2070 if(outchar >= 0xD800 && outchar <= 0xDBFF) {
2071 outchar = (outchar & 0x3FF) << 10;
2072 b = ReadByte();
2073 if(b == -1) {
2074 fprintf(stderr, "ERROR: invalid UTF16LE end (missing 0xDC00-0xDFFF character)\n");
2075 CloseFile();
2076 return SCRIPTFILE_EOF;
2077 }
2078 b = ReadByte();
2079 if(b == -1) {
2080 fprintf(stderr, "ERROR: invalid UTF16LE end (missing 0xDC00-0xDFFF character)\n");
2081 CloseFile();
2082 return SCRIPTFILE_EOF;
2083 }
2084 outchar |= b;
2085 if(b < 0xDC || b > 0xDF) {
2086 fprintf(stderr, "ERROR: invalid UTF16LE bad 0xD800/0xDC00 sequence\n");
2087 CloseFile();
2088 return SCRIPTFILE_EOF;
2089 }
2090 outchar |= (b & 0x03) << 8;
2091 }
2092 else if(outchar >= 0xDC00 && outchar <= 0xDFFF) {
2093 // ouch, the 0xD800-0xDBFF is missing
2094 return SCRIPTFILE_BAD;
2095 }
2096 else if(outchar == 0xFFFE) {
2097 // change endian!
2098 f_type = SCRIPTFILE_TYPE_UTF16BE;
2099 return 0xFEFF;
2100 }
2101 return outchar;
2102
2103 case SCRIPTFILE_TYPE_UTF16BE:
2104 b = ReadByte();
2105 if(b == -1) {
2106 fprintf(stderr, "ERROR: invalid UTF16BE end (odd size file)\n");
2107 CloseFile();
2108 return SCRIPTFILE_EOF;
2109 }
2110 outchar = a * 256 + b;
2111 if(outchar >= 0xD800 && outchar <= 0xDBFF) {
2112 outchar = (outchar & 0x3FF) << 10;
2113 b = ReadByte();
2114 if(b == -1) {
2115 fprintf(stderr, "ERROR: invalid UTF16BE end (missing 0xDC00-0xDFFF character)\n");
2116 CloseFile();
2117 return SCRIPTFILE_EOF;
2118 }
2119 if(b < 0xDC || b > 0xDF) {
2120 fprintf(stderr, "ERROR: invalid UTF16BE bad 0xD800/0xDC00 sequence\n");
2121 CloseFile();
2122 return SCRIPTFILE_EOF;
2123 }
2124 outchar |= (b & 0x03) << 8;
2125 b = ReadByte();
2126 if(b == -1) {
2127 fprintf(stderr, "ERROR: invalid UTF16BE end (missing 0xDC00-0xDFFF character)\n");
2128 CloseFile();
2129 return SCRIPTFILE_EOF;
2130 }
2131 outchar |= b;
2132 }
2133 else if(outchar >= 0xDC00 && outchar <= 0xDFFF) {
2134 // ouch, the 0xD800-0xDBFF is missing
2135 return SCRIPTFILE_BAD;
2136 }
2137 else if(outchar == 0xFFFE) {
2138 // change endian!
2139 f_type = SCRIPTFILE_TYPE_UTF16LE;
2140 return 0xFEFF;
2141 }
2142 return outchar;
2143
2144 case SCRIPTFILE_TYPE_UCS2LE:
2145 b = ReadByte();
2146 if(b == -1) {
2147 fprintf(stderr, "ERROR: invalid UCS2LE end (odd size file)\n");
2148 CloseFile();
2149 return SCRIPTFILE_EOF;
2150 }
2151 outchar = a + b * 256;
2152 if(outchar == 0xFFFE) {
2153 f_type = SCRIPTFILE_TYPE_UCS2BE;
2154 return 0xFEFF;
2155 }
2156 return outchar;
2157
2158 case SCRIPTFILE_TYPE_UCS2BE:
2159 b = ReadByte();
2160 if(b == -1) {
2161 fprintf(stderr, "ERROR: invalid UCS2BE end (odd size file)\n");
2162 CloseFile();
2163 return SCRIPTFILE_EOF;
2164 }
2165 outchar = a * 256 + b;
2166 if(outchar == 0xFFFE) {
2167 f_type = SCRIPTFILE_TYPE_UCS2LE;
2168 return 0xFEFF;
2169 }
2170 return outchar;
2171
2172 case SCRIPTFILE_TYPE_UCS4LE:
2173 b = ReadByte();
2174 if(b != -1) {
2175 outchar = a | (b << 8);
2176 b = ReadByte();
2177 if(b != -1) {
2178 outchar |= (c_t) b << 16;
2179 b = ReadByte();
2180 if(b != -1) {
2181 outchar |= (c_t) b << 24;
2182 if(outchar == (c_t) 0xFFFE0000) {
2183 f_type = SCRIPTFILE_TYPE_UCS4BE;
2184 return 0xFEFF;
2185 }
2186 if(b < 0x80) {
2187 return outchar;
2188 }
2189 }
2190 }
2191 }
2192 fprintf(stderr, "ERROR: invalid UCS4LE end or bit 32 set\n");
2193 CloseFile();
2194 return SCRIPTFILE_EOF;
2195
2196 case SCRIPTFILE_TYPE_UCS4BE:
2197 b = ReadByte();
2198 if(b != -1) {
2199 outchar = ((c_t) a << 24) | ((c_t) b << 16);
2200 b = ReadByte();
2201 if(b != -1) {
2202 outchar |= b << 8;
2203 b = ReadByte();
2204 if(b != -1) {
2205 outchar |= b;
2206 if(outchar == (c_t) 0xFFFE0000) {
2207 f_type = SCRIPTFILE_TYPE_UCS4LE;
2208 return 0xFEFF;
2209 }
2210 if(outchar >= 0) {
2211 return outchar;
2212 }
2213 }
2214 }
2215 }
2216 fprintf(stderr, "ERROR: invalid USC4BE end or bit 32 set\n");
2217 CloseFile();
2218 return SCRIPTFILE_EOF;
2219
2220 case SCRIPTFILE_TYPE_ISO88591:
2221 return a;
2222
2223 case SCRIPTFILE_TYPE_ASCII:
2224 if(a >= 0x80) {
2225 return SCRIPTFILE_BAD;
2226 }
2227 return a;
2228
2229 case SCRIPTFILE_TYPE_UTF8:
2230 // U-00000000 - U-0000007F: 0xxxxxxx
2231 // U-00000080 - U-000007FF: 110xxxxx 10xxxxxx
2232 // U-00000800 - U-0000FFFF: 1110xxxx 10xxxxxx 10xxxxxx
2233 // U-00010000 - U-001FFFFF: 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
2234 // U-00200000 - U-03FFFFFF: 111110xx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx
2235 // U-04000000 - U-7FFFFFFF: 1111110x 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx
2236 // if necessary, we resync. our self
2237 while(a >= 0x80 && a <= 0xBF) {
2238 a = ReadByte();
2239 }
2240 if(a == -1) {
2241 CloseFile();
2242 return SCRIPTFILE_EOF;
2243 }
2244 if(a < 0x80) {
2245 return a;
2246 }
2247 if(a >= 0xFE) {
2248 // bad entry here!
2249 // TODO: error or skip silently?
2250 CloseFile();
2251 return SCRIPTFILE_EOF;
2252 }
2253 // multi-byte character -- read all the 10xxxxxx...
2254 if(a >= 0xFC) {
2255 a &= 0x01;
2256 min = 1 << (2 + 6 * 4);
2257 cnt = 5;
2258 }
2259 else if(a >= 0xF8) {
2260 a &= 0x03;
2261 min = 1 << (3 + 6 * 3);
2262 cnt = 4;
2263 }
2264 else if(a >= 0xF0) {
2265 a &= 0x07;
2266 min = 1 << (4 + 6 * 2);
2267 cnt = 3;
2268 }
2269 else if(a >= 0xE0) {
2270 a &= 0x0F;
2271 min = 1 << (5 + 6);
2272 cnt = 2;
2273 }
2274 else /*if(a >= 0xC0)*/ {
2275 a &= 0x1F;
2276 min = 1 << 7;
2277 cnt = 1;
2278 }
2279 outchar = a;
2280 while(cnt > 0) {
2281 cnt--;
2282 b = ReadByte();
2283 if(b == -1) {
2284 // bad entry here!
2285 // TODO: error or skip silently?
2286 CloseFile();
2287 return SCRIPTFILE_EOF;
2288 }
2289 if((b & 0xC0) != 0x80) {
2290 // save this byte for better error recovery
2291 UnreadByte(b);
2292 // refuse long encodings!
2293 // TODO: error or skip silently?
2294 return SCRIPTFILE_BAD;
2295 }
2296 outchar = outchar * 64 + (b & 0x3F);
2297 }
2298 if(outchar < min) {
2299 // refuse long encodings!
2300 // TODO: error or skip silently?
2301 return SCRIPTFILE_BAD;
2302 }
2303 return outchar;
2304
2305 default:
2306 assert(0, "unknown encoding type when reading a character");
2307 /*NOTREACHED*/
2308
2309 }
2310 a = ReadByte();
2311 if(a == -1) {
2312 CloseFile();
2313 return SCRIPTFILE_EOF;
2314 }
2315 }
2316 /*NOTREACHED*/
2317 return SCRIPTFILE_EOF;
2318 }
2319
2320
ReadByte(void)2321 int ScriptFile::ReadByte(void)
2322 {
2323 unsigned char c;
2324
2325 errno = 0;
2326
2327 if(f_unread_count > 0) {
2328 f_unread_count--;
2329 return f_unread[f_unread_count];
2330 }
2331
2332 if(fread(&c, 1, 1, f_file) != 1) {
2333 if(errno != 0) {
2334 f_last_errno = errno;
2335 perror("fread()");
2336 fprintf(stderr, "%s:%d:%d: i/o error", f_filename, f_line, f_last_errno);
2337 }
2338 return -1;
2339 }
2340
2341 return c;
2342 }
2343
2344
UnreadByte(unsigned char c)2345 void ScriptFile::UnreadByte(unsigned char c)
2346 {
2347 assert(f_unread_count < UNREAD_COUNT_MAX, "too many UnreadByte() calls (max = %d)", UNREAD_COUNT_MAX);
2348
2349 f_unread[f_unread_count] = c;
2350 f_unread_count++;
2351 }
2352
2353
2354
2355
2356 extern "C"
2357 {
2358
2359
2360
sswf_read_actionscript(int yes)2361 void sswf_read_actionscript(int yes)
2362 {
2363 //printf("Called sswf_read_actionscript (%d)\n", yes);
2364 //fflush(stdout);
2365 if(sf != 0) {
2366 sf->SetReadActionscript(yes);
2367 }
2368 }
2369
2370
sswf_add_include(const char * path)2371 void sswf_add_include(const char *path)
2372 {
2373 ScriptFile::string_t *str;
2374
2375 str = new ScriptFile::string_t();
2376 include_paths.MemAttach(str, sizeof(ScriptFile::string_t), "sswf_add_include(): user include path");
2377 str->f_string = include_paths.StrDup(path);
2378 include_paths.Set(-1, str);
2379 }
2380
2381
sswf_set_default_include(int def)2382 void sswf_set_default_include(int def)
2383 {
2384 no_default_include = def;
2385 }
2386
sswf_open_script(const char * filename)2387 int sswf_open_script(const char *filename)
2388 {
2389 ScriptFile *n;
2390 int ec;
2391
2392 n = new ScriptFile(sf);
2393 if(n == 0) {
2394 fprintf(stderr, "FATAL ERROR: out of memory.\n");
2395 exit(1);
2396 }
2397 sf = n;
2398
2399 ec = sf->OpenFile(filename, include_paths, no_default_include == 0);
2400 if(ec == 0) {
2401 lex_filename = sf->Filename();
2402 }
2403
2404 return ec;
2405 }
2406
2407
sswf_close_script(void)2408 void sswf_close_script(void)
2409 {
2410 ScriptFile *p;
2411
2412 if(sf != 0) {
2413 p = sf->Parent();
2414 delete sf;
2415 sf = p;
2416 if(p != 0) {
2417 lex_filename = p->Filename();
2418 }
2419 }
2420 }
2421
2422
yylex()2423 int yylex()
2424 {
2425 int c;
2426
2427 c = sf->GetToken();
2428
2429 #if 0
2430 printf("%s: %d: Read token [%d] '%c'\n",
2431 sf->Filename(), sf->Line(),
2432 c, c >= ' ' && c <= 0x7E ? c : '?');
2433 #endif
2434
2435 return c;
2436 }
2437
2438
2439 }
2440
2441
2442
2443 #if 0
2444
2445 #define RETURN_TOKEN(type, subtype) ylval.node = node_alloc(NODE_TYPE_##type, NODE_SUBTYPE_##subtype, yylloc.first_line); return type;
2446 #define RETURN_UNIT(name, unit) yylval.type = NODE_SUBTYPE_##unit; return UNIT_##name;
2447
2448 static void skip_comment(int close);
2449 static struct node_t * read_identifier(void);
2450 static struct node_t * read_string(void);
2451 static struct node_t * read_value(void);
2452
2453 /*
2454 some unused rules...
2455 "SHOW"[ \t_]?"FRAME" { yylval.node = node_alloc(NODE_TYPE_OBJECT, NODE_SUBTYPE_SHOW_FRAME, yylloc.first_line); return DIRECT_REFERENCE; }
2456 "REMOVE"[ \t_]?"ALL" { yylval.node = node_alloc(NODE_TYPE_OBJECT, NODE_SUBTYPE_REMOVE_ALL, yylloc.first_line); return DIRECT_REFERENCE; }
2457 "END" { yylval.node = node_alloc(NODE_TYPE_OBJECT, NODE_SUBTYPE_END, yylloc.first_line); return DIRECT_REFERENCE; }
2458 */
2459
2460 %}
2461
2462 %option noyywrap
2463
2464 %%
2465
2466 "ACTION" { RETURN_TOKEN(OBJECT, ACTION); }
2467 "ACTIONSCRIPT" { return ACTIONSCRIPT; }
2468 "BC" { RETURN_UNIT(COLOR, BC); }
2469 "BUTTON" { RETURN_TOKEN(OBJECT, BUTTON); }
2470 "CATCH" { RETURN_TOKEN(OBJECT, CATCH); }
2471 "CM" { RETURN_UNIT(SIZE, CM); }
2472 "COLOR"[ \t_]?"TRANSFORM" { RETURN_TOKEN(OBJECT, COLOR_TRANSFORM); }
2473 "COLOR" { RETURN_TOKEN(OBJECT, COLOR); }
2474 "DEG" { RETURN_UNIT(ANGLE, DEG); }
2475 "DO"[ \t_]?"ACTION" { RETURN_TOKEN(OBJECT, DO_ACTION); }
2476 "EDGES" { RETURN_TOKEN(OBJECT, EDGES); }
2477 "EDIT"[ \t_]?"TEXT" { RETURN_TOKEN(OBJECT, EDIT_TEXT); }
2478 "ELSE" { return ELSE; }
2479 "END" { RETURN_TOKEN(OBJECT, END); }
2480 "ENVELOPE?" { RETURN_TOKEN(OBJECT, ENVELOPE); }
2481 "EXPORT" { RETURN_TOKEN(OBJECT, EXPORT); }
2482 "FALSE" { yylval.node = node_alloc(NODE_TYPE_INTEGER, NODE_SUBTYPE_UNKNOWN, yylloc.first_line); yylval.node->integer = 0; return VALUE; }
2483 "FC" { RETURN_UNIT(COLOR, FC); }
2484 "FILL"[ \t_]?"STYLE" { RETURN_TOKEN(OBJECT, FILL_STYLE); }
2485 "FINALLY" { RETURN_TOKEN(OBJECT, FINALLY); }
2486 "FONT" { RETURN_TOKEN(OBJECT, FONT); }
2487 "FOR" { return FOR; }
2488 "FPF" { RETURN_UNIT(SPEED, FPF); }
2489 "FPS" { RETURN_UNIT(SPEED, FPS); }
2490 "FRAME"[ \t_]?"LABEL" { RETURN_TOKEN(OBJECT, FRAME_LABEL); }
2491 "FRM" { RETURN_UNIT(TIME, FRM); }
2492 "FUNCTION" { RETURN_TOKEN(OBJECT, FUNCTION); }
2493 "GLYPH" { RETURN_TOKEN(OBJECT, GLYPH); }
2494 "GRAD" { RETURN_UNIT(ANGLE, GRAD); }
2495 "GRADIENT" { RETURN_TOKEN(OBJECT, GRADIENT); }
2496 "IF" { return IF; }
2497 "IMAGE" { RETURN_TOKEN(OBJECT, IMAGE); }
2498 "IMPORT" { RETURN_TOKEN(OBJECT, IMPORT); }
2499 "IN" { RETURN_UNIT(SIZE, IN); }
2500 "LABEL" { RETURN_TOKEN(OBJECT, LABEL); }
2501 "LINE"[ \t_]?"STYLE" { RETURN_TOKEN(OBJECT, LINE_STYLE); }
2502 "LIST"|"BLOCK" { RETURN_TOKEN(OBJECT, LIST); }
2503 "MATRIX" { RETURN_TOKEN(OBJECT, MATRIX); }
2504 "MIN" { RETURN_UNIT(TIME, MIN); }
2505 "ON"[ \t_]?"EVENT" { RETURN_TOKEN(OBJECT, ON_EVENT); }
2506 "PLACE"[ \t_]?"OBJECT" { RETURN_TOKEN(OBJECT, PLACE_OBJECT); }
2507 "POINTS" { RETURN_TOKEN(OBJECT, POINTS); }
2508 "PR" { RETURN_UNIT(RATIO, PR); }
2509 "PX" { RETURN_UNIT(SIZE, PX); }
2510 "RAD" { RETURN_UNIT(ANGLE, RAD); }
2511 "RECT"("ANGLE")? { RETURN_TOKEN(OBJECT, RECT); }
2512 "REMOVE" { RETURN_TOKEN(OBJECT, REMOVE); }
2513 "REPLACE"[ \t_]?"OBJECT" { RETURN_TOKEN(OBJECT, REPLACE_OBJECT); }
2514 "RT" { RETURN_UNIT(RATIO, RT); }
2515 "SCRIPT"[ \t_]?"LIMITS" { RETURN_TOKEN(OBJECT, SCRIPT_LIMITS); }
2516 "SEC" { RETURN_UNIT(TIME, SEC); }
2517 "SEQUENCE" { RETURN_TOKEN(OBJECT, SEQUENCE); }
2518 "SET"[ \t_]?"BACKGROUND"[ \t_]?"COLOR" { RETURN_TOKEN(OBJECT, SET_BACKGROUND_COLOR); }
2519 "SET"[ \t_]?"TAB"[ \t_]?"INDEX" { RETURN_TOKEN(OBJECT, SET_TAB_INDEX); }
2520 ("DEFINE"[ \t_]?)?"SHAPE" { RETURN_TOKEN(OBJECT, SHAPE); }
2521 "SHOW"[ \t_]?"FRAME" { RETURN_TOKEN(OBJECT, SHOW_FRAME); }
2522 "SOUND" { RETURN_TOKEN(OBJECT, SOUND); }
2523 "SOUND"[ \t_]?"INFO" { RETURN_TOKEN(OBJECT, SOUND_INFO); }
2524 "SPRITE" { RETURN_TOKEN(OBJECT, SPRITE); }
2525 "STATE" { RETURN_TOKEN(OBJECT, STATE); }
2526 "TEXT" { RETURN_TOKEN(OBJECT, TEXT); }
2527 "TEXT"[ \t_]?"SETUP" { RETURN_TOKEN(OBJECT, TEXT_SETUP); }
2528 "TRUE" { yylval.node = node_alloc(NODE_TYPE_INTEGER, NODE_SUBTYPE_UNKNOWN, yylloc.first_line); yylval.node->integer = 1; return VALUE; }
2529 "TRY" { RETURN_TOKEN(OBJECT, TRY); }
2530 "TW" { RETURN_UNIT(SIZE, TW); }
2531 "WITH" { RETURN_TOKEN(OBJECT, WITH); }
2532
2533
2534
2535
2536
2537
2538
2539 static struct node_t *read_identifier(void)
2540 {
2541 struct node_t *n;
2542
2543 n = node_alloc(NODE_TYPE_IDENTIFIER, NODE_SUBTYPE_UNKNOWN, yylloc.first_line);
2544 n->string = sswf_strdup(yytext);
2545
2546 return n;
2547 }
2548
2549
2550 static struct node_t *read_value(void)
2551 {
2552 register int c;
2553 long r, p, sign;
2554 double fr, div, exp;
2555 struct node_t *n;
2556
2557 r = 0;
2558
2559 c = *yytext;
2560 if(c == '.') {
2561 /* we can have a decimal point only if a second digit was found
2562 * make sure we restore it first!
2563 */
2564 unput(yytext[1]);
2565 }
2566
2567 if(c == '0') { /* check for hexa, otherwise it's probably octal or floating point? */
2568 c = input();
2569 if(c == 'x' || c == 'X') {
2570 /* hexadecimal */
2571 p = c;
2572 c = input();
2573 if((c < '0' || c > '9') && (c < 'a' || c > 'f') && (c < 'A' || c > 'F')) {
2574 /* this is NOT a valid hex. value */
2575 unput(c);
2576 unput(p);
2577 c = '\0';
2578 }
2579 else {
2580 p = 0;
2581 for(;;) {
2582 if(c >= '0' && c <= '9') {
2583 p = r;
2584 r = r * 16 + c - '0';
2585 }
2586 else if(c >= 'a' && c <= 'f') {
2587 p = r;
2588 r = r * 16 + c - 'a' + 10;
2589 }
2590 else if(c >= 'A' && c <= 'F') {
2591 p = r;
2592 r = r * 16 + c - 'A' + 10;
2593 }
2594 else {
2595 if((c == 'm' || c == 'M') && (r & 15) == 0xC) {
2596 /* this is an hex. followed by 'cm'! */
2597 unput(c);
2598 c = 'c';
2599 r = p; /* restore saved value (because of overflow!) */
2600 fprintf(stderr, "WARNING: hexadecimal followed by the CM unit.\n");
2601 }
2602 unput(c);
2603 c = '\0';
2604 break;
2605 }
2606 c = input();
2607 }
2608 }
2609 }
2610 else if(c != '.') {
2611 /* octal */
2612 while(c >= '0' && c <= '8') {
2613 r = r * 8 + c - '0';
2614 c = input();
2615 }
2616 if(c == '8' || c == '9') {
2617 fprintf(stderr, "ERROR: invalid octal number.\n");
2618 /* skip the rest of the number */
2619 do {
2620 c = input();
2621 } while(c >= '0' && c <= '9');
2622 }
2623 unput(c);
2624 c = '\0';
2625 }
2626 }
2627 if(c != '.') {
2628 while(c >= '0' && c <= '9') {
2629 r = r * 10 + c - '0';
2630 c = input();
2631 }
2632 }
2633 if(c == '.') {
2634 /* we found a floating point value */
2635 /* TODO: the following is wrong because the '...e+/-<value>' will change the outcome very much */
2636 fr = (double) r;
2637 div = 0.1;
2638 c = input();
2639 while(c >= '0' && c <= '9') {
2640 fr += (double) (c - '0') * (double) div;
2641 div /= 10.0;
2642 c = input();
2643 }
2644 if(c == 'e' || c == 'E') {
2645 r = c;
2646 c = input();
2647 exp = 0;
2648 sign = 1;
2649 if(c == '+') {
2650 c = input();
2651 if(c < '0' || c > '9') {
2652 unput(c);
2653 unput('+');
2654 c = r;
2655 }
2656 }
2657 else if(c == '-') {
2658 c = input();
2659 if(c < '0' || c > '9') {
2660 unput(c);
2661 unput('-');
2662 c = r;
2663 }
2664 else {
2665 sign = -1;
2666 }
2667 }
2668 else if(c < '0' || c > '9') {
2669 unput(c);
2670 c = r;
2671 }
2672 while(c >= '0' && c <= '9') {
2673 exp = exp * 10 + c - '0';
2674 c = input();
2675 }
2676 if(exp != 0) {
2677 fr *= pow(10, exp * (double) sign);
2678 }
2679 }
2680 n = node_alloc(NODE_TYPE_FLOAT, NODE_SUBTYPE_UNKNOWN, yylloc.first_line);
2681 n->floating_point = fr;
2682 }
2683 else {
2684 /* we have a integer number */
2685 n = node_alloc(NODE_TYPE_INTEGER, NODE_SUBTYPE_UNKNOWN, yylloc.first_line);
2686 n->integer = r;
2687 }
2688
2689 if(c != '\0') {
2690 unput(c);
2691 }
2692
2693 return n;
2694 }
2695
2696 #endif
2697
2698