1 /* sswf_lexical.c++ -- written by Alexis WILKE for Made to Order Software Corp. (c) 2002-2009 */
2 
3 /*
4 
5 Copyright (c) 2002-2009 Made to Order Software Corp.
6 
7 Permission is hereby granted, free of charge, to any
8 person obtaining a copy of this software and
9 associated documentation files (the "Software"), to
10 deal in the Software without restriction, including
11 without limitation the rights to use, copy, modify,
12 merge, publish, distribute, sublicense, and/or sell
13 copies of the Software, and to permit persons to whom
14 the Software is furnished to do so, subject to the
15 following conditions:
16 
17 The above copyright notice and this permission notice
18 shall be included in all copies or substantial
19 portions of the Software.
20 
21 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF
22 ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT
23 LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
24 FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO
25 EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
26 LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
27 WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
28 ARISING FROM, OUT OF OR IN CONNECTION WITH THE
29 SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30 SOFTWARE.
31 
32 */
33 
34 
35 #define	SSWF_NEED_ASSERT
36 #include	"sswf.h"
37 
38 extern "C" {
39 #include	"sswf_grammar.h"
40 };
41 
42 
43 #include	"sswf/libsswf.h"
44 
45 
46 extern	YYLTYPE		yylloc;
47 
48 #define	UNREAD_COUNT_MAX	4
49 #define	UNGET_COUNT_MAX		16
50 #define	MULTIBYTE_MAX		16
51 
52 class ScriptFile
53 {
54 public:
55 	struct string_t : public sswf::ItemBase {
56 		char *		f_string;
57 	};
58 	typedef sswf::sswf_ucs4_t	c_t;		// UCS chars are 31 bits, negative values are used for errors
59 
60 	// NOTES:
61 	//
62 	// Glossary
63 	//	ASCII	American Standard Code for Information Interchange
64 	//	BE	Big-endian (most significant byte first)
65 	//	LE	Little-endian (least significant byte first)
66 	//	UCS	Universal Character Set
67 	//	UTF	Universal Transformation Format
68 	//
69 	// Note that UCS2, UTF16, UCS4 and UTF32 don't have endian specified.
70 	// This is correct since we can infer the endian by checking the
71 	// few first bytes of input (which MUST represent a comment)
72 	//
73 	enum scriptfile_type_t {
74 		// totally unknown
75 		SCRIPTFILE_TYPE_UNKNOWN = 0,	// still unknown
76 
77 		// unknown but valid for iconv()
78 		SCRIPTFILE_TYPE_MULTIBYTES,	// a specified encoding (using iconv() to convert the characters)
79 
80 		// 8 bits
81 		SCRIPTFILE_TYPE_ASCII,		// accept characters upto 127 as is; others are viewed as erroneous
82 		SCRIPTFILE_TYPE_ISO88591,	// use input as is (like Unicode page 0)
83 		SCRIPTFILE_TYPE_UTF8,		// 1 to 6 bytes to encode any character
84 		SCRIPTFILE_TYPE_CESU8,		// 1 to 4 bytes to encode 0x110000 characters, 0xD800 to 0xDFFF is interpreted
85 
86 		// 16 bits
87 		SCRIPTFILE_TYPE_UCS2,		// UCS-2 chars (limited to 0x10000 characters)
88 		SCRIPTFILE_TYPE_UCS2BE,		// same as UCS2 in big endian
89 		SCRIPTFILE_TYPE_UCS2LE,		// same as UCS2 in little endian
90 		SCRIPTFILE_TYPE_UCS2SAME,	// UCS-2 in processor endian
91 		SCRIPTFILE_TYPE_UCS2SWAP,	// UCS-2 in opposite process endian
92 		SCRIPTFILE_TYPE_UTF16,		// UTF-16 (0xD800 to 0xDFFF are escapes to represent 20 bits)
93 		SCRIPTFILE_TYPE_UTF16BE,	// same as UTF-16 in big endian
94 		SCRIPTFILE_TYPE_UTF16LE,	// same as UTF-16 in little endian
95 
96 		// 32 bits
97 		SCRIPTFILE_TYPE_UCS4,		// UCS-4 (unlimited character set, except negative values)
98 		SCRIPTFILE_TYPE_UCS4BE,		// same as UCS-4 big endian
99 		SCRIPTFILE_TYPE_UCS4LE,		// same as UCS-4 little endian
100 		SCRIPTFILE_TYPE_UCS4SAME,	// UCS-4 in processor endian
101 		SCRIPTFILE_TYPE_UCS4SWAP,	// UCS-4 in opposite process endian
102 		SCRIPTFILE_TYPE_UTF32,		// UTF-32 (limited to 0x110000 chars)
103 		SCRIPTFILE_TYPE_UTF32BE,	// UTF-32 big endian
104 		SCRIPTFILE_TYPE_UTF32LE,	// UTF-32 little endian
105 
106 		SCRIPTFILE_TYPE_SAME,		// keep input type
107 
108 		SCRIPTFILE_TYPE_max
109 	};
110 	struct sf_type_t {
111 		scriptfile_type_t	f_type;		// corresponding type (internally supported)
112 		const char *		f_name;		// official encoding name (as in iconv)
113 		unsigned long		f_input;	// accepted input encoding (the one we determine we our internal algorithm)
114 	};
115 
116 #define	SCRIPTFILE_EOF		((c_t) -1)	// UCS chars are 31 bits max.
117 #define	SCRIPTFILE_BAD		((c_t) -2)	// UCS chars are 31 bits max.
118 
119 				ScriptFile(ScriptFile *parent);
120 				~ScriptFile();
121 
122 	int			OpenFile(const char *filename, sswf::Vectors& user_include_paths, bool use_internal_paths);
123 	void			CloseFile(void);
124 	int			GetToken(void);
125 	unsigned int		Line(void) const;
126 	ScriptFile *		Parent(void);
127 	const char *		Filename(void);
128 	int			ReadActionscript(void);
129 	void			SetReadActionscript(bool yes);
130 
131 private:
132 	void			Reset(void);
133 	int			FindFile(const char *filename, sswf::Vectors& user_include_paths, bool use_internal_paths);
134 	c_t			GetChar(void);
135 	void			UngetChar(c_t c);
136 	c_t			ReadChar(void);
137 	int			ReadByte(void);
138 	void			UnreadByte(unsigned char c);
139 	void			SkipComment(int close);
140 	int			ReadIdentifier(c_t c);
141 	int			ReadString(c_t c);
142 	int			ReadValue(c_t c);
143 
144 	ScriptFile *		f_parent;
145 	const char *		f_filename;
146 	unsigned int		f_line;
147 	unsigned int		f_first_line;
148 	scriptfile_type_t	f_type;
149 	FILE *			f_file;
150 	int			f_last_errno;
151 	c_t			f_last_char;
152 	unsigned int		f_unread_count;
153 	unsigned char		f_unread[UNREAD_COUNT_MAX];
154 	unsigned int		f_unget_count;
155 	c_t			f_unget[UNGET_COUNT_MAX];
156 	bool			f_iconvertor_open;
157 	iconv_t			f_iconvertor;
158 	size_t			f_mb_count;
159 	char			f_multibytes[MULTIBYTE_MAX];
160 	bool			f_read_actionscript;
161 };
162 
163 
164 ScriptFile		*sf;		// the current script file
165 sswf::Vectors		include_paths;	// an array of strings where files are being searched
166 int			no_default_include;
167 
168 
169 
170 
171 #define	SF_TYPE_TO_FLAG1(a)		(1<<ScriptFile::SCRIPTFILE_TYPE_##a)
172 #define	SF_TYPE_TO_FLAG2(a,b)		SF_TYPE_TO_FLAG1(a)|SF_TYPE_TO_FLAG1(b)
173 #define	SF_TYPE_TO_FLAG3(a,b,c)		SF_TYPE_TO_FLAG2(a,b)|SF_TYPE_TO_FLAG1(c)
174 #define	SF_TYPE_TO_FLAG4(a,b,c,d)	SF_TYPE_TO_FLAG3(a,b,c)|SF_TYPE_TO_FLAG1(d)
175 #define	SF_TYPE_TO_FLAG5(a,b,c,d,e)	SF_TYPE_TO_FLAG4(a,b,c,d)|SF_TYPE_TO_FLAG1(e)
176 #define	SF_TYPE_TO_FLAG6(a,b,c,d,e,f)	SF_TYPE_TO_FLAG5(a,b,c,d,e)|SF_TYPE_TO_FLAG1(f)
177 
178 #define	SF_GET_ELEMS(size, elems)	elems
179 //#define	SF_ELEMS(array)			SF_GET_ELEMS array
180 
181 #define	SF_GET_SIZE(size, elems)	size
182 //#define	SF_SIZE(array)			SF_GET_SIZE array
183 
184 #define	SF_CALL1(macro, count, elems)	macro##count elems
185 #define	SF_CALL(macro, count, elems)	SF_CALL1(macro, count, elems)
186 
187 #define	SF_FOREACH(macro, array)	SF_CALL(macro, SF_GET_SIZE array, SF_GET_ELEMS array)
188 
189 
190 #define	SCRIPTFILE_TYPE(name, alias, accepted_array)			\
191 		{							\
192 			ScriptFile::SCRIPTFILE_TYPE_##name,				\
193 			alias,						\
194 			SF_FOREACH(SF_TYPE_TO_FLAG, accepted_array)	\
195 		},
196 
197 static const ScriptFile::sf_type_t internal_types[] =
198 {
199 	// ASCII
200 	SCRIPTFILE_TYPE(ASCII, "ASCII",			(1, (ISO88591)))
201 	SCRIPTFILE_TYPE(ASCII, "USASCII",		(1, (ISO88591)))
202 	SCRIPTFILE_TYPE(ASCII, "CSASCII",		(1, (ISO88591)))
203 	SCRIPTFILE_TYPE(ASCII, "US",			(1, (ISO88591)))
204 	SCRIPTFILE_TYPE(ASCII, "ISO646US",		(1, (ISO88591)))
205 	SCRIPTFILE_TYPE(ASCII, "ISO646.IRV",		(1, (ISO88591)))
206 	SCRIPTFILE_TYPE(ASCII, "ISO646.IRV:1991",	(1, (ISO88591)))
207 	SCRIPTFILE_TYPE(ASCII, "ISO646.1991IRV",	(1, (ISO88591)))
208 	SCRIPTFILE_TYPE(ASCII, "ISOIR6",		(1, (ISO88591)))
209 	SCRIPTFILE_TYPE(ASCII, "ANSIX3.4-1968",		(1, (ISO88591)))
210 	SCRIPTFILE_TYPE(ASCII, "ANSIX3.4-1986",		(1, (ISO88591)))
211 	SCRIPTFILE_TYPE(ASCII, "CP367",			(1, (ISO88591)))
212 	SCRIPTFILE_TYPE(ASCII, "IBM367",		(1, (ISO88591)))
213 
214 	// ISO8859-1
215 	SCRIPTFILE_TYPE(SAME, "88591",			(1, (ISO88591)))
216 	SCRIPTFILE_TYPE(SAME, "88591:1987",		(1, (ISO88591)))
217 	SCRIPTFILE_TYPE(SAME, "ISO88591",		(1, (ISO88591)))
218 	SCRIPTFILE_TYPE(SAME, "ISO88591:1987",		(1, (ISO88591)))
219 	SCRIPTFILE_TYPE(SAME, "ISOIR100",		(1, (ISO88591)))
220 	SCRIPTFILE_TYPE(SAME, "CSISOLATIN1",		(1, (ISO88591)))
221 	SCRIPTFILE_TYPE(SAME, "LATIN1",			(1, (ISO88591)))
222 	SCRIPTFILE_TYPE(SAME, "L1",			(1, (ISO88591)))
223 	SCRIPTFILE_TYPE(SAME, "CP819",			(1, (ISO88591)))
224 	SCRIPTFILE_TYPE(SAME, "IBM819",			(1, (ISO88591)))
225 
226 	// UTF-8
227 	SCRIPTFILE_TYPE(UTF8, "UTF8",			(2, (ISO88591, UTF8)))
228 
229 	// CESU-8
230 	SCRIPTFILE_TYPE(CESU8, "CESU8",			(2, (ISO88591, UTF8)))
231 
232 	// UCS-2
233 	SCRIPTFILE_TYPE(SAME, "UCS2",			(2, (UCS2LE, UCS2BE)))
234 	SCRIPTFILE_TYPE(SAME, "CSUNICODE",		(2, (UCS2LE, UCS2BE)))
235 	SCRIPTFILE_TYPE(SAME, "ISO10646:1993/UCS2",	(2, (UCS2LE, UCS2BE)))
236 	SCRIPTFILE_TYPE(SAME, "10646/UCS2",		(2, (UCS2LE, UCS2BE)))
237 	SCRIPTFILE_TYPE(SAME, "106461/UCS2",		(2, (UCS2LE, UCS2BE)))
238 	SCRIPTFILE_TYPE(SAME, "ISO10646UCS2",		(2, (UCS2LE, UCS2BE)))
239 	SCRIPTFILE_TYPE(SAME, "10646UCS2",		(2, (UCS2LE, UCS2BE)))
240 
241 	SCRIPTFILE_TYPE(SAME, "UCS2INTERNAL",		(2, (UCS2LE, UCS2BE)))
242 	SCRIPTFILE_TYPE(SAME, "UCS2SWAPPED",		(2, (UCS2LE, UCS2BE)))
243 
244 	SCRIPTFILE_TYPE(SAME, "UCS2BE",			(1, (UCS2BE)))
245 	SCRIPTFILE_TYPE(SAME, "UNICODEBIG",		(1, (UCS2BE)))
246 	SCRIPTFILE_TYPE(SAME, "UNICODE11",		(1, (UCS2BE)))
247 	SCRIPTFILE_TYPE(SAME, "CSUNICODE11",		(1, (UCS2BE)))
248 
249 	SCRIPTFILE_TYPE(SAME, "UCS2LE",			(1, (UCS2LE)))
250 	SCRIPTFILE_TYPE(SAME, "UNICODEOLITTLE",		(1, (UCS2LE)))
251 
252 	// UTF-16
253 	SCRIPTFILE_TYPE(UTF16BE, "UTF16",		(1, (UCS2BE)))
254 	SCRIPTFILE_TYPE(UTF16BE, "UTF16BE",		(1, (UCS2BE)))
255 
256 	SCRIPTFILE_TYPE(UTF16LE, "UTF16",		(1, (UCS2LE)))
257 	SCRIPTFILE_TYPE(UTF16LE, "UTF16LE",		(1, (UCS2LE)))
258 
259 	// UCS-4
260 	SCRIPTFILE_TYPE(SAME, "UCS4",			(2, (UCS4LE, UCS4BE)))
261 	SCRIPTFILE_TYPE(SAME, "CSUCS4",			(2, (UCS4LE, UCS4BE)))
262 	SCRIPTFILE_TYPE(SAME, "ISO10646",		(2, (UCS4LE, UCS4BE)))
263 	SCRIPTFILE_TYPE(SAME, "ISO10646:1993",		(2, (UCS4LE, UCS4BE)))
264 	SCRIPTFILE_TYPE(SAME, "ISO10646:1993/UCS4",	(2, (UCS4LE, UCS4BE)))
265 	SCRIPTFILE_TYPE(SAME, "10646",			(2, (UCS4LE, UCS4BE)))
266 	SCRIPTFILE_TYPE(SAME, "10646/UCS4",		(2, (UCS4LE, UCS4BE)))
267 	SCRIPTFILE_TYPE(SAME, "106461",			(2, (UCS4LE, UCS4BE)))
268 	SCRIPTFILE_TYPE(SAME, "106461/UCS4",		(2, (UCS4LE, UCS4BE)))
269 
270 	SCRIPTFILE_TYPE(SAME, "UCS4INTERNAL",		(2, (UCS4LE, UCS4BE)))
271 	SCRIPTFILE_TYPE(SAME, "UCS4SWAPPED",		(2, (UCS4LE, UCS4BE)))
272 
273 	SCRIPTFILE_TYPE(SAME, "UCS4BE",			(1, (UCS4BE)))
274 	SCRIPTFILE_TYPE(SAME, "UCS4LE",			(1, (UCS4LE)))
275 
276 	// UTF-32
277 	SCRIPTFILE_TYPE(UTF32BE, "UTF32",		(1, (UCS4BE)))
278 	SCRIPTFILE_TYPE(UTF32BE, "UTF32BE",		(1, (UCS4BE)))
279 
280 	SCRIPTFILE_TYPE(UTF32LE, "UTF32",		(1, (UCS4LE)))
281 	SCRIPTFILE_TYPE(UTF32LE, "UTF32LE",		(1, (UCS4LE)))
282 };
283 
284 
285 
286 
287 
288 
ScriptFile(ScriptFile * parent)289 ScriptFile::ScriptFile(ScriptFile *parent)
290 	: f_parent(parent)
291 {
292 	//f_parent -- already initialized in decl.
293 	f_filename = 0;
294 	f_line = 0;
295 	f_first_line = 0;
296 	f_type = SCRIPTFILE_TYPE_UNKNOWN;
297 	f_file = 0;
298 	f_last_errno = 0;
299 	f_last_char = '\0';
300 	f_unread_count = 0;
301 	//f_unread -- the counter is at zero
302 	f_unget_count = 0;
303 	//f_unget -- the counter is at zero
304 	f_iconvertor_open = false;
305 	//f_convertor -- flag is false
306 	f_mb_count = 0;
307 	//f_multibytes -- counter is at zero
308 	f_read_actionscript = false;
309 }
310 
311 
~ScriptFile()312 ScriptFile::~ScriptFile()
313 {
314 	Reset();
315 }
316 
317 
Reset(void)318 void ScriptFile::Reset(void)
319 {
320 	CloseFile();
321 	// sswf_clean(&f_filename); -- this is used here and there, don't delete
322 
323 	f_line = 0;
324 	f_last_char = 0;
325 	f_unread_count = 0;
326 	f_unget_count = 0;
327 	f_type = SCRIPTFILE_TYPE_UNKNOWN;
328 	f_mb_count = 0;
329 
330 	if(f_iconvertor_open) {
331 		iconv_close(f_iconvertor);
332 		f_iconvertor_open = false;
333 	}
334 }
335 
336 
Line(void) const337 unsigned int ScriptFile::Line(void) const
338 {
339 	return f_line;
340 }
341 
342 
Parent(void)343 ScriptFile *ScriptFile::Parent(void)
344 {
345 	return f_parent;
346 }
347 
348 
Filename(void)349 const char *ScriptFile::Filename(void)
350 {
351 	return f_filename;
352 }
353 
354 
SetReadActionscript(bool yes)355 void ScriptFile::SetReadActionscript(bool yes)
356 {
357 	f_read_actionscript = yes;
358 }
359 
360 
FindFile(const char * filename,sswf::Vectors & user_include_paths,bool use_internal_paths)361 int ScriptFile::FindFile(const char *filename, sswf::Vectors& user_include_paths, bool use_internal_paths)
362 {
363 	// TODO: this shouldn't be hard coded; instead some deep well hidden
364 	// configuration file should specify these default directories (and
365 	// the path to that setup file, where do we get it?!)
366 	static const char *	default_include_paths[] = {
367 		// Alexis' suggested install dir.
368 		"/usr/include/sswf/scripts",
369 		// Linux
370 		"/usr/share/sswf/scripts",
371 		"/usr/share/sswf/include/scripts",
372 		"/usr/local/share/sswf/scripts",
373 		"/usr/local/share/sswf/include/scripts",
374 		// MAC OS X with Fink
375 		"/sw/local/share/sswf/scripts",
376 		"/sw/local/share/sswf/include/scripts",
377 		// IRIX
378 		"/opt/sswf/scripts",
379 		"/opt/sswf/include/scripts",
380 		0
381 	};
382 	const char	*s, **p;
383 	char		*name;
384 	int		idx, max;
385 
386 	if(show_input_search) {
387 		printf(" %% File \"%s\" exists?\n", filename);
388 	}
389 
390 /* special case, use stdin instead of a file */
391 	if(strcmp(filename, "-") == 0) {
392 		f_filename = "*standard input*";
393 		f_file = stdin;
394 		if(show_input_filenames) {
395 			printf(" -> Input File: \"%s\".\n", f_filename);
396 		}
397 		return 0;
398 	}
399 
400 /* check file as is (from current dir.) */
401 	f_file = fopen(filename, "rb");
402 	if(f_file != NULL) {
403 		f_filename = sswf_strdup(filename);
404 		if(show_input_filenames) {
405 			printf(" -> Input File: \"%s\".\n", f_filename);
406 		}
407 		return 0;
408 	}
409 	f_last_errno = errno;
410 	if(f_last_errno != ENOENT) {
411 		return -1;
412 	}
413 
414 /* a file specified with a full path can't be searched any more */
415 	s = filename;
416 	while(*s != '/' && *s != '\\' && *s != '\0' && *s != ':') {
417 		s++;
418 	}
419 	if((s == filename && (s[0] == '/' || s[0] == '\\')) || s[0] == ':') {
420 		return -1;
421 	}
422 
423 /* look within the user include directories */
424 	max = user_include_paths.Count();
425 	for(idx = 0; idx < max; idx++) {
426 		s = (const char *) user_include_paths.Get(idx);
427 		name = sswf_strchild(s, filename);
428 		if(show_input_search) {
429 			printf(" %% File \"%s\" exists?\n", name);
430 		}
431 		f_file = fopen(name, "rb");
432 		if(f_file != 0) {
433 			f_filename = name;
434 			if(show_input_filenames) {
435 				printf(" -> Input File: \"%s\".\n", f_filename);
436 			}
437 			return 0;
438 		}
439 		f_last_errno = errno;
440 		sswf_free(name);
441 		if(f_last_errno != ENOENT) {
442 			return -1;
443 		}
444 	}
445 
446 /* if not turned off, try the internal paths now */
447 	if(use_internal_paths) {
448 		for(p = default_include_paths; *p != 0; p++) {
449 			name = sswf_strchild(*p, filename);
450 			if(show_input_search) {
451 				printf(" %% File \"%s\" exists?\n", name);
452 			}
453 			f_file = fopen(name, "rb");
454 			if(f_file != 0) {
455 				f_filename = name;
456 				if(show_input_filenames) {
457 					printf(" -> Input File: \"%s\".\n", f_filename);
458 				}
459 				return 0;
460 			}
461 			f_last_errno = errno;
462 			sswf_free(name);
463 			if(f_last_errno != ENOENT) {
464 				return -1;
465 			}
466 		}
467 	}
468 
469 /* file not found... */
470 	return -1;
471 }
472 
473 
OpenFile(const char * filename,sswf::Vectors & user_include_paths,bool use_internal_paths)474 int ScriptFile::OpenFile(const char *filename, sswf::Vectors& user_include_paths, bool use_internal_paths)
475 {
476 	int				a, b, c, d;
477 	unsigned long			input;
478 	char				encoding[256];
479 	const ScriptFile::sf_type_t	*types;
480 
481 	Reset();
482 
483 	if(FindFile(filename, user_include_paths, use_internal_paths) != 0) {
484 		fprintf(stderr, "ERROR: can't open file \"%s\" (errno: %d).\n", filename, errno);
485 		return 1;
486 	}
487 	f_line = 1;
488 
489 /* at the very start we need to check for U16/U32 files */
490 /*
491  * The following are the tests which will be conducted
492  * on the input to try to determine the type of the file.
493  * Note that we need at least 4 characters in any SSFW
494  * file. Don't forget also that the file needs to start
495  * with a comment.
496  *
497  * File Starts with		Default Encoding
498  *	0xEF 0xBB 0xBF 0x?? ... UTF8
499  *	0x?? 0x00 0x?? 0x00 ... USC2LE
500  *	0x00 0x?? 0x00 0x?? ... USC2BE
501  *	0xFF 0xFE 0x?? 0x00 ... USC2LE
502  *	0xFE 0xFF 0x00 0x?? ... USC2BE
503  *	0x?? 0x00 0x00 0x00 ... USC4LE
504  *	0x00 0x00 0x00 0x?? ... USC4BE
505  *	0xFF 0xFE 0x00 0x00 ... USC4LE
506  *	0x00 0x00 0xFE 0xFF ... USC4BE
507  *
508  * The rest will force UNKNOWN and a comment must be present
509  * on the first line. This comment must include the encoding.
510  * For instance:
511  *
512  * 	encoding="utf-16"
513  *
514  * Note that even auto-detected formats can include an
515  * encoding. In that case, the user specified encoding
516  * needs to match what we have detected (we can't
517  * switch from UCS-2LE to UTF-32BE).
518  *
519  * IMPORTANT NOTE:
520  *	It is to be noted that the parser will only
521  *	accept a few characters at the beginning of a
522  *	file and this is why this algorithm works this
523  *	way.
524  *
525  *	The possible characters are as defined here:
526  *
527  *	. spaces (U+0009 '\t', U+000A '\n', U+000C '\f',
528  *		  U+000D '\r', U+0020 ' ', U+FEFF)
529  *	. comment (U+0028 '(', U+002F '/')
530  *	. identifier (U+0041 'A' to U+005A 'Z', U+005F '_'
531  *		      U+0061 'a' to U+007A 'z')
532  *
533  *	The identifier can be either an object name
534  *	(such as "sequence", "text", "button"...)
535  *	or the name of a variable (as in "a = 56")
536  */
537 	a = ReadByte();
538 	b = ReadByte();
539 	c = ReadByte();
540 	d = ReadByte();
541 
542 	if(a == -1 || b == -1 || c == -1 || d == -1) {
543 		/*
544 		 * this is not good, a script can't be
545 		 * less than 4 bytes?!?
546 		 */
547 		CloseFile();
548 		fprintf(stderr, "ERROR: file \"%s\" seems empty or too small a file for a ScriptSWF.\n", filename);
549 		f_last_errno = EBADF;
550 		return -1;	// return EOF
551 	}
552 	UnreadByte(d);
553 	UnreadByte(c);
554 	UnreadByte(b);
555 	UnreadByte(a);
556 
557 	if(a == 0xEF && b == 0xBB && c == 0xBF) {
558 		// UTF-8 starting with 0xFEFF is represented by 0xEF, 0xBB and 0xBF
559 		// NOTE:
560 		// This sequence represent i with trema, the double closing quotes '>>'
561 		// and an upside down question mark (for Spanish); that's really
562 		// unlikely not UTF-8!
563 		f_type = SCRIPTFILE_TYPE_UTF8;
564 	}
565 	if(a != 0 && b == 0 && c != 0 && d == 0) {
566 		f_type = SCRIPTFILE_TYPE_UCS2LE;
567 	}
568 	else if(a == 0 && b != 0 && c == 0 && d != 0) {
569 		f_type = SCRIPTFILE_TYPE_UCS2BE;
570 	}
571 	else if(a == 0xFF && b == 0xFE && c != 0 && d == 0) {
572 		f_type = SCRIPTFILE_TYPE_UCS2LE;
573 	}
574 	else if(a == 0xFE && b == 0xFF && c == 0 && d != 0) {
575 		f_type = SCRIPTFILE_TYPE_UCS2BE;
576 	}
577 	else if(a == 0xFF && b == 0xFE && c == 0 && d == 0) {
578 		f_type = SCRIPTFILE_TYPE_UCS4LE;
579 	}
580 	else if(a == 0 && b == 0 && c == 0xFE && d == 0xFF) {
581 		f_type = SCRIPTFILE_TYPE_UCS4BE;
582 	}
583 	else if(a != 0 && b == 0 && c == 0 && d == 0) {
584 		f_type = SCRIPTFILE_TYPE_UCS4LE;
585 	}
586 	else if(a == 0 && b == 0 && c == 0 && d != 0) {
587 		f_type = SCRIPTFILE_TYPE_UCS4BE;
588 	}
589 	else {
590 		/*
591 		 * In this case we assume ISO-8859-1
592 		 * this is useful to read the starting
593 		 * comment as if it were read with
594 		 * ReadByte() calls!
595 		 */
596 		f_type = SCRIPTFILE_TYPE_ISO88591;
597 	}
598 
599 	/*
600 	 * We expect (want) a comment with the name of
601 	 * an encoding; as we read the comment check for
602 	 * the following: 'encoding=\"<name>\"'; only the
603 	 * first encoding entry is used
604 	 */
605 retry:
606 	do {
607 		a = GetChar();
608 	} while(a == ' ' || a == '\t' || a == '\n');
609 	b = '\0';
610 	if(a == '/') {
611 		a = GetChar();
612 		if(a == '/') {	// C++ comment
613 			b = '\n';
614 		}
615 		else if(a == '*') {	/* standard C comment */
616 			b = '/';
617 		}
618 	}
619 	else if(a == '(') {
620 		a = GetChar();
621 		if(a == '*') {		// standard Pascal comment
622 			b = ')';
623 		}
624 	}
625 	if(b == '\0') {
626 		/*
627 		 * This is wrong, we must have a comment at the
628 		 * start of the file!
629 		 */
630 		CloseFile();
631 		fprintf(stderr, "ERROR: can't determine the encoding of \"%s\", no proper comment found at the beginning of the file.\n", filename);
632 		f_last_errno = EINVAL;
633 		return 1;
634 	}
635 	c = 0;
636 	a = GetChar();
637 	for(;;) {
638 		if(a == -1) {
639 			CloseFile();
640 			fprintf(stderr, "ERROR: end of file \"%s\" found before the end of the starting comment.\n", filename);
641 			f_last_errno = EBADF;
642 			return -1;
643 		}
644 		if(a == '*') {			// C or Pascal comment ends
645 			a = GetChar();
646 			if(a == b) {
647 				// Ooops no encoding="..." in this comment!
648 				// Just try again
649 				goto retry;
650 			}
651 			c = 0;	// new word after an asterisk...
652 			continue;
653 		}
654 		if((a < 'A' || a > 'Z') && (a < 'a' || a > 'z') && a != '\"' && a != '=') {
655 			// new word
656 			if(b == '\n' && a == '\n') {		// C++ comment ends
657 				// Ooops no encoding="..." in this comment!
658 				// Just try again
659 				goto retry;
660 			}
661 			c = 0;
662 		}
663 		else if(c < 10) {
664 			// searching: encoding="...
665 			encoding[c] = a;
666 			c++;
667 			if(c == 10 && strncasecmp(encoding, "encoding=\"", 10) == 0) {
668 				c = 0;
669 				a = GetChar();
670 				while(a != '"' && a != '\n' && a != -1 && c < (int) (sizeof(encoding) - 1)) {
671 					if(a != '-' && a != '_') {
672 						encoding[c] = a;
673 						c++;
674 					}
675 					a = GetChar();
676 				}
677 				while(c > 0 && encoding[c - 1] == '/') {
678 					c--;
679 				}
680 				encoding[c] = '\0';
681 				break;
682 			}
683 		}
684 		a = GetChar();
685 	}
686 	// we found an encoding="..." entry
687 	// let's skip the rest of the comment first
688 	if(b == '\n') {
689 		do {
690 			a = GetChar();
691 			if(a == -1) {
692 				fprintf(stderr, "ERROR: end of file \"%s\" found before the end of a comment.\n", filename);
693 				CloseFile();
694 				f_last_errno = EBADF;
695 				return -1;
696 			}
697 		} while(a != '\n');
698 	}
699 	else {
700 		do {
701 			a = GetChar();
702 			while(a == '*') {
703 				a = GetChar();
704 				if(a == -1) {
705 					fprintf(stderr, "ERROR: end of file \"%s\" found before the end of a comment.\n", filename);
706 					CloseFile();
707 					f_last_errno = EBADF;
708 					return -1;
709 				}
710 				if(a == b) {
711 					a = -1;
712 					break;
713 				}
714 			}
715 		} while(a != -1);
716 	}
717 
718 	input = 1 << f_type;
719 	types = internal_types;
720 	while(types->f_type != SCRIPTFILE_TYPE_UNKNOWN) {
721 		if(strcasecmp(encoding, types->f_name) == 0 && (types->f_input & input) != 0) {
722 			if(types->f_type != SCRIPTFILE_TYPE_SAME) {
723 				f_type = types->f_type;
724 			}
725 			// we found the proper type, we're done here.
726 			return 0;
727 		}
728 		types++;
729 	}
730 	// didn't find anything compatible, check out for an iconv(3C) convertion
731 	if(f_type != SCRIPTFILE_TYPE_ISO88591) {
732 		// the encoding doesn't match and it should!
733 		f_type = SCRIPTFILE_TYPE_UNKNOWN;
734 		fprintf(stderr, "ERROR: unacceptable encoding \"%s\" for this file.\n", encoding);
735 		return 1;
736 	}
737 
738 	// the input encoding needs to be an 8 bits encoding!
739 	f_iconvertor = iconv_open("UCS-4-INTERNAL", encoding);
740 	if(f_iconvertor == (iconv_t) -1) {
741 		f_last_errno = errno;
742 		CloseFile();
743 		fprintf(stderr, "ERROR: encoding \"%s\" not understood. Please, check your iconv_open() manual page for a complete list of possible convertions.\n", encoding);
744 		return 1;
745 	}
746 
747 	f_type = SCRIPTFILE_TYPE_MULTIBYTES;
748 
749 	return 0;
750 
751 #if 0
752 // old stuff...
753 	switch(f_type) {
754 	case SCRIPTFILE_TYPE_UTF16LE:
755 		if(strcasecmp(encoding, "UCS-2")   == 0
756 		&& strcasecmp(encoding, "UCS-2LE") == 0) {
757 			f_type = SCRIPTFILE_TYPE_UCS2LE;
758 		}
759 		else if(strcasecmp(encoding, "UTF-16")   != 0
760 		     && strcasecmp(encoding, "UTF-16LE") != 0) {
761 			// we've got a problem here!
762 			f_type = SCRIPTFILE_TYPE_UNKNOWN;
763 		}
764 		break;
765 
766 	case SCRIPTFILE_TYPE_UTF16BE:
767 		if(strcasecmp(encoding, "UCS-2")   == 0
768 		|| strcasecmp(encoding, "UCS-2BE") == 0) {
769 			f_type = SCRIPTFILE_TYPE_UCS2BE;
770 		}
771 		else if(strcasecmp(encoding, "UTF-16")   != 0
772 		     && strcasecmp(encoding, "UTF-16BE") != 0) {
773 			// we've got a problem here!
774 			f_type = SCRIPTFILE_TYPE_UNKNOWN;
775 		}
776 		break;
777 
778 	case SCRIPTFILE_TYPE_UCS2LE:
779 		if(strcasecmp(encoding, "UTF-16")   != 0
780 		&& strcasecmp(encoding, "UTF-16LE") != 0) {
781 			f_type = SCRIPTFILE_TYPE_UTF16LE;
782 		}
783 		else if(strcasecmp(encoding, "UCS-2")   != 0
784 		     && strcasecmp(encoding, "UCS-2LE") != 0) {
785 			// we've got a problem here!
786 			f_type = SCRIPTFILE_TYPE_UNKNOWN;
787 		}
788 		break;
789 
790 	case SCRIPTFILE_TYPE_UCS2BE:
791 		if(strcasecmp(encoding, "UTF-16")    == 0
792 		|| strcasecmp(encoding, "UTF-16BE")  == 0) {
793 			f_type = SCRIPTFILE_TYPE_UTF16BE;
794 		}
795 		else if(strcasecmp(encoding, "UCS-2")   != 0
796 		     && strcasecmp(encoding, "UCS-2BE") != 0) {
797 			// we've got a problem here!
798 			f_type = SCRIPTFILE_TYPE_UNKNOWN;
799 		}
800 		break;
801 
802 	case SCRIPTFILE_TYPE_UCS4LE:
803 		if(strcasecmp(encoding, "UTF-32")   == 0
804 		&& strcasecmp(encoding, "UTF-32LE") == 0) {
805 			f_type = SCRIPTFILE_TYPE_UTF32LE;
806 		}
807 		else if(strcasecmp(encoding, "UCS-4")   != 0
808 		     && strcasecmp(encoding, "UCS-4LE") != 0) {
809 			// we've got a problem here!
810 			f_type = SCRIPTFILE_TYPE_UNKNOWN;
811 		}
812 		break;
813 
814 	case SCRIPTFILE_TYPE_UCS4BE:
815 		if(strcasecmp(encoding, "UTF-32")    == 0
816 		|| strcasecmp(encoding, "UTF-32BE")  == 0) {
817 			f_type = SCRIPTFILE_TYPE_UTF32BE;
818 		}
819 		else if(strcasecmp(encoding, "UCS-4")   != 0
820 		     && strcasecmp(encoding, "UCS-4BE") != 0) {
821 			// we've got a problem here!
822 			f_type = SCRIPTFILE_TYPE_UNKNOWN;
823 		}
824 		break;
825 
826 	case SCRIPTFILE_TYPE_ISO8859_1:
827 		// now we have a name, check for what we understand internally...
828 		if(strcasecmp(encoding, "UTF-8") == 0) {
829 			f_type = SCRIPTFILE_TYPE_UTF8;
830 		}
831 		else if(strcasecmp(encoding, "iso-8859-1") == 0 || strcasecmp(encoding, "iso_8859-1") == 0 || strcasecmp(encoding, "iso8859-1") == 0) {
832 			f_type = SCRIPTFILE_TYPE_ISO8859_1;
833 		}
834 		else if(strcasecmp(encoding, "ascii") == 0) {
835 			f_type = SCRIPTFILE_TYPE_ASCII;
836 		}
837 		else {
838 			// otherwise, use iconv() facility
839 			// the input encoding needs to be an 8 bits encoding!
840 			f_iconvertor = iconv_open("UCS-4-INTERNAL", encoding);
841 			if(f_iconvertor == (iconv_t) -1) {
842 				f_last_errno = errno;
843 				CloseFile();
844 				fprintf(stderr, "ERROR: encoding \"%s\" not understood. Please, check your iconv_open() manual page for a complete list of possible convertions.\n", encoding);
845 				return 1;
846 			}
847 			f_type = SCRIPTFILE_TYPE_MULTIBYTES;
848 		}
849 		break;
850 
851 #if DEBUG
852 	default:
853 		assert(0, "INTERNAL ERROR: f_type seems to be set to a value we didn't have control over (%d).", f_type);
854 #endif
855 
856 	}
857 #endif
858 }
859 
860 
CloseFile(void)861 void ScriptFile::CloseFile(void)
862 {
863 	if(f_file != 0) {
864 		/*
865 		 * Avoid closing the standard input file since this
866 		 * is usually done by the system at exit()
867 		 */
868 		if(f_file != stdin) {
869 			fclose(f_file);
870 		}
871 		f_file = 0;
872 	}
873 }
874 
875 
876 
877 
ReadActionscript(void)878 int ScriptFile::ReadActionscript(void)
879 {
880 	char		*str;
881 	int		max, pos, count;
882 	size_t		size;
883 	bool		in_string;
884 	c_t		c, last_char, quote;
885 
886 	// create the node at the start so the f_line is at the start
887 	// (we need to pass that to the parser so it err at the
888 	// right line!)
889 	yylval.node = node_alloc(NODE_TYPE_STRING, NODE_SUBTYPE_UNKNOWN, f_first_line);
890 
891 	str = (char *) sswf_malloc(256, "ReadActionscript() -- small string buffer");
892 		/*
893 		 * We allocated 256, but save 1 byte for the null terminator
894 		 * and up to 6 for the last multi-byte
895 		 */
896 	max = 256 - 6 - 1;
897 	pos = 0;
898 
899 	// we read everything up to a closing '}' since an action
900 	// script is always written between '{' and '}'
901 	count = 1;
902 	in_string = false;
903 	quote = '\0';
904 	c = ' ';
905 	do {
906 		last_char = c;
907 		c = GetChar();
908 		switch(c) {
909 		case SCRIPTFILE_EOF:
910 		case SCRIPTFILE_BAD:
911 			count = 0;
912 			c = '\0';
913 			break;
914 
915 		case '{':
916 			if(!in_string) {
917 				count++;
918 			}
919 			break;
920 
921 		case '}':
922 			if(!in_string) {
923 				count--;
924 			}
925 			break;
926 
927 		case '"':
928 		case '\'':
929 		case '`':
930 			if(in_string) {
931 				if(quote == c && last_char != '\\') {
932 					in_string = false;
933 				}
934 			}
935 			else {
936 				quote = c;
937 				in_string = true;
938 			}
939 			break;
940 
941 		// other characters kept as is
942 		}
943 		if(count > 0) {
944 			if(pos >= max) {
945 				max += 256;
946 				/*
947 				 * +6 because some multi-bytes take that many bytes
948 				 * +1 so the null terminator is reserved
949 				 */
950 				str = (char *) sswf_remalloc(str, max + 6 + 1, "StrAppend() -- large string buffer");
951 			}
952 			size = 6;
953 			// TODO: should we check for errors?
954 			sswf::wctomb(&c, sizeof(c), str + pos, size);
955 			pos += 6 - size;
956 		}
957 	} while(count > 0);
958 	str[pos] = '\0';
959 
960 	// the '}' character needs to be restored
961 	UngetChar(c);
962 
963 #if ADJUT_STRINGS
964 	/* on most systems this is really fast and it can save some memory */
965 	str = sswf_remalloc(str, pos + 1, "ReadActionscript() -- adjusted to the minimum");
966 #endif
967 
968 	yylval.node->string = str;
969 
970 //fprintf(stderr, "Read actionscript [%s]\n", str);
971 
972 	return STRING;
973 }
974 
975 
GetToken(void)976 int ScriptFile::GetToken(void)
977 {
978 	c_t		c;
979 
980 	yylloc.first_line = f_first_line = f_line;
981 
982 	if(f_read_actionscript) {
983 		return ReadActionscript();
984 	}
985 
986 	for(;;) {
987 		do {
988 			c = GetChar();
989 		} while(c == ' ' || c == '\t' || c == '\f' || c == '\n');
990 
991 		if((c >= 'A' && c <= 'Z')
992 		|| (c >= 'a' && c <= 'z')
993 		|| c == '_'
994 		|| c >= 0x0C0) {	// international character
995 			/* an identifier or keyword */
996 			return ReadIdentifier(c);
997 		}
998 
999 		switch(c) {
1000 		case SCRIPTFILE_EOF:
1001 			return EOF;
1002 
1003 		case SCRIPTFILE_BAD:
1004 			// TODO: ???
1005 			return EOF;
1006 
1007 		case '0':
1008 		case '1':
1009 		case '2':
1010 		case '3':
1011 		case '4':
1012 		case '5':
1013 		case '6':
1014 		case '7':
1015 		case '8':
1016 		case '9':
1017 			return ReadValue(c);
1018 
1019 		case '\'':
1020 		case '`':
1021 		case '\"':
1022 			return ReadString(c);
1023 
1024 		case '.':
1025 			c = GetChar();
1026 			if(c == '.') {
1027 				return RANGE;
1028 			}
1029 			if(c >= '0' && c <= '9') {
1030 				UngetChar(c);
1031 				return ReadValue('.');
1032 			}
1033 			UngetChar(c);
1034 			return '.';
1035 
1036 		case '*':
1037 			c = GetChar();
1038 			if(c == '*') {
1039 				return POWER;
1040 			}
1041 			UngetChar(c);
1042 			return '*';
1043 
1044 		case '<':
1045 			c = GetChar();
1046 			if(c == '?') {
1047 				return MIN_OP;
1048 			}
1049 			if(c == '<') {
1050 				return SHIFT_LEFT;
1051 			}
1052 			if(c == '>') {
1053 				return NOT_EQUAL;
1054 			}
1055 			if(c == '=') {
1056 				return LESS_EQUAL;
1057 			}
1058 			UngetChar(c);
1059 			return '<';
1060 
1061 		case '>':
1062 			c = GetChar();
1063 			if(c == '?') {
1064 				return MAX_OP;
1065 			}
1066 			if(c == '>') {
1067 				c = GetChar();
1068 				if(c == '>') {
1069 					return SHIFT_RIGHT_UNSIGNED;
1070 				}
1071 				UngetChar(c);
1072 				return SHIFT_RIGHT;
1073 			}
1074 			if(c == '=') {
1075 				return GREATER_EQUAL;
1076 			}
1077 			UngetChar(c);
1078 			return '>';
1079 
1080 		case '!':
1081 			c = GetChar();
1082 			if(c == '<') {
1083 				return ROTATE_LEFT;
1084 			}
1085 			if(c == '>') {
1086 				return ROTATE_RIGHT;
1087 			}
1088 			if(c == '=') {
1089 				return NOT_EQUAL;
1090 			}
1091 			UngetChar(c);
1092 			return '!';
1093 
1094 		case '=':
1095 			c = GetChar();
1096 			if(c == '=') {
1097 				return EQUAL;
1098 			}
1099 			UngetChar(c);
1100 			return '=';
1101 
1102 		case ':':
1103 			c = GetChar();
1104 			if(c == '=') {
1105 				return c;
1106 			}
1107 			UngetChar(c);
1108 			return ':';
1109 
1110 		case '|':
1111 			c = GetChar();
1112 			if(c == '|') {
1113 				return LOGICAL_OR;
1114 			}
1115 			UngetChar(c);
1116 			return '|';
1117 
1118 		case '^':
1119 			c = GetChar();
1120 			if(c == '^') {
1121 				return LOGICAL_XOR;
1122 			}
1123 			UngetChar(c);
1124 			return '^';
1125 
1126 		case '&':
1127 			c = GetChar();
1128 			if(c == '&') {
1129 				return LOGICAL_AND;
1130 			}
1131 			UngetChar(c);
1132 			return '&';
1133 
1134 		case '/':
1135 			c = GetChar();
1136 			if(c == '*') {
1137 				SkipComment('/');
1138 				continue;
1139 			}
1140 			if(c == '/') {
1141 				// C++ comment, read until '\n'
1142 				do {
1143 					c = GetChar();
1144 				} while(c != '\n' && c != SCRIPTFILE_EOF);
1145 				continue;
1146 			}
1147 			UngetChar(c);
1148 			return '/';
1149 
1150 		case '(':
1151 			c = GetChar();
1152 			if(c == '*') {
1153 				SkipComment(')');
1154 				continue;
1155 			}
1156 			UngetChar(c);
1157 			return '(';
1158 
1159 		// anything else is returned as is
1160 		default:
1161 			return c;
1162 
1163 		}
1164 	}
1165 
1166 	return 0;
1167 }
1168 
1169 
SkipComment(int close)1170 void ScriptFile::SkipComment(int close)
1171 {
1172 	register int	c, p;
1173 
1174 	c = 0;
1175 	do {
1176 		p = c;
1177 		c = GetChar();
1178 	} while(c != SCRIPTFILE_EOF && c != SCRIPTFILE_BAD && (c != close || p != '*'));
1179 }
1180 
1181 
1182 struct keyword_t {
1183 	size_t		f_size;		// number of chars in the keyword
1184 	const char *	f_name;		// the keyword
1185 	node_type_t	f_type;		// the type or unit
1186 	node_type_t	f_subtype;	// the sub-type
1187 	unsigned int	f_flags;	// what we need to do here
1188 };
1189 #define	KEYWORD_FLAG_DIRECT		0x00000000
1190 #define	KEYWORD_FLAG_OBJECT		0x00000001
1191 #define	KEYWORD_FLAG_UNIT		0x00000002	// f_type is the corresponding node_unit_t
1192 #define	KEYWORD_FLAG_INTEGER		0x00000004	// f_type is an integer (for FALSE and TRUE)
1193 
1194 #define	KEYWORD_FLAG_MULTIWORD		0x80000000
1195 
1196 
1197 #define	KEYWORD(w, type, subtype, flg)	{ (sizeof(w) - 1), (w), ((node_type_t) type), (subtype), (flg) }
1198 
1199 #define	OBJECT_KEYWORD(w, flg)		KEYWORD(#w, NODE_TYPE_OBJECT, NODE_SUBTYPE_##w, (flg) | KEYWORD_FLAG_OBJECT)
1200 #define	DIRECT_KEYWORD(w, flg)		KEYWORD(#w, (w), NODE_SUBTYPE_UNKNOWN, (flg))
1201 #define	UNIT_KEYWORD(w, unit, flg)	KEYWORD(#w, UNIT_##unit, NODE_SUBTYPE_##w, (flg) | KEYWORD_FLAG_UNIT)
1202 
1203 
1204 
1205 struct all_keywords_t {
1206 	keyword_t *	f_keywords;
1207 	size_t		f_count;
1208 };
1209 #define	ALL_KEYWORD_ENTRY(x)		{ x##_keywords, (sizeof(x##_keywords) / sizeof(keyword_t)) }
1210 
1211 
1212 
1213 keyword_t a_keywords[] = {
1214 	OBJECT_KEYWORD(ACTION, 0),
1215 	DIRECT_KEYWORD(ACTION_SCRIPT, 0)
1216 };
1217 
1218 keyword_t b_keywords[] = {
1219 	UNIT_KEYWORD(BC, COLOR, 0),
1220 	// BLOCK == LIST
1221 	KEYWORD("BLOCK", NODE_TYPE_OBJECT, NODE_SUBTYPE_LIST, KEYWORD_FLAG_OBJECT),
1222 	OBJECT_KEYWORD(BUTTON, 0)
1223 };
1224 
1225 keyword_t c_keywords[] = {
1226 	OBJECT_KEYWORD(CATCH, 0),
1227 	UNIT_KEYWORD(CM, SIZE, 0),
1228 	OBJECT_KEYWORD(COLOR_TRANSFORM, KEYWORD_FLAG_MULTIWORD),
1229 	OBJECT_KEYWORD(COLOR, 0)
1230 };
1231 
1232 keyword_t d_keywords[] = {
1233 	KEYWORD("DEFINE_SHAPE", NODE_TYPE_OBJECT, NODE_SUBTYPE_SHAPE, KEYWORD_FLAG_OBJECT | KEYWORD_FLAG_MULTIWORD),
1234 	UNIT_KEYWORD(DEG, ANGLE, 0),
1235 	OBJECT_KEYWORD(DO_ACTION, KEYWORD_FLAG_MULTIWORD)
1236 };
1237 
1238 keyword_t e_keywords[] = {
1239 	OBJECT_KEYWORD(EDGES, 0),
1240 	OBJECT_KEYWORD(EDIT_TEXT, KEYWORD_FLAG_MULTIWORD),
1241 	DIRECT_KEYWORD(ELSE, 0),
1242 	OBJECT_KEYWORD(END, 0),
1243 	KEYWORD("ENVELOP", NODE_TYPE_OBJECT, NODE_SUBTYPE_ENVELOPE, KEYWORD_FLAG_OBJECT),
1244 	OBJECT_KEYWORD(ENVELOPE, 0),
1245 	OBJECT_KEYWORD(EXPORT, 0)
1246 };
1247 
1248 keyword_t f_keywords[] = {
1249 	KEYWORD("FALSE", 0, NODE_SUBTYPE_UNKNOWN, KEYWORD_FLAG_INTEGER),
1250 	UNIT_KEYWORD(FC, COLOR, 0),
1251 	OBJECT_KEYWORD(FILL_STYLE, KEYWORD_FLAG_MULTIWORD),
1252 	OBJECT_KEYWORD(FINALLY, 0),
1253 	OBJECT_KEYWORD(FONT, 0),
1254 	DIRECT_KEYWORD(FOR, 0),
1255 	UNIT_KEYWORD(FPF, SPEED, 0),
1256 	UNIT_KEYWORD(FPS, SPEED, 0),
1257 	OBJECT_KEYWORD(FRAME_LABEL, KEYWORD_FLAG_MULTIWORD),
1258 	UNIT_KEYWORD(FRM, TIME, 0),
1259 	OBJECT_KEYWORD(FUNCTION, 0)
1260 };
1261 
1262 keyword_t g_keywords[] = {
1263 	OBJECT_KEYWORD(GLYPH, 0),
1264 	UNIT_KEYWORD(GRAD, ANGLE, 0),
1265 	OBJECT_KEYWORD(GRADIENT, 0)
1266 };
1267 
1268 keyword_t i_keywords[] = {
1269 	DIRECT_KEYWORD(IF, 0),
1270 	OBJECT_KEYWORD(IMAGE, 0),
1271 	OBJECT_KEYWORD(IMPORT, 0),
1272 	UNIT_KEYWORD(IN, SIZE, 0)
1273 };
1274 
1275 keyword_t l_keywords[] = {
1276 	OBJECT_KEYWORD(LABEL, 0),
1277 	OBJECT_KEYWORD(LINE_STYLE, KEYWORD_FLAG_MULTIWORD),
1278 	OBJECT_KEYWORD(LIST, 0)
1279 };
1280 
1281 keyword_t m_keywords[] = {
1282 	OBJECT_KEYWORD(MATRIX, 0),
1283 	OBJECT_KEYWORD(METADATA, 0),
1284 	UNIT_KEYWORD(MIN, TIME, 0)
1285 };
1286 
1287 keyword_t o_keywords[] = {
1288 	OBJECT_KEYWORD(ON_EVENT, KEYWORD_FLAG_MULTIWORD)
1289 };
1290 
1291 keyword_t p_keywords[] = {
1292 	OBJECT_KEYWORD(PLACE_OBJECT, KEYWORD_FLAG_MULTIWORD),
1293 	OBJECT_KEYWORD(POINTS, 0),
1294 	UNIT_KEYWORD(PR, RATIO, 0),
1295 	UNIT_KEYWORD(PX, SIZE, 0)
1296 };
1297 
1298 keyword_t r_keywords[] = {
1299 	UNIT_KEYWORD(RAD, ANGLE, 0),
1300 	OBJECT_KEYWORD(RECT, 0),
1301 	KEYWORD("RECTANGLE", NODE_TYPE_OBJECT, NODE_SUBTYPE_RECT, KEYWORD_FLAG_OBJECT),
1302 	OBJECT_KEYWORD(REMOVE, 0),
1303 	OBJECT_KEYWORD(REPLACE_OBJECT, KEYWORD_FLAG_MULTIWORD),
1304 	UNIT_KEYWORD(RT, RATIO, 0)
1305 };
1306 
1307 keyword_t s_keywords[] = {
1308 	OBJECT_KEYWORD(SCRIPT_LIMITS, KEYWORD_FLAG_MULTIWORD),
1309 	UNIT_KEYWORD(SEC, TIME, 0),
1310 	OBJECT_KEYWORD(SEQUENCE, 0),
1311 	OBJECT_KEYWORD(SET_BACKGROUND_COLOR, KEYWORD_FLAG_MULTIWORD),
1312 	OBJECT_KEYWORD(SET_TAB_INDEX, KEYWORD_FLAG_MULTIWORD),
1313 	OBJECT_KEYWORD(SHAPE, 0),
1314 	OBJECT_KEYWORD(SHOW_FRAME, KEYWORD_FLAG_MULTIWORD),
1315 	OBJECT_KEYWORD(SOUND_INFO, KEYWORD_FLAG_MULTIWORD),
1316 	OBJECT_KEYWORD(SOUND, 0),
1317 	OBJECT_KEYWORD(SPRITE, 0),
1318 	OBJECT_KEYWORD(STATE, 0)
1319 };
1320 
1321 keyword_t t_keywords[] = {
1322 	OBJECT_KEYWORD(TEXT_SETUP, KEYWORD_FLAG_MULTIWORD),
1323 	OBJECT_KEYWORD(TEXT, 0),
1324 	KEYWORD("TRUE", 1, NODE_SUBTYPE_UNKNOWN, KEYWORD_FLAG_INTEGER),
1325 	OBJECT_KEYWORD(TRY, 0),
1326 	UNIT_KEYWORD(TW, SIZE, 0)
1327 };
1328 
1329 keyword_t w_keywords[] = {
1330 	OBJECT_KEYWORD(WITH, 0)
1331 };
1332 
1333 
1334 
1335 all_keywords_t all_keywords[26] = {
1336 	/* A */ ALL_KEYWORD_ENTRY(a),
1337 	/* B */ ALL_KEYWORD_ENTRY(b),
1338 	/* C */ ALL_KEYWORD_ENTRY(c),
1339 	/* D */ ALL_KEYWORD_ENTRY(d),
1340 	/* E */ ALL_KEYWORD_ENTRY(e),
1341 	/* F */ ALL_KEYWORD_ENTRY(f),
1342 	/* G */ ALL_KEYWORD_ENTRY(g),
1343 	/* H */ { 0, 0 },	// ALL_KEYWORD_ENTRY(h),
1344 	/* I */ ALL_KEYWORD_ENTRY(i),
1345 	/* J */ { 0, 0 },	// ALL_KEYWORD_ENTRY(j),
1346 	/* K */ { 0, 0 },	// ALL_KEYWORD_ENTRY(k),
1347 	/* L */ ALL_KEYWORD_ENTRY(l),
1348 	/* M */ ALL_KEYWORD_ENTRY(m),
1349 	/* N */ { 0, 0 },	// ALL_KEYWORD_ENTRY(n),
1350 	/* O */ ALL_KEYWORD_ENTRY(o),
1351 	/* P */ ALL_KEYWORD_ENTRY(p),
1352 	/* Q */ { 0, 0 },	// ALL_KEYWORD_ENTRY(q),
1353 	/* R */ ALL_KEYWORD_ENTRY(r),
1354 	/* S */ ALL_KEYWORD_ENTRY(s),
1355 	/* T */ ALL_KEYWORD_ENTRY(t),
1356 	/* U */ { 0, 0 },	// ALL_KEYWORD_ENTRY(u),
1357 	/* V */ { 0, 0 },	// ALL_KEYWORD_ENTRY(v),
1358 	/* W */ ALL_KEYWORD_ENTRY(w),
1359 	/* X */ { 0, 0 },	// ALL_KEYWORD_ENTRY(x),
1360 	/* Y */ { 0, 0 },	// ALL_KEYWORD_ENTRY(y),
1361 	/* Z */ { 0, 0 }	// ALL_KEYWORD_ENTRY(z)
1362 };
1363 
1364 
ReadIdentifier(c_t c)1365 int ScriptFile::ReadIdentifier(c_t c)
1366 {
1367 	char		a, identifier[256];	/* by default we expect that identifiers are less than 256 chars */
1368 	char		*id;
1369 	const char	*s1, *s2;
1370 	size_t		size;
1371 	int		pos, max, cnt, idx;
1372 	const keyword_t	*k;
1373 	bool		has_international, found;
1374 	c_t		ex;
1375 
1376 	id = identifier;
1377 	has_international = c >= 0x80;
1378 	if(has_international) {
1379 		size = 6;
1380 		sswf::wctomb(&c, sizeof(c), id, size);
1381 		pos = 6 - size;
1382 	}
1383 	else {
1384 		id[0] = (char) c;
1385 		pos = 1;
1386 	}
1387 	max = sizeof(identifier) - 6 - 1;
1388 
1389 	// read one identifier -- keywords may be composed of
1390 	// multiple identifiers, others will be read if required
1391 	// only
1392 	c = GetChar();
1393 	while((c >= '0' && c <= '9')
1394 	   || (c >= 'A' && c <= 'Z')
1395 	   || (c >= 'a' && c <= 'z')
1396 	   || c == '_'
1397 	   || c >= 0x0C0) {	// we accept any international character too
1398 		if(pos >= max) {
1399 			// need a larger buffer
1400 			max += 256;
1401 			if(id != identifier) {
1402 				id = (char *) sswf_remalloc(id, max + 6 + 1, "ReadIdentifier() -- really large identifier buffer");
1403 			}
1404 			else {
1405 				id = (char *) sswf_malloc(max + 6 + 1, "ReadIdentifier() -- large identifier buffer");
1406 				memcpy(id, identifier, max - 256);
1407 			}
1408 		}
1409 		if(c >= 0x80) {
1410 			size = 6;
1411 			sswf::wctomb(&c, sizeof(c), id + pos, size);
1412 			pos += 6 - size;
1413 			has_international = true;
1414 		}
1415 		else {
1416 			// ASCII is anyway saved as is and this is much
1417 			// faster than a crazy call to another function
1418 			// (it also enables me to set the has_international
1419 			// flag in the first part)
1420 			id[pos] = (char) c;
1421 
1422 //printf("Adding char [%c] %d\n", (char) c, (int) c);
1423 
1424 			pos++;
1425 		}
1426 		c = GetChar();
1427 	}
1428 	UngetChar(c);
1429 	id[pos] = '\0';
1430 
1431 //printf(stderr, "Got word [%s]...\n", id);
1432 
1433 	/*
1434 	 * Check for keywords...
1435 	 * NOTE: keywords only include [A-Z0-9_ \t\r\n]
1436 	 * (blanks are only if the keywords can be composed
1437 	 * of multiple words)
1438 	 *
1439 	 * The test below is valid since (1) we save the international
1440 	 * characters in UTF-8 and (2) the has_international flag will
1441 	 * make sure the function skips the test for keywords at once.
1442 	 */
1443 	a = toupper(id[0]);
1444 	if(!has_international
1445 	&& pos < 20		// no keyword longer than that
1446 	&& a >= 'A'		// all keywords start with a letter
1447 	&& a <= 'Z') {
1448 		if(c == '\t' || c == '\n') {
1449 			c = ' ';
1450 		}
1451 		//
1452 		// NOTE: max was used to know if the identifier string was
1453 		//	 allocated and needed extension; here we know that
1454 		//	 it will fit in the identifier buffer and thus we
1455 		//	 don't have to worry about it
1456 		//
1457 		//	 pos can't be modified unless a multi-word keyword
1458 		//	 is found since all the extra characters read
1459 		//	 because of that multi-word entry need to be put
1460 		//	 back in the input stream
1461 		//
1462 		//	 notice that all spaces (' ', '\t' and '\n') are
1463 		//	 transformed in one underscore ('_') within a
1464 		//	 keyword
1465 		//
1466 		max = pos;
1467 		a -= 'A';
1468 		// NOTE: though a will always be positive, it's
1469 		//	 still safer to have a cast to unsigned char!
1470 		k = all_keywords[(unsigned char) a].f_keywords;
1471 		cnt = all_keywords[(unsigned char) a].f_count;
1472 		found = false;
1473 
1474 //printf("%d keywords to check with [%s]... (%p)\n", cnt, id, k);
1475 
1476 		while(cnt > 0) {
1477 			cnt--;
1478 
1479 #if DEBUG
1480 			// make sure that all the keywords are given in upper case
1481 			// [this was actually fixed in 1.7.3]
1482 			idx = k->f_size;
1483 			while(idx > 0) {
1484 				idx--;
1485 				assert(k->f_name[idx] == '_' || (k->f_name[idx] >= 'A' && k->f_name[idx] <= 'Z'), "ScriptFile::ReadIdentifier(): a keyword must fully be given in uppercase (%s)", k->f_name);
1486 			}
1487 #endif
1488 
1489 			// in case we have a multi-word we can't check the size
1490 			if(k->f_size >= (size_t) pos) {
1491 				// This wouldn't take the multi-word written
1492 				// as one word in account...
1493 				//found = strncasecmp(k->f_name, id, pos) == 0;
1494 				s1 = k->f_name;
1495 				s2 = id;
1496 				while(*s1 != '\0' && (*s2 != '\0' && *s2 != ' ')) {
1497 					if(*s1 == '_') {
1498 						s1++;
1499 						assert(*s1 != '\0' && *s1 != '_', "ScriptFile::ReadIdentifier(): a keyword can't end with an underscore nor have two underscores one after another");
1500 					}
1501 					if(*s1 != toupper(*s2)) {
1502 						// this is not valid
1503 						break;
1504 					}
1505 					s1++;
1506 					s2++;
1507 				}
1508 //printf("Compare [%s]/[%s] with [%s]/[%s] - '%02X' %c\n", id, k->f_name, s2, s1, *s2, *s2);
1509 				if(*s1 == '\0' && (*s2 == '\0' || *s2 == ' ')) {
1510 					found = true;
1511 					break;
1512 				}
1513 			}
1514 			if(c == ' ' && (k->f_flags & KEYWORD_FLAG_MULTIWORD) != 0) {
1515 				// 1. do we have enough characters already?
1516 				idx = k->f_size + 1;
1517 				if(max < idx) {
1518 					// not enough, read more
1519 					ex = GetChar();
1520 					while(max < idx
1521 					   && ((ex >= '0' && ex <= '9')
1522 					    || (ex >= 'A' && ex <= 'Z')
1523 					    || (ex >= 'a' && ex <= 'z')
1524 					    ||  ex == '_' ||  ex == ' ' || ex == '\t' || ex == '\n'
1525 					   /*|| ex >= 0x0C0*/)) {	// we DON'T accept international characters since all keywords are in ASCII
1526 						if(ex == ' ' || ex == '\t' || ex == '\n') {
1527 							// ignore multiple spaces
1528 							if(id[max - 1] != ' ') {
1529 								// ASCII is saved as is
1530 								id[max] = ' ';
1531 								max++;
1532 							}
1533 						}
1534 						else {
1535 							// ASCII is saved as is
1536 							id[max] = (char) ex;
1537 							max++;
1538 						}
1539 						ex = GetChar();
1540 					}
1541 					UngetChar(ex);
1542 				}
1543 				id[max] = '\0';
1544 
1545 
1546 				// 2. make sure the following character is a space (' ')
1547 				//    and that the input had enough characters
1548 				idx--;
1549 				if((size_t) max >= k->f_size && (id[idx] == ' ' || id[idx] == '\0')) {
1550 					// compare the words "by hand" because of the ' ' and '_'
1551 					// which have to be considered as being equal
1552 					//
1553 					// IMPORTANT: the 'break's below would be "wrong" if we
1554 					// didn't already know that the very first character was
1555 					// always to be equal (that's the case because we selected
1556 					// this table specifically because all the keywords to
1557 					// check are starting with that letter)
1558 					while(idx > 0) {
1559 						idx--;
1560 						if(id[idx] == ' ' || id[idx] == '_') {
1561 							if(k->f_name[idx] != '_') {
1562 								break;
1563 							}
1564 						}
1565 						else if(id[idx] >= 'a' && id[idx] <= 'z') {
1566 							if(k->f_name[idx] != (id[idx] & 0x5F)) {
1567 								break;
1568 							}
1569 						}
1570 						else if(k->f_name[idx] != id[idx]) {
1571 							break;
1572 						}
1573 					}
1574 					found = idx == 0;
1575 					if(found) {
1576 						pos = k->f_size;
1577 						break;
1578 					}
1579 				}
1580 			}
1581 			k++;
1582 		}
1583 		while(max > pos) {
1584 			max--;
1585 			UngetChar(id[max]);	// we know id[] is only composed of ASCII
1586 		}
1587 		if(found) {
1588 			// TODO: get rid of the eventual extra chars we read
1589 			//	 we found a match with a keyword, act on it
1590 //printf("Found! (%p) [%s] %08X\n", k, k->f_name, k->f_flags);
1591 			if((k->f_flags & KEYWORD_FLAG_OBJECT) != 0) {
1592 				yylval.node = node_alloc(k->f_type, k->f_subtype, f_first_line);
1593 				return OBJECT;
1594 			}
1595 			if((k->f_flags & KEYWORD_FLAG_UNIT) != 0) {
1596 				yylval.type = k->f_subtype;
1597 				return k->f_type;
1598 			}
1599 			if((k->f_flags & KEYWORD_FLAG_INTEGER) != 0) {
1600 				yylval.node = node_alloc(NODE_TYPE_INTEGER, NODE_SUBTYPE_UNKNOWN, f_first_line);
1601 				yylval.node->integer = k->f_type;
1602 				return VALUE;
1603 			}
1604 			// KEYWORD_FLAG_DIRECT
1605 			return k->f_type;
1606 		}
1607 		id[pos] = '\0';
1608 	}
1609 
1610 //printf("*** Identifier [%s]\n", id);
1611 
1612 	yylval.node = node_alloc(NODE_TYPE_IDENTIFIER, NODE_SUBTYPE_UNKNOWN, f_first_line);
1613 	if(id != identifier) {
1614 		yylval.node->string = id;
1615 	}
1616 	else {
1617 		yylval.node->string = sswf_strdup(id);
1618 	}
1619 
1620 	return IDENTIFIER;
1621 }
1622 
1623 
ReadString(c_t quote)1624 int ScriptFile::ReadString(c_t quote)
1625 {
1626 	register c_t	c, o;
1627 	register int	pos, max;
1628 	c_t		r;
1629 	int		cnt;
1630 	char		*str;
1631 	size_t		size;
1632 
1633 	// create the node at the start so the f_line is at the start
1634 	yylval.node = node_alloc(NODE_TYPE_STRING, NODE_SUBTYPE_UNKNOWN, f_first_line);
1635 
1636 	str = (char *) sswf_malloc(256, "ReadString() -- small string buffer");
1637 		/*
1638 		 * We allocated 256, but save 1 byte for the null terminator
1639 		 * and up to 6 for the last multi-byte
1640 		 */
1641 	max = 256 - 6 - 1;
1642 	pos = 0;
1643 	for(;;) {
1644 		c = GetChar();
1645 		if(c == quote || c == SCRIPTFILE_EOF) {
1646 			break;
1647 		}
1648 		// skip bad chars.
1649 		if(c == SCRIPTFILE_BAD) {
1650 			continue;
1651 		}
1652 		if(c == '\\') {
1653 			c = GetChar();
1654 			if(c == SCRIPTFILE_EOF) {
1655 				break;
1656 			}
1657 			o = c;
1658 			switch(c) {
1659 			case 'a': c =  7; break;
1660 			case 'b': c =  8; break;
1661 			case 't': c =  9; break;
1662 			case 'n': c = 10; break;
1663 			case 'r': c = 11; break;
1664 			case 'f': c = 12; break;
1665 			case 'v': c = 13; break;
1666 
1667 			case 'U':
1668 			case 'u':
1669 				c = GetChar();
1670 				if(c != '+') {
1671 					UngetChar(c);
1672 					c = o;
1673 					break;
1674 				}
1675 				c = GetChar();
1676 				if(c >= '0' && c <= '9') {
1677 					c -= '0';
1678 				}
1679 				else if(c >= 'a' && c <= 'f') {
1680 					c -= 'a' - 10;
1681 				}
1682 				else if(c >= 'A' && c <= 'F') {
1683 					c -= 'A' - 10;
1684 				}
1685 				else {
1686 					UngetChar('+');
1687 					UngetChar(c);
1688 					c = o;
1689 					break;
1690 				}
1691 				goto readhex;
1692 
1693 			case 'X':
1694 			case 'x':
1695 				c = GetChar();
1696 				if(c >= '0' && c <= '9') {
1697 					c -= '0';
1698 				}
1699 				else if(c >= 'a' && c <= 'f') {
1700 					c -= 'a' - 10;
1701 				}
1702 				else if(c >= 'A' && c <= 'F') {
1703 					c -= 'A' - 10;
1704 				}
1705 				else {
1706 					UngetChar(c);
1707 					c = o;
1708 					break;
1709 				}
1710 readhex:
1711 				cnt = 7;
1712 				while(cnt > 0) {
1713 					cnt--;
1714 					r = GetChar();
1715 					if(r >= '0' && r <= '9') {
1716 						c = c * 16 + r - '0';
1717 					}
1718 					else if(r >= 'a' && r <= 'f') {
1719 						c = c * 16 + r - 'a' + 10;
1720 					}
1721 					else if(r >= 'A' && r <= 'F') {
1722 						c = c * 16 + r - 'A' + 10;
1723 					}
1724 					else {
1725 						UngetChar(r);
1726 						break;
1727 					}
1728 				}
1729 				break;
1730 
1731 			case '0':
1732 				c = GetChar();
1733 				if(c == 'x' || c == 'X') {
1734 					o = c;
1735 					c = GetChar();
1736 					if(c >= '0' && c <= '9') {
1737 						c -= '0';
1738 					}
1739 					else if(c >= 'a' && c <= 'f') {
1740 						c -= 'a' - 10;
1741 					}
1742 					else if(c >= 'A' && c <= 'F') {
1743 						c -= 'A' - 10;
1744 					}
1745 					else {
1746 						UngetChar(c);
1747 						UngetChar(o);
1748 						c = 0;
1749 						break;
1750 					}
1751 					goto readhex;
1752 				}
1753 			case '1':
1754 			case '2':
1755 			case '3':
1756 			case '4':
1757 			case '5':
1758 			case '6':
1759 			case '7':
1760 				c = c - '0';
1761 				cnt = 10;
1762 				while(cnt > 0) {
1763 					cnt--;
1764 					r = GetChar();
1765 					if(r < '0' || r > '7') {
1766 						UngetChar(r);
1767 						break;
1768 					}
1769 					c = c * 8 + r - '0';
1770 				}
1771 				break;
1772 
1773 			}
1774 		}
1775 		if(c > 0) {
1776 			if(pos >= max) {
1777 				max += 256;
1778 				/*
1779 				 * +6 because some multi-bytes take that many bytes
1780 				 * +1 so the null terminator is reserved
1781 				 */
1782 				str = (char *) sswf_remalloc(str, max + 6 + 1, "ReadString() -- large string buffer");
1783 			}
1784 			size = 6;
1785 			// TODO: should we check for errors?
1786 			sswf::wctomb(&c, sizeof(c), str + pos, size);
1787 			pos += 6 - size;
1788 		}
1789 	}
1790 	str[pos] = '\0';
1791 
1792 #if ADJUT_STRINGS
1793 	/* on most systems this is really fast and it can save some memory */
1794 	str = sswf_remalloc(str, pos + 1, "ReadString() -- adjusted to the minimum");
1795 #endif
1796 
1797 	yylval.node->string = str;
1798 
1799 //fprintf(stderr, "Read string [%s]\n", str);
1800 
1801 	return STRING;
1802 }
1803 
1804 
ReadValue(c_t c)1805 int ScriptFile::ReadValue(c_t c)
1806 {
1807 	long		r, p, sign;
1808 	double		fr, div, exp;
1809 
1810 	r = 0;
1811 	fr = 0.0;
1812 
1813 	if(c == '0') {		/* check for hexa, otherwise it's probably octal or floating point? */
1814 		c = GetChar();
1815 		if(c == 'x' || c == 'X') {
1816 			/* hexadecimal */
1817 			p = c;
1818 			c = GetChar();
1819 			if((c < '0' || c > '9') && (c < 'a' || c > 'f') && (c < 'A' || c > 'F')) {
1820 				/* this is NOT a valid hex. value (just 0) */
1821 				UngetChar(c);
1822 				UngetChar(p);
1823 				c = '\0';
1824 			}
1825 			else {
1826 				p = 0;
1827 				for(;;) {
1828 					if(c >= '0' && c <= '9') {
1829 						p = r;
1830 						r = r * 16 + c - '0';
1831 					}
1832 					else if(c >= 'a' && c <= 'f') {
1833 						p = r;
1834 						r = r * 16 + c - 'a' + 10;
1835 					}
1836 					else if(c >= 'A' && c <= 'F') {
1837 						p = r;
1838 						r = r * 16 + c - 'A' + 10;
1839 					}
1840 					else {
1841 						if((c == 'm' || c == 'M') && (r & 15) == 0xC) {
1842 							/* this is an hex. followed by 'cm'! */
1843 							UngetChar(c);
1844 							c = 'c';
1845 							r = p;		/* restore saved value (because of possible overflow!) */
1846 fprintf(stderr, "WARNING: hexadecimal followed by the CM unit.\n");
1847 						}
1848 						UngetChar(c);
1849 						c = '\0';
1850 						break;
1851 					}
1852 					c = GetChar();
1853 				}
1854 			}
1855 		}
1856 		else if(c != '.') {
1857 			/* octal */
1858 			while(c >= '0' && c <= '8') {
1859 				r = r * 8 + c - '0';
1860 				c = GetChar();
1861 			}
1862 			if(c == '8' || c == '9') {
1863 				fprintf(stderr, "ERROR: invalid octal number.\n");
1864 				/* skip the rest of the number */
1865 				do {
1866 					c = GetChar();
1867 				} while(c >= '0' && c <= '9');
1868 			}
1869 			UngetChar(c);
1870 			c = '\0';
1871 		}
1872 	}
1873 	if(c != '.') {
1874 		while(c >= '0' && c <= '9') {
1875 			r = r * 10 + c - '0';
1876 			fr = fr * 10 + c - '0';		// avoid overflows we may get in 'r' if the number is followed by a period (.)
1877 			c = GetChar();
1878 		}
1879 	}
1880 	if(c == '.') {
1881 		/* we found a floating point value */
1882 		/* TODO: the following is wrong because the '...e+/-<value>' will change the outcome very much */
1883 		//fr = (double) r;
1884 		div = 0.1;
1885 		c = GetChar();
1886 		while(c >= '0' && c <= '9') {
1887 			fr += (double) (c - '0') * (double) div;
1888 			div /= 10.0;
1889 			c = GetChar();
1890 		}
1891 		if(c == 'e' || c == 'E') {
1892 			r = c;
1893 			c = GetChar();
1894 			exp = 0;
1895 			sign = 1;
1896 			if(c == '+') {
1897 				c = GetChar();
1898 				if(c < '0' || c > '9') {
1899 					UngetChar(c);
1900 					UngetChar('+');
1901 					c = r;
1902 				}
1903 			}
1904 			else if(c == '-') {
1905 				c = GetChar();
1906 				if(c < '0' || c > '9') {
1907 					UngetChar(c);
1908 					UngetChar('-');
1909 					c = r;
1910 				}
1911 				else {
1912 					sign = -1;
1913 				}
1914 			}
1915 			else if(c < '0' || c > '9') {
1916 				UngetChar(c);
1917 				c = r;
1918 			}
1919 			while(c >= '0' && c <= '9') {
1920 				exp = exp * 10 + c - '0';
1921 				c = GetChar();
1922 			}
1923 			if(exp != 0) {
1924 				fr *= pow(10.0, exp * (double) sign);
1925 			}
1926 		}
1927 		yylval.node = node_alloc(NODE_TYPE_FLOAT, NODE_SUBTYPE_UNKNOWN, f_first_line);
1928 		yylval.node->floating_point = fr;
1929 	}
1930 	else {
1931 		/* we have a integer number */
1932 		yylval.node = node_alloc(NODE_TYPE_INTEGER, NODE_SUBTYPE_UNKNOWN, f_first_line);
1933 		yylval.node->integer = r;
1934 	}
1935 
1936 	if(c != '\0') {
1937 		UngetChar(c);
1938 	}
1939 
1940 	return VALUE;
1941 }
1942 
1943 
1944 
1945 
GetChar(void)1946 ScriptFile::c_t ScriptFile::GetChar(void)
1947 {
1948 	c_t		c;
1949 
1950 	if(f_unget_count > 0) {
1951 		f_unget_count--;
1952 		f_last_char = f_unget[f_unget_count];
1953 	}
1954 	else {
1955 		f_last_char = ReadChar();
1956 	}
1957 
1958 	// the following test simplifies the line counting
1959 	if(f_last_char == '\r') {
1960 		f_last_char = '\n';	// \n, \r or \r\n -> \n
1961 		// skip the \n in a \r\n sequence
1962 		c = ReadChar();
1963 		if(c != '\n') {
1964 			UngetChar(c);
1965 		}
1966 	}
1967 
1968 	if(f_last_char == '\n') {
1969 		f_line++;
1970 	}
1971 
1972 	return f_last_char;
1973 }
1974 
1975 
UngetChar(c_t c)1976 void ScriptFile::UngetChar(c_t c)
1977 {
1978 	// don't record the end of file or an error!
1979 	//if(c == SCRIPTFILE_EOF || c == SCRIPTFILE_BAD)
1980 	if(c < 0) {
1981 		return;
1982 	}
1983 
1984 	if(c == '\n') {
1985 		f_line--;
1986 	}
1987 
1988 	assert(f_unget_count < UNGET_COUNT_MAX, "too many ScriptFile::UngetChar()");
1989 	f_unget[f_unget_count] = c;
1990 	f_unget_count++;
1991 }
1992 
1993 
ReadChar(void)1994 ScriptFile::c_t ScriptFile::ReadChar(void)
1995 {
1996 	int		a, b, cnt, min;
1997 	const char	*input;
1998 	char		*output;
1999 	size_t		out;
2000 	c_t		outchar;
2001 
2002 	// the following is to avoid warnings -- a would always be
2003 	// properly initialized without it
2004 	a = 0;
2005 
2006 	if(f_type == SCRIPTFILE_TYPE_MULTIBYTES) {
2007 		if(f_file != 0) {
2008 			while(f_mb_count < MULTIBYTE_MAX) {
2009 				a = ReadByte();
2010 				if(a == -1) {
2011 					CloseFile();
2012 					break;
2013 				}
2014 				f_multibytes[f_mb_count] = a;
2015 				f_mb_count++;
2016 			}
2017 		}
2018 		// anything in the input stream?
2019 		if(f_mb_count == 0) {
2020 			return SCRIPTFILE_EOF;
2021 		}
2022 		// 8 bits files need to have each character
2023 		// converted according to the encoding we've
2024 		// got reading the script starting comment
2025 		input = f_multibytes;
2026 		out = sizeof(outchar);
2027 		output = (char *) &outchar;
2028 		b = (int) f_mb_count;
2029 		a = (int) iconv(f_iconvertor, ICONV_INPUT_CAST &input, &f_mb_count, &output, &out);
2030 		// the output buffer will usually be full before the
2031 		// input is fully emptied!
2032 		if(a < 0 && errno == E2BIG && out == 0 && b != (int) f_mb_count) {
2033 			a = 1;
2034 		}
2035 		if(a < 0) {
2036 			f_last_errno = errno;
2037 			fprintf(stderr, "ERROR: can't convert the bytes: ");
2038 			for(a = 0; a < (int) f_mb_count; a++) {
2039 				fprintf(stderr, " 0x%02X", f_multibytes[a]);
2040 			}
2041 			fprintf(stderr, ", to a character (errno: %d)\n", f_last_errno);
2042 			outchar = SCRIPTFILE_BAD;
2043 			f_mb_count--;	// we need to do this if we don't want to loop forever
2044 		}
2045 		// the characters used need to be removed from the input buffer
2046 		memmove(f_multibytes, input, f_mb_count);
2047 		return outchar;
2048 	}
2049 
2050 	if(f_file == 0) {
2051 		return SCRIPTFILE_EOF;
2052 	}
2053 
2054 	a = ReadByte();
2055 	if(a == -1) {
2056 		CloseFile();
2057 		return SCRIPTFILE_EOF;
2058 	}
2059 
2060 	for(;;) {
2061 		switch(f_type) {
2062 		case SCRIPTFILE_TYPE_UTF16LE:
2063 			b = ReadByte();
2064 			if(b == -1) {
2065 				fprintf(stderr, "ERROR: invalid UTF16LE end (odd size file)\n");
2066 				CloseFile();
2067 				return SCRIPTFILE_EOF;
2068 			}
2069 			outchar = a + b * 256;
2070 			if(outchar >= 0xD800 && outchar <= 0xDBFF) {
2071 				outchar = (outchar & 0x3FF) << 10;
2072 				b = ReadByte();
2073 				if(b == -1) {
2074 					fprintf(stderr, "ERROR: invalid UTF16LE end (missing 0xDC00-0xDFFF character)\n");
2075 					CloseFile();
2076 					return SCRIPTFILE_EOF;
2077 				}
2078 				b = ReadByte();
2079 				if(b == -1) {
2080 					fprintf(stderr, "ERROR: invalid UTF16LE end (missing 0xDC00-0xDFFF character)\n");
2081 					CloseFile();
2082 					return SCRIPTFILE_EOF;
2083 				}
2084 				outchar |= b;
2085 				if(b < 0xDC || b > 0xDF) {
2086 					fprintf(stderr, "ERROR: invalid UTF16LE bad 0xD800/0xDC00 sequence\n");
2087 					CloseFile();
2088 					return SCRIPTFILE_EOF;
2089 				}
2090 				outchar |= (b & 0x03) << 8;
2091 			}
2092 			else if(outchar >= 0xDC00 && outchar <= 0xDFFF) {
2093 				// ouch, the 0xD800-0xDBFF is missing
2094 				return SCRIPTFILE_BAD;
2095 			}
2096 			else if(outchar == 0xFFFE) {
2097 				// change endian!
2098 				f_type = SCRIPTFILE_TYPE_UTF16BE;
2099 				return 0xFEFF;
2100 			}
2101 			return outchar;
2102 
2103 		case SCRIPTFILE_TYPE_UTF16BE:
2104 			b = ReadByte();
2105 			if(b == -1) {
2106 				fprintf(stderr, "ERROR: invalid UTF16BE end (odd size file)\n");
2107 				CloseFile();
2108 				return SCRIPTFILE_EOF;
2109 			}
2110 			outchar = a * 256 + b;
2111 			if(outchar >= 0xD800 && outchar <= 0xDBFF) {
2112 				outchar = (outchar & 0x3FF) << 10;
2113 				b = ReadByte();
2114 				if(b == -1) {
2115 					fprintf(stderr, "ERROR: invalid UTF16BE end (missing 0xDC00-0xDFFF character)\n");
2116 					CloseFile();
2117 					return SCRIPTFILE_EOF;
2118 				}
2119 				if(b < 0xDC || b > 0xDF) {
2120 					fprintf(stderr, "ERROR: invalid UTF16BE bad 0xD800/0xDC00 sequence\n");
2121 					CloseFile();
2122 					return SCRIPTFILE_EOF;
2123 				}
2124 				outchar |= (b & 0x03) << 8;
2125 				b = ReadByte();
2126 				if(b == -1) {
2127 					fprintf(stderr, "ERROR: invalid UTF16BE end (missing 0xDC00-0xDFFF character)\n");
2128 					CloseFile();
2129 					return SCRIPTFILE_EOF;
2130 				}
2131 				outchar |= b;
2132 			}
2133 			else if(outchar >= 0xDC00 && outchar <= 0xDFFF) {
2134 				// ouch, the 0xD800-0xDBFF is missing
2135 				return SCRIPTFILE_BAD;
2136 			}
2137 			else if(outchar == 0xFFFE) {
2138 				// change endian!
2139 				f_type = SCRIPTFILE_TYPE_UTF16LE;
2140 				return 0xFEFF;
2141 			}
2142 			return outchar;
2143 
2144 		case SCRIPTFILE_TYPE_UCS2LE:
2145 			b = ReadByte();
2146 			if(b == -1) {
2147 				fprintf(stderr, "ERROR: invalid UCS2LE end (odd size file)\n");
2148 				CloseFile();
2149 				return SCRIPTFILE_EOF;
2150 			}
2151 			outchar = a + b * 256;
2152 			if(outchar == 0xFFFE) {
2153 				f_type = SCRIPTFILE_TYPE_UCS2BE;
2154 				return 0xFEFF;
2155 			}
2156 			return outchar;
2157 
2158 		case SCRIPTFILE_TYPE_UCS2BE:
2159 			b = ReadByte();
2160 			if(b == -1) {
2161 				fprintf(stderr, "ERROR: invalid UCS2BE end (odd size file)\n");
2162 				CloseFile();
2163 				return SCRIPTFILE_EOF;
2164 			}
2165 			outchar = a * 256 + b;
2166 			if(outchar == 0xFFFE) {
2167 				f_type = SCRIPTFILE_TYPE_UCS2LE;
2168 				return 0xFEFF;
2169 			}
2170 			return outchar;
2171 
2172 		case SCRIPTFILE_TYPE_UCS4LE:
2173 			b = ReadByte();
2174 			if(b != -1) {
2175 				outchar = a | (b << 8);
2176 				b = ReadByte();
2177 				if(b != -1) {
2178 					outchar |= (c_t) b << 16;
2179 					b = ReadByte();
2180 					if(b != -1) {
2181 						outchar |= (c_t) b << 24;
2182 						if(outchar == (c_t) 0xFFFE0000) {
2183 							f_type = SCRIPTFILE_TYPE_UCS4BE;
2184 							return 0xFEFF;
2185 						}
2186 						if(b < 0x80) {
2187 							return outchar;
2188 						}
2189 					}
2190 				}
2191 			}
2192 			fprintf(stderr, "ERROR: invalid UCS4LE end or bit 32 set\n");
2193 			CloseFile();
2194 			return SCRIPTFILE_EOF;
2195 
2196 		case SCRIPTFILE_TYPE_UCS4BE:
2197 			b = ReadByte();
2198 			if(b != -1) {
2199 				outchar = ((c_t) a << 24) | ((c_t) b << 16);
2200 				b = ReadByte();
2201 				if(b != -1) {
2202 					outchar |= b << 8;
2203 					b = ReadByte();
2204 					if(b != -1) {
2205 						outchar |= b;
2206 						if(outchar == (c_t) 0xFFFE0000) {
2207 							f_type = SCRIPTFILE_TYPE_UCS4LE;
2208 							return 0xFEFF;
2209 						}
2210 						if(outchar >= 0) {
2211 							return outchar;
2212 						}
2213 					}
2214 				}
2215 			}
2216 			fprintf(stderr, "ERROR: invalid USC4BE end or bit 32 set\n");
2217 			CloseFile();
2218 			return SCRIPTFILE_EOF;
2219 
2220 		case SCRIPTFILE_TYPE_ISO88591:
2221 			return a;
2222 
2223 		case SCRIPTFILE_TYPE_ASCII:
2224 			if(a >= 0x80) {
2225 				return SCRIPTFILE_BAD;
2226 			}
2227 			return a;
2228 
2229 		case SCRIPTFILE_TYPE_UTF8:
2230 			// U-00000000 - U-0000007F: 0xxxxxxx
2231 			// U-00000080 - U-000007FF: 110xxxxx 10xxxxxx
2232 			// U-00000800 - U-0000FFFF: 1110xxxx 10xxxxxx 10xxxxxx
2233 			// U-00010000 - U-001FFFFF: 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
2234 			// U-00200000 - U-03FFFFFF: 111110xx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx
2235 			// U-04000000 - U-7FFFFFFF: 1111110x 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx
2236 			// if necessary, we resync. our self
2237 			while(a >= 0x80 && a <= 0xBF) {
2238 				a = ReadByte();
2239 			}
2240 			if(a == -1) {
2241 				CloseFile();
2242 				return SCRIPTFILE_EOF;
2243 			}
2244 			if(a < 0x80) {
2245 				return a;
2246 			}
2247 			if(a >= 0xFE) {
2248 				// bad entry here!
2249 				// TODO: error or skip silently?
2250 				CloseFile();
2251 				return SCRIPTFILE_EOF;
2252 			}
2253 			// multi-byte character -- read all the 10xxxxxx...
2254 			if(a >= 0xFC) {
2255 				a &= 0x01;
2256 				min = 1 << (2 + 6 * 4);
2257 				cnt = 5;
2258 			}
2259 			else if(a >= 0xF8) {
2260 				a &= 0x03;
2261 				min = 1 << (3 + 6 * 3);
2262 				cnt = 4;
2263 			}
2264 			else if(a >= 0xF0) {
2265 				a &= 0x07;
2266 				min = 1 << (4 + 6 * 2);
2267 				cnt = 3;
2268 			}
2269 			else if(a >= 0xE0) {
2270 				a &= 0x0F;
2271 				min = 1 << (5 + 6);
2272 				cnt = 2;
2273 			}
2274 			else /*if(a >= 0xC0)*/ {
2275 				a &= 0x1F;
2276 				min = 1 << 7;
2277 				cnt = 1;
2278 			}
2279 			outchar = a;
2280 			while(cnt > 0) {
2281 				cnt--;
2282 				b = ReadByte();
2283 				if(b == -1) {
2284 					// bad entry here!
2285 					// TODO: error or skip silently?
2286 					CloseFile();
2287 					return SCRIPTFILE_EOF;
2288 				}
2289 				if((b & 0xC0) != 0x80) {
2290 					// save this byte for better error recovery
2291 					UnreadByte(b);
2292 					// refuse long encodings!
2293 					// TODO: error or skip silently?
2294 					return SCRIPTFILE_BAD;
2295 				}
2296 				outchar = outchar * 64 + (b & 0x3F);
2297 			}
2298 			if(outchar < min) {
2299 				// refuse long encodings!
2300 				// TODO: error or skip silently?
2301 				return SCRIPTFILE_BAD;
2302 			}
2303 			return outchar;
2304 
2305 		default:
2306 			assert(0, "unknown encoding type when reading a character");
2307 			/*NOTREACHED*/
2308 
2309 		}
2310 		a = ReadByte();
2311 		if(a == -1) {
2312 			CloseFile();
2313 			return SCRIPTFILE_EOF;
2314 		}
2315 	}
2316 	/*NOTREACHED*/
2317 	return SCRIPTFILE_EOF;
2318 }
2319 
2320 
ReadByte(void)2321 int ScriptFile::ReadByte(void)
2322 {
2323 	unsigned char	c;
2324 
2325 	errno = 0;
2326 
2327 	if(f_unread_count > 0) {
2328 		f_unread_count--;
2329 		return f_unread[f_unread_count];
2330 	}
2331 
2332 	if(fread(&c, 1, 1, f_file) != 1) {
2333 		if(errno != 0) {
2334 			f_last_errno = errno;
2335 			perror("fread()");
2336 			fprintf(stderr, "%s:%d:%d: i/o error", f_filename, f_line, f_last_errno);
2337 		}
2338 		return -1;
2339 	}
2340 
2341 	return c;
2342 }
2343 
2344 
UnreadByte(unsigned char c)2345 void ScriptFile::UnreadByte(unsigned char c)
2346 {
2347 	assert(f_unread_count < UNREAD_COUNT_MAX, "too many UnreadByte() calls (max = %d)", UNREAD_COUNT_MAX);
2348 
2349 	f_unread[f_unread_count] = c;
2350 	f_unread_count++;
2351 }
2352 
2353 
2354 
2355 
2356 extern "C"
2357 {
2358 
2359 
2360 
sswf_read_actionscript(int yes)2361 void sswf_read_actionscript(int yes)
2362 {
2363 //printf("Called sswf_read_actionscript (%d)\n", yes);
2364 //fflush(stdout);
2365 	if(sf != 0) {
2366 		sf->SetReadActionscript(yes);
2367 	}
2368 }
2369 
2370 
sswf_add_include(const char * path)2371 void sswf_add_include(const char *path)
2372 {
2373 	ScriptFile::string_t	*str;
2374 
2375 	str = new ScriptFile::string_t();
2376 	include_paths.MemAttach(str, sizeof(ScriptFile::string_t), "sswf_add_include(): user include path");
2377 	str->f_string = include_paths.StrDup(path);
2378 	include_paths.Set(-1, str);
2379 }
2380 
2381 
sswf_set_default_include(int def)2382 void sswf_set_default_include(int def)
2383 {
2384 	no_default_include = def;
2385 }
2386 
sswf_open_script(const char * filename)2387 int sswf_open_script(const char *filename)
2388 {
2389 	ScriptFile	*n;
2390 	int		ec;
2391 
2392 	n = new ScriptFile(sf);
2393 	if(n == 0) {
2394 		fprintf(stderr, "FATAL ERROR: out of memory.\n");
2395 		exit(1);
2396 	}
2397 	sf = n;
2398 
2399 	ec = sf->OpenFile(filename, include_paths, no_default_include == 0);
2400 	if(ec == 0) {
2401 		lex_filename = sf->Filename();
2402 	}
2403 
2404 	return ec;
2405 }
2406 
2407 
sswf_close_script(void)2408 void sswf_close_script(void)
2409 {
2410 	ScriptFile	*p;
2411 
2412 	if(sf != 0) {
2413 		p = sf->Parent();
2414 		delete sf;
2415 		sf = p;
2416 		if(p != 0) {
2417 			lex_filename = p->Filename();
2418 		}
2419 	}
2420 }
2421 
2422 
yylex()2423 int yylex()
2424 {
2425 	int	c;
2426 
2427 	c = sf->GetToken();
2428 
2429 #if 0
2430 printf("%s: %d: Read token [%d] '%c'\n",
2431 			sf->Filename(), sf->Line(),
2432 			c, c >= ' ' && c <= 0x7E ? c : '?');
2433 #endif
2434 
2435 	return c;
2436 }
2437 
2438 
2439 }
2440 
2441 
2442 
2443 #if 0
2444 
2445 #define	RETURN_TOKEN(type, subtype)	ylval.node = node_alloc(NODE_TYPE_##type, NODE_SUBTYPE_##subtype, yylloc.first_line); return type;
2446 #define	RETURN_UNIT(name, unit)		yylval.type = NODE_SUBTYPE_##unit; return UNIT_##name;
2447 
2448 static	void			skip_comment(int close);
2449 static	struct node_t *		read_identifier(void);
2450 static	struct node_t *		read_string(void);
2451 static	struct node_t *		read_value(void);
2452 
2453 /*
2454 some unused rules...
2455 "SHOW"[ \t_]?"FRAME"			{ yylval.node = node_alloc(NODE_TYPE_OBJECT, NODE_SUBTYPE_SHOW_FRAME, yylloc.first_line); return DIRECT_REFERENCE; }
2456 "REMOVE"[ \t_]?"ALL"			{ yylval.node = node_alloc(NODE_TYPE_OBJECT, NODE_SUBTYPE_REMOVE_ALL, yylloc.first_line); return DIRECT_REFERENCE; }
2457 "END"					{ yylval.node = node_alloc(NODE_TYPE_OBJECT, NODE_SUBTYPE_END, yylloc.first_line); return DIRECT_REFERENCE; }
2458 */
2459 
2460 %}
2461 
2462 %option noyywrap
2463 
2464 %%
2465 
2466 "ACTION"				{ RETURN_TOKEN(OBJECT, ACTION); }
2467 "ACTIONSCRIPT"				{ return ACTIONSCRIPT; }
2468 "BC"					{ RETURN_UNIT(COLOR, BC); }
2469 "BUTTON"				{ RETURN_TOKEN(OBJECT, BUTTON); }
2470 "CATCH"					{ RETURN_TOKEN(OBJECT, CATCH); }
2471 "CM"					{ RETURN_UNIT(SIZE, CM); }
2472 "COLOR"[ \t_]?"TRANSFORM"		{ RETURN_TOKEN(OBJECT, COLOR_TRANSFORM); }
2473 "COLOR"					{ RETURN_TOKEN(OBJECT, COLOR); }
2474 "DEG"					{ RETURN_UNIT(ANGLE, DEG); }
2475 "DO"[ \t_]?"ACTION"			{ RETURN_TOKEN(OBJECT, DO_ACTION); }
2476 "EDGES"					{ RETURN_TOKEN(OBJECT, EDGES); }
2477 "EDIT"[ \t_]?"TEXT"			{ RETURN_TOKEN(OBJECT, EDIT_TEXT); }
2478 "ELSE"					{ return ELSE; }
2479 "END"					{ RETURN_TOKEN(OBJECT, END); }
2480 "ENVELOPE?"				{ RETURN_TOKEN(OBJECT, ENVELOPE); }
2481 "EXPORT"				{ RETURN_TOKEN(OBJECT, EXPORT); }
2482 "FALSE"					{ yylval.node = node_alloc(NODE_TYPE_INTEGER, NODE_SUBTYPE_UNKNOWN, yylloc.first_line); yylval.node->integer = 0; return VALUE; }
2483 "FC"					{ RETURN_UNIT(COLOR, FC); }
2484 "FILL"[ \t_]?"STYLE"			{ RETURN_TOKEN(OBJECT, FILL_STYLE); }
2485 "FINALLY"				{ RETURN_TOKEN(OBJECT, FINALLY); }
2486 "FONT"					{ RETURN_TOKEN(OBJECT, FONT); }
2487 "FOR"					{ return FOR; }
2488 "FPF"					{ RETURN_UNIT(SPEED, FPF); }
2489 "FPS"					{ RETURN_UNIT(SPEED, FPS); }
2490 "FRAME"[ \t_]?"LABEL"			{ RETURN_TOKEN(OBJECT, FRAME_LABEL); }
2491 "FRM"					{ RETURN_UNIT(TIME, FRM); }
2492 "FUNCTION"				{ RETURN_TOKEN(OBJECT, FUNCTION); }
2493 "GLYPH"					{ RETURN_TOKEN(OBJECT, GLYPH); }
2494 "GRAD"					{ RETURN_UNIT(ANGLE, GRAD); }
2495 "GRADIENT"				{ RETURN_TOKEN(OBJECT, GRADIENT); }
2496 "IF"					{ return IF; }
2497 "IMAGE"					{ RETURN_TOKEN(OBJECT, IMAGE); }
2498 "IMPORT"				{ RETURN_TOKEN(OBJECT, IMPORT); }
2499 "IN"					{ RETURN_UNIT(SIZE, IN); }
2500 "LABEL"					{ RETURN_TOKEN(OBJECT, LABEL); }
2501 "LINE"[ \t_]?"STYLE"			{ RETURN_TOKEN(OBJECT, LINE_STYLE); }
2502 "LIST"|"BLOCK"				{ RETURN_TOKEN(OBJECT, LIST); }
2503 "MATRIX"				{ RETURN_TOKEN(OBJECT, MATRIX); }
2504 "MIN"					{ RETURN_UNIT(TIME, MIN); }
2505 "ON"[ \t_]?"EVENT"			{ RETURN_TOKEN(OBJECT, ON_EVENT); }
2506 "PLACE"[ \t_]?"OBJECT"			{ RETURN_TOKEN(OBJECT, PLACE_OBJECT); }
2507 "POINTS"				{ RETURN_TOKEN(OBJECT, POINTS); }
2508 "PR"					{ RETURN_UNIT(RATIO, PR); }
2509 "PX"					{ RETURN_UNIT(SIZE, PX); }
2510 "RAD"					{ RETURN_UNIT(ANGLE, RAD); }
2511 "RECT"("ANGLE")?			{ RETURN_TOKEN(OBJECT, RECT); }
2512 "REMOVE"				{ RETURN_TOKEN(OBJECT, REMOVE); }
2513 "REPLACE"[ \t_]?"OBJECT"		{ RETURN_TOKEN(OBJECT, REPLACE_OBJECT); }
2514 "RT"					{ RETURN_UNIT(RATIO, RT); }
2515 "SCRIPT"[ \t_]?"LIMITS"			{ RETURN_TOKEN(OBJECT, SCRIPT_LIMITS); }
2516 "SEC"					{ RETURN_UNIT(TIME, SEC); }
2517 "SEQUENCE"				{ RETURN_TOKEN(OBJECT, SEQUENCE); }
2518 "SET"[ \t_]?"BACKGROUND"[ \t_]?"COLOR"	{ RETURN_TOKEN(OBJECT, SET_BACKGROUND_COLOR); }
2519 "SET"[ \t_]?"TAB"[ \t_]?"INDEX"		{ RETURN_TOKEN(OBJECT, SET_TAB_INDEX); }
2520 ("DEFINE"[ \t_]?)?"SHAPE"		{ RETURN_TOKEN(OBJECT, SHAPE); }
2521 "SHOW"[ \t_]?"FRAME"			{ RETURN_TOKEN(OBJECT, SHOW_FRAME); }
2522 "SOUND"					{ RETURN_TOKEN(OBJECT, SOUND); }
2523 "SOUND"[ \t_]?"INFO"			{ RETURN_TOKEN(OBJECT, SOUND_INFO); }
2524 "SPRITE"				{ RETURN_TOKEN(OBJECT, SPRITE); }
2525 "STATE"					{ RETURN_TOKEN(OBJECT, STATE); }
2526 "TEXT"					{ RETURN_TOKEN(OBJECT, TEXT); }
2527 "TEXT"[ \t_]?"SETUP"			{ RETURN_TOKEN(OBJECT, TEXT_SETUP); }
2528 "TRUE"					{ yylval.node = node_alloc(NODE_TYPE_INTEGER, NODE_SUBTYPE_UNKNOWN, yylloc.first_line); yylval.node->integer = 1; return VALUE; }
2529 "TRY"					{ RETURN_TOKEN(OBJECT, TRY); }
2530 "TW"					{ RETURN_UNIT(SIZE, TW); }
2531 "WITH"					{ RETURN_TOKEN(OBJECT, WITH); }
2532 
2533 
2534 
2535 
2536 
2537 
2538 
2539 static struct node_t *read_identifier(void)
2540 {
2541 	struct node_t *n;
2542 
2543 	n = node_alloc(NODE_TYPE_IDENTIFIER, NODE_SUBTYPE_UNKNOWN, yylloc.first_line);
2544 	n->string = sswf_strdup(yytext);
2545 
2546 	return n;
2547 }
2548 
2549 
2550 static struct node_t *read_value(void)
2551 {
2552 	register int	c;
2553 	long		r, p, sign;
2554 	double		fr, div, exp;
2555 	struct node_t	*n;
2556 
2557 	r = 0;
2558 
2559 	c = *yytext;
2560 	if(c == '.') {
2561 		/* we can have a decimal point only if a second digit was found
2562 		 * make sure we restore it first!
2563 		 */
2564 		unput(yytext[1]);
2565 	}
2566 
2567 	if(c == '0') {		/* check for hexa, otherwise it's probably octal or floating point? */
2568 		c = input();
2569 		if(c == 'x' || c == 'X') {
2570 			/* hexadecimal */
2571 			p = c;
2572 			c = input();
2573 			if((c < '0' || c > '9') && (c < 'a' || c > 'f') && (c < 'A' || c > 'F')) {
2574 				/* this is NOT a valid hex. value */
2575 				unput(c);
2576 				unput(p);
2577 				c = '\0';
2578 			}
2579 			else {
2580 				p = 0;
2581 				for(;;) {
2582 					if(c >= '0' && c <= '9') {
2583 						p = r;
2584 						r = r * 16 + c - '0';
2585 					}
2586 					else if(c >= 'a' && c <= 'f') {
2587 						p = r;
2588 						r = r * 16 + c - 'a' + 10;
2589 					}
2590 					else if(c >= 'A' && c <= 'F') {
2591 						p = r;
2592 						r = r * 16 + c - 'A' + 10;
2593 					}
2594 					else {
2595 						if((c == 'm' || c == 'M') && (r & 15) == 0xC) {
2596 							/* this is an hex. followed by 'cm'! */
2597 							unput(c);
2598 							c = 'c';
2599 							r = p;		/* restore saved value (because of overflow!) */
2600 fprintf(stderr, "WARNING: hexadecimal followed by the CM unit.\n");
2601 						}
2602 						unput(c);
2603 						c = '\0';
2604 						break;
2605 					}
2606 					c = input();
2607 				}
2608 			}
2609 		}
2610 		else if(c != '.') {
2611 			/* octal */
2612 			while(c >= '0' && c <= '8') {
2613 				r = r * 8 + c - '0';
2614 				c = input();
2615 			}
2616 			if(c == '8' || c == '9') {
2617 				fprintf(stderr, "ERROR: invalid octal number.\n");
2618 				/* skip the rest of the number */
2619 				do {
2620 					c = input();
2621 				} while(c >= '0' && c <= '9');
2622 			}
2623 			unput(c);
2624 			c = '\0';
2625 		}
2626 	}
2627 	if(c != '.') {
2628 		while(c >= '0' && c <= '9') {
2629 			r = r * 10 + c - '0';
2630 			c = input();
2631 		}
2632 	}
2633 	if(c == '.') {
2634 		/* we found a floating point value */
2635 		/* TODO: the following is wrong because the '...e+/-<value>' will change the outcome very much */
2636 		fr = (double) r;
2637 		div = 0.1;
2638 		c = input();
2639 		while(c >= '0' && c <= '9') {
2640 			fr += (double) (c - '0') * (double) div;
2641 			div /= 10.0;
2642 			c = input();
2643 		}
2644 		if(c == 'e' || c == 'E') {
2645 			r = c;
2646 			c = input();
2647 			exp = 0;
2648 			sign = 1;
2649 			if(c == '+') {
2650 				c = input();
2651 				if(c < '0' || c > '9') {
2652 					unput(c);
2653 					unput('+');
2654 					c = r;
2655 				}
2656 			}
2657 			else if(c == '-') {
2658 				c = input();
2659 				if(c < '0' || c > '9') {
2660 					unput(c);
2661 					unput('-');
2662 					c = r;
2663 				}
2664 				else {
2665 					sign = -1;
2666 				}
2667 			}
2668 			else if(c < '0' || c > '9') {
2669 				unput(c);
2670 				c = r;
2671 			}
2672 			while(c >= '0' && c <= '9') {
2673 				exp = exp * 10 + c - '0';
2674 				c = input();
2675 			}
2676 			if(exp != 0) {
2677 				fr *= pow(10, exp * (double) sign);
2678 			}
2679 		}
2680 		n = node_alloc(NODE_TYPE_FLOAT, NODE_SUBTYPE_UNKNOWN, yylloc.first_line);
2681 		n->floating_point = fr;
2682 	}
2683 	else {
2684 		/* we have a integer number */
2685 		n = node_alloc(NODE_TYPE_INTEGER, NODE_SUBTYPE_UNKNOWN, yylloc.first_line);
2686 		n->integer = r;
2687 	}
2688 
2689 	if(c != '\0') {
2690 		unput(c);
2691 	}
2692 
2693 	return n;
2694 }
2695 
2696 #endif
2697 
2698