sswf_lexical.c++ - OpenGrok cross reference for /dports/graphics/sswf/sswf-1.8.4/src/sswf/sswf_lexical.c++

/* sswf_lexical.c++ -- written by Alexis WILKE for Made to Order Software Corp. (c) 2002-2009 */

/*

Copyright (c) 2002-2009 Made to Order Software Corp.

Permission is hereby granted, free of charge, to any
person obtaining a copy of this software and
associated documentation files (the "Software"), to
deal in the Software without restriction, including
without limitation the rights to use, copy, modify,
merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom
the Software is furnished to do so, subject to the
following conditions:

The above copyright notice and this permission notice
shall be included in all copies or substantial
portions of the Software.

THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF
ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT
LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO
EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
ARISING FROM, OUT OF OR IN CONNECTION WITH THE
SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.

*/


#define	SSWF_NEED_ASSERT
#include	"sswf.h"

extern "C" {
#include	"sswf_grammar.h"
};


#include	"sswf/libsswf.h"


extern	YYLTYPE		yylloc;

#define	UNREAD_COUNT_MAX	4
#define	UNGET_COUNT_MAX		16
#define	MULTIBYTE_MAX		16

class ScriptFile
{
public:
	struct string_t : public sswf::ItemBase {
		char *		f_string;
	};
	typedef sswf::sswf_ucs4_t	c_t;		// UCS chars are 31 bits, negative values are used for errors

	// NOTES:
	//
	// Glossary
	//	ASCII	American Standard Code for Information Interchange
	//	BE	Big-endian (most significant byte first)
	//	LE	Little-endian (least significant byte first)
	//	UCS	Universal Character Set
	//	UTF	Universal Transformation Format
	//
	// Note that UCS2, UTF16, UCS4 and UTF32 don't have endian specified.
	// This is correct since we can infer the endian by checking the
	// few first bytes of input (which MUST represent a comment)
	//
	enum scriptfile_type_t {
		// totally unknown
		SCRIPTFILE_TYPE_UNKNOWN = 0,	// still unknown

		// unknown but valid for iconv()
		SCRIPTFILE_TYPE_MULTIBYTES,	// a specified encoding (using iconv() to convert the characters)

		// 8 bits
		SCRIPTFILE_TYPE_ASCII,		// accept characters upto 127 as is; others are viewed as erroneous
		SCRIPTFILE_TYPE_ISO88591,	// use input as is (like Unicode page 0)
		SCRIPTFILE_TYPE_UTF8,		// 1 to 6 bytes to encode any character
		SCRIPTFILE_TYPE_CESU8,		// 1 to 4 bytes to encode 0x110000 characters, 0xD800 to 0xDFFF is interpreted

		// 16 bits
		SCRIPTFILE_TYPE_UCS2,		// UCS-2 chars (limited to 0x10000 characters)
		SCRIPTFILE_TYPE_UCS2BE,		// same as UCS2 in big endian
		SCRIPTFILE_TYPE_UCS2LE,		// same as UCS2 in little endian
		SCRIPTFILE_TYPE_UCS2SAME,	// UCS-2 in processor endian
		SCRIPTFILE_TYPE_UCS2SWAP,	// UCS-2 in opposite process endian
		SCRIPTFILE_TYPE_UTF16,		// UTF-16 (0xD800 to 0xDFFF are escapes to represent 20 bits)
		SCRIPTFILE_TYPE_UTF16BE,	// same as UTF-16 in big endian
		SCRIPTFILE_TYPE_UTF16LE,	// same as UTF-16 in little endian

		// 32 bits
		SCRIPTFILE_TYPE_UCS4,		// UCS-4 (unlimited character set, except negative values)
		SCRIPTFILE_TYPE_UCS4BE,		// same as UCS-4 big endian
		SCRIPTFILE_TYPE_UCS4LE,		// same as UCS-4 little endian
		SCRIPTFILE_TYPE_UCS4SAME,	// UCS-4 in processor endian
		SCRIPTFILE_TYPE_UCS4SWAP,	// UCS-4 in opposite process endian
		SCRIPTFILE_TYPE_UTF32,		// UTF-32 (limited to 0x110000 chars)
		SCRIPTFILE_TYPE_UTF32BE,	// UTF-32 big endian
		SCRIPTFILE_TYPE_UTF32LE,	// UTF-32 little endian

		SCRIPTFILE_TYPE_SAME,		// keep input type

		SCRIPTFILE_TYPE_max
	};
	struct sf_type_t {
		scriptfile_type_t	f_type;		// corresponding type (internally supported)
		const char *		f_name;		// official encoding name (as in iconv)
		unsigned long		f_input;	// accepted input encoding (the one we determine we our internal algorithm)
	};

#define	SCRIPTFILE_EOF		((c_t) -1)	// UCS chars are 31 bits max.
#define	SCRIPTFILE_BAD		((c_t) -2)	// UCS chars are 31 bits max.

				ScriptFile(ScriptFile *parent);
				~ScriptFile();

	int			OpenFile(const char *filename, sswf::Vectors& user_include_paths, bool use_internal_paths);
	void			CloseFile(void);
	int			GetToken(void);
	unsigned int		Line(void) const;
	ScriptFile *		Parent(void);
	const char *		Filename(void);
	int			ReadActionscript(void);
	void			SetReadActionscript(bool yes);

private:
	void			Reset(void);
	int			FindFile(const char *filename, sswf::Vectors& user_include_paths, bool use_internal_paths);
	c_t			GetChar(void);
	void			UngetChar(c_t c);
	c_t			ReadChar(void);
	int			ReadByte(void);
	void			UnreadByte(unsigned char c);
	void			SkipComment(int close);
	int			ReadIdentifier(c_t c);
	int			ReadString(c_t c);
	int			ReadValue(c_t c);

	ScriptFile *		f_parent;
	const char *		f_filename;
	unsigned int		f_line;
	unsigned int		f_first_line;
	scriptfile_type_t	f_type;
	FILE *			f_file;
	int			f_last_errno;
	c_t			f_last_char;
	unsigned int		f_unread_count;
	unsigned char		f_unread[UNREAD_COUNT_MAX];
	unsigned int		f_unget_count;
	c_t			f_unget[UNGET_COUNT_MAX];
	bool			f_iconvertor_open;
	iconv_t			f_iconvertor;
	size_t			f_mb_count;
	char			f_multibytes[MULTIBYTE_MAX];
	bool			f_read_actionscript;
};


ScriptFile		*sf;		// the current script file
sswf::Vectors		include_paths;	// an array of strings where files are being searched
int			no_default_include;


#define	SF_TYPE_TO_FLAG1(a)		(1<<ScriptFile::SCRIPTFILE_TYPE_##a)
#define	SF_TYPE_TO_FLAG2(a,b)		SF_TYPE_TO_FLAG1(a)|SF_TYPE_TO_FLAG1(b)
#define	SF_TYPE_TO_FLAG3(a,b,c)		SF_TYPE_TO_FLAG2(a,b)|SF_TYPE_TO_FLAG1(c)
#define	SF_TYPE_TO_FLAG4(a,b,c,d)	SF_TYPE_TO_FLAG3(a,b,c)|SF_TYPE_TO_FLAG1(d)
#define	SF_TYPE_TO_FLAG5(a,b,c,d,e)	SF_TYPE_TO_FLAG4(a,b,c,d)|SF_TYPE_TO_FLAG1(e)
#define	SF_TYPE_TO_FLAG6(a,b,c,d,e,f)	SF_TYPE_TO_FLAG5(a,b,c,d,e)|SF_TYPE_TO_FLAG1(f)

#define	SF_GET_ELEMS(size, elems)	elems
//#define	SF_ELEMS(array)			SF_GET_ELEMS array

#define	SF_GET_SIZE(size, elems)	size
//#define	SF_SIZE(array)			SF_GET_SIZE array

#define	SF_CALL1(macro, count, elems)	macro##count elems
#define	SF_CALL(macro, count, elems)	SF_CALL1(macro, count, elems)

#define	SF_FOREACH(macro, array)	SF_CALL(macro, SF_GET_SIZE array, SF_GET_ELEMS array)


#define	SCRIPTFILE_TYPE(name, alias, accepted_array)			\
		{							\
			ScriptFile::SCRIPTFILE_TYPE_##name,				\
			alias,						\
			SF_FOREACH(SF_TYPE_TO_FLAG, accepted_array)	\
		},

static const ScriptFile::sf_type_t internal_types[] =
{
	// ASCII
	SCRIPTFILE_TYPE(ASCII, "ASCII",			(1, (ISO88591)))
	SCRIPTFILE_TYPE(ASCII, "USASCII",		(1, (ISO88591)))
	SCRIPTFILE_TYPE(ASCII, "CSASCII",		(1, (ISO88591)))
	SCRIPTFILE_TYPE(ASCII, "US",			(1, (ISO88591)))
	SCRIPTFILE_TYPE(ASCII, "ISO646US",		(1, (ISO88591)))
	SCRIPTFILE_TYPE(ASCII, "ISO646.IRV",		(1, (ISO88591)))
	SCRIPTFILE_TYPE(ASCII, "ISO646.IRV:1991",	(1, (ISO88591)))
	SCRIPTFILE_TYPE(ASCII, "ISO646.1991IRV",	(1, (ISO88591)))
	SCRIPTFILE_TYPE(ASCII, "ISOIR6",		(1, (ISO88591)))
	SCRIPTFILE_TYPE(ASCII, "ANSIX3.4-1968",		(1, (ISO88591)))
	SCRIPTFILE_TYPE(ASCII, "ANSIX3.4-1986",		(1, (ISO88591)))
	SCRIPTFILE_TYPE(ASCII, "CP367",			(1, (ISO88591)))
	SCRIPTFILE_TYPE(ASCII, "IBM367",		(1, (ISO88591)))

	// ISO8859-1
	SCRIPTFILE_TYPE(SAME, "88591",			(1, (ISO88591)))
	SCRIPTFILE_TYPE(SAME, "88591:1987",		(1, (ISO88591)))
	SCRIPTFILE_TYPE(SAME, "ISO88591",		(1, (ISO88591)))
	SCRIPTFILE_TYPE(SAME, "ISO88591:1987",		(1, (ISO88591)))
	SCRIPTFILE_TYPE(SAME, "ISOIR100",		(1, (ISO88591)))
	SCRIPTFILE_TYPE(SAME, "CSISOLATIN1",		(1, (ISO88591)))
	SCRIPTFILE_TYPE(SAME, "LATIN1",			(1, (ISO88591)))
	SCRIPTFILE_TYPE(SAME, "L1",			(1, (ISO88591)))
	SCRIPTFILE_TYPE(SAME, "CP819",			(1, (ISO88591)))
	SCRIPTFILE_TYPE(SAME, "IBM819",			(1, (ISO88591)))

	// UTF-8
	SCRIPTFILE_TYPE(UTF8, "UTF8",			(2, (ISO88591, UTF8)))

	// CESU-8
	SCRIPTFILE_TYPE(CESU8, "CESU8",			(2, (ISO88591, UTF8)))

	// UCS-2
	SCRIPTFILE_TYPE(SAME, "UCS2",			(2, (UCS2LE, UCS2BE)))
	SCRIPTFILE_TYPE(SAME, "CSUNICODE",		(2, (UCS2LE, UCS2BE)))
	SCRIPTFILE_TYPE(SAME, "ISO10646:1993/UCS2",	(2, (UCS2LE, UCS2BE)))
	SCRIPTFILE_TYPE(SAME, "10646/UCS2",		(2, (UCS2LE, UCS2BE)))
	SCRIPTFILE_TYPE(SAME, "106461/UCS2",		(2, (UCS2LE, UCS2BE)))
	SCRIPTFILE_TYPE(SAME, "ISO10646UCS2",		(2, (UCS2LE, UCS2BE)))
	SCRIPTFILE_TYPE(SAME, "10646UCS2",		(2, (UCS2LE, UCS2BE)))

	SCRIPTFILE_TYPE(SAME, "UCS2INTERNAL",		(2, (UCS2LE, UCS2BE)))
	SCRIPTFILE_TYPE(SAME, "UCS2SWAPPED",		(2, (UCS2LE, UCS2BE)))

	SCRIPTFILE_TYPE(SAME, "UCS2BE",			(1, (UCS2BE)))
	SCRIPTFILE_TYPE(SAME, "UNICODEBIG",		(1, (UCS2BE)))
	SCRIPTFILE_TYPE(SAME, "UNICODE11",		(1, (UCS2BE)))
	SCRIPTFILE_TYPE(SAME, "CSUNICODE11",		(1, (UCS2BE)))

	SCRIPTFILE_TYPE(SAME, "UCS2LE",			(1, (UCS2LE)))
	SCRIPTFILE_TYPE(SAME, "UNICODEOLITTLE",		(1, (UCS2LE)))

	// UTF-16
	SCRIPTFILE_TYPE(UTF16BE, "UTF16",		(1, (UCS2BE)))
	SCRIPTFILE_TYPE(UTF16BE, "UTF16BE",		(1, (UCS2BE)))

	SCRIPTFILE_TYPE(UTF16LE, "UTF16",		(1, (UCS2LE)))
	SCRIPTFILE_TYPE(UTF16LE, "UTF16LE",		(1, (UCS2LE)))

	// UCS-4
	SCRIPTFILE_TYPE(SAME, "UCS4",			(2, (UCS4LE, UCS4BE)))
	SCRIPTFILE_TYPE(SAME, "CSUCS4",			(2, (UCS4LE, UCS4BE)))
	SCRIPTFILE_TYPE(SAME, "ISO10646",		(2, (UCS4LE, UCS4BE)))
	SCRIPTFILE_TYPE(SAME, "ISO10646:1993",		(2, (UCS4LE, UCS4BE)))
	SCRIPTFILE_TYPE(SAME, "ISO10646:1993/UCS4",	(2, (UCS4LE, UCS4BE)))
	SCRIPTFILE_TYPE(SAME, "10646",			(2, (UCS4LE, UCS4BE)))
	SCRIPTFILE_TYPE(SAME, "10646/UCS4",		(2, (UCS4LE, UCS4BE)))
	SCRIPTFILE_TYPE(SAME, "106461",			(2, (UCS4LE, UCS4BE)))
	SCRIPTFILE_TYPE(SAME, "106461/UCS4",		(2, (UCS4LE, UCS4BE)))

	SCRIPTFILE_TYPE(SAME, "UCS4INTERNAL",		(2, (UCS4LE, UCS4BE)))
	SCRIPTFILE_TYPE(SAME, "UCS4SWAPPED",		(2, (UCS4LE, UCS4BE)))

	SCRIPTFILE_TYPE(SAME, "UCS4BE",			(1, (UCS4BE)))
	SCRIPTFILE_TYPE(SAME, "UCS4LE",			(1, (UCS4LE)))

	// UTF-32
	SCRIPTFILE_TYPE(UTF32BE, "UTF32",		(1, (UCS4BE)))
	SCRIPTFILE_TYPE(UTF32BE, "UTF32BE",		(1, (UCS4BE)))

	SCRIPTFILE_TYPE(UTF32LE, "UTF32",		(1, (UCS4LE)))
	SCRIPTFILE_TYPE(UTF32LE, "UTF32LE",		(1, (UCS4LE)))
};


ScriptFile::ScriptFile(ScriptFile *parent)
	: f_parent(parent)
{
	//f_parent -- already initialized in decl.
	f_filename = 0;
	f_line = 0;
	f_first_line = 0;
	f_type = SCRIPTFILE_TYPE_UNKNOWN;
	f_file = 0;
	f_last_errno = 0;
	f_last_char = '\0';
	f_unread_count = 0;
	//f_unread -- the counter is at zero
	f_unget_count = 0;
	//f_unget -- the counter is at zero
	f_iconvertor_open = false;
	//f_convertor -- flag is false
	f_mb_count = 0;
	//f_multibytes -- counter is at zero
	f_read_actionscript = false;
}


ScriptFile::~ScriptFile()
{
	Reset();
}


void ScriptFile::Reset(void)
{
	CloseFile();
	// sswf_clean(&f_filename); -- this is used here and there, don't delete

	f_line = 0;
	f_last_char = 0;
	f_unread_count = 0;
	f_unget_count = 0;
	f_type = SCRIPTFILE_TYPE_UNKNOWN;
	f_mb_count = 0;

	if(f_iconvertor_open) {
		iconv_close(f_iconvertor);
		f_iconvertor_open = false;
	}
}


unsigned int ScriptFile::Line(void) const
{
	return f_line;
}


ScriptFile *ScriptFile::Parent(void)
{
	return f_parent;
}


const char *ScriptFile::Filename(void)
{
	return f_filename;
}


void ScriptFile::SetReadActionscript(bool yes)
{
	f_read_actionscript = yes;
}


int ScriptFile::FindFile(const char *filename, sswf::Vectors& user_include_paths, bool use_internal_paths)
{
	// TODO: this shouldn't be hard coded; instead some deep well hidden
	// configuration file should specify these default directories (and
	// the path to that setup file, where do we get it?!)
	static const char *	default_include_paths[] = {
		// Alexis' suggested install dir.
		"/usr/include/sswf/scripts",
		// Linux
		"/usr/share/sswf/scripts",
		"/usr/share/sswf/include/scripts",
		"/usr/local/share/sswf/scripts",
		"/usr/local/share/sswf/include/scripts",
		// MAC OS X with Fink
		"/sw/local/share/sswf/scripts",
		"/sw/local/share/sswf/include/scripts",
		// IRIX
		"/opt/sswf/scripts",
		"/opt/sswf/include/scripts",
		0
	};
	const char	*s, **p;
	char		*name;
	int		idx, max;

	if(show_input_search) {
		printf(" %% File \"%s\" exists?\n", filename);
	}

/* special case, use stdin instead of a file */
	if(strcmp(filename, "-") == 0) {
		f_filename = "*standard input*";
		f_file = stdin;
		if(show_input_filenames) {
			printf(" -> Input File: \"%s\".\n", f_filename);
		}
		return 0;
	}

/* check file as is (from current dir.) */
	f_file = fopen(filename, "rb");
	if(f_file != NULL) {
		f_filename = sswf_strdup(filename);
		if(show_input_filenames) {
			printf(" -> Input File: \"%s\".\n", f_filename);
		}
		return 0;
	}
	f_last_errno = errno;
	if(f_last_errno != ENOENT) {
		return -1;
	}

/* a file specified with a full path can't be searched any more */
	s = filename;
	while(*s != '/' && *s != '\\' && *s != '\0' && *s != ':') {
		s++;
	}
	if((s == filename && (s[0] == '/' || s[0] == '\\')) || s[0] == ':') {
		return -1;
	}

/* look within the user include directories */
	max = user_include_paths.Count();
	for(idx = 0; idx < max; idx++) {
		s = (const char *) user_include_paths.Get(idx);
		name = sswf_strchild(s, filename);
		if(show_input_search) {
			printf(" %% File \"%s\" exists?\n", name);
		}
		f_file = fopen(name, "rb");
		if(f_file != 0) {
			f_filename = name;
			if(show_input_filenames) {
				printf(" -> Input File: \"%s\".\n", f_filename);
			}
			return 0;
		}
		f_last_errno = errno;
		sswf_free(name);
		if(f_last_errno != ENOENT) {
			return -1;
		}
	}

/* if not turned off, try the internal paths now */
	if(use_internal_paths) {
		for(p = default_include_paths; *p != 0; p++) {
			name = sswf_strchild(*p, filename);
			if(show_input_search) {
				printf(" %% File \"%s\" exists?\n", name);
			}
			f_file = fopen(name, "rb");
			if(f_file != 0) {
				f_filename = name;
				if(show_input_filenames) {
					printf(" -> Input File: \"%s\".\n", f_filename);
				}
				return 0;
			}
			f_last_errno = errno;
			sswf_free(name);
			if(f_last_errno != ENOENT) {
				return -1;
			}
		}
	}

/* file not found... */
	return -1;
}


int ScriptFile::OpenFile(const char *filename, sswf::Vectors& user_include_paths, bool use_internal_paths)
{
	int				a, b, c, d;
	unsigned long			input;
	char				encoding[256];
	const ScriptFile::sf_type_t	*types;

	Reset();

	if(FindFile(filename, user_include_paths, use_internal_paths) != 0) {
		fprintf(stderr, "ERROR: can't open file \"%s\" (errno: %d).\n", filename, errno);
		return 1;
	}
	f_line = 1;

/* at the very start we need to check for U16/U32 files */
/*
 * The following are the tests which will be conducted
 * on the input to try to determine the type of the file.
 * Note that we need at least 4 characters in any SSFW
 * file. Don't forget also that the file needs to start
 * with a comment.
 *
 * File Starts with		Default Encoding
 *	0xEF 0xBB 0xBF 0x?? ... UTF8
 *	0x?? 0x00 0x?? 0x00 ... USC2LE
 *	0x00 0x?? 0x00 0x?? ... USC2BE
 *	0xFF 0xFE 0x?? 0x00 ... USC2LE
 *	0xFE 0xFF 0x00 0x?? ... USC2BE
 *	0x?? 0x00 0x00 0x00 ... USC4LE
 *	0x00 0x00 0x00 0x?? ... USC4BE
 *	0xFF 0xFE 0x00 0x00 ... USC4LE
 *	0x00 0x00 0xFE 0xFF ... USC4BE
 *
 * The rest will force UNKNOWN and a comment must be present
 * on the first line. This comment must include the encoding.
 * For instance:
 *
 * 	encoding="utf-16"
 *
 * Note that even auto-detected formats can include an
 * encoding. In that case, the user specified encoding
 * needs to match what we have detected (we can't
 * switch from UCS-2LE to UTF-32BE).
 *
 * IMPORTANT NOTE:
 *	It is to be noted that the parser will only
 *	accept a few characters at the beginning of a
 *	file and this is why this algorithm works this
 *	way.
 *
 *	The possible characters are as defined here:
 *
 *	. spaces (U+0009 '\t', U+000A '\n', U+000C '\f',
 *		  U+000D '\r', U+0020 ' ', U+FEFF)
 *	. comment (U+0028 '(', U+002F '/')
 *	. identifier (U+0041 'A' to U+005A 'Z', U+005F '_'
 *		      U+0061 'a' to U+007A 'z')
 *
 *	The identifier can be either an object name
 *	(such as "sequence", "text", "button"...)
 *	or the name of a variable (as in "a = 56")
 */
	a = ReadByte();
	b = ReadByte();
	c = ReadByte();
	d = ReadByte();

	if(a == -1 || b == -1 || c == -1 || d == -1) {
		/*
		 * this is not good, a script can't be
		 * less than 4 bytes?!?
		 */
		CloseFile();
		fprintf(stderr, "ERROR: file \"%s\" seems empty or too small a file for a ScriptSWF.\n", filename);
		f_last_errno = EBADF;
		return -1;	// return EOF
	}
	UnreadByte(d);
	UnreadByte(c);
	UnreadByte(b);
	UnreadByte(a);

	if(a == 0xEF && b == 0xBB && c == 0xBF) {
		// UTF-8 starting with 0xFEFF is represented by 0xEF, 0xBB and 0xBF
		// NOTE:
		// This sequence represent i with trema, the double closing quotes '>>'
		// and an upside down question mark (for Spanish); that's really
		// unlikely not UTF-8!
		f_type = SCRIPTFILE_TYPE_UTF8;
	}
	if(a != 0 && b == 0 && c != 0 && d == 0) {
		f_type = SCRIPTFILE_TYPE_UCS2LE;
	}
	else if(a == 0 && b != 0 && c == 0 && d != 0) {
		f_type = SCRIPTFILE_TYPE_UCS2BE;
	}
	else if(a == 0xFF && b == 0xFE && c != 0 && d == 0) {
		f_type = SCRIPTFILE_TYPE_UCS2LE;
	}
	else if(a == 0xFE && b == 0xFF && c == 0 && d != 0) {
		f_type = SCRIPTFILE_TYPE_UCS2BE;
	}
	else if(a == 0xFF && b == 0xFE && c == 0 && d == 0) {
		f_type = SCRIPTFILE_TYPE_UCS4LE;
	}
	else if(a == 0 && b == 0 && c == 0xFE && d == 0xFF) {
		f_type = SCRIPTFILE_TYPE_UCS4BE;
	}
	else if(a != 0 && b == 0 && c == 0 && d == 0) {
		f_type = SCRIPTFILE_TYPE_UCS4LE;
	}
	else if(a == 0 && b == 0 && c == 0 && d != 0) {
		f_type = SCRIPTFILE_TYPE_UCS4BE;
	}
	else {
		/*
		 * In this case we assume ISO-8859-1
		 * this is useful to read the starting
		 * comment as if it were read with
		 * ReadByte() calls!
		 */
		f_type = SCRIPTFILE_TYPE_ISO88591;
	}

	/*
	 * We expect (want) a comment with the name of
	 * an encoding; as we read the comment check for
	 * the following: 'encoding=\"<name>\"'; only the
	 * first encoding entry is used
	 */
retry:
	do {
		a = GetChar();
	} while(a == ' ' || a == '\t' || a == '\n');
	b = '\0';
	if(a == '/') {
		a = GetChar();
		if(a == '/') {	// C++ comment
			b = '\n';
		}
		else if(a == '*') {	/* standard C comment */
			b = '/';
		}
	}
	else if(a == '(') {
		a = GetChar();
		if(a == '*') {		// standard Pascal comment
			b = ')';
		}
	}
	if(b == '\0') {
		/*
		 * This is wrong, we must have a comment at the
		 * start of the file!
		 */
		CloseFile();
		fprintf(stderr, "ERROR: can't determine the encoding of \"%s\", no proper comment found at the beginning of the file.\n", filename);
		f_last_errno = EINVAL;
		return 1;
	}
	c = 0;
	a = GetChar();
	for(;;) {
		if(a == -1) {
			CloseFile();
			fprintf(stderr, "ERROR: end of file \"%s\" found before the end of the starting comment.\n", filename);
			f_last_errno = EBADF;
			return -1;
		}
		if(a == '*') {			// C or Pascal comment ends
			a = GetChar();
			if(a == b) {
				// Ooops no encoding="..." in this comment!
				// Just try again
				goto retry;
			}
			c = 0;	// new word after an asterisk...
			continue;
		}
		if((a < 'A' || a > 'Z') && (a < 'a' || a > 'z') && a != '\"' && a != '=') {
			// new word
			if(b == '\n' && a == '\n') {		// C++ comment ends
				// Ooops no encoding="..." in this comment!
				// Just try again
				goto retry;
			}
			c = 0;
		}
		else if(c < 10) {
			// searching: encoding="...
			encoding[c] = a;
			c++;
			if(c == 10 && strncasecmp(encoding, "encoding=\"", 10) == 0) {
				c = 0;
				a = GetChar();
				while(a != '"' && a != '\n' && a != -1 && c < (int) (sizeof(encoding) - 1)) {
					if(a != '-' && a != '_') {
						encoding[c] = a;
						c++;
					}
					a = GetChar();
				}
				while(c > 0 && encoding[c - 1] == '/') {
					c--;
				}
				encoding[c] = '\0';
				break;
			}
		}
		a = GetChar();
	}
	// we found an encoding="..." entry
	// let's skip the rest of the comment first
	if(b == '\n') {
		do {
			a = GetChar();
			if(a == -1) {
				fprintf(stderr, "ERROR: end of file \"%s\" found before the end of a comment.\n", filename);
				CloseFile();
				f_last_errno = EBADF;
				return -1;
			}
		} while(a != '\n');
	}
	else {
		do {
			a = GetChar();
			while(a == '*') {
				a = GetChar();
				if(a == -1) {
					fprintf(stderr, "ERROR: end of file \"%s\" found before the end of a comment.\n", filename);
					CloseFile();
					f_last_errno = EBADF;
					return -1;
				}
				if(a == b) {
					a = -1;
					break;
				}
			}
		} while(a != -1);
	}

	input = 1 << f_type;
	types = internal_types;
	while(types->f_type != SCRIPTFILE_TYPE_UNKNOWN) {
		if(strcasecmp(encoding, types->f_name) == 0 && (types->f_input & input) != 0) {
			if(types->f_type != SCRIPTFILE_TYPE_SAME) {
				f_type = types->f_type;
			}
			// we found the proper type, we're done here.
			return 0;
		}
		types++;
	}
	// didn't find anything compatible, check out for an iconv(3C) convertion
	if(f_type != SCRIPTFILE_TYPE_ISO88591) {
		// the encoding doesn't match and it should!
		f_type = SCRIPTFILE_TYPE_UNKNOWN;
		fprintf(stderr, "ERROR: unacceptable encoding \"%s\" for this file.\n", encoding);
		return 1;
	}

	// the input encoding needs to be an 8 bits encoding!
	f_iconvertor = iconv_open("UCS-4-INTERNAL", encoding);
	if(f_iconvertor == (iconv_t) -1) {
		f_last_errno = errno;
		CloseFile();
		fprintf(stderr, "ERROR: encoding \"%s\" not understood. Please, check your iconv_open() manual page for a complete list of possible convertions.\n", encoding);
		return 1;
	}

	f_type = SCRIPTFILE_TYPE_MULTIBYTES;

	return 0;

#if 0
// old stuff...
	switch(f_type) {
	case SCRIPTFILE_TYPE_UTF16LE:
		if(strcasecmp(encoding, "UCS-2")   == 0
		&& strcasecmp(encoding, "UCS-2LE") == 0) {
			f_type = SCRIPTFILE_TYPE_UCS2LE;
		}
		else if(strcasecmp(encoding, "UTF-16")   != 0
		     && strcasecmp(encoding, "UTF-16LE") != 0) {
			// we've got a problem here!
			f_type = SCRIPTFILE_TYPE_UNKNOWN;
		}
		break;

	case SCRIPTFILE_TYPE_UTF16BE:
		if(strcasecmp(encoding, "UCS-2")   == 0
		|| strcasecmp(encoding, "UCS-2BE") == 0) {
			f_type = SCRIPTFILE_TYPE_UCS2BE;
		}
		else if(strcasecmp(encoding, "UTF-16")   != 0
		     && strcasecmp(encoding, "UTF-16BE") != 0) {
			// we've got a problem here!
			f_type = SCRIPTFILE_TYPE_UNKNOWN;
		}
		break;

	case SCRIPTFILE_TYPE_UCS2LE:
		if(strcasecmp(encoding, "UTF-16")   != 0
		&& strcasecmp(encoding, "UTF-16LE") != 0) {
			f_type = SCRIPTFILE_TYPE_UTF16LE;
		}
		else if(strcasecmp(encoding, "UCS-2")   != 0
		     && strcasecmp(encoding, "UCS-2LE") != 0) {
			// we've got a problem here!
			f_type = SCRIPTFILE_TYPE_UNKNOWN;
		}
		break;

	case SCRIPTFILE_TYPE_UCS2BE:
		if(strcasecmp(encoding, "UTF-16")    == 0
		|| strcasecmp(encoding, "UTF-16BE")  == 0) {
			f_type = SCRIPTFILE_TYPE_UTF16BE;
		}
		else if(strcasecmp(encoding, "UCS-2")   != 0
		     && strcasecmp(encoding, "UCS-2BE") != 0) {
			// we've got a problem here!
			f_type = SCRIPTFILE_TYPE_UNKNOWN;
		}
		break;

	case SCRIPTFILE_TYPE_UCS4LE:
		if(strcasecmp(encoding, "UTF-32")   == 0
		&& strcasecmp(encoding, "UTF-32LE") == 0) {
			f_type = SCRIPTFILE_TYPE_UTF32LE;
		}
		else if(strcasecmp(encoding, "UCS-4")   != 0
		     && strcasecmp(encoding, "UCS-4LE") != 0) {
			// we've got a problem here!
			f_type = SCRIPTFILE_TYPE_UNKNOWN;
		}
		break;

	case SCRIPTFILE_TYPE_UCS4BE:
		if(strcasecmp(encoding, "UTF-32")    == 0
		|| strcasecmp(encoding, "UTF-32BE")  == 0) {
			f_type = SCRIPTFILE_TYPE_UTF32BE;
		}
		else if(strcasecmp(encoding, "UCS-4")   != 0
		     && strcasecmp(encoding, "UCS-4BE") != 0) {
			// we've got a problem here!
			f_type = SCRIPTFILE_TYPE_UNKNOWN;
		}
		break;

	case SCRIPTFILE_TYPE_ISO8859_1:
		// now we have a name, check for what we understand internally...
		if(strcasecmp(encoding, "UTF-8") == 0) {
			f_type = SCRIPTFILE_TYPE_UTF8;
		}
		else if(strcasecmp(encoding, "iso-8859-1") == 0 || strcasecmp(encoding, "iso_8859-1") == 0 || strcasecmp(encoding, "iso8859-1") == 0) {
			f_type = SCRIPTFILE_TYPE_ISO8859_1;
		}
		else if(strcasecmp(encoding, "ascii") == 0) {
			f_type = SCRIPTFILE_TYPE_ASCII;
		}
		else {
			// otherwise, use iconv() facility
			// the input encoding needs to be an 8 bits encoding!
			f_iconvertor = iconv_open("UCS-4-INTERNAL", encoding);
			if(f_iconvertor == (iconv_t) -1) {
				f_last_errno = errno;
				CloseFile();
				fprintf(stderr, "ERROR: encoding \"%s\" not understood. Please, check your iconv_open() manual page for a complete list of possible convertions.\n", encoding);
				return 1;
			}
			f_type = SCRIPTFILE_TYPE_MULTIBYTES;
		}
		break;

#if DEBUG
	default:
		assert(0, "INTERNAL ERROR: f_type seems to be set to a value we didn't have control over (%d).", f_type);
#endif

	}
#endif
}


void ScriptFile::CloseFile(void)
{
	if(f_file != 0) {
		/*
		 * Avoid closing the standard input file since this
		 * is usually done by the system at exit()
		 */
		if(f_file != stdin) {
			fclose(f_file);
		}
		f_file = 0;
	}
}


int ScriptFile::ReadActionscript(void)
{
	char		*str;
	int		max, pos, count;
	size_t		size;
	bool		in_string;
	c_t		c, last_char, quote;

	// create the node at the start so the f_line is at the start
	// (we need to pass that to the parser so it err at the
	// right line!)
	yylval.node = node_alloc(NODE_TYPE_STRING, NODE_SUBTYPE_UNKNOWN, f_first_line);

	str = (char *) sswf_malloc(256, "ReadActionscript() -- small string buffer");
		/*
		 * We allocated 256, but save 1 byte for the null terminator
		 * and up to 6 for the last multi-byte
		 */
	max = 256 - 6 - 1;
	pos = 0;

	// we read everything up to a closing '}' since an action
	// script is always written between '{' and '}'
	count = 1;
	in_string = false;
	quote = '\0';
	c = ' ';
	do {
		last_char = c;
		c = GetChar();
		switch(c) {
		case SCRIPTFILE_EOF:
		case SCRIPTFILE_BAD:
			count = 0;
			c = '\0';
			break;

		case '{':
			if(!in_string) {
				count++;
			}
			break;

		case '}':
			if(!in_string) {
				count--;
			}
			break;

		case '"':
		case '\'':
		case '`':
			if(in_string) {
				if(quote == c && last_char != '\\') {
					in_string = false;
				}
			}
			else {
				quote = c;
				in_string = true;
			}
			break;

		// other characters kept as is
		}
		if(count > 0) {
			if(pos >= max) {
				max += 256;
				/*
				 * +6 because some multi-bytes take that many bytes
				 * +1 so the null terminator is reserved
				 */
				str = (char *) sswf_remalloc(str, max + 6 + 1, "StrAppend() -- large string buffer");
			}
			size = 6;
			// TODO: should we check for errors?
			sswf::wctomb(&c, sizeof(c), str + pos, size);
			pos += 6 - size;
		}
	} while(count > 0);
	str[pos] = '\0';

	// the '}' character needs to be restored
	UngetChar(c);

#if ADJUT_STRINGS
	/* on most systems this is really fast and it can save some memory */
	str = sswf_remalloc(str, pos + 1, "ReadActionscript() -- adjusted to the minimum");
#endif

	yylval.node->string = str;

//fprintf(stderr, "Read actionscript [%s]\n", str);

	return STRING;
}


int ScriptFile::GetToken(void)
{
	c_t		c;

	yylloc.first_line = f_first_line = f_line;

	if(f_read_actionscript) {
		return ReadActionscript();
	}

	for(;;) {
		do {
			c = GetChar();
		} while(c == ' ' || c == '\t' || c == '\f' || c == '\n');

		if((c >= 'A' && c <= 'Z')
		|| (c >= 'a' && c <= 'z')
		|| c == '_'
		|| c >= 0x0C0) {	// international character
			/* an identifier or keyword */
			return ReadIdentifier(c);
		}

		switch(c) {
		case SCRIPTFILE_EOF:
			return EOF;

		case SCRIPTFILE_BAD:
			// TODO: ???
			return EOF;

		case '0':
		case '1':
		case '2':
		case '3':
		case '4':
		case '5':
		case '6':
		case '7':
		case '8':
		case '9':
			return ReadValue(c);

		case '\'':
		case '`':
		case '\"':
			return ReadString(c);

		case '.':
			c = GetChar();
			if(c == '.') {
				return RANGE;
			}
			if(c >= '0' && c <= '9') {
				UngetChar(c);
				return ReadValue('.');
			}
			UngetChar(c);
			return '.';

		case '*':
			c = GetChar();
			if(c == '*') {
				return POWER;
			}
			UngetChar(c);
			return '*';

		case '<':
			c = GetChar();
			if(c == '?') {
				return MIN_OP;
			}
			if(c == '<') {
				return SHIFT_LEFT;
			}
			if(c == '>') {
				return NOT_EQUAL;
			}
			if(c == '=') {
				return LESS_EQUAL;
			}
			UngetChar(c);
			return '<';

		case '>':
			c = GetChar();
			if(c == '?') {
				return MAX_OP;
			}
			if(c == '>') {
				c = GetChar();
				if(c == '>') {
					return SHIFT_RIGHT_UNSIGNED;
				}
				UngetChar(c);
				return SHIFT_RIGHT;
			}
			if(c == '=') {
				return GREATER_EQUAL;
			}
			UngetChar(c);
			return '>';

		case '!':
			c = GetChar();
			if(c == '<') {
				return ROTATE_LEFT;
			}
			if(c == '>') {
				return ROTATE_RIGHT;
			}
			if(c == '=') {
				return NOT_EQUAL;
			}
			UngetChar(c);
			return '!';

		case '=':
			c = GetChar();
			if(c == '=') {
				return EQUAL;
			}
			UngetChar(c);
			return '=';

		case ':':
			c = GetChar();
			if(c == '=') {
				return c;
			}
			UngetChar(c);
			return ':';

		case '|':
			c = GetChar();
			if(c == '|') {
				return LOGICAL_OR;
			}
			UngetChar(c);
			return '|';

		case '^':
			c = GetChar();
			if(c == '^') {
				return LOGICAL_XOR;
			}
			UngetChar(c);
			return '^';

		case '&':
			c = GetChar();
			if(c == '&') {
				return LOGICAL_AND;
			}
			UngetChar(c);
			return '&';

		case '/':
			c = GetChar();
			if(c == '*') {
				SkipComment('/');
				continue;
			}
			if(c == '/') {
				// C++ comment, read until '\n'
				do {
					c = GetChar();
				} while(c != '\n' && c != SCRIPTFILE_EOF);
				continue;
			}
			UngetChar(c);
			return '/';

		case '(':
			c = GetChar();
			if(c == '*') {
				SkipComment(')');
				continue;
			}
			UngetChar(c);
			return '(';

		// anything else is returned as is
		default:
			return c;

		}
	}

	return 0;
}


void ScriptFile::SkipComment(int close)
{
	register int	c, p;

	c = 0;
	do {
		p = c;
		c = GetChar();
	} while(c != SCRIPTFILE_EOF && c != SCRIPTFILE_BAD && (c != close || p != '*'));
}


struct keyword_t {
	size_t		f_size;		// number of chars in the keyword
	const char *	f_name;		// the keyword
	node_type_t	f_type;		// the type or unit
	node_type_t	f_subtype;	// the sub-type
	unsigned int	f_flags;	// what we need to do here
};
#define	KEYWORD_FLAG_DIRECT		0x00000000
#define	KEYWORD_FLAG_OBJECT		0x00000001
#define	KEYWORD_FLAG_UNIT		0x00000002	// f_type is the corresponding node_unit_t
#define	KEYWORD_FLAG_INTEGER		0x00000004	// f_type is an integer (for FALSE and TRUE)

#define	KEYWORD_FLAG_MULTIWORD		0x80000000


#define	KEYWORD(w, type, subtype, flg)	{ (sizeof(w) - 1), (w), ((node_type_t) type), (subtype), (flg) }

#define	OBJECT_KEYWORD(w, flg)		KEYWORD(#w, NODE_TYPE_OBJECT, NODE_SUBTYPE_##w, (flg) | KEYWORD_FLAG_OBJECT)
#define	DIRECT_KEYWORD(w, flg)		KEYWORD(#w, (w), NODE_SUBTYPE_UNKNOWN, (flg))
#define	UNIT_KEYWORD(w, unit, flg)	KEYWORD(#w, UNIT_##unit, NODE_SUBTYPE_##w, (flg) | KEYWORD_FLAG_UNIT)


struct all_keywords_t {
	keyword_t *	f_keywords;
	size_t		f_count;
};
#define	ALL_KEYWORD_ENTRY(x)		{ x##_keywords, (sizeof(x##_keywords) / sizeof(keyword_t)) }


keyword_t a_keywords[] = {
	OBJECT_KEYWORD(ACTION, 0),
	DIRECT_KEYWORD(ACTION_SCRIPT, 0)
};

keyword_t b_keywords[] = {
	UNIT_KEYWORD(BC, COLOR, 0),
	// BLOCK == LIST
	KEYWORD("BLOCK", NODE_TYPE_OBJECT, NODE_SUBTYPE_LIST, KEYWORD_FLAG_OBJECT),
	OBJECT_KEYWORD(BUTTON, 0)
};

keyword_t c_keywords[] = {
	OBJECT_KEYWORD(CATCH, 0),
	UNIT_KEYWORD(CM, SIZE, 0),
	OBJECT_KEYWORD(COLOR_TRANSFORM, KEYWORD_FLAG_MULTIWORD),
	OBJECT_KEYWORD(COLOR, 0)
};

keyword_t d_keywords[] = {
	KEYWORD("DEFINE_SHAPE", NODE_TYPE_OBJECT, NODE_SUBTYPE_SHAPE, KEYWORD_FLAG_OBJECT | KEYWORD_FLAG_MULTIWORD),
	UNIT_KEYWORD(DEG, ANGLE, 0),
	OBJECT_KEYWORD(DO_ACTION, KEYWORD_FLAG_MULTIWORD)
};

keyword_t e_keywords[] = {
	OBJECT_KEYWORD(EDGES, 0),
	OBJECT_KEYWORD(EDIT_TEXT, KEYWORD_FLAG_MULTIWORD),
	DIRECT_KEYWORD(ELSE, 0),
	OBJECT_KEYWORD(END, 0),
	KEYWORD("ENVELOP", NODE_TYPE_OBJECT, NODE_SUBTYPE_ENVELOPE, KEYWORD_FLAG_OBJECT),
	OBJECT_KEYWORD(ENVELOPE, 0),
	OBJECT_KEYWORD(EXPORT, 0)
};

keyword_t f_keywords[] = {
	KEYWORD("FALSE", 0, NODE_SUBTYPE_UNKNOWN, KEYWORD_FLAG_INTEGER),
	UNIT_KEYWORD(FC, COLOR, 0),
	OBJECT_KEYWORD(FILL_STYLE, KEYWORD_FLAG_MULTIWORD),
	OBJECT_KEYWORD(FINALLY, 0),
	OBJECT_KEYWORD(FONT, 0),
	DIRECT_KEYWORD(FOR, 0),
	UNIT_KEYWORD(FPF, SPEED, 0),
	UNIT_KEYWORD(FPS, SPEED, 0),
	OBJECT_KEYWORD(FRAME_LABEL, KEYWORD_FLAG_MULTIWORD),
	UNIT_KEYWORD(FRM, TIME, 0),
	OBJECT_KEYWORD(FUNCTION, 0)
};

keyword_t g_keywords[] = {
	OBJECT_KEYWORD(GLYPH, 0),
	UNIT_KEYWORD(GRAD, ANGLE, 0),
	OBJECT_KEYWORD(GRADIENT, 0)
};

keyword_t i_keywords[] = {
	DIRECT_KEYWORD(IF, 0),
	OBJECT_KEYWORD(IMAGE, 0),
	OBJECT_KEYWORD(IMPORT, 0),
	UNIT_KEYWORD(IN, SIZE, 0)
};

keyword_t l_keywords[] = {
	OBJECT_KEYWORD(LABEL, 0),
	OBJECT_KEYWORD(LINE_STYLE, KEYWORD_FLAG_MULTIWORD),
	OBJECT_KEYWORD(LIST, 0)
};

keyword_t m_keywords[] = {
	OBJECT_KEYWORD(MATRIX, 0),
	OBJECT_KEYWORD(METADATA, 0),
	UNIT_KEYWORD(MIN, TIME, 0)
};

keyword_t o_keywords[] = {
	OBJECT_KEYWORD(ON_EVENT, KEYWORD_FLAG_MULTIWORD)
};

keyword_t p_keywords[] = {
	OBJECT_KEYWORD(PLACE_OBJECT, KEYWORD_FLAG_MULTIWORD),
	OBJECT_KEYWORD(POINTS, 0),
	UNIT_KEYWORD(PR, RATIO, 0),
	UNIT_KEYWORD(PX, SIZE, 0)
};

keyword_t r_keywords[] = {
	UNIT_KEYWORD(RAD, ANGLE, 0),
	OBJECT_KEYWORD(RECT, 0),
	KEYWORD("RECTANGLE", NODE_TYPE_OBJECT, NODE_SUBTYPE_RECT, KEYWORD_FLAG_OBJECT),
	OBJECT_KEYWORD(REMOVE, 0),
	OBJECT_KEYWORD(REPLACE_OBJECT, KEYWORD_FLAG_MULTIWORD),
	UNIT_KEYWORD(RT, RATIO, 0)
};

keyword_t s_keywords[] = {
	OBJECT_KEYWORD(SCRIPT_LIMITS, KEYWORD_FLAG_MULTIWORD),
	UNIT_KEYWORD(SEC, TIME, 0),
	OBJECT_KEYWORD(SEQUENCE, 0),
	OBJECT_KEYWORD(SET_BACKGROUND_COLOR, KEYWORD_FLAG_MULTIWORD),
	OBJECT_KEYWORD(SET_TAB_INDEX, KEYWORD_FLAG_MULTIWORD),
	OBJECT_KEYWORD(SHAPE, 0),
	OBJECT_KEYWORD(SHOW_FRAME, KEYWORD_FLAG_MULTIWORD),
	OBJECT_KEYWORD(SOUND_INFO, KEYWORD_FLAG_MULTIWORD),
	OBJECT_KEYWORD(SOUND, 0),
	OBJECT_KEYWORD(SPRITE, 0),
	OBJECT_KEYWORD(STATE, 0)
};

keyword_t t_keywords[] = {
	OBJECT_KEYWORD(TEXT_SETUP, KEYWORD_FLAG_MULTIWORD),
	OBJECT_KEYWORD(TEXT, 0),
	KEYWORD("TRUE", 1, NODE_SUBTYPE_UNKNOWN, KEYWORD_FLAG_INTEGER),
	OBJECT_KEYWORD(TRY, 0),
	UNIT_KEYWORD(TW, SIZE, 0)
};

keyword_t w_keywords[] = {
	OBJECT_KEYWORD(WITH, 0)
};


all_keywords_t all_keywords[26] = {
	/* A */ ALL_KEYWORD_ENTRY(a),
	/* B */ ALL_KEYWORD_ENTRY(b),
	/* C */ ALL_KEYWORD_ENTRY(c),
	/* D */ ALL_KEYWORD_ENTRY(d),
	/* E */ ALL_KEYWORD_ENTRY(e),
	/* F */ ALL_KEYWORD_ENTRY(f),
	/* G */ ALL_KEYWORD_ENTRY(g),
	/* H */ { 0, 0 },	// ALL_KEYWORD_ENTRY(h),
	/* I */ ALL_KEYWORD_ENTRY(i),
	/* J */ { 0, 0 },	// ALL_KEYWORD_ENTRY(j),
	/* K */ { 0, 0 },	// ALL_KEYWORD_ENTRY(k),
	/* L */ ALL_KEYWORD_ENTRY(l),
	/* M */ ALL_KEYWORD_ENTRY(m),
	/* N */ { 0, 0 },	// ALL_KEYWORD_ENTRY(n),
	/* O */ ALL_KEYWORD_ENTRY(o),
	/* P */ ALL_KEYWORD_ENTRY(p),
	/* Q */ { 0, 0 },	// ALL_KEYWORD_ENTRY(q),
	/* R */ ALL_KEYWORD_ENTRY(r),
	/* S */ ALL_KEYWORD_ENTRY(s),
	/* T */ ALL_KEYWORD_ENTRY(t),
	/* U */ { 0, 0 },	// ALL_KEYWORD_ENTRY(u),
	/* V */ { 0, 0 },	// ALL_KEYWORD_ENTRY(v),
	/* W */ ALL_KEYWORD_ENTRY(w),
	/* X */ { 0, 0 },	// ALL_KEYWORD_ENTRY(x),
	/* Y */ { 0, 0 },	// ALL_KEYWORD_ENTRY(y),
	/* Z */ { 0, 0 }	// ALL_KEYWORD_ENTRY(z)
};


int ScriptFile::ReadIdentifier(c_t c)
{
	char		a, identifier[256];	/* by default we expect that identifiers are less than 256 chars */
	char		*id;
	const char	*s1, *s2;
	size_t		size;
	int		pos, max, cnt, idx;
	const keyword_t	*k;
	bool		has_international, found;
	c_t		ex;

	id = identifier;
	has_international = c >= 0x80;
	if(has_international) {
		size = 6;
		sswf::wctomb(&c, sizeof(c), id, size);
		pos = 6 - size;
	}
	else {
		id[0] = (char) c;
		pos = 1;
	}
	max = sizeof(identifier) - 6 - 1;

	// read one identifier -- keywords may be composed of
	// multiple identifiers, others will be read if required
	// only
	c = GetChar();
	while((c >= '0' && c <= '9')
	   || (c >= 'A' && c <= 'Z')
	   || (c >= 'a' && c <= 'z')
	   || c == '_'
	   || c >= 0x0C0) {	// we accept any international character too
		if(pos >= max) {
			// need a larger buffer
			max += 256;
			if(id != identifier) {
				id = (char *) sswf_remalloc(id, max + 6 + 1, "ReadIdentifier() -- really large identifier buffer");
			}
			else {
				id = (char *) sswf_malloc(max + 6 + 1, "ReadIdentifier() -- large identifier buffer");
				memcpy(id, identifier, max - 256);
			}
		}
		if(c >= 0x80) {
			size = 6;
			sswf::wctomb(&c, sizeof(c), id + pos, size);
			pos += 6 - size;
			has_international = true;
		}
		else {
			// ASCII is anyway saved as is and this is much
			// faster than a crazy call to another function
			// (it also enables me to set the has_international
			// flag in the first part)
			id[pos] = (char) c;

//printf("Adding char [%c] %d\n", (char) c, (int) c);

			pos++;
		}
		c = GetChar();
	}
	UngetChar(c);
	id[pos] = '\0';

//printf(stderr, "Got word [%s]...\n", id);

	/*
	 * Check for keywords...
	 * NOTE: keywords only include [A-Z0-9_ \t\r\n]
	 * (blanks are only if the keywords can be composed
	 * of multiple words)
	 *
	 * The test below is valid since (1) we save the international
	 * characters in UTF-8 and (2) the has_international flag will
	 * make sure the function skips the test for keywords at once.
	 */
	a = toupper(id[0]);
	if(!has_international
	&& pos < 20		// no keyword longer than that
	&& a >= 'A'		// all keywords start with a letter
	&& a <= 'Z') {
		if(c == '\t' || c == '\n') {
			c = ' ';
		}
		//
		// NOTE: max was used to know if the identifier string was
		//	 allocated and needed extension; here we know that
		//	 it will fit in the identifier buffer and thus we
		//	 don't have to worry about it
		//
		//	 pos can't be modified unless a multi-word keyword
		//	 is found since all the extra characters read
		//	 because of that multi-word entry need to be put
		//	 back in the input stream
		//
		//	 notice that all spaces (' ', '\t' and '\n') are
		//	 transformed in one underscore ('_') within a
		//	 keyword
		//
		max = pos;
		a -= 'A';
		// NOTE: though a will always be positive, it's
		//	 still safer to have a cast to unsigned char!
		k = all_keywords[(unsigned char) a].f_keywords;
		cnt = all_keywords[(unsigned char) a].f_count;
		found = false;

//printf("%d keywords to check with [%s]... (%p)\n", cnt, id, k);

		while(cnt > 0) {
			cnt--;

#if DEBUG
			// make sure that all the keywords are given in upper case
			// [this was actually fixed in 1.7.3]
			idx = k->f_size;
			while(idx > 0) {
				idx--;
				assert(k->f_name[idx] == '_' || (k->f_name[idx] >= 'A' && k->f_name[idx] <= 'Z'), "ScriptFile::ReadIdentifier(): a keyword must fully be given in uppercase (%s)", k->f_name);
			}
#endif

			// in case we have a multi-word we can't check the size
			if(k->f_size >= (size_t) pos) {
				// This wouldn't take the multi-word written
				// as one word in account...
				//found = strncasecmp(k->f_name, id, pos) == 0;
				s1 = k->f_name;
				s2 = id;
				while(*s1 != '\0' && (*s2 != '\0' && *s2 != ' ')) {
					if(*s1 == '_') {
						s1++;
						assert(*s1 != '\0' && *s1 != '_', "ScriptFile::ReadIdentifier(): a keyword can't end with an underscore nor have two underscores one after another");
					}
					if(*s1 != toupper(*s2)) {
						// this is not valid
						break;
					}
					s1++;
					s2++;
				}
//printf("Compare [%s]/[%s] with [%s]/[%s] - '%02X' %c\n", id, k->f_name, s2, s1, *s2, *s2);
				if(*s1 == '\0' && (*s2 == '\0' || *s2 == ' ')) {
					found = true;
					break;
				}
			}
			if(c == ' ' && (k->f_flags & KEYWORD_FLAG_MULTIWORD) != 0) {
				// 1. do we have enough characters already?
				idx = k->f_size + 1;
				if(max < idx) {
					// not enough, read more
					ex = GetChar();
					while(max < idx
					   && ((ex >= '0' && ex <= '9')
					    || (ex >= 'A' && ex <= 'Z')
					    || (ex >= 'a' && ex <= 'z')
					    ||  ex == '_' ||  ex == ' ' || ex == '\t' || ex == '\n'
					   /*|| ex >= 0x0C0*/)) {	// we DON'T accept international characters since all keywords are in ASCII
						if(ex == ' ' || ex == '\t' || ex == '\n') {
							// ignore multiple spaces
							if(id[max - 1] != ' ') {
								// ASCII is saved as is
								id[max] = ' ';
								max++;
							}
						}
						else {
							// ASCII is saved as is
							id[max] = (char) ex;
							max++;
						}
						ex = GetChar();
					}
					UngetChar(ex);
				}
				id[max] = '\0';


				// 2. make sure the following character is a space (' ')
				//    and that the input had enough characters
				idx--;
				if((size_t) max >= k->f_size && (id[idx] == ' ' || id[idx] == '\0')) {
					// compare the words "by hand" because of the ' ' and '_'
					// which have to be considered as being equal
					//
					// IMPORTANT: the 'break's below would be "wrong" if we
					// didn't already know that the very first character was
					// always to be equal (that's the case because we selected
					// this table specifically because all the keywords to
					// check are starting with that letter)
					while(idx > 0) {
						idx--;
						if(id[idx] == ' ' || id[idx] == '_') {
							if(k->f_name[idx] != '_') {
								break;
							}
						}
						else if(id[idx] >= 'a' && id[idx] <= 'z') {
							if(k->f_name[idx] != (id[idx] & 0x5F)) {
								break;
							}
						}
						else if(k->f_name[idx] != id[idx]) {
							break;
						}
					}
					found = idx == 0;
					if(found) {
						pos = k->f_size;
						break;
					}
				}
			}
			k++;
		}
		while(max > pos) {
			max--;
			UngetChar(id[max]);	// we know id[] is only composed of ASCII
		}
		if(found) {
			// TODO: get rid of the eventual extra chars we read
			//	 we found a match with a keyword, act on it
//printf("Found! (%p) [%s] %08X\n", k, k->f_name, k->f_flags);
			if((k->f_flags & KEYWORD_FLAG_OBJECT) != 0) {
				yylval.node = node_alloc(k->f_type, k->f_subtype, f_first_line);
				return OBJECT;
			}
			if((k->f_flags & KEYWORD_FLAG_UNIT) != 0) {
				yylval.type = k->f_subtype;
				return k->f_type;
			}
			if((k->f_flags & KEYWORD_FLAG_INTEGER) != 0) {
				yylval.node = node_alloc(NODE_TYPE_INTEGER, NODE_SUBTYPE_UNKNOWN, f_first_line);
				yylval.node->integer = k->f_type;
				return VALUE;
			}
			// KEYWORD_FLAG_DIRECT
			return k->f_type;
		}
		id[pos] = '\0';
	}

//printf("*** Identifier [%s]\n", id);

	yylval.node = node_alloc(NODE_TYPE_IDENTIFIER, NODE_SUBTYPE_UNKNOWN, f_first_line);
	if(id != identifier) {
		yylval.node->string = id;
	}
	else {
		yylval.node->string = sswf_strdup(id);
	}

	return IDENTIFIER;
}


int ScriptFile::ReadString(c_t quote)
{
	register c_t	c, o;
	register int	pos, max;
	c_t		r;
	int		cnt;
	char		*str;
	size_t		size;

	// create the node at the start so the f_line is at the start
	yylval.node = node_alloc(NODE_TYPE_STRING, NODE_SUBTYPE_UNKNOWN, f_first_line);

	str = (char *) sswf_malloc(256, "ReadString() -- small string buffer");
		/*
		 * We allocated 256, but save 1 byte for the null terminator
		 * and up to 6 for the last multi-byte
		 */
	max = 256 - 6 - 1;
	pos = 0;
	for(;;) {
		c = GetChar();
		if(c == quote || c == SCRIPTFILE_EOF) {
			break;
		}
		// skip bad chars.
		if(c == SCRIPTFILE_BAD) {
			continue;
		}
		if(c == '\\') {
			c = GetChar();
			if(c == SCRIPTFILE_EOF) {
				break;
			}
			o = c;
			switch(c) {
			case 'a': c =  7; break;
			case 'b': c =  8; break;
			case 't': c =  9; break;
			case 'n': c = 10; break;
			case 'r': c = 11; break;
			case 'f': c = 12; break;
			case 'v': c = 13; break;

			case 'U':
			case 'u':
				c = GetChar();
				if(c != '+') {
					UngetChar(c);
					c = o;
					break;
				}
				c = GetChar();
				if(c >= '0' && c <= '9') {
					c -= '0';
				}
				else if(c >= 'a' && c <= 'f') {
					c -= 'a' - 10;
				}
				else if(c >= 'A' && c <= 'F') {
					c -= 'A' - 10;
				}
				else {
					UngetChar('+');
					UngetChar(c);
					c = o;
					break;
				}
				goto readhex;

			case 'X':
			case 'x':
				c = GetChar();
				if(c >= '0' && c <= '9') {
					c -= '0';
				}
				else if(c >= 'a' && c <= 'f') {
					c -= 'a' - 10;
				}
				else if(c >= 'A' && c <= 'F') {
					c -= 'A' - 10;
				}
				else {
					UngetChar(c);
					c = o;
					break;
				}
readhex:
				cnt = 7;
				while(cnt > 0) {
					cnt--;
					r = GetChar();
					if(r >= '0' && r <= '9') {
						c = c * 16 + r - '0';
					}
					else if(r >= 'a' && r <= 'f') {
						c = c * 16 + r - 'a' + 10;
					}
					else if(r >= 'A' && r <= 'F') {
						c = c * 16 + r - 'A' + 10;
					}
					else {
						UngetChar(r);
						break;
					}
				}
				break;

			case '0':
				c = GetChar();
				if(c == 'x' || c == 'X') {
					o = c;
					c = GetChar();
					if(c >= '0' && c <= '9') {
						c -= '0';
					}
					else if(c >= 'a' && c <= 'f') {
						c -= 'a' - 10;
					}
					else if(c >= 'A' && c <= 'F') {
						c -= 'A' - 10;
					}
					else {
						UngetChar(c);
						UngetChar(o);
						c = 0;
						break;
					}
					goto readhex;
				}
			case '1':
			case '2':
			case '3':
			case '4':
			case '5':
			case '6':
			case '7':
				c = c - '0';
				cnt = 10;
				while(cnt > 0) {
					cnt--;
					r = GetChar();
					if(r < '0' || r > '7') {
						UngetChar(r);
						break;
					}
					c = c * 8 + r - '0';
				}
				break;

			}
		}
		if(c > 0) {
			if(pos >= max) {
				max += 256;
				/*
				 * +6 because some multi-bytes take that many bytes
				 * +1 so the null terminator is reserved
				 */
				str = (char *) sswf_remalloc(str, max + 6 + 1, "ReadString() -- large string buffer");
			}
			size = 6;
			// TODO: should we check for errors?
			sswf::wctomb(&c, sizeof(c), str + pos, size);
			pos += 6 - size;
		}
	}
	str[pos] = '\0';

#if ADJUT_STRINGS
	/* on most systems this is really fast and it can save some memory */
	str = sswf_remalloc(str, pos + 1, "ReadString() -- adjusted to the minimum");
#endif

	yylval.node->string = str;

//fprintf(stderr, "Read string [%s]\n", str);

	return STRING;
}


int ScriptFile::ReadValue(c_t c)
{
	long		r, p, sign;
	double		fr, div, exp;

	r = 0;
	fr = 0.0;

	if(c == '0') {		/* check for hexa, otherwise it's probably octal or floating point? */
		c = GetChar();
		if(c == 'x' || c == 'X') {
			/* hexadecimal */
			p = c;
			c = GetChar();
			if((c < '0' || c > '9') && (c < 'a' || c > 'f') && (c < 'A' || c > 'F')) {
				/* this is NOT a valid hex. value (just 0) */
				UngetChar(c);
				UngetChar(p);
				c = '\0';
			}
			else {
				p = 0;
				for(;;) {
					if(c >= '0' && c <= '9') {
						p = r;
						r = r * 16 + c - '0';
					}
					else if(c >= 'a' && c <= 'f') {
						p = r;
						r = r * 16 + c - 'a' + 10;
					}
					else if(c >= 'A' && c <= 'F') {
						p = r;
						r = r * 16 + c - 'A' + 10;
					}
					else {
						if((c == 'm' || c == 'M') && (r & 15) == 0xC) {
							/* this is an hex. followed by 'cm'! */
							UngetChar(c);
							c = 'c';
							r = p;		/* restore saved value (because of possible overflow!) */
fprintf(stderr, "WARNING: hexadecimal followed by the CM unit.\n");
						}
						UngetChar(c);
						c = '\0';
						break;
					}
					c = GetChar();
				}
			}
		}
		else if(c != '.') {
			/* octal */
			while(c >= '0' && c <= '8') {
				r = r * 8 + c - '0';
				c = GetChar();
			}
			if(c == '8' || c == '9') {
				fprintf(stderr, "ERROR: invalid octal number.\n");
				/* skip the rest of the number */
				do {
					c = GetChar();
				} while(c >= '0' && c <= '9');
			}
			UngetChar(c);
			c = '\0';
		}
	}
	if(c != '.') {
		while(c >= '0' && c <= '9') {
			r = r * 10 + c - '0';
			fr = fr * 10 + c - '0';		// avoid overflows we may get in 'r' if the number is followed by a period (.)
			c = GetChar();
		}
	}
	if(c == '.') {
		/* we found a floating point value */
		/* TODO: the following is wrong because the '...e+/-<value>' will change the outcome very much */
		//fr = (double) r;
		div = 0.1;
		c = GetChar();
		while(c >= '0' && c <= '9') {
			fr += (double) (c - '0') * (double) div;
			div /= 10.0;
			c = GetChar();
		}
		if(c == 'e' || c == 'E') {
			r = c;
			c = GetChar();
			exp = 0;
			sign = 1;
			if(c == '+') {
				c = GetChar();
				if(c < '0' || c > '9') {
					UngetChar(c);
					UngetChar('+');
					c = r;
				}
			}
			else if(c == '-') {
				c = GetChar();
				if(c < '0' || c > '9') {
					UngetChar(c);
					UngetChar('-');
					c = r;
				}
				else {
					sign = -1;
				}
			}
			else if(c < '0' || c > '9') {
				UngetChar(c);
				c = r;
			}
			while(c >= '0' && c <= '9') {
				exp = exp * 10 + c - '0';
				c = GetChar();
			}
			if(exp != 0) {
				fr *= pow(10.0, exp * (double) sign);
			}
		}
		yylval.node = node_alloc(NODE_TYPE_FLOAT, NODE_SUBTYPE_UNKNOWN, f_first_line);
		yylval.node->floating_point = fr;
	}
	else {
		/* we have a integer number */
		yylval.node = node_alloc(NODE_TYPE_INTEGER, NODE_SUBTYPE_UNKNOWN, f_first_line);
		yylval.node->integer = r;
	}

	if(c != '\0') {
		UngetChar(c);
	}

	return VALUE;
}


ScriptFile::c_t ScriptFile::GetChar(void)
{
	c_t		c;

	if(f_unget_count > 0) {
		f_unget_count--;
		f_last_char = f_unget[f_unget_count];
	}
	else {
		f_last_char = ReadChar();
	}

	// the following test simplifies the line counting
	if(f_last_char == '\r') {
		f_last_char = '\n';	// \n, \r or \r\n -> \n
		// skip the \n in a \r\n sequence
		c = ReadChar();
		if(c != '\n') {
			UngetChar(c);
		}
	}

	if(f_last_char == '\n') {
		f_line++;
	}

	return f_last_char;
}


void ScriptFile::UngetChar(c_t c)
{
	// don't record the end of file or an error!
	//if(c == SCRIPTFILE_EOF || c == SCRIPTFILE_BAD)
	if(c < 0) {
		return;
	}

	if(c == '\n') {
		f_line--;
	}

	assert(f_unget_count < UNGET_COUNT_MAX, "too many ScriptFile::UngetChar()");
	f_unget[f_unget_count] = c;
	f_unget_count++;
}


ScriptFile::c_t ScriptFile::ReadChar(void)
{
	int		a, b, cnt, min;
	const char	*input;
	char		*output;
	size_t		out;
	c_t		outchar;

	// the following is to avoid warnings -- a would always be
	// properly initialized without it
	a = 0;

	if(f_type == SCRIPTFILE_TYPE_MULTIBYTES) {
		if(f_file != 0) {
			while(f_mb_count < MULTIBYTE_MAX) {
				a = ReadByte();
				if(a == -1) {
					CloseFile();
					break;
				}
				f_multibytes[f_mb_count] = a;
				f_mb_count++;
			}
		}
		// anything in the input stream?
		if(f_mb_count == 0) {
			return SCRIPTFILE_EOF;
		}
		// 8 bits files need to have each character
		// converted according to the encoding we've
		// got reading the script starting comment
		input = f_multibytes;
		out = sizeof(outchar);
		output = (char *) &outchar;
		b = (int) f_mb_count;
		a = (int) iconv(f_iconvertor, ICONV_INPUT_CAST &input, &f_mb_count, &output, &out);
		// the output buffer will usually be full before the
		// input is fully emptied!
		if(a < 0 && errno == E2BIG && out == 0 && b != (int) f_mb_count) {
			a = 1;
		}
		if(a < 0) {
			f_last_errno = errno;
			fprintf(stderr, "ERROR: can't convert the bytes: ");
			for(a = 0; a < (int) f_mb_count; a++) {
				fprintf(stderr, " 0x%02X", f_multibytes[a]);
			}
			fprintf(stderr, ", to a character (errno: %d)\n", f_last_errno);
			outchar = SCRIPTFILE_BAD;
			f_mb_count--;	// we need to do this if we don't want to loop forever
		}
		// the characters used need to be removed from the input buffer
		memmove(f_multibytes, input, f_mb_count);
		return outchar;
	}

	if(f_file == 0) {
		return SCRIPTFILE_EOF;
	}

	a = ReadByte();
	if(a == -1) {
		CloseFile();
		return SCRIPTFILE_EOF;
	}

	for(;;) {
		switch(f_type) {
		case SCRIPTFILE_TYPE_UTF16LE:
			b = ReadByte();
			if(b == -1) {
				fprintf(stderr, "ERROR: invalid UTF16LE end (odd size file)\n");
				CloseFile();
				return SCRIPTFILE_EOF;
			}
			outchar = a + b * 256;
			if(outchar >= 0xD800 && outchar <= 0xDBFF) {
				outchar = (outchar & 0x3FF) << 10;
				b = ReadByte();
				if(b == -1) {
					fprintf(stderr, "ERROR: invalid UTF16LE end (missing 0xDC00-0xDFFF character)\n");
					CloseFile();
					return SCRIPTFILE_EOF;
				}
				b = ReadByte();
				if(b == -1) {
					fprintf(stderr, "ERROR: invalid UTF16LE end (missing 0xDC00-0xDFFF character)\n");
					CloseFile();
					return SCRIPTFILE_EOF;
				}
				outchar |= b;
				if(b < 0xDC || b > 0xDF) {
					fprintf(stderr, "ERROR: invalid UTF16LE bad 0xD800/0xDC00 sequence\n");
					CloseFile();
					return SCRIPTFILE_EOF;
				}
				outchar |= (b & 0x03) << 8;
			}
			else if(outchar >= 0xDC00 && outchar <= 0xDFFF) {
				// ouch, the 0xD800-0xDBFF is missing
				return SCRIPTFILE_BAD;
			}
			else if(outchar == 0xFFFE) {
				// change endian!
				f_type = SCRIPTFILE_TYPE_UTF16BE;
				return 0xFEFF;
			}
			return outchar;

		case SCRIPTFILE_TYPE_UTF16BE:
			b = ReadByte();
			if(b == -1) {
				fprintf(stderr, "ERROR: invalid UTF16BE end (odd size file)\n");
				CloseFile();
				return SCRIPTFILE_EOF;
			}
			outchar = a * 256 + b;
			if(outchar >= 0xD800 && outchar <= 0xDBFF) {
				outchar = (outchar & 0x3FF) << 10;
				b = ReadByte();
				if(b == -1) {
					fprintf(stderr, "ERROR: invalid UTF16BE end (missing 0xDC00-0xDFFF character)\n");
					CloseFile();
					return SCRIPTFILE_EOF;
				}
				if(b < 0xDC || b > 0xDF) {
					fprintf(stderr, "ERROR: invalid UTF16BE bad 0xD800/0xDC00 sequence\n");
					CloseFile();
					return SCRIPTFILE_EOF;
				}
				outchar |= (b & 0x03) << 8;
				b = ReadByte();
				if(b == -1) {
					fprintf(stderr, "ERROR: invalid UTF16BE end (missing 0xDC00-0xDFFF character)\n");
					CloseFile();
					return SCRIPTFILE_EOF;
				}
				outchar |= b;
			}
			else if(outchar >= 0xDC00 && outchar <= 0xDFFF) {
				// ouch, the 0xD800-0xDBFF is missing
				return SCRIPTFILE_BAD;
			}
			else if(outchar == 0xFFFE) {
				// change endian!
				f_type = SCRIPTFILE_TYPE_UTF16LE;
				return 0xFEFF;
			}
			return outchar;

		case SCRIPTFILE_TYPE_UCS2LE:
			b = ReadByte();
			if(b == -1) {
				fprintf(stderr, "ERROR: invalid UCS2LE end (odd size file)\n");
				CloseFile();
				return SCRIPTFILE_EOF;
			}
			outchar = a + b * 256;
			if(outchar == 0xFFFE) {
				f_type = SCRIPTFILE_TYPE_UCS2BE;
				return 0xFEFF;
			}
			return outchar;

		case SCRIPTFILE_TYPE_UCS2BE:
			b = ReadByte();
			if(b == -1) {
				fprintf(stderr, "ERROR: invalid UCS2BE end (odd size file)\n");
				CloseFile();
				return SCRIPTFILE_EOF;
			}
			outchar = a * 256 + b;
			if(outchar == 0xFFFE) {
				f_type = SCRIPTFILE_TYPE_UCS2LE;
				return 0xFEFF;
			}
			return outchar;

		case SCRIPTFILE_TYPE_UCS4LE:
			b = ReadByte();
			if(b != -1) {
				outchar = a | (b << 8);
				b = ReadByte();
				if(b != -1) {
					outchar |= (c_t) b << 16;
					b = ReadByte();
					if(b != -1) {
						outchar |= (c_t) b << 24;
						if(outchar == (c_t) 0xFFFE0000) {
							f_type = SCRIPTFILE_TYPE_UCS4BE;
							return 0xFEFF;
						}
						if(b < 0x80) {
							return outchar;
						}
					}
				}
			}
			fprintf(stderr, "ERROR: invalid UCS4LE end or bit 32 set\n");
			CloseFile();
			return SCRIPTFILE_EOF;

		case SCRIPTFILE_TYPE_UCS4BE:
			b = ReadByte();
			if(b != -1) {
				outchar = ((c_t) a << 24) | ((c_t) b << 16);
				b = ReadByte();
				if(b != -1) {
					outchar |= b << 8;
					b = ReadByte();
					if(b != -1) {
						outchar |= b;
						if(outchar == (c_t) 0xFFFE0000) {
							f_type = SCRIPTFILE_TYPE_UCS4LE;
							return 0xFEFF;
						}
						if(outchar >= 0) {
							return outchar;
						}
					}
				}
			}
			fprintf(stderr, "ERROR: invalid USC4BE end or bit 32 set\n");
			CloseFile();
			return SCRIPTFILE_EOF;

		case SCRIPTFILE_TYPE_ISO88591:
			return a;

		case SCRIPTFILE_TYPE_ASCII:
			if(a >= 0x80) {
				return SCRIPTFILE_BAD;
			}
			return a;

		case SCRIPTFILE_TYPE_UTF8:
			// U-00000000 - U-0000007F: 0xxxxxxx
			// U-00000080 - U-000007FF: 110xxxxx 10xxxxxx
			// U-00000800 - U-0000FFFF: 1110xxxx 10xxxxxx 10xxxxxx
			// U-00010000 - U-001FFFFF: 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
			// U-00200000 - U-03FFFFFF: 111110xx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx
			// U-04000000 - U-7FFFFFFF: 1111110x 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx
			// if necessary, we resync. our self
			while(a >= 0x80 && a <= 0xBF) {
				a = ReadByte();
			}
			if(a == -1) {
				CloseFile();
				return SCRIPTFILE_EOF;
			}
			if(a < 0x80) {
				return a;
			}
			if(a >= 0xFE) {
				// bad entry here!
				// TODO: error or skip silently?
				CloseFile();
				return SCRIPTFILE_EOF;
			}
			// multi-byte character -- read all the 10xxxxxx...
			if(a >= 0xFC) {
				a &= 0x01;
				min = 1 << (2 + 6 * 4);
				cnt = 5;
			}
			else if(a >= 0xF8) {
				a &= 0x03;
				min = 1 << (3 + 6 * 3);
				cnt = 4;
			}
			else if(a >= 0xF0) {
				a &= 0x07;
				min = 1 << (4 + 6 * 2);
				cnt = 3;
			}
			else if(a >= 0xE0) {
				a &= 0x0F;
				min = 1 << (5 + 6);
				cnt = 2;
			}
			else /*if(a >= 0xC0)*/ {
				a &= 0x1F;
				min = 1 << 7;
				cnt = 1;
			}
			outchar = a;
			while(cnt > 0) {
				cnt--;
				b = ReadByte();
				if(b == -1) {
					// bad entry here!
					// TODO: error or skip silently?
					CloseFile();
					return SCRIPTFILE_EOF;
				}
				if((b & 0xC0) != 0x80) {
					// save this byte for better error recovery
					UnreadByte(b);
					// refuse long encodings!
					// TODO: error or skip silently?
					return SCRIPTFILE_BAD;
				}
				outchar = outchar * 64 + (b & 0x3F);
			}
			if(outchar < min) {
				// refuse long encodings!
				// TODO: error or skip silently?
				return SCRIPTFILE_BAD;
			}
			return outchar;

		default:
			assert(0, "unknown encoding type when reading a character");
			/*NOTREACHED*/

		}
		a = ReadByte();
		if(a == -1) {
			CloseFile();
			return SCRIPTFILE_EOF;
		}
	}
	/*NOTREACHED*/
	return SCRIPTFILE_EOF;
}


int ScriptFile::ReadByte(void)
{
	unsigned char	c;

	errno = 0;

	if(f_unread_count > 0) {
		f_unread_count--;
		return f_unread[f_unread_count];
	}

	if(fread(&c, 1, 1, f_file) != 1) {
		if(errno != 0) {
			f_last_errno = errno;
			perror("fread()");
			fprintf(stderr, "%s:%d:%d: i/o error", f_filename, f_line, f_last_errno);
		}
		return -1;
	}

	return c;
}


void ScriptFile::UnreadByte(unsigned char c)
{
	assert(f_unread_count < UNREAD_COUNT_MAX, "too many UnreadByte() calls (max = %d)", UNREAD_COUNT_MAX);

	f_unread[f_unread_count] = c;
	f_unread_count++;
}


extern "C"
{


void sswf_read_actionscript(int yes)
{
//printf("Called sswf_read_actionscript (%d)\n", yes);
//fflush(stdout);
	if(sf != 0) {
		sf->SetReadActionscript(yes);
	}
}


void sswf_add_include(const char *path)
{
	ScriptFile::string_t	*str;

	str = new ScriptFile::string_t();
	include_paths.MemAttach(str, sizeof(ScriptFile::string_t), "sswf_add_include(): user include path");
	str->f_string = include_paths.StrDup(path);
	include_paths.Set(-1, str);
}


void sswf_set_default_include(int def)
{
	no_default_include = def;
}

int sswf_open_script(const char *filename)
{
	ScriptFile	*n;
	int		ec;

	n = new ScriptFile(sf);
	if(n == 0) {
		fprintf(stderr, "FATAL ERROR: out of memory.\n");
		exit(1);
	}
	sf = n;

	ec = sf->OpenFile(filename, include_paths, no_default_include == 0);
	if(ec == 0) {
		lex_filename = sf->Filename();
	}

	return ec;
}


void sswf_close_script(void)
{
	ScriptFile	*p;

	if(sf != 0) {
		p = sf->Parent();
		delete sf;
		sf = p;
		if(p != 0) {
			lex_filename = p->Filename();
		}
	}
}


int yylex()
{
	int	c;

	c = sf->GetToken();

#if 0
printf("%s: %d: Read token [%d] '%c'\n",
			sf->Filename(), sf->Line(),
			c, c >= ' ' && c <= 0x7E ? c : '?');
#endif

	return c;
}


}


#if 0

#define	RETURN_TOKEN(type, subtype)	ylval.node = node_alloc(NODE_TYPE_##type, NODE_SUBTYPE_##subtype, yylloc.first_line); return type;
#define	RETURN_UNIT(name, unit)		yylval.type = NODE_SUBTYPE_##unit; return UNIT_##name;

static	void			skip_comment(int close);
static	struct node_t *		read_identifier(void);
static	struct node_t *		read_string(void);
static	struct node_t *		read_value(void);

/*
some unused rules...
"SHOW"[ \t_]?"FRAME"			{ yylval.node = node_alloc(NODE_TYPE_OBJECT, NODE_SUBTYPE_SHOW_FRAME, yylloc.first_line); return DIRECT_REFERENCE; }
"REMOVE"[ \t_]?"ALL"			{ yylval.node = node_alloc(NODE_TYPE_OBJECT, NODE_SUBTYPE_REMOVE_ALL, yylloc.first_line); return DIRECT_REFERENCE; }
"END"					{ yylval.node = node_alloc(NODE_TYPE_OBJECT, NODE_SUBTYPE_END, yylloc.first_line); return DIRECT_REFERENCE; }
*/

%}

%option noyywrap

%%

"ACTION"				{ RETURN_TOKEN(OBJECT, ACTION); }
"ACTIONSCRIPT"				{ return ACTIONSCRIPT; }
"BC"					{ RETURN_UNIT(COLOR, BC); }
"BUTTON"				{ RETURN_TOKEN(OBJECT, BUTTON); }
"CATCH"					{ RETURN_TOKEN(OBJECT, CATCH); }
"CM"					{ RETURN_UNIT(SIZE, CM); }
"COLOR"[ \t_]?"TRANSFORM"		{ RETURN_TOKEN(OBJECT, COLOR_TRANSFORM); }
"COLOR"					{ RETURN_TOKEN(OBJECT, COLOR); }
"DEG"					{ RETURN_UNIT(ANGLE, DEG); }
"DO"[ \t_]?"ACTION"			{ RETURN_TOKEN(OBJECT, DO_ACTION); }
"EDGES"					{ RETURN_TOKEN(OBJECT, EDGES); }
"EDIT"[ \t_]?"TEXT"			{ RETURN_TOKEN(OBJECT, EDIT_TEXT); }
"ELSE"					{ return ELSE; }
"END"					{ RETURN_TOKEN(OBJECT, END); }
"ENVELOPE?"				{ RETURN_TOKEN(OBJECT, ENVELOPE); }
"EXPORT"				{ RETURN_TOKEN(OBJECT, EXPORT); }
"FALSE"					{ yylval.node = node_alloc(NODE_TYPE_INTEGER, NODE_SUBTYPE_UNKNOWN, yylloc.first_line); yylval.node->integer = 0; return VALUE; }
"FC"					{ RETURN_UNIT(COLOR, FC); }
"FILL"[ \t_]?"STYLE"			{ RETURN_TOKEN(OBJECT, FILL_STYLE); }
"FINALLY"				{ RETURN_TOKEN(OBJECT, FINALLY); }
"FONT"					{ RETURN_TOKEN(OBJECT, FONT); }
"FOR"					{ return FOR; }
"FPF"					{ RETURN_UNIT(SPEED, FPF); }
"FPS"					{ RETURN_UNIT(SPEED, FPS); }
"FRAME"[ \t_]?"LABEL"			{ RETURN_TOKEN(OBJECT, FRAME_LABEL); }
"FRM"					{ RETURN_UNIT(TIME, FRM); }
"FUNCTION"				{ RETURN_TOKEN(OBJECT, FUNCTION); }
"GLYPH"					{ RETURN_TOKEN(OBJECT, GLYPH); }
"GRAD"					{ RETURN_UNIT(ANGLE, GRAD); }
"GRADIENT"				{ RETURN_TOKEN(OBJECT, GRADIENT); }
"IF"					{ return IF; }
"IMAGE"					{ RETURN_TOKEN(OBJECT, IMAGE); }
"IMPORT"				{ RETURN_TOKEN(OBJECT, IMPORT); }
"IN"					{ RETURN_UNIT(SIZE, IN); }
"LABEL"					{ RETURN_TOKEN(OBJECT, LABEL); }
"LINE"[ \t_]?"STYLE"			{ RETURN_TOKEN(OBJECT, LINE_STYLE); }
"LIST"|"BLOCK"				{ RETURN_TOKEN(OBJECT, LIST); }
"MATRIX"				{ RETURN_TOKEN(OBJECT, MATRIX); }
"MIN"					{ RETURN_UNIT(TIME, MIN); }
"ON"[ \t_]?"EVENT"			{ RETURN_TOKEN(OBJECT, ON_EVENT); }
"PLACE"[ \t_]?"OBJECT"			{ RETURN_TOKEN(OBJECT, PLACE_OBJECT); }
"POINTS"				{ RETURN_TOKEN(OBJECT, POINTS); }
"PR"					{ RETURN_UNIT(RATIO, PR); }
"PX"					{ RETURN_UNIT(SIZE, PX); }
"RAD"					{ RETURN_UNIT(ANGLE, RAD); }
"RECT"("ANGLE")?			{ RETURN_TOKEN(OBJECT, RECT); }
"REMOVE"				{ RETURN_TOKEN(OBJECT, REMOVE); }
"REPLACE"[ \t_]?"OBJECT"		{ RETURN_TOKEN(OBJECT, REPLACE_OBJECT); }
"RT"					{ RETURN_UNIT(RATIO, RT); }
"SCRIPT"[ \t_]?"LIMITS"			{ RETURN_TOKEN(OBJECT, SCRIPT_LIMITS); }
"SEC"					{ RETURN_UNIT(TIME, SEC); }
"SEQUENCE"				{ RETURN_TOKEN(OBJECT, SEQUENCE); }
"SET"[ \t_]?"BACKGROUND"[ \t_]?"COLOR"	{ RETURN_TOKEN(OBJECT, SET_BACKGROUND_COLOR); }
"SET"[ \t_]?"TAB"[ \t_]?"INDEX"		{ RETURN_TOKEN(OBJECT, SET_TAB_INDEX); }
("DEFINE"[ \t_]?)?"SHAPE"		{ RETURN_TOKEN(OBJECT, SHAPE); }
"SHOW"[ \t_]?"FRAME"			{ RETURN_TOKEN(OBJECT, SHOW_FRAME); }
"SOUND"					{ RETURN_TOKEN(OBJECT, SOUND); }
"SOUND"[ \t_]?"INFO"			{ RETURN_TOKEN(OBJECT, SOUND_INFO); }
"SPRITE"				{ RETURN_TOKEN(OBJECT, SPRITE); }
"STATE"					{ RETURN_TOKEN(OBJECT, STATE); }
"TEXT"					{ RETURN_TOKEN(OBJECT, TEXT); }
"TEXT"[ \t_]?"SETUP"			{ RETURN_TOKEN(OBJECT, TEXT_SETUP); }
"TRUE"					{ yylval.node = node_alloc(NODE_TYPE_INTEGER, NODE_SUBTYPE_UNKNOWN, yylloc.first_line); yylval.node->integer = 1; return VALUE; }
"TRY"					{ RETURN_TOKEN(OBJECT, TRY); }
"TW"					{ RETURN_UNIT(SIZE, TW); }
"WITH"					{ RETURN_TOKEN(OBJECT, WITH); }


static struct node_t *read_identifier(void)
{
	struct node_t *n;

	n = node_alloc(NODE_TYPE_IDENTIFIER, NODE_SUBTYPE_UNKNOWN, yylloc.first_line);
	n->string = sswf_strdup(yytext);

	return n;
}


static struct node_t *read_value(void)
{
	register int	c;
	long		r, p, sign;
	double		fr, div, exp;
	struct node_t	*n;

	r = 0;

	c = *yytext;
	if(c == '.') {
		/* we can have a decimal point only if a second digit was found
		 * make sure we restore it first!
		 */
		unput(yytext[1]);
	}

	if(c == '0') {		/* check for hexa, otherwise it's probably octal or floating point? */
		c = input();
		if(c == 'x' || c == 'X') {
			/* hexadecimal */
			p = c;
			c = input();
			if((c < '0' || c > '9') && (c < 'a' || c > 'f') && (c < 'A' || c > 'F')) {
				/* this is NOT a valid hex. value */
				unput(c);
				unput(p);
				c = '\0';
			}
			else {
				p = 0;
				for(;;) {
					if(c >= '0' && c <= '9') {
						p = r;
						r = r * 16 + c - '0';
					}
					else if(c >= 'a' && c <= 'f') {
						p = r;
						r = r * 16 + c - 'a' + 10;
					}
					else if(c >= 'A' && c <= 'F') {
						p = r;
						r = r * 16 + c - 'A' + 10;
					}
					else {
						if((c == 'm' || c == 'M') && (r & 15) == 0xC) {
							/* this is an hex. followed by 'cm'! */
							unput(c);
							c = 'c';
							r = p;		/* restore saved value (because of overflow!) */
fprintf(stderr, "WARNING: hexadecimal followed by the CM unit.\n");
						}
						unput(c);
						c = '\0';
						break;
					}
					c = input();
				}
			}
		}
		else if(c != '.') {
			/* octal */
			while(c >= '0' && c <= '8') {
				r = r * 8 + c - '0';
				c = input();
			}
			if(c == '8' || c == '9') {
				fprintf(stderr, "ERROR: invalid octal number.\n");
				/* skip the rest of the number */
				do {
					c = input();
				} while(c >= '0' && c <= '9');
			}
			unput(c);
			c = '\0';
		}
	}
	if(c != '.') {
		while(c >= '0' && c <= '9') {
			r = r * 10 + c - '0';
			c = input();
		}
	}
	if(c == '.') {
		/* we found a floating point value */
		/* TODO: the following is wrong because the '...e+/-<value>' will change the outcome very much */
		fr = (double) r;
		div = 0.1;
		c = input();
		while(c >= '0' && c <= '9') {
			fr += (double) (c - '0') * (double) div;
			div /= 10.0;
			c = input();
		}
		if(c == 'e' || c == 'E') {
			r = c;
			c = input();
			exp = 0;
			sign = 1;
			if(c == '+') {
				c = input();
				if(c < '0' || c > '9') {
					unput(c);
					unput('+');
					c = r;
				}
			}
			else if(c == '-') {
				c = input();
				if(c < '0' || c > '9') {
					unput(c);
					unput('-');
					c = r;
				}
				else {
					sign = -1;
				}
			}
			else if(c < '0' || c > '9') {
				unput(c);
				c = r;
			}
			while(c >= '0' && c <= '9') {
				exp = exp * 10 + c - '0';
				c = input();
			}
			if(exp != 0) {
				fr *= pow(10, exp * (double) sign);
			}
		}
		n = node_alloc(NODE_TYPE_FLOAT, NODE_SUBTYPE_UNKNOWN, yylloc.first_line);
		n->floating_point = fr;
	}
	else {
		/* we have a integer number */
		n = node_alloc(NODE_TYPE_INTEGER, NODE_SUBTYPE_UNKNOWN, yylloc.first_line);
		n->integer = r;
	}

	if(c != '\0') {
		unput(c);
	}

	return n;
}

#endif