/* sswf_lexical.c++ -- written by Alexis WILKE for Made to Order Software Corp. (c) 2002-2009 */ /* Copyright (c) 2002-2009 Made to Order Software Corp. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ #define SSWF_NEED_ASSERT #include "sswf.h" extern "C" { #include "sswf_grammar.h" }; #include "sswf/libsswf.h" extern YYLTYPE yylloc; #define UNREAD_COUNT_MAX 4 #define UNGET_COUNT_MAX 16 #define MULTIBYTE_MAX 16 class ScriptFile { public: struct string_t : public sswf::ItemBase { char * f_string; }; typedef sswf::sswf_ucs4_t c_t; // UCS chars are 31 bits, negative values are used for errors // NOTES: // // Glossary // ASCII American Standard Code for Information Interchange // BE Big-endian (most significant byte first) // LE Little-endian (least significant byte first) // UCS Universal Character Set // UTF Universal Transformation Format // // Note that UCS2, UTF16, UCS4 and UTF32 don't have endian specified. // This is correct since we can infer the endian by checking the // few first bytes of input (which MUST represent a comment) // enum scriptfile_type_t { // totally unknown SCRIPTFILE_TYPE_UNKNOWN = 0, // still unknown // unknown but valid for iconv() SCRIPTFILE_TYPE_MULTIBYTES, // a specified encoding (using iconv() to convert the characters) // 8 bits SCRIPTFILE_TYPE_ASCII, // accept characters upto 127 as is; others are viewed as erroneous SCRIPTFILE_TYPE_ISO88591, // use input as is (like Unicode page 0) SCRIPTFILE_TYPE_UTF8, // 1 to 6 bytes to encode any character SCRIPTFILE_TYPE_CESU8, // 1 to 4 bytes to encode 0x110000 characters, 0xD800 to 0xDFFF is interpreted // 16 bits SCRIPTFILE_TYPE_UCS2, // UCS-2 chars (limited to 0x10000 characters) SCRIPTFILE_TYPE_UCS2BE, // same as UCS2 in big endian SCRIPTFILE_TYPE_UCS2LE, // same as UCS2 in little endian SCRIPTFILE_TYPE_UCS2SAME, // UCS-2 in processor endian SCRIPTFILE_TYPE_UCS2SWAP, // UCS-2 in opposite process endian SCRIPTFILE_TYPE_UTF16, // UTF-16 (0xD800 to 0xDFFF are escapes to represent 20 bits) SCRIPTFILE_TYPE_UTF16BE, // same as UTF-16 in big endian SCRIPTFILE_TYPE_UTF16LE, // same as UTF-16 in little endian // 32 bits SCRIPTFILE_TYPE_UCS4, // UCS-4 (unlimited character set, except negative values) SCRIPTFILE_TYPE_UCS4BE, // same as UCS-4 big endian SCRIPTFILE_TYPE_UCS4LE, // same as UCS-4 little endian SCRIPTFILE_TYPE_UCS4SAME, // UCS-4 in processor endian SCRIPTFILE_TYPE_UCS4SWAP, // UCS-4 in opposite process endian SCRIPTFILE_TYPE_UTF32, // UTF-32 (limited to 0x110000 chars) SCRIPTFILE_TYPE_UTF32BE, // UTF-32 big endian SCRIPTFILE_TYPE_UTF32LE, // UTF-32 little endian SCRIPTFILE_TYPE_SAME, // keep input type SCRIPTFILE_TYPE_max }; struct sf_type_t { scriptfile_type_t f_type; // corresponding type (internally supported) const char * f_name; // official encoding name (as in iconv) unsigned long f_input; // accepted input encoding (the one we determine we our internal algorithm) }; #define SCRIPTFILE_EOF ((c_t) -1) // UCS chars are 31 bits max. #define SCRIPTFILE_BAD ((c_t) -2) // UCS chars are 31 bits max. ScriptFile(ScriptFile *parent); ~ScriptFile(); int OpenFile(const char *filename, sswf::Vectors& user_include_paths, bool use_internal_paths); void CloseFile(void); int GetToken(void); unsigned int Line(void) const; ScriptFile * Parent(void); const char * Filename(void); int ReadActionscript(void); void SetReadActionscript(bool yes); private: void Reset(void); int FindFile(const char *filename, sswf::Vectors& user_include_paths, bool use_internal_paths); c_t GetChar(void); void UngetChar(c_t c); c_t ReadChar(void); int ReadByte(void); void UnreadByte(unsigned char c); void SkipComment(int close); int ReadIdentifier(c_t c); int ReadString(c_t c); int ReadValue(c_t c); ScriptFile * f_parent; const char * f_filename; unsigned int f_line; unsigned int f_first_line; scriptfile_type_t f_type; FILE * f_file; int f_last_errno; c_t f_last_char; unsigned int f_unread_count; unsigned char f_unread[UNREAD_COUNT_MAX]; unsigned int f_unget_count; c_t f_unget[UNGET_COUNT_MAX]; bool f_iconvertor_open; iconv_t f_iconvertor; size_t f_mb_count; char f_multibytes[MULTIBYTE_MAX]; bool f_read_actionscript; }; ScriptFile *sf; // the current script file sswf::Vectors include_paths; // an array of strings where files are being searched int no_default_include; #define SF_TYPE_TO_FLAG1(a) (1<