1 /*
2 Open Asset Import Library (assimp)
3 ----------------------------------------------------------------------
4
5 Copyright (c) 2006-2015, assimp team
6 All rights reserved.
7
8 Redistribution and use of this software in source and binary forms,
9 with or without modification, are permitted provided that the
10 following conditions are met:
11
12 * Redistributions of source code must retain the above
13 copyright notice, this list of conditions and the
14 following disclaimer.
15
16 * Redistributions in binary form must reproduce the above
17 copyright notice, this list of conditions and the
18 following disclaimer in the documentation and/or other
19 materials provided with the distribution.
20
21 * Neither the name of the assimp team, nor the names of its
22 contributors may be used to endorse or promote products
23 derived from this software without specific prior
24 written permission of the assimp team.
25
26 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
27 "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
28 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
29 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
30 OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
31 SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
32 LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
33 DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
34 THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
35 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
36 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
37
38 ----------------------------------------------------------------------
39 */
40
41 /** @file FBXTokenizer.cpp
42 * @brief Implementation of the FBX broadphase lexer
43 */
44
45 #ifndef ASSIMP_BUILD_NO_FBX_IMPORTER
46
47 // tab width for logging columns
48 #define ASSIMP_FBX_TAB_WIDTH 4
49
50 #include "ParsingUtils.h"
51
52 #include "FBXTokenizer.h"
53 #include "FBXUtil.h"
54 #include "Exceptional.h"
55
56 namespace Assimp {
57 namespace FBX {
58
59 // ------------------------------------------------------------------------------------------------
Token(const char * sbegin,const char * send,TokenType type,unsigned int line,unsigned int column)60 Token::Token(const char* sbegin, const char* send, TokenType type, unsigned int line, unsigned int column)
61 :
62 #ifdef DEBUG
63 contents(sbegin, static_cast<size_t>(send-sbegin)),
64 #endif
65 sbegin(sbegin)
66 , send(send)
67 , type(type)
68 , line(line)
69 , column(column)
70 {
71 ai_assert(sbegin);
72 ai_assert(send);
73
74 // tokens must be of non-zero length
75 ai_assert(static_cast<size_t>(send-sbegin) > 0);
76 }
77
78
79 // ------------------------------------------------------------------------------------------------
~Token()80 Token::~Token()
81 {
82 }
83
84
85 namespace {
86
87 // ------------------------------------------------------------------------------------------------
88 // signal tokenization error, this is always unrecoverable. Throws DeadlyImportError.
89 AI_WONT_RETURN void TokenizeError(const std::string& message, unsigned int line, unsigned int column) AI_WONT_RETURN_SUFFIX;
TokenizeError(const std::string & message,unsigned int line,unsigned int column)90 AI_WONT_RETURN void TokenizeError(const std::string& message, unsigned int line, unsigned int column)
91 {
92 throw DeadlyImportError(Util::AddLineAndColumn("FBX-Tokenize",message,line,column));
93 }
94
95
96 // process a potential data token up to 'cur', adding it to 'output_tokens'.
97 // ------------------------------------------------------------------------------------------------
ProcessDataToken(TokenList & output_tokens,const char * & start,const char * & end,unsigned int line,unsigned int column,TokenType type=TokenType_DATA,bool must_have_token=false)98 void ProcessDataToken( TokenList& output_tokens, const char*& start, const char*& end,
99 unsigned int line,
100 unsigned int column,
101 TokenType type = TokenType_DATA,
102 bool must_have_token = false)
103 {
104 if (start && end) {
105 // sanity check:
106 // tokens should have no whitespace outside quoted text and [start,end] should
107 // properly delimit the valid range.
108 bool in_double_quotes = false;
109 for (const char* c = start; c != end + 1; ++c) {
110 if (*c == '\"') {
111 in_double_quotes = !in_double_quotes;
112 }
113
114 if (!in_double_quotes && IsSpaceOrNewLine(*c)) {
115 TokenizeError("unexpected whitespace in token", line, column);
116 }
117 }
118
119 if (in_double_quotes) {
120 TokenizeError("non-terminated double quotes", line, column);
121 }
122
123 output_tokens.push_back(new_Token(start,end + 1,type,line,column));
124 }
125 else if (must_have_token) {
126 TokenizeError("unexpected character, expected data token", line, column);
127 }
128
129 start = end = NULL;
130 }
131
132 }
133
134 // ------------------------------------------------------------------------------------------------
Tokenize(TokenList & output_tokens,const char * input)135 void Tokenize(TokenList& output_tokens, const char* input)
136 {
137 ai_assert(input);
138
139 // line and column numbers numbers are one-based
140 unsigned int line = 1;
141 unsigned int column = 1;
142
143 bool comment = false;
144 bool in_double_quotes = false;
145 bool pending_data_token = false;
146
147 const char* token_begin = NULL, *token_end = NULL;
148 for (const char* cur = input;*cur;column += (*cur == '\t' ? ASSIMP_FBX_TAB_WIDTH : 1), ++cur) {
149 const char c = *cur;
150
151 if (IsLineEnd(c)) {
152 comment = false;
153
154 column = 0;
155 ++line;
156 }
157
158 if(comment) {
159 continue;
160 }
161
162 if(in_double_quotes) {
163 if (c == '\"') {
164 in_double_quotes = false;
165 token_end = cur;
166
167 ProcessDataToken(output_tokens,token_begin,token_end,line,column);
168 pending_data_token = false;
169 }
170 continue;
171 }
172
173 switch(c)
174 {
175 case '\"':
176 if (token_begin) {
177 TokenizeError("unexpected double-quote", line, column);
178 }
179 token_begin = cur;
180 in_double_quotes = true;
181 continue;
182
183 case ';':
184 ProcessDataToken(output_tokens,token_begin,token_end,line,column);
185 comment = true;
186 continue;
187
188 case '{':
189 ProcessDataToken(output_tokens,token_begin,token_end, line, column);
190 output_tokens.push_back(new_Token(cur,cur+1,TokenType_OPEN_BRACKET,line,column));
191 continue;
192
193 case '}':
194 ProcessDataToken(output_tokens,token_begin,token_end,line,column);
195 output_tokens.push_back(new_Token(cur,cur+1,TokenType_CLOSE_BRACKET,line,column));
196 continue;
197
198 case ',':
199 if (pending_data_token) {
200 ProcessDataToken(output_tokens,token_begin,token_end,line,column,TokenType_DATA,true);
201 }
202 output_tokens.push_back(new_Token(cur,cur+1,TokenType_COMMA,line,column));
203 continue;
204
205 case ':':
206 if (pending_data_token) {
207 ProcessDataToken(output_tokens,token_begin,token_end,line,column,TokenType_KEY,true);
208 }
209 else {
210 TokenizeError("unexpected colon", line, column);
211 }
212 continue;
213 }
214
215 if (IsSpaceOrNewLine(c)) {
216
217 if (token_begin) {
218 // peek ahead and check if the next token is a colon in which
219 // case this counts as KEY token.
220 TokenType type = TokenType_DATA;
221 for (const char* peek = cur; *peek && IsSpaceOrNewLine(*peek); ++peek) {
222 if (*peek == ':') {
223 type = TokenType_KEY;
224 cur = peek;
225 break;
226 }
227 }
228
229 ProcessDataToken(output_tokens,token_begin,token_end,line,column,type);
230 }
231
232 pending_data_token = false;
233 }
234 else {
235 token_end = cur;
236 if (!token_begin) {
237 token_begin = cur;
238 }
239
240 pending_data_token = true;
241 }
242 }
243 }
244
245 } // !FBX
246 } // !Assimp
247
248 #endif
249