1 /*
2 Open Asset Import Library (assimp)
3 ----------------------------------------------------------------------
4 
5 Copyright (c) 2006-2015, assimp team
6 All rights reserved.
7 
8 Redistribution and use of this software in source and binary forms,
9 with or without modification, are permitted provided that the
10 following conditions are met:
11 
12 * Redistributions of source code must retain the above
13   copyright notice, this list of conditions and the
14   following disclaimer.
15 
16 * Redistributions in binary form must reproduce the above
17   copyright notice, this list of conditions and the
18   following disclaimer in the documentation and/or other
19   materials provided with the distribution.
20 
21 * Neither the name of the assimp team, nor the names of its
22   contributors may be used to endorse or promote products
23   derived from this software without specific prior
24   written permission of the assimp team.
25 
26 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
27 "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
28 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
29 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
30 OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
31 SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
32 LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
33 DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
34 THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
35 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
36 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
37 
38 ----------------------------------------------------------------------
39 */
40 
41 /** @file  FBXTokenizer.cpp
42  *  @brief Implementation of the FBX broadphase lexer
43  */
44 
45 #ifndef ASSIMP_BUILD_NO_FBX_IMPORTER
46 
47 // tab width for logging columns
48 #define ASSIMP_FBX_TAB_WIDTH 4
49 
50 #include "ParsingUtils.h"
51 
52 #include "FBXTokenizer.h"
53 #include "FBXUtil.h"
54 #include "Exceptional.h"
55 
56 namespace Assimp {
57 namespace FBX {
58 
59 // ------------------------------------------------------------------------------------------------
Token(const char * sbegin,const char * send,TokenType type,unsigned int line,unsigned int column)60 Token::Token(const char* sbegin, const char* send, TokenType type, unsigned int line, unsigned int column)
61     :
62 #ifdef DEBUG
63     contents(sbegin, static_cast<size_t>(send-sbegin)),
64 #endif
65     sbegin(sbegin)
66     , send(send)
67     , type(type)
68     , line(line)
69     , column(column)
70 {
71     ai_assert(sbegin);
72     ai_assert(send);
73 
74     // tokens must be of non-zero length
75     ai_assert(static_cast<size_t>(send-sbegin) > 0);
76 }
77 
78 
79 // ------------------------------------------------------------------------------------------------
~Token()80 Token::~Token()
81 {
82 }
83 
84 
85 namespace {
86 
87 // ------------------------------------------------------------------------------------------------
88 // signal tokenization error, this is always unrecoverable. Throws DeadlyImportError.
89 AI_WONT_RETURN void TokenizeError(const std::string& message, unsigned int line, unsigned int column) AI_WONT_RETURN_SUFFIX;
TokenizeError(const std::string & message,unsigned int line,unsigned int column)90 AI_WONT_RETURN void TokenizeError(const std::string& message, unsigned int line, unsigned int column)
91 {
92     throw DeadlyImportError(Util::AddLineAndColumn("FBX-Tokenize",message,line,column));
93 }
94 
95 
96 // process a potential data token up to 'cur', adding it to 'output_tokens'.
97 // ------------------------------------------------------------------------------------------------
ProcessDataToken(TokenList & output_tokens,const char * & start,const char * & end,unsigned int line,unsigned int column,TokenType type=TokenType_DATA,bool must_have_token=false)98 void ProcessDataToken( TokenList& output_tokens, const char*& start, const char*& end,
99                       unsigned int line,
100                       unsigned int column,
101                       TokenType type = TokenType_DATA,
102                       bool must_have_token = false)
103 {
104     if (start && end) {
105         // sanity check:
106         // tokens should have no whitespace outside quoted text and [start,end] should
107         // properly delimit the valid range.
108         bool in_double_quotes = false;
109         for (const char* c = start; c != end + 1; ++c) {
110             if (*c == '\"') {
111                 in_double_quotes = !in_double_quotes;
112             }
113 
114             if (!in_double_quotes && IsSpaceOrNewLine(*c)) {
115                 TokenizeError("unexpected whitespace in token", line, column);
116             }
117         }
118 
119         if (in_double_quotes) {
120             TokenizeError("non-terminated double quotes", line, column);
121         }
122 
123         output_tokens.push_back(new_Token(start,end + 1,type,line,column));
124     }
125     else if (must_have_token) {
126         TokenizeError("unexpected character, expected data token", line, column);
127     }
128 
129     start = end = NULL;
130 }
131 
132 }
133 
134 // ------------------------------------------------------------------------------------------------
Tokenize(TokenList & output_tokens,const char * input)135 void Tokenize(TokenList& output_tokens, const char* input)
136 {
137     ai_assert(input);
138 
139     // line and column numbers numbers are one-based
140     unsigned int line = 1;
141     unsigned int column = 1;
142 
143     bool comment = false;
144     bool in_double_quotes = false;
145     bool pending_data_token = false;
146 
147     const char* token_begin = NULL, *token_end = NULL;
148     for (const char* cur = input;*cur;column += (*cur == '\t' ? ASSIMP_FBX_TAB_WIDTH : 1), ++cur) {
149         const char c = *cur;
150 
151         if (IsLineEnd(c)) {
152             comment = false;
153 
154             column = 0;
155             ++line;
156         }
157 
158         if(comment) {
159             continue;
160         }
161 
162         if(in_double_quotes) {
163             if (c == '\"') {
164                 in_double_quotes = false;
165                 token_end = cur;
166 
167                 ProcessDataToken(output_tokens,token_begin,token_end,line,column);
168                 pending_data_token = false;
169             }
170             continue;
171         }
172 
173         switch(c)
174         {
175         case '\"':
176             if (token_begin) {
177                 TokenizeError("unexpected double-quote", line, column);
178             }
179             token_begin = cur;
180             in_double_quotes = true;
181             continue;
182 
183         case ';':
184             ProcessDataToken(output_tokens,token_begin,token_end,line,column);
185             comment = true;
186             continue;
187 
188         case '{':
189             ProcessDataToken(output_tokens,token_begin,token_end, line, column);
190             output_tokens.push_back(new_Token(cur,cur+1,TokenType_OPEN_BRACKET,line,column));
191             continue;
192 
193         case '}':
194             ProcessDataToken(output_tokens,token_begin,token_end,line,column);
195             output_tokens.push_back(new_Token(cur,cur+1,TokenType_CLOSE_BRACKET,line,column));
196             continue;
197 
198         case ',':
199             if (pending_data_token) {
200                 ProcessDataToken(output_tokens,token_begin,token_end,line,column,TokenType_DATA,true);
201             }
202             output_tokens.push_back(new_Token(cur,cur+1,TokenType_COMMA,line,column));
203             continue;
204 
205         case ':':
206             if (pending_data_token) {
207                 ProcessDataToken(output_tokens,token_begin,token_end,line,column,TokenType_KEY,true);
208             }
209             else {
210                 TokenizeError("unexpected colon", line, column);
211             }
212             continue;
213         }
214 
215         if (IsSpaceOrNewLine(c)) {
216 
217             if (token_begin) {
218                 // peek ahead and check if the next token is a colon in which
219                 // case this counts as KEY token.
220                 TokenType type = TokenType_DATA;
221                 for (const char* peek = cur;  *peek && IsSpaceOrNewLine(*peek); ++peek) {
222                     if (*peek == ':') {
223                         type = TokenType_KEY;
224                         cur = peek;
225                         break;
226                     }
227                 }
228 
229                 ProcessDataToken(output_tokens,token_begin,token_end,line,column,type);
230             }
231 
232             pending_data_token = false;
233         }
234         else {
235             token_end = cur;
236             if (!token_begin) {
237                 token_begin = cur;
238             }
239 
240             pending_data_token = true;
241         }
242     }
243 }
244 
245 } // !FBX
246 } // !Assimp
247 
248 #endif
249