1 /*
2 Open Asset Import Library (assimp)
3 ----------------------------------------------------------------------
4
5 Copyright (c) 2006-2015, assimp team
6 All rights reserved.
7
8 Redistribution and use of this software in source and binary forms,
9 with or without modification, are permitted provided that the
10 following conditions are met:
11
12 * Redistributions of source code must retain the above
13 copyright notice, this list of conditions and the
14 following disclaimer.
15
16 * Redistributions in binary form must reproduce the above
17 copyright notice, this list of conditions and the
18 following disclaimer in the documentation and/or other
19 materials provided with the distribution.
20
21 * Neither the name of the assimp team, nor the names of its
22 contributors may be used to endorse or promote products
23 derived from this software without specific prior
24 written permission of the assimp team.
25
26 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
27 "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
28 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
29 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
30 OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
31 SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
32 LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
33 DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
34 THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
35 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
36 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
37
38 ----------------------------------------------------------------------
39 */
40
41 // Modified by Lasse Oorni for Urho3D
42
43 /** @file FBXBinaryTokenizer.cpp
44 * @brief Implementation of a fake lexer for binary fbx files -
45 * we emit tokens so the parser needs almost no special handling
46 * for binary files.
47 */
48
49 #ifndef ASSIMP_BUILD_NO_FBX_IMPORTER
50
51 #include "FBXTokenizer.h"
52 #include "FBXUtil.h"
53 #include "../include/assimp/defs.h"
54 // Urho3D: VS2008 compatibility
55 #if !defined(_MSC_VER) || (_MSC_VER >= 1600)
56 #include <stdint.h>
57 #else
58 #include "../include/assimp/Compiler/pstdint.h"
59 #endif
60 #include "Exceptional.h"
61 #include "ByteSwapper.h"
62
63 namespace Assimp {
64 namespace FBX {
65
66
67 // ------------------------------------------------------------------------------------------------
Token(const char * sbegin,const char * send,TokenType type,unsigned int offset)68 Token::Token(const char* sbegin, const char* send, TokenType type, unsigned int offset)
69 :
70 #ifdef DEBUG
71 contents(sbegin, static_cast<size_t>(send-sbegin)),
72 #endif
73 sbegin(sbegin)
74 , send(send)
75 , type(type)
76 , line(offset)
77 , column(BINARY_MARKER)
78 {
79 ai_assert(sbegin);
80 ai_assert(send);
81
82 // binary tokens may have zero length because they are sometimes dummies
83 // inserted by TokenizeBinary()
84 ai_assert(send >= sbegin);
85 }
86
87
88 namespace {
89
90 // ------------------------------------------------------------------------------------------------
91 // signal tokenization error, this is always unrecoverable. Throws DeadlyImportError.
92 AI_WONT_RETURN void TokenizeError(const std::string& message, unsigned int offset) AI_WONT_RETURN_SUFFIX;
TokenizeError(const std::string & message,unsigned int offset)93 AI_WONT_RETURN void TokenizeError(const std::string& message, unsigned int offset)
94 {
95 throw DeadlyImportError(Util::AddOffset("FBX-Tokenize",message,offset));
96 }
97
98
99 // ------------------------------------------------------------------------------------------------
Offset(const char * begin,const char * cursor)100 uint32_t Offset(const char* begin, const char* cursor)
101 {
102 ai_assert(begin <= cursor);
103 return static_cast<unsigned int>(cursor - begin);
104 }
105
106
107 // ------------------------------------------------------------------------------------------------
TokenizeError(const std::string & message,const char * begin,const char * cursor)108 void TokenizeError(const std::string& message, const char* begin, const char* cursor)
109 {
110 TokenizeError(message, Offset(begin, cursor));
111 }
112
113
114 // ------------------------------------------------------------------------------------------------
ReadWord(const char * input,const char * & cursor,const char * end)115 uint32_t ReadWord(const char* input, const char*& cursor, const char* end)
116 {
117 if(Offset(cursor, end) < 4) {
118 TokenizeError("cannot ReadWord, out of bounds",input, cursor);
119 }
120
121 uint32_t word = *reinterpret_cast<const uint32_t*>(cursor);
122 AI_SWAP4(word);
123
124 cursor += 4;
125
126 return word;
127 }
128
129
130 // ------------------------------------------------------------------------------------------------
ReadByte(const char * input,const char * & cursor,const char * end)131 uint8_t ReadByte(const char* input, const char*& cursor, const char* end)
132 {
133 if(Offset(cursor, end) < 1) {
134 TokenizeError("cannot ReadByte, out of bounds",input, cursor);
135 }
136
137 uint8_t word = *reinterpret_cast<const uint8_t*>(cursor);
138 ++cursor;
139
140 return word;
141 }
142
143
144 // ------------------------------------------------------------------------------------------------
ReadString(const char * & sbegin_out,const char * & send_out,const char * input,const char * & cursor,const char * end,bool long_length=false,bool allow_null=false)145 unsigned int ReadString(const char*& sbegin_out, const char*& send_out, const char* input, const char*& cursor, const char* end,
146 bool long_length = false,
147 bool allow_null = false)
148 {
149 const uint32_t len_len = long_length ? 4 : 1;
150 if(Offset(cursor, end) < len_len) {
151 TokenizeError("cannot ReadString, out of bounds reading length",input, cursor);
152 }
153
154 const uint32_t length = long_length ? ReadWord(input, cursor, end) : ReadByte(input, cursor, end);
155
156 if (Offset(cursor, end) < length) {
157 TokenizeError("cannot ReadString, length is out of bounds",input, cursor);
158 }
159
160 sbegin_out = cursor;
161 cursor += length;
162
163 send_out = cursor;
164
165 if(!allow_null) {
166 for (unsigned int i = 0; i < length; ++i) {
167 if(sbegin_out[i] == '\0') {
168 TokenizeError("failed ReadString, unexpected NUL character in string",input, cursor);
169 }
170 }
171 }
172
173 return length;
174 }
175
176
177
178 // ------------------------------------------------------------------------------------------------
ReadData(const char * & sbegin_out,const char * & send_out,const char * input,const char * & cursor,const char * end)179 void ReadData(const char*& sbegin_out, const char*& send_out, const char* input, const char*& cursor, const char* end)
180 {
181 if(Offset(cursor, end) < 1) {
182 TokenizeError("cannot ReadData, out of bounds reading length",input, cursor);
183 }
184
185 const char type = *cursor;
186 sbegin_out = cursor++;
187
188 switch(type)
189 {
190 // 16 bit int
191 case 'Y':
192 cursor += 2;
193 break;
194
195 // 1 bit bool flag (yes/no)
196 case 'C':
197 cursor += 1;
198 break;
199
200 // 32 bit int
201 case 'I':
202 // <- fall thru
203
204 // float
205 case 'F':
206 cursor += 4;
207 break;
208
209 // double
210 case 'D':
211 cursor += 8;
212 break;
213
214 // 64 bit int
215 case 'L':
216 cursor += 8;
217 break;
218
219 // note: do not write cursor += ReadWord(...cursor) as this would be UB
220
221 // raw binary data
222 case 'R':
223 {
224 const uint32_t length = ReadWord(input, cursor, end);
225 cursor += length;
226 break;
227 }
228
229 case 'b':
230 // TODO: what is the 'b' type code? Right now we just skip over it /
231 // take the full range we could get
232 cursor = end;
233 break;
234
235 // array of *
236 case 'f':
237 case 'd':
238 case 'l':
239 case 'i': {
240
241 const uint32_t length = ReadWord(input, cursor, end);
242 const uint32_t encoding = ReadWord(input, cursor, end);
243
244 const uint32_t comp_len = ReadWord(input, cursor, end);
245
246 // compute length based on type and check against the stored value
247 if(encoding == 0) {
248 uint32_t stride = 0;
249 switch(type)
250 {
251 case 'f':
252 case 'i':
253 stride = 4;
254 break;
255
256 case 'd':
257 case 'l':
258 stride = 8;
259 break;
260
261 default:
262 ai_assert(false);
263 };
264 ai_assert(stride > 0);
265 if(length * stride != comp_len) {
266 TokenizeError("cannot ReadData, calculated data stride differs from what the file claims",input, cursor);
267 }
268 }
269 // zip/deflate algorithm (encoding==1)? take given length. anything else? die
270 else if (encoding != 1) {
271 TokenizeError("cannot ReadData, unknown encoding",input, cursor);
272 }
273 cursor += comp_len;
274 break;
275 }
276
277 // string
278 case 'S': {
279 const char* sb, *se;
280 // 0 characters can legally happen in such strings
281 ReadString(sb, se, input, cursor, end, true, true);
282 break;
283 }
284 default:
285 TokenizeError("cannot ReadData, unexpected type code: " + std::string(&type, 1),input, cursor);
286 }
287
288 if(cursor > end) {
289 TokenizeError("cannot ReadData, the remaining size is too small for the data type: " + std::string(&type, 1),input, cursor);
290 }
291
292 // the type code is contained in the returned range
293 send_out = cursor;
294 }
295
296
297 // ------------------------------------------------------------------------------------------------
ReadScope(TokenList & output_tokens,const char * input,const char * & cursor,const char * end)298 bool ReadScope(TokenList& output_tokens, const char* input, const char*& cursor, const char* end)
299 {
300 // the first word contains the offset at which this block ends
301 const uint32_t end_offset = ReadWord(input, cursor, end);
302
303 // we may get 0 if reading reached the end of the file -
304 // fbx files have a mysterious extra footer which I don't know
305 // how to extract any information from, but at least it always
306 // starts with a 0.
307 if(!end_offset) {
308 return false;
309 }
310
311 if(end_offset > Offset(input, end)) {
312 TokenizeError("block offset is out of range",input, cursor);
313 }
314 else if(end_offset < Offset(input, cursor)) {
315 TokenizeError("block offset is negative out of range",input, cursor);
316 }
317
318 // the second data word contains the number of properties in the scope
319 const uint32_t prop_count = ReadWord(input, cursor, end);
320
321 // the third data word contains the length of the property list
322 const uint32_t prop_length = ReadWord(input, cursor, end);
323
324 // now comes the name of the scope/key
325 const char* sbeg, *send;
326 ReadString(sbeg, send, input, cursor, end);
327
328 output_tokens.push_back(new_Token(sbeg, send, TokenType_KEY, Offset(input, cursor) ));
329
330 // now come the individual properties
331 const char* begin_cursor = cursor;
332 for (unsigned int i = 0; i < prop_count; ++i) {
333 ReadData(sbeg, send, input, cursor, begin_cursor + prop_length);
334
335 output_tokens.push_back(new_Token(sbeg, send, TokenType_DATA, Offset(input, cursor) ));
336
337 if(i != prop_count-1) {
338 output_tokens.push_back(new_Token(cursor, cursor + 1, TokenType_COMMA, Offset(input, cursor) ));
339 }
340 }
341
342 if (Offset(begin_cursor, cursor) != prop_length) {
343 TokenizeError("property length not reached, something is wrong",input, cursor);
344 }
345
346 // at the end of each nested block, there is a NUL record to indicate
347 // that the sub-scope exists (i.e. to distinguish between P: and P : {})
348 // this NUL record is 13 bytes long.
349 #define BLOCK_SENTINEL_LENGTH 13
350
351 if (Offset(input, cursor) < end_offset) {
352
353 if (end_offset - Offset(input, cursor) < BLOCK_SENTINEL_LENGTH) {
354 TokenizeError("insufficient padding bytes at block end",input, cursor);
355 }
356
357 output_tokens.push_back(new_Token(cursor, cursor + 1, TokenType_OPEN_BRACKET, Offset(input, cursor) ));
358
359 // XXX this is vulnerable to stack overflowing ..
360 while(Offset(input, cursor) < end_offset - BLOCK_SENTINEL_LENGTH) {
361 ReadScope(output_tokens, input, cursor, input + end_offset - BLOCK_SENTINEL_LENGTH);
362 }
363 output_tokens.push_back(new_Token(cursor, cursor + 1, TokenType_CLOSE_BRACKET, Offset(input, cursor) ));
364
365 for (unsigned int i = 0; i < BLOCK_SENTINEL_LENGTH; ++i) {
366 if(cursor[i] != '\0') {
367 TokenizeError("failed to read nested block sentinel, expected all bytes to be 0",input, cursor);
368 }
369 }
370 cursor += BLOCK_SENTINEL_LENGTH;
371 }
372
373 if (Offset(input, cursor) != end_offset) {
374 TokenizeError("scope length not reached, something is wrong",input, cursor);
375 }
376
377 return true;
378 }
379
380
381 }
382
383 // ------------------------------------------------------------------------------------------------
TokenizeBinary(TokenList & output_tokens,const char * input,unsigned int length)384 void TokenizeBinary(TokenList& output_tokens, const char* input, unsigned int length)
385 {
386 ai_assert(input);
387
388 if(length < 0x1b) {
389 TokenizeError("file is too short",0);
390 }
391
392 if (strncmp(input,"Kaydara FBX Binary",18)) {
393 TokenizeError("magic bytes not found",0);
394 }
395
396
397 //uint32_t offset = 0x1b;
398
399 const char* cursor = input + 0x1b;
400
401 while (cursor < input + length) {
402 if(!ReadScope(output_tokens, input, cursor, input + length)) {
403 break;
404 }
405 }
406 }
407
408 } // !FBX
409 } // !Assimp
410
411 #endif
412