1 /*
2 Open Asset Import Library (assimp)
3 ----------------------------------------------------------------------
4 
5 Copyright (c) 2006-2015, assimp team
6 All rights reserved.
7 
8 Redistribution and use of this software in source and binary forms,
9 with or without modification, are permitted provided that the
10 following conditions are met:
11 
12 * Redistributions of source code must retain the above
13   copyright notice, this list of conditions and the
14   following disclaimer.
15 
16 * Redistributions in binary form must reproduce the above
17   copyright notice, this list of conditions and the
18   following disclaimer in the documentation and/or other
19   materials provided with the distribution.
20 
21 * Neither the name of the assimp team, nor the names of its
22   contributors may be used to endorse or promote products
23   derived from this software without specific prior
24   written permission of the assimp team.
25 
26 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
27 "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
28 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
29 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
30 OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
31 SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
32 LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
33 DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
34 THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
35 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
36 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
37 
38 ----------------------------------------------------------------------
39 */
40 
41 // Modified by Lasse Oorni for Urho3D
42 
43 /** @file  FBXBinaryTokenizer.cpp
44  *  @brief Implementation of a fake lexer for binary fbx files -
45  *    we emit tokens so the parser needs almost no special handling
46  *    for binary files.
47  */
48 
49 #ifndef ASSIMP_BUILD_NO_FBX_IMPORTER
50 
51 #include "FBXTokenizer.h"
52 #include "FBXUtil.h"
53 #include "../include/assimp/defs.h"
54 // Urho3D: VS2008 compatibility
55 #if !defined(_MSC_VER) || (_MSC_VER >= 1600)
56 #include <stdint.h>
57 #else
58 #include "../include/assimp/Compiler/pstdint.h"
59 #endif
60 #include "Exceptional.h"
61 #include "ByteSwapper.h"
62 
63 namespace Assimp {
64 namespace FBX {
65 
66 
67 // ------------------------------------------------------------------------------------------------
Token(const char * sbegin,const char * send,TokenType type,unsigned int offset)68 Token::Token(const char* sbegin, const char* send, TokenType type, unsigned int offset)
69     :
70     #ifdef DEBUG
71     contents(sbegin, static_cast<size_t>(send-sbegin)),
72     #endif
73     sbegin(sbegin)
74     , send(send)
75     , type(type)
76     , line(offset)
77     , column(BINARY_MARKER)
78 {
79     ai_assert(sbegin);
80     ai_assert(send);
81 
82     // binary tokens may have zero length because they are sometimes dummies
83     // inserted by TokenizeBinary()
84     ai_assert(send >= sbegin);
85 }
86 
87 
88 namespace {
89 
90 // ------------------------------------------------------------------------------------------------
91 // signal tokenization error, this is always unrecoverable. Throws DeadlyImportError.
92 AI_WONT_RETURN void TokenizeError(const std::string& message, unsigned int offset) AI_WONT_RETURN_SUFFIX;
TokenizeError(const std::string & message,unsigned int offset)93 AI_WONT_RETURN void TokenizeError(const std::string& message, unsigned int offset)
94 {
95     throw DeadlyImportError(Util::AddOffset("FBX-Tokenize",message,offset));
96 }
97 
98 
99 // ------------------------------------------------------------------------------------------------
Offset(const char * begin,const char * cursor)100 uint32_t Offset(const char* begin, const char* cursor)
101 {
102     ai_assert(begin <= cursor);
103     return static_cast<unsigned int>(cursor - begin);
104 }
105 
106 
107 // ------------------------------------------------------------------------------------------------
TokenizeError(const std::string & message,const char * begin,const char * cursor)108 void TokenizeError(const std::string& message, const char* begin, const char* cursor)
109 {
110     TokenizeError(message, Offset(begin, cursor));
111 }
112 
113 
114 // ------------------------------------------------------------------------------------------------
ReadWord(const char * input,const char * & cursor,const char * end)115 uint32_t ReadWord(const char* input, const char*& cursor, const char* end)
116 {
117     if(Offset(cursor, end) < 4) {
118         TokenizeError("cannot ReadWord, out of bounds",input, cursor);
119     }
120 
121     uint32_t word = *reinterpret_cast<const uint32_t*>(cursor);
122     AI_SWAP4(word);
123 
124     cursor += 4;
125 
126     return word;
127 }
128 
129 
130 // ------------------------------------------------------------------------------------------------
ReadByte(const char * input,const char * & cursor,const char * end)131 uint8_t ReadByte(const char* input, const char*& cursor, const char* end)
132 {
133     if(Offset(cursor, end) < 1) {
134         TokenizeError("cannot ReadByte, out of bounds",input, cursor);
135     }
136 
137     uint8_t word = *reinterpret_cast<const uint8_t*>(cursor);
138     ++cursor;
139 
140     return word;
141 }
142 
143 
144 // ------------------------------------------------------------------------------------------------
ReadString(const char * & sbegin_out,const char * & send_out,const char * input,const char * & cursor,const char * end,bool long_length=false,bool allow_null=false)145 unsigned int ReadString(const char*& sbegin_out, const char*& send_out, const char* input, const char*& cursor, const char* end,
146     bool long_length = false,
147     bool allow_null = false)
148 {
149     const uint32_t len_len = long_length ? 4 : 1;
150     if(Offset(cursor, end) < len_len) {
151         TokenizeError("cannot ReadString, out of bounds reading length",input, cursor);
152     }
153 
154     const uint32_t length = long_length ? ReadWord(input, cursor, end) : ReadByte(input, cursor, end);
155 
156     if (Offset(cursor, end) < length) {
157         TokenizeError("cannot ReadString, length is out of bounds",input, cursor);
158     }
159 
160     sbegin_out = cursor;
161     cursor += length;
162 
163     send_out = cursor;
164 
165     if(!allow_null) {
166         for (unsigned int i = 0; i < length; ++i) {
167             if(sbegin_out[i] == '\0') {
168                 TokenizeError("failed ReadString, unexpected NUL character in string",input, cursor);
169             }
170         }
171     }
172 
173     return length;
174 }
175 
176 
177 
178 // ------------------------------------------------------------------------------------------------
ReadData(const char * & sbegin_out,const char * & send_out,const char * input,const char * & cursor,const char * end)179 void ReadData(const char*& sbegin_out, const char*& send_out, const char* input, const char*& cursor, const char* end)
180 {
181     if(Offset(cursor, end) < 1) {
182         TokenizeError("cannot ReadData, out of bounds reading length",input, cursor);
183     }
184 
185     const char type = *cursor;
186     sbegin_out = cursor++;
187 
188     switch(type)
189     {
190         // 16 bit int
191     case 'Y':
192         cursor += 2;
193         break;
194 
195         // 1 bit bool flag (yes/no)
196     case 'C':
197         cursor += 1;
198         break;
199 
200         // 32 bit int
201     case 'I':
202         // <- fall thru
203 
204         // float
205     case 'F':
206         cursor += 4;
207         break;
208 
209         // double
210     case 'D':
211         cursor += 8;
212         break;
213 
214         // 64 bit int
215     case 'L':
216         cursor += 8;
217         break;
218 
219         // note: do not write cursor += ReadWord(...cursor) as this would be UB
220 
221         // raw binary data
222     case 'R':
223     {
224         const uint32_t length = ReadWord(input, cursor, end);
225         cursor += length;
226         break;
227     }
228 
229     case 'b':
230         // TODO: what is the 'b' type code? Right now we just skip over it /
231         // take the full range we could get
232         cursor = end;
233         break;
234 
235         // array of *
236     case 'f':
237     case 'd':
238     case 'l':
239     case 'i':   {
240 
241         const uint32_t length = ReadWord(input, cursor, end);
242         const uint32_t encoding = ReadWord(input, cursor, end);
243 
244         const uint32_t comp_len = ReadWord(input, cursor, end);
245 
246         // compute length based on type and check against the stored value
247         if(encoding == 0) {
248             uint32_t stride = 0;
249             switch(type)
250             {
251             case 'f':
252             case 'i':
253                 stride = 4;
254                 break;
255 
256             case 'd':
257             case 'l':
258                 stride = 8;
259                 break;
260 
261             default:
262                 ai_assert(false);
263             };
264             ai_assert(stride > 0);
265             if(length * stride != comp_len) {
266                 TokenizeError("cannot ReadData, calculated data stride differs from what the file claims",input, cursor);
267             }
268         }
269         // zip/deflate algorithm (encoding==1)? take given length. anything else? die
270         else if (encoding != 1) {
271             TokenizeError("cannot ReadData, unknown encoding",input, cursor);
272         }
273         cursor += comp_len;
274         break;
275     }
276 
277         // string
278     case 'S': {
279         const char* sb, *se;
280         // 0 characters can legally happen in such strings
281         ReadString(sb, se, input, cursor, end, true, true);
282         break;
283     }
284     default:
285         TokenizeError("cannot ReadData, unexpected type code: " + std::string(&type, 1),input, cursor);
286     }
287 
288     if(cursor > end) {
289         TokenizeError("cannot ReadData, the remaining size is too small for the data type: " + std::string(&type, 1),input, cursor);
290     }
291 
292     // the type code is contained in the returned range
293     send_out = cursor;
294 }
295 
296 
297 // ------------------------------------------------------------------------------------------------
ReadScope(TokenList & output_tokens,const char * input,const char * & cursor,const char * end)298 bool ReadScope(TokenList& output_tokens, const char* input, const char*& cursor, const char* end)
299 {
300     // the first word contains the offset at which this block ends
301     const uint32_t end_offset = ReadWord(input, cursor, end);
302 
303     // we may get 0 if reading reached the end of the file -
304     // fbx files have a mysterious extra footer which I don't know
305     // how to extract any information from, but at least it always
306     // starts with a 0.
307     if(!end_offset) {
308         return false;
309     }
310 
311     if(end_offset > Offset(input, end)) {
312         TokenizeError("block offset is out of range",input, cursor);
313     }
314     else if(end_offset < Offset(input, cursor)) {
315         TokenizeError("block offset is negative out of range",input, cursor);
316     }
317 
318     // the second data word contains the number of properties in the scope
319     const uint32_t prop_count = ReadWord(input, cursor, end);
320 
321     // the third data word contains the length of the property list
322     const uint32_t prop_length = ReadWord(input, cursor, end);
323 
324     // now comes the name of the scope/key
325     const char* sbeg, *send;
326     ReadString(sbeg, send, input, cursor, end);
327 
328     output_tokens.push_back(new_Token(sbeg, send, TokenType_KEY, Offset(input, cursor) ));
329 
330     // now come the individual properties
331     const char* begin_cursor = cursor;
332     for (unsigned int i = 0; i < prop_count; ++i) {
333         ReadData(sbeg, send, input, cursor, begin_cursor + prop_length);
334 
335         output_tokens.push_back(new_Token(sbeg, send, TokenType_DATA, Offset(input, cursor) ));
336 
337         if(i != prop_count-1) {
338             output_tokens.push_back(new_Token(cursor, cursor + 1, TokenType_COMMA, Offset(input, cursor) ));
339         }
340     }
341 
342     if (Offset(begin_cursor, cursor) != prop_length) {
343         TokenizeError("property length not reached, something is wrong",input, cursor);
344     }
345 
346     // at the end of each nested block, there is a NUL record to indicate
347     // that the sub-scope exists (i.e. to distinguish between P: and P : {})
348     // this NUL record is 13 bytes long.
349 #define BLOCK_SENTINEL_LENGTH 13
350 
351     if (Offset(input, cursor) < end_offset) {
352 
353         if (end_offset - Offset(input, cursor) < BLOCK_SENTINEL_LENGTH) {
354             TokenizeError("insufficient padding bytes at block end",input, cursor);
355         }
356 
357         output_tokens.push_back(new_Token(cursor, cursor + 1, TokenType_OPEN_BRACKET, Offset(input, cursor) ));
358 
359         // XXX this is vulnerable to stack overflowing ..
360         while(Offset(input, cursor) < end_offset - BLOCK_SENTINEL_LENGTH) {
361             ReadScope(output_tokens, input, cursor, input + end_offset - BLOCK_SENTINEL_LENGTH);
362         }
363         output_tokens.push_back(new_Token(cursor, cursor + 1, TokenType_CLOSE_BRACKET, Offset(input, cursor) ));
364 
365         for (unsigned int i = 0; i < BLOCK_SENTINEL_LENGTH; ++i) {
366             if(cursor[i] != '\0') {
367                 TokenizeError("failed to read nested block sentinel, expected all bytes to be 0",input, cursor);
368             }
369         }
370         cursor += BLOCK_SENTINEL_LENGTH;
371     }
372 
373     if (Offset(input, cursor) != end_offset) {
374         TokenizeError("scope length not reached, something is wrong",input, cursor);
375     }
376 
377     return true;
378 }
379 
380 
381 }
382 
383 // ------------------------------------------------------------------------------------------------
TokenizeBinary(TokenList & output_tokens,const char * input,unsigned int length)384 void TokenizeBinary(TokenList& output_tokens, const char* input, unsigned int length)
385 {
386     ai_assert(input);
387 
388     if(length < 0x1b) {
389         TokenizeError("file is too short",0);
390     }
391 
392     if (strncmp(input,"Kaydara FBX Binary",18)) {
393         TokenizeError("magic bytes not found",0);
394     }
395 
396 
397     //uint32_t offset = 0x1b;
398 
399     const char* cursor = input + 0x1b;
400 
401     while (cursor < input + length) {
402         if(!ReadScope(output_tokens, input, cursor, input + length)) {
403             break;
404         }
405     }
406 }
407 
408 } // !FBX
409 } // !Assimp
410 
411 #endif
412