1 // action_buffer.cpp: holds actions for later execution, for Gnash.
2 //
3 // Copyright (C) 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012
4 // Free Software Foundation, Inc
5 //
6 // This program is free software; you can redistribute it and/or modify
7 // it under the terms of the GNU General Public License as published by
8 // the Free Software Foundation; either version 3 of the License, or
9 // (at your option) any later version.
10 //
11 // This program is distributed in the hope that it will be useful,
12 // but WITHOUT ANY WARRANTY; without even the implied warranty of
13 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 // GNU General Public License for more details.
15 //
16 // You should have received a copy of the GNU General Public License
17 // along with this program; if not, write to the Free Software
18 // Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
19 //
20
21 #include "action_buffer.h"
22
23 #include <string>
24 #include <cstring> // for memcpy
25
26 #include "log.h"
27 #include "SWFStream.h"
28 #include "SWF.h"
29 #include "ASHandlers.h"
30 #include "movie_definition.h"
31
32 namespace gnash {
33
34 // Forward declarations
35 namespace {
36 float convert_float_little(const void *p);
37 double convert_double_wacky(const void *p);
38 }
39
action_buffer(const movie_definition & md)40 action_buffer::action_buffer(const movie_definition& md)
41 :
42 _pools(),
43 _src(md)
44 {
45 }
46
47 void
read(SWFStream & in,unsigned long endPos)48 action_buffer::read(SWFStream& in, unsigned long endPos)
49 {
50 unsigned long startPos = in.tell();
51 assert(endPos <= in.get_tag_end_position());
52 unsigned size = endPos-startPos;
53
54 if (!size) {
55 IF_VERBOSE_MALFORMED_SWF(
56 log_swferror(_("Empty action buffer starting at offset %lu"),
57 startPos);
58 );
59 return;
60 }
61
62 // Allocate the buffer
63 //
64 // NOTE: a .reserve would be fine here, except GLIBCPP_DEBUG will complain...
65 //
66 m_buffer.resize(size);
67 unsigned char* buf = &m_buffer.front();
68
69 // Read all the bytes in the buffer
70 //
71 // NOTE:
72 // we might be reading more data then we'll actually
73 // use here if the SWF contains Action blocks padded
74 // with data after the terminating END.
75 // This has a cost in memory use, but for the normal
76 // case (non-malformed SWF) not looking for an END
77 // tag should give significant speedup in parsing
78 // large action-based movies.
79 //
80 in.read(reinterpret_cast<char*>(buf), size);
81
82 // Consistency checks here
83 //
84 // NOTE: it is common to find such movies, swfmill is known to write
85 // DoAction w/out the terminating END tag
86 //
87 if (m_buffer.back() != SWF::ACTION_END) {
88 // Add a null terminator so read_string won't read off
89 // the end of the buffer.
90 m_buffer.push_back(0x00);
91
92 IF_VERBOSE_MALFORMED_SWF(
93 log_swferror(_("Action buffer starting at offset %lu doesn't "
94 "end with an END tag"), startPos);
95 );
96 }
97
98 }
99
100 const ConstantPool&
readConstantPool(size_t start_pc,size_t stop_pc) const101 action_buffer::readConstantPool(size_t start_pc, size_t stop_pc) const
102 {
103 assert(stop_pc <= m_buffer.size()); // TODO: drop, be safe instead
104
105 // Return a previously parsed pool at the same position, if any
106 PoolsMap::iterator pi = _pools.find(start_pc);
107 if ( pi != _pools.end() ) return pi->second;
108
109 // Actual processing.
110
111 ConstantPool& pool = _pools[start_pc];
112
113 size_t i = start_pc;
114 const std::uint16_t length = read_uint16(i + 1);
115 const std::uint16_t count = read_uint16(i + 3);
116 i += 2;
117
118 assert(start_pc + 3 + length == stop_pc);
119
120 pool.resize(count);
121
122 // Index the strings.
123 for (int ct = 0; ct < count; ct++) {
124 // Point into the current action buffer.
125 pool[ct] = reinterpret_cast<const char*>(&m_buffer[3 + i]);
126
127 // TODO: rework this "safety" thing here (doesn't look all that safe)
128 while (m_buffer[3 + i]) {
129 // safety check.
130 if (i >= stop_pc) {
131 log_error(_("action buffer dict length exceeded"));
132 // Jam something into the remaining (invalid) entries.
133 while (ct < count) {
134 pool[ct] = "<invalid>";
135 ct++;
136 }
137 return pool;
138 }
139 i++;
140 }
141 i++;
142 }
143
144 return pool;
145 }
146
147
148 // Disassemble one instruction to the log. The maxBufferLength
149 // argument is the number of bytes remaining in the action_buffer
150 // and prevents malformed instructions causing a read past the
151 // end of the buffer.
152 static std::string
disasm_instruction(const unsigned char * instruction_data,size_t maxBufferLength)153 disasm_instruction(const unsigned char* instruction_data,
154 size_t maxBufferLength)
155 {
156
157 using namespace SWF;
158
159 const SWF::SWFHandlers& ash = SWF::SWFHandlers::instance();
160
161 assert (maxBufferLength > 0);
162
163 ArgumentType fmt = ARG_HEX;
164 ActionType action_id = static_cast<ActionType>(instruction_data[0]);
165
166 std::stringstream ss;
167
168 // Show instruction.
169 if (action_id > ash.lastType()) {
170 ss << "<unknown>[0x]" << action_id << "\n";
171 }
172 else {
173 ss << ash[action_id].getType();
174 }
175
176 // Show instruction argument(s).
177 if (action_id & 0x80) {
178
179 assert(maxBufferLength >= 3);
180 ss << " (";
181 fmt = ash[action_id].getArgFormat();
182
183 size_t length = (instruction_data[1] | (instruction_data[2] << 8));
184
185 // Assert that length without the three initial bytes
186 // is always within the buffer.
187 assert(length <= maxBufferLength - 3);
188
189 switch (fmt) {
190
191 case ARG_NONE:
192 break;
193
194 case ARG_HEX:
195 ss << hexify(&instruction_data[3], length, false) << " ";
196 break;
197
198 case ARG_STR:
199 {
200 const std::string str =
201 hexify(&instruction_data[3], length, true);
202 ss << "\"" << str.c_str() << "\"";
203 break;
204 }
205
206 case ARG_U8:
207 {
208 const int val = instruction_data[3];
209 ss << " " << val;
210 break;
211 }
212
213 case ARG_U16:
214 {
215 const int val =
216 instruction_data[3] | (instruction_data[4] << 8);
217 ss << " " << val;
218 break;
219 }
220
221 case ARG_S16:
222 {
223 int val = instruction_data[3] | (instruction_data[4] << 8);
224 if (val & 0x8000) val |= ~0x7FFF; // sign-extend
225 ss << " " << val;
226 break;
227 }
228
229 case ARG_PUSH_DATA:
230 {
231 size_t i = 0;
232 while (i < length) {
233 int type = instruction_data[3 + i];
234
235 // This should be safe, as the buffer is always
236 // 0-terminated.
237 if (i++) ss << ", ";
238
239 switch (type)
240 {
241 case 0:
242 {
243 // string
244 std::string str;
245 while (instruction_data[3 + i] && i < length)
246 {
247 str += hexify(&instruction_data[3 + i], 1, true);
248 i++;
249 }
250 i++;
251 ss << "\"" << str.c_str() << "\"";
252 break;
253 }
254
255 case 1:
256 {
257 // float (little-endian)
258 if (i + 4 > length) break;
259 float f = convert_float_little(instruction_data + 3 + i);
260 i += 4;
261 ss << "(float) " << f;
262 break;
263 }
264
265 case 2:
266 ss << "NULL";
267 break;
268
269 case 3:
270 ss << "undef";
271 break;
272
273 case 4:
274 {
275 // contents of register
276 int reg = instruction_data[3 + i];
277 i++;
278 ss << "reg[" << reg << "]";
279 break;
280 }
281
282 case 5:
283 {
284
285 int bool_val = instruction_data[3 + i];
286 i++;
287 ss << "bool(" << bool_val << ")";
288 break;
289 }
290
291 case 6:
292 {
293 // double in wacky format: 45670123
294 if (i + 8 > length) break;
295 double d = convert_double_wacky(instruction_data + 3 + i);
296 i += 8;
297 ss << "(double) " << d;
298 break;
299 }
300
301 case 7:
302 {
303 // std::int32_t
304 std::int32_t val = instruction_data[3 + i]
305 | (instruction_data[3 + i + 1] << 8)
306 | (instruction_data[3 + i + 2] << 16)
307 | (instruction_data[3 + i + 3] << 24);
308 i += 4;
309 ss << "(int) " << val;
310 break;
311 }
312
313 case 8:
314 {
315 int id = instruction_data[3 + i];
316 i++;
317 ss << "dict_lookup[" << id << "]";
318 break;
319 }
320
321 case 9:
322 {
323 int id = instruction_data[3 + i] | (instruction_data[3 + i + 1] << 8);
324 i += 2;
325 ss << "dict_lookup_lg[" << id << "]";
326 break;
327 }
328 }
329 }
330 break;
331 }
332
333 case ARG_DECL_DICT:
334 {
335 size_t i = 0;
336 size_t count = instruction_data[3 + i] | (instruction_data[3 + i + 1] << 8);
337 i += 2;
338
339 ss << " [" << count << "] ";
340
341 // Print strings.
342 for (size_t ct = 0; ct < count; ct++)
343 {
344 if ( ct ) ss << ", ";
345
346 ss << ct << ":";
347
348 std::string str;
349 while (instruction_data[3 + i] && i < length)
350 {
351 str += instruction_data[3 + i];
352 i++;
353 }
354 ss << "\"" << str.c_str() << "\"";
355 i++;
356 }
357 break;
358 }
359
360 case ARG_FUNCTION2:
361 {
362 size_t i = 0;
363 std::string functionName;
364 // Signature info for a function2 opcode.
365 while (instruction_data[3 + i] && i <= length)
366 {
367 functionName.push_back(instruction_data[3 + i]);
368 ++i;
369 }
370
371 // Don't read outside the instruction.
372 if (i + 6 > length) break;
373 ++i;
374
375 std::uint16_t argCount = instruction_data[3 + i] | (instruction_data[3 + i + 1] << 8);
376 i += 2;
377
378 std::uint8_t registerCount = instruction_data[3 + i];
379 i++;
380
381 ss << "\tname = '" << functionName << "'"
382 << " arg count = " << argCount
383 << " register count = " << static_cast<int>(registerCount);
384
385 const std::uint16_t flags =
386 (instruction_data[3 + i]) |
387 (instruction_data[3 + i + 1] << 8);
388
389 i += 2;
390
391 const bool preload_global = (flags & 0x100);
392 const bool preload_parent = (flags & 0x80);
393 const bool preload_root = (flags & 0x40);
394 const bool suppress_super = (flags & 0x20);
395 const bool preload_super = (flags & 0x10);
396 const bool suppress_args = (flags & 0x08);
397 const bool preload_args = (flags & 0x04);
398 const bool suppress_this = (flags & 0x02);
399 const bool preload_this = (flags & 0x01);
400
401 ss << " pg=" << preload_global
402 << " pp=" << preload_parent
403 << " pr=" << preload_root
404 << " ss=" << suppress_super
405 << " ps=" << preload_super
406 << " sa=" << suppress_args
407 << " pa=" << preload_args
408 << " st=" << suppress_this
409 << " pt=" << preload_this;
410
411 for (size_t argi = 0; argi < argCount; ++argi) {
412
413 // Make sure not to read past the end of the
414 // instruction.
415 if (i >= length) break;
416
417 int arg_register = instruction_data[3 + i];
418 i++;
419
420 std::string argName;
421 // Signature info for a function2 opcode.
422 while (instruction_data[3 + i] && i <= length) {
423 argName.push_back(instruction_data[3 + i]);
424 i++;
425 }
426
427 ss << "\targ[" << argi << "]"
428 << " - reg[" << arg_register << "]"
429 << " - '" << argName << "'";
430
431 if (i == length) break;
432
433 // Advance past the terminating 0
434 i++;
435
436 }
437
438 if (i + 2 > length) break;
439 int function_length = instruction_data[3 + i] | (instruction_data[3 + i + 1] << 8);
440 i += 2;
441
442 ss << "\t\tfunction length = " << function_length;
443 break;
444 }
445 } // Switch
446
447 ss << ")";
448 } // If action & 0x80
449
450 return ss.str();
451 }
452
453 std::string
disasm(size_t pc) const454 action_buffer::disasm(size_t pc) const
455 {
456 const size_t maxBufferLength = m_buffer.size() - pc;
457 return disasm_instruction(&m_buffer[pc], maxBufferLength);
458 }
459
460 float
read_float_little(size_t pc) const461 action_buffer::read_float_little(size_t pc) const
462 {
463 return convert_float_little(&m_buffer[pc]);
464 }
465
466 double
read_double_wacky(size_t pc) const467 action_buffer::read_double_wacky(size_t pc) const
468 {
469 return convert_double_wacky(&m_buffer[pc]);
470 }
471
472 const std::string&
getDefinitionURL() const473 action_buffer::getDefinitionURL() const
474 {
475 return _src.get_url();
476 }
477
478 int
getDefinitionVersion() const479 action_buffer::getDefinitionVersion() const
480 {
481 return _src.get_version();
482 }
483
484 namespace {
485
486 // Endian conversion routines.
487 //
488 // Flash format stores integers as little-endian,
489 // floats as little-endian IEEE754,
490 // and doubles as little-endian IEEE754 with the two 32-bit words swapped over.
491 //
492 // We detect endianness at runtime.
493 // It looks hairy but the cost is small (one assignment, one switch),
494 // and it is less of a maintenance/portability nightmare.
495 // It also allows us to detect three existing variants instead of two and
496 // to reject incompatible (non-IEEE754) floating point formats (VAX etc).
497 // For these we would need to interpret the IEEE bitvalues explicitly.
498
499 // Read a little-endian 32-bit float from m_buffer[pc]
500 // and return it as a host-endian float.
501 float
convert_float_little(const void * p)502 convert_float_little(const void *p)
503 {
504 // Hairy union for endian detection and munging
505 union {
506 float f;
507 std::uint32_t i;
508 struct { // for endian detection
509 std::uint16_t s0;
510 std::uint16_t s1;
511 } s;
512 struct { // for byte-swapping
513 std::uint8_t c0;
514 std::uint8_t c1;
515 std::uint8_t c2;
516 std::uint8_t c3;
517 } c;
518 } u;
519
520 u.f = 1.0;
521 switch (u.s.s0) {
522
523 case 0x0000: // little-endian host
524 std::memcpy(&u.i, p, 4); // TODO: use std::copy instead ..
525 break;
526 case 0x3f80: // big-endian host
527 {
528 const std::uint8_t *cp = static_cast<const std::uint8_t*>(p);
529 u.c.c0 = cp[3];
530 u.c.c1 = cp[2];
531 u.c.c2 = cp[1];
532 u.c.c3 = cp[0];
533 break;
534 }
535 default:
536 log_error(_("Native floating point format not recognised"));
537 std::abort();
538 }
539
540 return u.f;
541 }
542
543
544 // Read a 64-bit double from memory, stored in word-swapped little-endian
545 // format and return it as a host-endian double.
546 // "Wacky format" is 45670123.
547 double
convert_double_wacky(const void * p)548 convert_double_wacky(const void *p)
549 {
550 const std::uint8_t *cp = static_cast<const std::uint8_t*>(p);
551 union {
552 double d;
553 std::uint64_t i;
554 struct {
555 std::uint32_t l0;
556 std::uint32_t l1;
557 } l;
558 struct {
559 std::uint16_t s0;
560 std::uint16_t s1;
561 std::uint16_t s2;
562 std::uint16_t s3;
563 } s;
564 struct {
565 std::uint8_t c0;
566 std::uint8_t c1;
567 std::uint8_t c2;
568 std::uint8_t c3;
569 std::uint8_t c4;
570 std::uint8_t c5;
571 std::uint8_t c6;
572 std::uint8_t c7;
573 } c;
574 } u;
575
576 static_assert(sizeof(u) == sizeof(u.i), "u must be 8 bytes");
577
578 // Detect endianness of doubles by storing a value that is
579 // exactly representable and that has different values in the
580 // four 16-bit words.
581 // 0x11223344 is represented as 0x41b1 2233 4400 0000 (bigendian)
582 u.d = static_cast<double>(0x11223344);
583 switch (u.s.s0) {
584 case 0x0000: // pure little-endian host: swap words only.
585 std::memcpy(&u.l.l1, cp, 4);
586 std::memcpy(&u.l.l0, cp + 4, 4);
587 break;
588 case 0x41b1: // pure big-endian host: swap contents of 32-bit words
589 u.c.c0 = cp[3];
590 u.c.c1 = cp[2];
591 u.c.c2 = cp[1];
592 u.c.c3 = cp[0];
593 u.c.c4 = cp[7];
594 u.c.c5 = cp[6];
595 u.c.c6 = cp[5];
596 u.c.c7 = cp[4];
597 break;
598 case 0x2233: // word-swapped little-endian host (PDP / ARM FPA)
599 // is the same as wacky format.
600 std::memcpy(&u.i, cp, 8);
601 break;
602 case 0x4400: // word-swapped big-endian host: does this exist?
603 u.c.c0 = cp[7];
604 u.c.c1 = cp[6];
605 u.c.c2 = cp[5];
606 u.c.c3 = cp[4];
607 u.c.c4 = cp[3];
608 u.c.c5 = cp[2];
609 u.c.c6 = cp[1];
610 u.c.c7 = cp[0];
611 break;
612 default:
613 log_error(_("Native double floating point format not recognised"));
614 abort();
615 }
616
617 return u.d;
618 }
619
620 } // unnamed namespace
621 } // namespace gnash
622
623 // Local Variables:
624 // mode: C++
625 // indent-tabs-mode: nil
626 // End:
627