1 // action_buffer.cpp:  holds actions for later execution, for Gnash.
2 //
3 //   Copyright (C) 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012
4 //   Free Software Foundation, Inc
5 //
6 // This program is free software; you can redistribute it and/or modify
7 // it under the terms of the GNU General Public License as published by
8 // the Free Software Foundation; either version 3 of the License, or
9 // (at your option) any later version.
10 //
11 // This program is distributed in the hope that it will be useful,
12 // but WITHOUT ANY WARRANTY; without even the implied warranty of
13 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14 // GNU General Public License for more details.
15 //
16 // You should have received a copy of the GNU General Public License
17 // along with this program; if not, write to the Free Software
18 // Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
19 //
20 
21 #include "action_buffer.h"
22 
23 #include <string>
24 #include <cstring> // for memcpy
25 
26 #include "log.h"
27 #include "SWFStream.h"
28 #include "SWF.h"
29 #include "ASHandlers.h"
30 #include "movie_definition.h"
31 
32 namespace gnash {
33 
34 // Forward declarations
35 namespace {
36     float convert_float_little(const void *p);
37     double convert_double_wacky(const void *p);
38 }
39 
action_buffer(const movie_definition & md)40 action_buffer::action_buffer(const movie_definition& md)
41     :
42     _pools(),
43     _src(md)
44 {
45 }
46 
47 void
read(SWFStream & in,unsigned long endPos)48 action_buffer::read(SWFStream& in, unsigned long endPos)
49 {
50     unsigned long startPos = in.tell();
51     assert(endPos <= in.get_tag_end_position());
52     unsigned size = endPos-startPos;
53 
54     if (!size) {
55         IF_VERBOSE_MALFORMED_SWF(
56             log_swferror(_("Empty action buffer starting at offset %lu"),
57                 startPos);
58         );
59         return;
60     }
61 
62     // Allocate the buffer
63     //
64     // NOTE: a .reserve would be fine here, except GLIBCPP_DEBUG will complain...
65     //
66     m_buffer.resize(size);
67     unsigned char* buf = &m_buffer.front();
68 
69     // Read all the bytes in the buffer
70     //
71     // NOTE:
72     // we might be reading more data then we'll actually
73     // use here if the SWF contains Action blocks padded
74     // with data after the terminating END.
75     // This has a cost in memory use, but for the normal
76     // case (non-malformed SWF) not looking for an END
77     // tag should give significant speedup in parsing
78     // large action-based movies.
79     //
80     in.read(reinterpret_cast<char*>(buf), size);
81 
82     // Consistency checks here
83     //
84     // NOTE: it is common to find such movies, swfmill is known to write
85     //       DoAction w/out the terminating END tag
86     //
87     if (m_buffer.back() != SWF::ACTION_END) {
88         // Add a null terminator so read_string won't read off
89         // the end of the buffer.
90         m_buffer.push_back(0x00);
91 
92         IF_VERBOSE_MALFORMED_SWF(
93             log_swferror(_("Action buffer starting at offset %lu doesn't "
94                     "end with an END tag"), startPos);
95         );
96     }
97 
98 }
99 
100 const ConstantPool&
readConstantPool(size_t start_pc,size_t stop_pc) const101 action_buffer::readConstantPool(size_t start_pc, size_t stop_pc) const
102 {
103     assert(stop_pc <= m_buffer.size()); // TODO: drop, be safe instead
104 
105     // Return a previously parsed pool at the same position, if any
106     PoolsMap::iterator pi = _pools.find(start_pc);
107     if ( pi != _pools.end() ) return pi->second;
108 
109     // Actual processing.
110 
111     ConstantPool& pool = _pools[start_pc];
112 
113     size_t i = start_pc;
114     const std::uint16_t length = read_uint16(i + 1);
115     const std::uint16_t count = read_uint16(i + 3);
116     i += 2;
117 
118     assert(start_pc + 3 + length == stop_pc);
119 
120     pool.resize(count);
121 
122     // Index the strings.
123     for (int ct = 0; ct < count; ct++) {
124         // Point into the current action buffer.
125         pool[ct] = reinterpret_cast<const char*>(&m_buffer[3 + i]);
126 
127         // TODO: rework this "safety" thing here (doesn't look all that safe)
128         while (m_buffer[3 + i]) {
129             // safety check.
130             if (i >= stop_pc) {
131                 log_error(_("action buffer dict length exceeded"));
132                 // Jam something into the remaining (invalid) entries.
133                 while (ct < count) {
134                     pool[ct] = "<invalid>";
135                     ct++;
136                 }
137                 return pool;
138             }
139             i++;
140         }
141         i++;
142     }
143 
144     return pool;
145 }
146 
147 
148 // Disassemble one instruction to the log. The maxBufferLength
149 // argument is the number of bytes remaining in the action_buffer
150 // and prevents malformed instructions causing a read past the
151 // end of the buffer.
152 static std::string
disasm_instruction(const unsigned char * instruction_data,size_t maxBufferLength)153 disasm_instruction(const unsigned char* instruction_data,
154         size_t maxBufferLength)
155 {
156 
157     using namespace SWF;
158 
159     const SWF::SWFHandlers& ash = SWF::SWFHandlers::instance();
160 
161     assert (maxBufferLength > 0);
162 
163     ArgumentType fmt = ARG_HEX;
164     ActionType action_id = static_cast<ActionType>(instruction_data[0]);
165 
166     std::stringstream ss;
167 
168     // Show instruction.
169     if (action_id > ash.lastType()) {
170         ss << "<unknown>[0x]" <<  action_id << "\n";
171     }
172     else {
173         ss << ash[action_id].getType();
174     }
175 
176     // Show instruction argument(s).
177     if (action_id & 0x80) {
178 
179         assert(maxBufferLength >= 3);
180         ss << " (";
181         fmt = ash[action_id].getArgFormat();
182 
183         size_t length = (instruction_data[1] | (instruction_data[2] << 8));
184 
185         // Assert that length without the three initial bytes
186         // is always within the buffer.
187         assert(length <= maxBufferLength - 3);
188 
189         switch (fmt) {
190 
191             case ARG_NONE:
192                 break;
193 
194             case ARG_HEX:
195                 ss << hexify(&instruction_data[3], length, false) << " ";
196                 break;
197 
198             case ARG_STR:
199             {
200                 const std::string str =
201                     hexify(&instruction_data[3], length, true);
202                 ss << "\"" << str.c_str() << "\"";
203                 break;
204             }
205 
206             case ARG_U8:
207             {
208                 const int val = instruction_data[3];
209                 ss << " " << val;
210                 break;
211             }
212 
213             case ARG_U16:
214             {
215                 const int val =
216                     instruction_data[3] | (instruction_data[4] << 8);
217                 ss << " " << val;
218                 break;
219             }
220 
221             case ARG_S16:
222             {
223                 int val = instruction_data[3] | (instruction_data[4] << 8);
224                 if (val & 0x8000) val |= ~0x7FFF;    // sign-extend
225                 ss << " " << val;
226                 break;
227             }
228 
229             case ARG_PUSH_DATA:
230             {
231                 size_t i = 0;
232                 while (i < length) {
233                     int type = instruction_data[3 + i];
234 
235                     // This should be safe, as the buffer is always
236                     // 0-terminated.
237                     if (i++) ss << ", ";
238 
239                     switch (type)
240                     {
241                         case 0:
242                         {
243                             // string
244                             std::string str;
245                             while (instruction_data[3 + i] && i < length)
246                             {
247                                 str += hexify(&instruction_data[3 + i], 1, true);
248                                 i++;
249                             }
250                             i++;
251                             ss << "\"" << str.c_str() << "\"";
252                             break;
253                         }
254 
255                         case 1:
256                         {
257                             // float (little-endian)
258                             if (i + 4 > length) break;
259                             float f = convert_float_little(instruction_data + 3 + i);
260                             i += 4;
261                             ss << "(float) " << f;
262                             break;
263                         }
264 
265                         case 2:
266                             ss << "NULL";
267                             break;
268 
269                         case 3:
270                             ss << "undef";
271                             break;
272 
273                         case 4:
274                         {
275                             // contents of register
276                             int reg = instruction_data[3 + i];
277                             i++;
278                             ss << "reg[" << reg << "]";
279                             break;
280                         }
281 
282                         case 5:
283                         {
284 
285                             int bool_val = instruction_data[3 + i];
286                             i++;
287                             ss << "bool(" << bool_val << ")";
288                             break;
289                         }
290 
291                         case 6:
292                         {
293                             // double in wacky format: 45670123
294                             if (i + 8 > length) break;
295                             double d = convert_double_wacky(instruction_data + 3 + i);
296                             i += 8;
297                             ss << "(double) " << d;
298                             break;
299                         }
300 
301                         case 7:
302                         {
303                             // std::int32_t
304                             std::int32_t val = instruction_data[3 + i]
305                             | (instruction_data[3 + i + 1] << 8)
306                             | (instruction_data[3 + i + 2] << 16)
307                             | (instruction_data[3 + i + 3] << 24);
308                             i += 4;
309                             ss << "(int) " << val;
310                             break;
311                         }
312 
313                         case 8:
314                         {
315                             int id = instruction_data[3 + i];
316                             i++;
317                             ss << "dict_lookup[" << id << "]";
318                             break;
319                         }
320 
321                         case 9:
322                         {
323                             int id = instruction_data[3 + i] | (instruction_data[3 + i + 1] << 8);
324                             i += 2;
325                             ss << "dict_lookup_lg[" << id << "]";
326                             break;
327                         }
328                     }
329                 }
330                 break;
331             }
332 
333             case ARG_DECL_DICT:
334             {
335                 size_t i = 0;
336                 size_t count = instruction_data[3 + i] | (instruction_data[3 + i + 1] << 8);
337                 i += 2;
338 
339                 ss << " [" << count << "] ";
340 
341                 // Print strings.
342                 for (size_t ct = 0; ct < count; ct++)
343                 {
344                     if ( ct ) ss << ", ";
345 
346                     ss << ct << ":";
347 
348                     std::string str;
349                     while (instruction_data[3 + i] && i < length)
350                     {
351                         str += instruction_data[3 + i];
352                         i++;
353                     }
354                     ss << "\"" << str.c_str() << "\"";
355                     i++;
356                 }
357                 break;
358             }
359 
360             case ARG_FUNCTION2:
361             {
362                 size_t i = 0;
363                 std::string functionName;
364                 // Signature info for a function2 opcode.
365                 while (instruction_data[3 + i] && i <= length)
366                 {
367                     functionName.push_back(instruction_data[3 + i]);
368                     ++i;
369                 }
370 
371                 // Don't read outside the instruction.
372                 if (i + 6 > length) break;
373                 ++i;
374 
375                 std::uint16_t argCount = instruction_data[3 + i] | (instruction_data[3 + i + 1] << 8);
376                 i += 2;
377 
378                 std::uint8_t registerCount = instruction_data[3 + i];
379                 i++;
380 
381                 ss << "\tname = '" << functionName << "'"
382                        << " arg count = " << argCount
383                        << " register count = " << static_cast<int>(registerCount);
384 
385                 const std::uint16_t flags =
386                     (instruction_data[3 + i]) |
387                     (instruction_data[3 + i + 1] << 8);
388 
389                 i += 2;
390 
391                 const bool preload_global = (flags & 0x100);
392                 const bool preload_parent = (flags & 0x80);
393                 const bool preload_root   = (flags & 0x40);
394                 const bool suppress_super = (flags & 0x20);
395                 const bool preload_super  = (flags & 0x10);
396                 const bool suppress_args  = (flags & 0x08);
397                 const bool preload_args   = (flags & 0x04);
398                 const bool suppress_this  = (flags & 0x02);
399                 const bool preload_this   = (flags & 0x01);
400 
401                 ss << " pg=" << preload_global
402                 << " pp=" << preload_parent
403                 << " pr=" << preload_root
404                 << " ss=" << suppress_super
405                 << " ps=" << preload_super
406                 << " sa=" << suppress_args
407                 << " pa=" << preload_args
408                 << " st=" << suppress_this
409                 << " pt=" << preload_this;
410 
411                 for (size_t argi = 0; argi < argCount; ++argi) {
412 
413                     // Make sure not to read past the end of the
414                     // instruction.
415                     if (i >= length) break;
416 
417                     int arg_register = instruction_data[3 + i];
418                     i++;
419 
420                     std::string argName;
421                     // Signature info for a function2 opcode.
422                     while (instruction_data[3 + i] && i <= length) {
423                         argName.push_back(instruction_data[3 + i]);
424                         i++;
425                     }
426 
427                     ss << "\targ[" << argi << "]"
428                        << " - reg[" << arg_register << "]"
429                        << " - '" << argName << "'";
430 
431                     if (i == length) break;
432 
433                     // Advance past the terminating 0
434                     i++;
435 
436                 }
437 
438                 if (i + 2 > length) break;
439                 int function_length = instruction_data[3 + i] | (instruction_data[3 + i + 1] << 8);
440                 i += 2;
441 
442                 ss << "\t\tfunction length = " << function_length;
443                 break;
444             }
445         } // Switch
446 
447         ss << ")";
448     } // If action & 0x80
449 
450     return ss.str();
451 }
452 
453 std::string
disasm(size_t pc) const454 action_buffer::disasm(size_t pc) const
455 {
456     const size_t maxBufferLength = m_buffer.size() - pc;
457     return disasm_instruction(&m_buffer[pc], maxBufferLength);
458 }
459 
460 float
read_float_little(size_t pc) const461 action_buffer::read_float_little(size_t pc) const
462 {
463     return convert_float_little(&m_buffer[pc]);
464 }
465 
466 double
read_double_wacky(size_t pc) const467 action_buffer::read_double_wacky(size_t pc) const
468 {
469     return convert_double_wacky(&m_buffer[pc]);
470 }
471 
472 const std::string&
getDefinitionURL() const473 action_buffer::getDefinitionURL() const
474 {
475     return _src.get_url();
476 }
477 
478 int
getDefinitionVersion() const479 action_buffer::getDefinitionVersion() const
480 {
481     return _src.get_version();
482 }
483 
484 namespace {
485 
486 // Endian conversion routines.
487 //
488 // Flash format stores integers as little-endian,
489 // floats as little-endian IEEE754,
490 // and doubles as little-endian IEEE754 with the two 32-bit words swapped over.
491 //
492 // We detect endianness at runtime.
493 // It looks hairy but the cost is small (one assignment, one switch),
494 // and it is less of a maintenance/portability nightmare.
495 // It also allows us to detect three existing variants instead of two and
496 // to reject incompatible (non-IEEE754) floating point formats (VAX etc).
497 // For these we would need to interpret the IEEE bitvalues explicitly.
498 
499 // Read a little-endian 32-bit float from m_buffer[pc]
500 // and return it as a host-endian float.
501 float
convert_float_little(const void * p)502 convert_float_little(const void *p)
503 {
504     // Hairy union for endian detection and munging
505     union {
506         float f;
507         std::uint32_t i;
508         struct {    // for endian detection
509             std::uint16_t s0;
510             std::uint16_t s1;
511         } s;
512         struct {    // for byte-swapping
513             std::uint8_t c0;
514             std::uint8_t c1;
515             std::uint8_t c2;
516             std::uint8_t c3;
517         } c;
518     } u;
519 
520     u.f = 1.0;
521     switch (u.s.s0) {
522 
523         case 0x0000:    // little-endian host
524             std::memcpy(&u.i, p, 4); // TODO: use std::copy instead ..
525             break;
526         case 0x3f80:    // big-endian host
527         {
528             const std::uint8_t *cp = static_cast<const std::uint8_t*>(p);
529             u.c.c0 = cp[3];
530             u.c.c1 = cp[2];
531             u.c.c2 = cp[1];
532             u.c.c3 = cp[0];
533             break;
534         }
535         default:
536             log_error(_("Native floating point format not recognised"));
537             std::abort();
538     }
539 
540     return u.f;
541 }
542 
543 
544 // Read a 64-bit double from memory, stored in word-swapped little-endian
545 // format and return it as a host-endian double.
546 // "Wacky format" is 45670123.
547 double
convert_double_wacky(const void * p)548 convert_double_wacky(const void *p)
549 {
550     const std::uint8_t *cp = static_cast<const std::uint8_t*>(p);
551     union {
552         double d;
553         std::uint64_t i;
554         struct {
555             std::uint32_t l0;
556             std::uint32_t l1;
557         } l;
558         struct {
559             std::uint16_t s0;
560             std::uint16_t s1;
561             std::uint16_t s2;
562             std::uint16_t s3;
563         } s;
564         struct {
565             std::uint8_t c0;
566             std::uint8_t c1;
567             std::uint8_t c2;
568             std::uint8_t c3;
569             std::uint8_t c4;
570             std::uint8_t c5;
571             std::uint8_t c6;
572             std::uint8_t c7;
573         } c;
574     } u;
575 
576     static_assert(sizeof(u) == sizeof(u.i), "u must be 8 bytes");
577 
578     // Detect endianness of doubles by storing a value that is
579     // exactly representable and that has different values in the
580     // four 16-bit words.
581     // 0x11223344 is represented as 0x41b1 2233 4400 0000 (bigendian)
582     u.d = static_cast<double>(0x11223344);
583     switch (u.s.s0) {
584     case 0x0000:    // pure little-endian host: swap words only.
585         std::memcpy(&u.l.l1, cp, 4);
586         std::memcpy(&u.l.l0, cp + 4, 4);
587         break;
588     case 0x41b1:    // pure big-endian host: swap contents of 32-bit words
589         u.c.c0 = cp[3];
590         u.c.c1 = cp[2];
591         u.c.c2 = cp[1];
592         u.c.c3 = cp[0];
593         u.c.c4 = cp[7];
594         u.c.c5 = cp[6];
595         u.c.c6 = cp[5];
596         u.c.c7 = cp[4];
597         break;
598     case 0x2233:    // word-swapped little-endian host (PDP / ARM FPA)
599             // is the same as wacky format.
600         std::memcpy(&u.i, cp, 8);
601         break;
602     case 0x4400:    // word-swapped big-endian host: does this exist?
603         u.c.c0 = cp[7];
604         u.c.c1 = cp[6];
605         u.c.c2 = cp[5];
606         u.c.c3 = cp[4];
607         u.c.c4 = cp[3];
608         u.c.c5 = cp[2];
609         u.c.c6 = cp[1];
610         u.c.c7 = cp[0];
611         break;
612     default:
613         log_error(_("Native double floating point format not recognised"));
614         abort();
615     }
616 
617     return u.d;
618 }
619 
620 } // unnamed namespace
621 } // namespace gnash
622 
623 // Local Variables:
624 // mode: C++
625 // indent-tabs-mode: nil
626 // End:
627