1package Data::ParseBinary::Executable::PE32;
2use strict;
3use warnings;
4use Data::ParseBinary;
5
6#Portable Executable (PE) 32 bit, little endian
7#Used on MSWindows systems (including DOS) for EXEs and DLLs
8#
9#1999 paper:
10#http://download.microsoft.com/download/1/6/1/161ba512-40e2-4cc9-843a-923143f3456c/pecoff.doc
11#
12#2006 with updates relevant for .NET:
13#http://download.microsoft.com/download/9/c/5/9c5b2167-8017-4bae-9fde-d599bac8184a/pecoff_v8.doc
14
15
16sub UTCTimeStamp {
17    my ($name) = @_;
18    return Data::ParseBinary::lib::ExecPE32::UTCTimeStampAdapter->create(ULInt32($name));
19}
20
21my $msdos_header = Struct("msdos_header",
22    Magic("MZ"),
23    ULInt16("partPag"),
24    ULInt16("page_count"),
25    ULInt16("relocation_count"),
26    ULInt16("header_size"),
27    ULInt16("minmem"),
28    ULInt16("maxmem"),
29    ULInt16("relocation_stackseg"),
30    ULInt16("exe_stackptr"),
31    ULInt16("checksum"),
32    ULInt16("exe_ip"),
33    ULInt16("relocation_codeseg"),
34    ULInt16("table_offset"),
35    ULInt16("overlay"),
36    Padding(8),
37    ULInt16("oem_id"),
38    ULInt16("oem_info"),
39    Padding(20),
40    ULInt32("coff_header_pointer"),
41    Anchor("_assembly_start"),
42    Field("code", sub { $_->ctx->{coff_header_pointer} - $_->ctx->{_assembly_start} } ),
43);
44
45my $symbol_table = Struct("symbol_table",
46    String("name", 8, padchar => "\x00"),
47    ULInt32("value"),
48    Enum(
49        Data::ParseBinary::lib::ExecPE32::OneDownAdapter->create(SLInt16("section_number")),
50        #ExprAdapter(SLInt16("section_number"),
51        #    encoder => sub { $_->obj + 1 },
52        #    decoder => sub { $_->obj - 1 },
53        #),
54        UNDEFINED => -1,
55        ABSOLUTE => -2,
56        DEBUG => -3,
57        _default_ => $DefaultPass,
58    ),
59    Enum(ULInt8("complex_type"),
60        NULL => 0,
61        POINTER => 1,
62        FUNCTION => 2,
63        ARRAY => 3,
64    ),
65    Enum(ULInt8("base_type"),
66        NULL => 0,
67        VOID => 1,
68        CHAR => 2,
69        SHORT => 3,
70        INT => 4,
71        LONG => 5,
72        FLOAT => 6,
73        DOUBLE => 7,
74        STRUCT => 8,
75        UNION => 9,
76        ENUM => 10,
77        MOE => 11,
78        BYTE => 12,
79        WORD => 13,
80        UINT => 14,
81        DWORD => 15,
82    ),
83    Enum(ULInt8("storage_class"),
84        END_OF_FUNCTION => 255,
85        NULL => 0,
86        AUTOMATIC => 1,
87        EXTERNAL => 2,
88        STATIC => 3,
89        REGISTER => 4,
90        EXTERNAL_DEF => 5,
91        LABEL => 6,
92        UNDEFINED_LABEL => 7,
93        MEMBER_OF_STRUCT => 8,
94        ARGUMENT => 9,
95        STRUCT_TAG => 10,
96        MEMBER_OF_UNION => 11,
97        UNION_TAG => 12,
98        TYPE_DEFINITION => 13,
99        UNDEFINED_STATIC => 14,
100        ENUM_TAG => 15,
101        MEMBER_OF_ENUM => 16,
102        REGISTER_PARAM => 17,
103        BIT_FIELD => 18,
104        BLOCK => 100,
105        FUNCTION => 101,
106        END_OF_STRUCT => 102,
107        FILE => 103,
108        SECTION => 104,
109        WEAK_EXTERNAL => 105,
110    ),
111    ULInt8("number_of_aux_symbols"),
112    Array(sub { $_->ctx->{number_of_aux_symbols} },
113        Bytes("aux_symbols", 18)
114    )
115);
116
117my $coff_header = Struct("coff_header",
118    Magic("PE\x00\x00"),
119    Enum(ULInt16("machine_type"),
120        UNKNOWN => 0x0,
121        AM33 => 0x1d3,
122        AMD64 => 0x8664,
123        ARM => 0x1c0,
124        EBC => 0xebc,
125        I386 => 0x14c,
126        IA64 => 0x200,
127        M32R => 0x9041,
128        MIPS16 => 0x266,
129        MIPSFPU => 0x366,
130        MIPSFPU16 => 0x466,
131        POWERPC => 0x1f0,
132        POWERPCFP => 0x1f1,
133        R4000 => 0x166,
134        SH3 => 0x1a2,
135        SH3DSP => 0x1a3,
136        SH4 => 0x1a6,
137        SH5=> 0x1a8,
138        THUMB => 0x1c2,
139        WCEMIPSV2 => 0x169,
140        _default_ => $DefaultPass
141    ),
142    ULInt16("number_of_sections"),
143    UTCTimeStamp("time_stamp"),
144    ULInt32("symbol_table_pointer"),
145    ULInt32("number_of_symbols"),
146    ULInt16("optional_header_size"),
147    FlagsEnum(ULInt16("characteristics"),
148        RELOCS_STRIPPED => 0x0001,
149        EXECUTABLE_IMAGE => 0x0002,
150        LINE_NUMS_STRIPPED => 0x0004,
151        LOCAL_SYMS_STRIPPED => 0x0008,
152        AGGRESSIVE_WS_TRIM => 0x0010,
153        LARGE_ADDRESS_AWARE => 0x0020,
154        MACHINE_16BIT => 0x0040,
155        BYTES_REVERSED_LO => 0x0080,
156        MACHINE_32BIT => 0x0100,
157        DEBUG_STRIPPED => 0x0200,
158        REMOVABLE_RUN_FROM_SWAP => 0x0400,
159        SYSTEM => 0x1000,
160        DLL => 0x2000,
161        UNIPROCESSOR_ONLY => 0x4000,
162        BIG_ENDIAN_MACHINE => 0x8000,
163    ),
164
165    # symbol table
166    Pointer(sub { $_->ctx->{symbol_table_pointer} },
167        Array(sub { $_->ctx->{number_of_symbols} }, $symbol_table)
168    )
169);
170
171sub PEPlusField {
172    my ($name) = @_;
173    return IfThenElse($name, sub { $_->ctx->{pe_type} eq "PE32_plus" },
174        ULInt64(undef),
175        ULInt32(undef),
176    );
177}
178
179my $optional_header = Struct("optional_header",
180    # standard fields
181    Enum(ULInt16("pe_type"),
182        PE32 => 0x10b,
183        PE32_plus => 0x20b,
184    ),
185    ULInt8("major_linker_version"),
186    ULInt8("minor_linker_version"),
187    ULInt32("code_size"),
188    ULInt32("initialized_data_size"),
189    ULInt32("uninitialized_data_size"),
190    ULInt32("entry_point_pointer"),
191    ULInt32("base_of_code"),
192
193    # only in PE32 files
194    If(sub { $_->ctx->{pe_type} eq "PE32" },
195        ULInt32("base_of_data")
196    ),
197
198    # WinNT-specific fields
199    PEPlusField("image_base"),
200    ULInt32("section_aligment"),
201    ULInt32("file_alignment"),
202    ULInt16("major_os_version"),
203    ULInt16("minor_os_version"),
204    ULInt16("major_image_version"),
205    ULInt16("minor_image_version"),
206    ULInt16("major_subsystem_version"),
207    ULInt16("minor_subsystem_version"),
208    Padding(4),
209    ULInt32("image_size"),
210    ULInt32("headers_size"),
211    ULInt32("checksum"),
212    Enum(ULInt16("subsystem"),
213        UNKNOWN => 0,
214        NATIVE => 1,
215        WINDOWS_GUI => 2,
216        WINDOWS_CUI => 3,
217        POSIX_CIU => 7,
218        WINDOWS_CE_GUI => 9,
219        EFI_APPLICATION => 10,
220        EFI_BOOT_SERVICE_DRIVER => 11,
221        EFI_RUNTIME_DRIVER => 12,
222        EFI_ROM => 13,
223        XBOX => 14,
224        _defualt_ => $DefaultPass
225    ),
226    FlagsEnum(ULInt16("dll_characteristics"),
227        NO_BIND => 0x0800,
228        WDM_DRIVER => 0x2000,
229        TERMINAL_SERVER_AWARE => 0x8000,
230    ),
231    PEPlusField("reserved_stack_size"),
232    PEPlusField("stack_commit_size"),
233    PEPlusField("reserved_heap_size"),
234    PEPlusField("heap_commit_size"),
235    ULInt32("loader_flags"),
236    ULInt32("number_of_data_directories"),
237
238    Data::ParseBinary::lib::ExecPE32::NamedSequence->create(
239        Array(sub { $_->ctx->{number_of_data_directories} },
240            Struct("data_directories",
241                ULInt32("address"),
242                ULInt32("size"),
243            )
244        ),
245        mapping => {
246            0 => 'export_table',
247            1 => 'import_table',
248            2 => 'resource_table',
249            3 => 'exception_table',
250            4 => 'certificate_table',
251            5 => 'base_relocation_table',
252            6 => 'debug',
253            7 => 'architecture',
254            8 => 'global_ptr',
255            9 => 'tls_table',
256            10 => 'load_config_table',
257            11 => 'bound_import',
258            12 => 'import_address_table',
259            13 => 'delay_import_descriptor',
260            14 => 'complus_runtime_header',
261        }
262    ),
263);
264
265my $section = Struct("section",
266    String("name", 8, padchar => "\x00"),
267    ULInt32("virtual_size"),
268    ULInt32("virtual_address"),
269    ULInt32("raw_data_size"),
270    ULInt32("raw_data_pointer"),
271    ULInt32("relocations_pointer"),
272    ULInt32("line_numbers_pointer"),
273    ULInt16("number_of_relocations"),
274    ULInt16("number_of_line_numbers"),
275    FlagsEnum(ULInt32("characteristics"),
276        TYPE_REG => 0x00000000,
277        TYPE_DSECT => 0x00000001,
278        TYPE_NOLOAD => 0x00000002,
279        TYPE_GROUP => 0x00000004,
280        TYPE_NO_PAD => 0x00000008,
281        TYPE_COPY => 0x00000010,
282        CNT_CODE => 0x00000020,
283        CNT_INITIALIZED_DATA => 0x00000040,
284        CNT_UNINITIALIZED_DATA => 0x00000080,
285        LNK_OTHER => 0x00000100,
286        LNK_INFO => 0x00000200,
287        TYPE_OVER => 0x00000400,
288        LNK_REMOVE => 0x00000800,
289        LNK_COMDAT => 0x00001000,
290        MEM_FARDATA => 0x00008000,
291        MEM_PURGEABLE => 0x00020000,
292        MEM_16BIT => 0x00020000,
293        MEM_LOCKED => 0x00040000,
294        MEM_PRELOAD => 0x00080000,
295        ALIGN_1BYTES => 0x00100000,
296        ALIGN_2BYTES => 0x00200000,
297        ALIGN_4BYTES => 0x00300000,
298        ALIGN_8BYTES => 0x00400000,
299        ALIGN_16BYTES => 0x00500000,
300        ALIGN_32BYTES => 0x00600000,
301        ALIGN_64BYTES => 0x00700000,
302        ALIGN_128BYTES => 0x00800000,
303        ALIGN_256BYTES => 0x00900000,
304        ALIGN_512BYTES => 0x00A00000,
305        ALIGN_1024BYTES => 0x00B00000,
306        ALIGN_2048BYTES => 0x00C00000,
307        ALIGN_4096BYTES => 0x00D00000,
308        ALIGN_8192BYTES => 0x00E00000,
309        LNK_NRELOC_OVFL => 0x01000000,
310        MEM_DISCARDABLE => 0x02000000,
311        MEM_NOT_CACHED => 0x04000000,
312        MEM_NOT_PAGED => 0x08000000,
313        MEM_SHARED => 0x10000000,
314        MEM_EXECUTE => 0x20000000,
315        MEM_READ => 0x40000000,
316        MEM_WRITE => 0x80000000,
317    ),
318
319    Pointer(sub { $_->ctx->{raw_data_pointer} },
320        Field("raw_data", sub { $_->ctx->{raw_data_size} })
321    ),
322
323    Pointer(sub { $_->ctx->{line_numbers_pointer} },
324        Array(sub { $_->ctx->{number_of_line_numbers} },
325            Struct("line_numbers",
326                ULInt32("type"),
327                ULInt16("line_number"),
328            )
329        )
330    ),
331
332    Pointer(sub { $_->ctx->{relocations_pointer} },
333        Array(sub { $_->ctx->{number_of_relocations} },
334            Struct("relocations",
335                ULInt32("virtual_address"),
336                ULInt32("symbol_table_index"),
337                ULInt16("type"),
338            )
339        )
340    ),
341);
342
343sub min {
344    my @values = @_;
345    return undef if @values == 0;
346    my $ret_val = $values[0];
347    foreach my $val (@values) {
348        if ($val < $ret_val) {
349            $ret_val = $val;
350        }
351    }
352}
353
354our $pe32_parser = Struct("pe32_file",
355    # headers
356    $msdos_header,
357    $coff_header,
358    Anchor("_start_of_optional_header"),
359    $optional_header,
360    Anchor("_end_of_optional_header"),
361    Padding(sub { min(0,
362            $_->ctx->{coff_header}->{optional_header_size} -
363            $_->ctx->{_end_of_optional_header} +
364            $_->ctx->{_start_of_optional_header} ) }
365    ),
366
367    # sections
368    Array(sub { $_->ctx->{coff_header}->{number_of_sections} }, $section),
369);
370
371require Exporter;
372our @ISA = qw(Exporter);
373our @EXPORT = qw($pe32_parser);
374
375package Data::ParseBinary::lib::ExecPE32::OneDownAdapter;
376our @ISA;
377BEGIN { @ISA = qw{Data::ParseBinary::Adapter}; }
378
379sub _decode {
380    my ($self, $value) = @_;
381    return $value - 1;
382}
383sub _encode {
384    my ($self, $tvalue) = @_;
385    return $tvalue + 1;
386}
387
388package Data::ParseBinary::lib::ExecPE32::UTCTimeStampAdapter;
389our @ISA;
390BEGIN { @ISA = qw{Data::ParseBinary::Adapter}; }
391
392sub _decode {
393    my ($self, $value) = @_;
394    return $value;
395    #return time.ctime(obj)
396}
397sub _encode {
398    my ($self, $tvalue) = @_;
399    return $tvalue;
400    #return int(time.mktime(time.strptime(obj)))
401}
402
403package Data::ParseBinary::lib::ExecPE32::NamedSequence;
404our @ISA;
405BEGIN { @ISA = qw{Data::ParseBinary::Adapter}; }
406
407#"""
408#creates a mapping between the elements of a sequence and their respective
409#names. this is useful for sequences of a variable length, where each
410#element in the sequence has a name (as is the case with the data
411#directories of the PE header)
412#"""
413
414sub _init {
415    my ($self, %params) = @_;
416    die "You need to specify mapping to NamedSequence" unless $params{mapping};
417    $self->{mapping} = $params{mapping};
418    my $rev_mapping = {};
419    while (my ($key, $val) = each %{ $params{mapping} }) {
420        $rev_mapping->{$val} = $key;
421    }
422    $self->{rev_mapping} = $rev_mapping;
423}
424
425sub _decode {
426    my ($self, $value) = @_;
427    my $tvalue = {};
428    foreach my $ix (0..$#$value) {
429        my $name = $ix;
430        $name = $self->{mapping}->{$name} if exists $self->{mapping}->{$name};
431        $tvalue->{$name} = $value->[$ix];
432    }
433    return $tvalue;
434}
435
436sub _encode {
437    my ($self, $tvalue) = @_;
438    my $value = [];
439    while (my ($key, $val) = each %$tvalue) {
440        my $index = $key;
441        if (exists $self->{rev_mapping}->{$index}) {
442            $index = $self->{rev_mapping}->{$index};
443        } elsif ($index !~ /^\d+$/) {
444            die "NamedSequence: encoded value should be either a recognized name or a number";
445        }
446        $value->[$index] = $val;
447    }
448    return $value;
449}
450
451#__slots__ = ["mapping", "rev_mapping"]
452#prefix = "unnamed_"
453#def __init__(self, subcon, mapping):
454#    Adapter.__init__(self, subcon)
455#    self.mapping = mapping
456#    self.rev_mapping = dict((v, k) for k, v in mapping.iteritems())
457#def _encode(self, obj, context):
458#    d = obj.__dict__
459#    obj2 = [None] * len(d)
460#    for name, value in d.iteritems():
461#        if name in self.rev_mapping:
462#            index = self.rev_mapping[name]
463#        elif name.startswith("__"):
464#            obj2.pop(-1)
465#            continue
466#        elif name.startswith(self.prefix):
467#            index = int(name.split(self.prefix)[1])
468#        else:
469#            raise ValueError("no mapping defined for %r" % (name,))
470#        obj2[index] = value
471#    return obj2
472#def _decode(self, obj, context):
473#    obj2 = Container()
474#    for i, item in enumerate(obj):
475#        if i in self.mapping:
476#            name = self.mapping[i]
477#        else:
478#            name = "%s%d" % (self.prefix, i)
479#        setattr(obj2, name, item)
480#    return obj2
481
4821;
483
484__END__
485
486=head1 NAME
487
488Data::ParseBinary::Executable::PE32 - Parsing Win32 EXE / DLL files
489
490=head1 SYNOPSIS
491
492    use Data::ParseBinary::Executable::PE32 qw{$pe32_parser};
493    my $data = $pe32_parser->parse(CreateStreamReader(File => $fh));
494
495Can parse a Windows (and DOS?) EXE and DLL files. However, when building it back,
496there are some minor differences from the original file, and Windows declare that
497it's not a valid Win32 application.
498
499This is a part of the Data::ParseBinary package, and is just one ready-made parser.
500please go to the main page for additional usage info.
501
502=cut
503
504
505
506
507