1// Copyright 2009 The Go Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5// Package pe implements access to PE (Microsoft Windows Portable Executable) files.
6package pe
7
8import (
9	"bytes"
10	"compress/zlib"
11	"debug/dwarf"
12	"encoding/binary"
13	"fmt"
14	"io"
15	"os"
16	"strings"
17)
18
19// Avoid use of post-Go 1.4 io features, to make safe for toolchain bootstrap.
20const seekStart = 0
21
22// A File represents an open PE file.
23type File struct {
24	FileHeader
25	OptionalHeader interface{} // of type *OptionalHeader32 or *OptionalHeader64
26	Sections       []*Section
27	Symbols        []*Symbol    // COFF symbols with auxiliary symbol records removed
28	COFFSymbols    []COFFSymbol // all COFF symbols (including auxiliary symbol records)
29	StringTable    StringTable
30
31	closer io.Closer
32}
33
34// Open opens the named file using os.Open and prepares it for use as a PE binary.
35func Open(name string) (*File, error) {
36	f, err := os.Open(name)
37	if err != nil {
38		return nil, err
39	}
40	ff, err := NewFile(f)
41	if err != nil {
42		f.Close()
43		return nil, err
44	}
45	ff.closer = f
46	return ff, nil
47}
48
49// Close closes the File.
50// If the File was created using NewFile directly instead of Open,
51// Close has no effect.
52func (f *File) Close() error {
53	var err error
54	if f.closer != nil {
55		err = f.closer.Close()
56		f.closer = nil
57	}
58	return err
59}
60
61// TODO(brainman): add Load function, as a replacement for NewFile, that does not call removeAuxSymbols (for performance)
62
63// NewFile creates a new File for accessing a PE binary in an underlying reader.
64func NewFile(r io.ReaderAt) (*File, error) {
65	f := new(File)
66	sr := io.NewSectionReader(r, 0, 1<<63-1)
67
68	var dosheader [96]byte
69	if _, err := r.ReadAt(dosheader[0:], 0); err != nil {
70		return nil, err
71	}
72	var base int64
73	if dosheader[0] == 'M' && dosheader[1] == 'Z' {
74		signoff := int64(binary.LittleEndian.Uint32(dosheader[0x3c:]))
75		var sign [4]byte
76		r.ReadAt(sign[:], signoff)
77		if !(sign[0] == 'P' && sign[1] == 'E' && sign[2] == 0 && sign[3] == 0) {
78			return nil, fmt.Errorf("Invalid PE COFF file signature of %v.", sign)
79		}
80		base = signoff + 4
81	} else {
82		base = int64(0)
83	}
84	sr.Seek(base, seekStart)
85	if err := binary.Read(sr, binary.LittleEndian, &f.FileHeader); err != nil {
86		return nil, err
87	}
88	switch f.FileHeader.Machine {
89	case IMAGE_FILE_MACHINE_UNKNOWN, IMAGE_FILE_MACHINE_ARMNT, IMAGE_FILE_MACHINE_AMD64, IMAGE_FILE_MACHINE_I386:
90	default:
91		return nil, fmt.Errorf("Unrecognised COFF file header machine value of 0x%x.", f.FileHeader.Machine)
92	}
93
94	var err error
95
96	// Read string table.
97	f.StringTable, err = readStringTable(&f.FileHeader, sr)
98	if err != nil {
99		return nil, err
100	}
101
102	// Read symbol table.
103	f.COFFSymbols, err = readCOFFSymbols(&f.FileHeader, sr)
104	if err != nil {
105		return nil, err
106	}
107	f.Symbols, err = removeAuxSymbols(f.COFFSymbols, f.StringTable)
108	if err != nil {
109		return nil, err
110	}
111
112	// Seek past file header.
113	_, err = sr.Seek(base+int64(binary.Size(f.FileHeader)), seekStart)
114	if err != nil {
115		return nil, fmt.Errorf("failure to seek past the file header: %v", err)
116	}
117
118	// Read optional header.
119	f.OptionalHeader, err = readOptionalHeader(sr, f.FileHeader.SizeOfOptionalHeader)
120	if err != nil {
121		return nil, err
122	}
123
124	// Process sections.
125	f.Sections = make([]*Section, f.FileHeader.NumberOfSections)
126	for i := 0; i < int(f.FileHeader.NumberOfSections); i++ {
127		sh := new(SectionHeader32)
128		if err := binary.Read(sr, binary.LittleEndian, sh); err != nil {
129			return nil, err
130		}
131		name, err := sh.fullName(f.StringTable)
132		if err != nil {
133			return nil, err
134		}
135		s := new(Section)
136		s.SectionHeader = SectionHeader{
137			Name:                 name,
138			VirtualSize:          sh.VirtualSize,
139			VirtualAddress:       sh.VirtualAddress,
140			Size:                 sh.SizeOfRawData,
141			Offset:               sh.PointerToRawData,
142			PointerToRelocations: sh.PointerToRelocations,
143			PointerToLineNumbers: sh.PointerToLineNumbers,
144			NumberOfRelocations:  sh.NumberOfRelocations,
145			NumberOfLineNumbers:  sh.NumberOfLineNumbers,
146			Characteristics:      sh.Characteristics,
147		}
148		r2 := r
149		if sh.PointerToRawData == 0 { // .bss must have all 0s
150			r2 = zeroReaderAt{}
151		}
152		s.sr = io.NewSectionReader(r2, int64(s.SectionHeader.Offset), int64(s.SectionHeader.Size))
153		s.ReaderAt = s.sr
154		f.Sections[i] = s
155	}
156	for i := range f.Sections {
157		var err error
158		f.Sections[i].Relocs, err = readRelocs(&f.Sections[i].SectionHeader, sr)
159		if err != nil {
160			return nil, err
161		}
162	}
163
164	return f, nil
165}
166
167// zeroReaderAt is ReaderAt that reads 0s.
168type zeroReaderAt struct{}
169
170// ReadAt writes len(p) 0s into p.
171func (w zeroReaderAt) ReadAt(p []byte, off int64) (n int, err error) {
172	for i := range p {
173		p[i] = 0
174	}
175	return len(p), nil
176}
177
178// getString extracts a string from symbol string table.
179func getString(section []byte, start int) (string, bool) {
180	if start < 0 || start >= len(section) {
181		return "", false
182	}
183
184	for end := start; end < len(section); end++ {
185		if section[end] == 0 {
186			return string(section[start:end]), true
187		}
188	}
189	return "", false
190}
191
192// Section returns the first section with the given name, or nil if no such
193// section exists.
194func (f *File) Section(name string) *Section {
195	for _, s := range f.Sections {
196		if s.Name == name {
197			return s
198		}
199	}
200	return nil
201}
202
203func (f *File) DWARF() (*dwarf.Data, error) {
204	dwarfSuffix := func(s *Section) string {
205		switch {
206		case strings.HasPrefix(s.Name, ".debug_"):
207			return s.Name[7:]
208		case strings.HasPrefix(s.Name, ".zdebug_"):
209			return s.Name[8:]
210		default:
211			return ""
212		}
213
214	}
215
216	// sectionData gets the data for s and checks its size.
217	sectionData := func(s *Section) ([]byte, error) {
218		b, err := s.Data()
219		if err != nil && uint32(len(b)) < s.Size {
220			return nil, err
221		}
222
223		if 0 < s.VirtualSize && s.VirtualSize < s.Size {
224			b = b[:s.VirtualSize]
225		}
226
227		if len(b) >= 12 && string(b[:4]) == "ZLIB" {
228			dlen := binary.BigEndian.Uint64(b[4:12])
229			dbuf := make([]byte, dlen)
230			r, err := zlib.NewReader(bytes.NewBuffer(b[12:]))
231			if err != nil {
232				return nil, err
233			}
234			if _, err := io.ReadFull(r, dbuf); err != nil {
235				return nil, err
236			}
237			if err := r.Close(); err != nil {
238				return nil, err
239			}
240			b = dbuf
241		}
242		return b, nil
243	}
244
245	// There are many other DWARF sections, but these
246	// are the ones the debug/dwarf package uses.
247	// Don't bother loading others.
248	var dat = map[string][]byte{"abbrev": nil, "info": nil, "str": nil, "line": nil, "ranges": nil}
249	for _, s := range f.Sections {
250		suffix := dwarfSuffix(s)
251		if suffix == "" {
252			continue
253		}
254		if _, ok := dat[suffix]; !ok {
255			continue
256		}
257
258		b, err := sectionData(s)
259		if err != nil {
260			return nil, err
261		}
262		dat[suffix] = b
263	}
264
265	d, err := dwarf.New(dat["abbrev"], nil, nil, dat["info"], dat["line"], nil, dat["ranges"], dat["str"])
266	if err != nil {
267		return nil, err
268	}
269
270	// Look for DWARF4 .debug_types sections.
271	for i, s := range f.Sections {
272		suffix := dwarfSuffix(s)
273		if suffix != "types" {
274			continue
275		}
276
277		b, err := sectionData(s)
278		if err != nil {
279			return nil, err
280		}
281
282		err = d.AddTypes(fmt.Sprintf("types-%d", i), b)
283		if err != nil {
284			return nil, err
285		}
286	}
287
288	return d, nil
289}
290
291// TODO(brainman): document ImportDirectory once we decide what to do with it.
292
293type ImportDirectory struct {
294	OriginalFirstThunk uint32
295	TimeDateStamp      uint32
296	ForwarderChain     uint32
297	Name               uint32
298	FirstThunk         uint32
299
300	dll string
301}
302
303// ImportedSymbols returns the names of all symbols
304// referred to by the binary f that are expected to be
305// satisfied by other libraries at dynamic load time.
306// It does not return weak symbols.
307func (f *File) ImportedSymbols() ([]string, error) {
308	if f.OptionalHeader == nil {
309		return nil, nil
310	}
311
312	pe64 := f.Machine == IMAGE_FILE_MACHINE_AMD64
313
314	// grab the number of data directory entries
315	var dd_length uint32
316	if pe64 {
317		dd_length = f.OptionalHeader.(*OptionalHeader64).NumberOfRvaAndSizes
318	} else {
319		dd_length = f.OptionalHeader.(*OptionalHeader32).NumberOfRvaAndSizes
320	}
321
322	// check that the length of data directory entries is large
323	// enough to include the imports directory.
324	if dd_length < IMAGE_DIRECTORY_ENTRY_IMPORT+1 {
325		return nil, nil
326	}
327
328	// grab the import data directory entry
329	var idd DataDirectory
330	if pe64 {
331		idd = f.OptionalHeader.(*OptionalHeader64).DataDirectory[IMAGE_DIRECTORY_ENTRY_IMPORT]
332	} else {
333		idd = f.OptionalHeader.(*OptionalHeader32).DataDirectory[IMAGE_DIRECTORY_ENTRY_IMPORT]
334	}
335
336	// figure out which section contains the import directory table
337	var ds *Section
338	ds = nil
339	for _, s := range f.Sections {
340		if s.VirtualAddress <= idd.VirtualAddress && idd.VirtualAddress < s.VirtualAddress+s.VirtualSize {
341			ds = s
342			break
343		}
344	}
345
346	// didn't find a section, so no import libraries were found
347	if ds == nil {
348		return nil, nil
349	}
350
351	d, err := ds.Data()
352	if err != nil {
353		return nil, err
354	}
355
356	// seek to the virtual address specified in the import data directory
357	d = d[idd.VirtualAddress-ds.VirtualAddress:]
358
359	// start decoding the import directory
360	var ida []ImportDirectory
361	for len(d) >= 20 {
362		var dt ImportDirectory
363		dt.OriginalFirstThunk = binary.LittleEndian.Uint32(d[0:4])
364		dt.TimeDateStamp = binary.LittleEndian.Uint32(d[4:8])
365		dt.ForwarderChain = binary.LittleEndian.Uint32(d[8:12])
366		dt.Name = binary.LittleEndian.Uint32(d[12:16])
367		dt.FirstThunk = binary.LittleEndian.Uint32(d[16:20])
368		d = d[20:]
369		if dt.OriginalFirstThunk == 0 {
370			break
371		}
372		ida = append(ida, dt)
373	}
374	// TODO(brainman): this needs to be rewritten
375	//  ds.Data() returns contents of section containing import table. Why store in variable called "names"?
376	//  Why we are retrieving it second time? We already have it in "d", and it is not modified anywhere.
377	//  getString does not extracts a string from symbol string table (as getString doco says).
378	//  Why ds.Data() called again and again in the loop?
379	//  Needs test before rewrite.
380	names, _ := ds.Data()
381	var all []string
382	for _, dt := range ida {
383		dt.dll, _ = getString(names, int(dt.Name-ds.VirtualAddress))
384		d, _ = ds.Data()
385		// seek to OriginalFirstThunk
386		d = d[dt.OriginalFirstThunk-ds.VirtualAddress:]
387		for len(d) > 0 {
388			if pe64 { // 64bit
389				va := binary.LittleEndian.Uint64(d[0:8])
390				d = d[8:]
391				if va == 0 {
392					break
393				}
394				if va&0x8000000000000000 > 0 { // is Ordinal
395					// TODO add dynimport ordinal support.
396				} else {
397					fn, _ := getString(names, int(uint32(va)-ds.VirtualAddress+2))
398					all = append(all, fn+":"+dt.dll)
399				}
400			} else { // 32bit
401				va := binary.LittleEndian.Uint32(d[0:4])
402				d = d[4:]
403				if va == 0 {
404					break
405				}
406				if va&0x80000000 > 0 { // is Ordinal
407					// TODO add dynimport ordinal support.
408					//ord := va&0x0000FFFF
409				} else {
410					fn, _ := getString(names, int(va-ds.VirtualAddress+2))
411					all = append(all, fn+":"+dt.dll)
412				}
413			}
414		}
415	}
416
417	return all, nil
418}
419
420// ImportedLibraries returns the names of all libraries
421// referred to by the binary f that are expected to be
422// linked with the binary at dynamic link time.
423func (f *File) ImportedLibraries() ([]string, error) {
424	// TODO
425	// cgo -dynimport don't use this for windows PE, so just return.
426	return nil, nil
427}
428
429// FormatError is unused.
430// The type is retained for compatibility.
431type FormatError struct {
432}
433
434func (e *FormatError) Error() string {
435	return "unknown error"
436}
437
438// readOptionalHeader accepts a io.ReadSeeker pointing to optional header in the PE file
439// and its size as seen in the file header.
440// It parses the given size of bytes and returns optional header. It infers whether the
441// bytes being parsed refer to 32 bit or 64 bit version of optional header.
442func readOptionalHeader(r io.ReadSeeker, sz uint16) (interface{}, error) {
443	// If optional header size is 0, return empty optional header.
444	if sz == 0 {
445		return nil, nil
446	}
447
448	var (
449		// First couple of bytes in option header state its type.
450		// We need to read them first to determine the type and
451		// validity of optional header.
452		ohMagic   uint16
453		ohMagicSz = binary.Size(ohMagic)
454	)
455
456	// If optional header size is greater than 0 but less than its magic size, return error.
457	if sz < uint16(ohMagicSz) {
458		return nil, fmt.Errorf("optional header size is less than optional header magic size")
459	}
460
461	// read reads from io.ReadSeeke, r, into data.
462	var err error
463	read := func(data interface{}) bool {
464		err = binary.Read(r, binary.LittleEndian, data)
465		return err == nil
466	}
467
468	if !read(&ohMagic) {
469		return nil, fmt.Errorf("failure to read optional header magic: %v", err)
470
471	}
472
473	switch ohMagic {
474	case 0x10b: // PE32
475		var (
476			oh32 OptionalHeader32
477			// There can be 0 or more data directories. So the minimum size of optional
478			// header is calculated by subtracting oh32.DataDirectory size from oh32 size.
479			oh32MinSz = binary.Size(oh32) - binary.Size(oh32.DataDirectory)
480		)
481
482		if sz < uint16(oh32MinSz) {
483			return nil, fmt.Errorf("optional header size(%d) is less minimum size (%d) of PE32 optional header", sz, oh32MinSz)
484		}
485
486		// Init oh32 fields
487		oh32.Magic = ohMagic
488		if !read(&oh32.MajorLinkerVersion) ||
489			!read(&oh32.MinorLinkerVersion) ||
490			!read(&oh32.SizeOfCode) ||
491			!read(&oh32.SizeOfInitializedData) ||
492			!read(&oh32.SizeOfUninitializedData) ||
493			!read(&oh32.AddressOfEntryPoint) ||
494			!read(&oh32.BaseOfCode) ||
495			!read(&oh32.BaseOfData) ||
496			!read(&oh32.ImageBase) ||
497			!read(&oh32.SectionAlignment) ||
498			!read(&oh32.FileAlignment) ||
499			!read(&oh32.MajorOperatingSystemVersion) ||
500			!read(&oh32.MinorOperatingSystemVersion) ||
501			!read(&oh32.MajorImageVersion) ||
502			!read(&oh32.MinorImageVersion) ||
503			!read(&oh32.MajorSubsystemVersion) ||
504			!read(&oh32.MinorSubsystemVersion) ||
505			!read(&oh32.Win32VersionValue) ||
506			!read(&oh32.SizeOfImage) ||
507			!read(&oh32.SizeOfHeaders) ||
508			!read(&oh32.CheckSum) ||
509			!read(&oh32.Subsystem) ||
510			!read(&oh32.DllCharacteristics) ||
511			!read(&oh32.SizeOfStackReserve) ||
512			!read(&oh32.SizeOfStackCommit) ||
513			!read(&oh32.SizeOfHeapReserve) ||
514			!read(&oh32.SizeOfHeapCommit) ||
515			!read(&oh32.LoaderFlags) ||
516			!read(&oh32.NumberOfRvaAndSizes) {
517			return nil, fmt.Errorf("failure to read PE32 optional header: %v", err)
518		}
519
520		dd, err := readDataDirectories(r, sz-uint16(oh32MinSz), oh32.NumberOfRvaAndSizes)
521		if err != nil {
522			return nil, err
523		}
524
525		copy(oh32.DataDirectory[:], dd)
526
527		return &oh32, nil
528	case 0x20b: // PE32+
529		var (
530			oh64 OptionalHeader64
531			// There can be 0 or more data directories. So the minimum size of optional
532			// header is calculated by subtracting oh64.DataDirectory size from oh64 size.
533			oh64MinSz = binary.Size(oh64) - binary.Size(oh64.DataDirectory)
534		)
535
536		if sz < uint16(oh64MinSz) {
537			return nil, fmt.Errorf("optional header size(%d) is less minimum size (%d) for PE32+ optional header", sz, oh64MinSz)
538		}
539
540		// Init oh64 fields
541		oh64.Magic = ohMagic
542		if !read(&oh64.MajorLinkerVersion) ||
543			!read(&oh64.MinorLinkerVersion) ||
544			!read(&oh64.SizeOfCode) ||
545			!read(&oh64.SizeOfInitializedData) ||
546			!read(&oh64.SizeOfUninitializedData) ||
547			!read(&oh64.AddressOfEntryPoint) ||
548			!read(&oh64.BaseOfCode) ||
549			!read(&oh64.ImageBase) ||
550			!read(&oh64.SectionAlignment) ||
551			!read(&oh64.FileAlignment) ||
552			!read(&oh64.MajorOperatingSystemVersion) ||
553			!read(&oh64.MinorOperatingSystemVersion) ||
554			!read(&oh64.MajorImageVersion) ||
555			!read(&oh64.MinorImageVersion) ||
556			!read(&oh64.MajorSubsystemVersion) ||
557			!read(&oh64.MinorSubsystemVersion) ||
558			!read(&oh64.Win32VersionValue) ||
559			!read(&oh64.SizeOfImage) ||
560			!read(&oh64.SizeOfHeaders) ||
561			!read(&oh64.CheckSum) ||
562			!read(&oh64.Subsystem) ||
563			!read(&oh64.DllCharacteristics) ||
564			!read(&oh64.SizeOfStackReserve) ||
565			!read(&oh64.SizeOfStackCommit) ||
566			!read(&oh64.SizeOfHeapReserve) ||
567			!read(&oh64.SizeOfHeapCommit) ||
568			!read(&oh64.LoaderFlags) ||
569			!read(&oh64.NumberOfRvaAndSizes) {
570			return nil, fmt.Errorf("failure to read PE32+ optional header: %v", err)
571		}
572
573		dd, err := readDataDirectories(r, sz-uint16(oh64MinSz), oh64.NumberOfRvaAndSizes)
574		if err != nil {
575			return nil, err
576		}
577
578		copy(oh64.DataDirectory[:], dd)
579
580		return &oh64, nil
581	default:
582		return nil, fmt.Errorf("optional header has unexpected Magic of 0x%x", ohMagic)
583	}
584}
585
586// readDataDirectories accepts a io.ReadSeeker pointing to data directories in the PE file,
587// its size and number of data directories as seen in optional header.
588// It parses the given size of bytes and returns given number of data directories.
589func readDataDirectories(r io.ReadSeeker, sz uint16, n uint32) ([]DataDirectory, error) {
590	ddSz := binary.Size(DataDirectory{})
591	if uint32(sz) != n*uint32(ddSz) {
592		return nil, fmt.Errorf("size of data directories(%d) is inconsistent with number of data directories(%d)", sz, n)
593	}
594
595	dd := make([]DataDirectory, n)
596	if err := binary.Read(r, binary.LittleEndian, dd); err != nil {
597		return nil, fmt.Errorf("failure to read data directories: %v", err)
598	}
599
600	return dd, nil
601}
602