1// Copyright 2015 The Go Authors.  All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5package dwarf
6
7import (
8	"errors"
9	"fmt"
10	"io"
11	"path"
12)
13
14// A LineReader reads a sequence of LineEntry structures from a DWARF
15// "line" section for a single compilation unit. LineEntries occur in
16// order of increasing PC and each LineEntry gives metadata for the
17// instructions from that LineEntry's PC to just before the next
18// LineEntry's PC. The last entry will have its EndSequence field set.
19type LineReader struct {
20	buf buf
21
22	// Original .debug_line section data. Used by Seek.
23	section []byte
24
25	// Header information
26	version              uint16
27	minInstructionLength int
28	maxOpsPerInstruction int
29	defaultIsStmt        bool
30	lineBase             int
31	lineRange            int
32	opcodeBase           int
33	opcodeLengths        []int
34	directories          []string
35	fileEntries          []*LineFile
36
37	programOffset Offset // section offset of line number program
38	endOffset     Offset // section offset of byte following program
39
40	initialFileEntries int // initial length of fileEntries
41
42	// Current line number program state machine registers
43	state     LineEntry // public state
44	fileIndex int       // private state
45}
46
47// A LineEntry is a row in a DWARF line table.
48type LineEntry struct {
49	// Address is the program-counter value of a machine
50	// instruction generated by the compiler. This LineEntry
51	// applies to each instruction from Address to just before the
52	// Address of the next LineEntry.
53	Address uint64
54
55	// OpIndex is the index of an operation within a VLIW
56	// instruction. The index of the first operation is 0. For
57	// non-VLIW architectures, it will always be 0. Address and
58	// OpIndex together form an operation pointer that can
59	// reference any individual operation within the instruction
60	// stream.
61	OpIndex int
62
63	// File is the source file corresponding to these
64	// instructions.
65	File *LineFile
66
67	// Line is the source code line number corresponding to these
68	// instructions. Lines are numbered beginning at 1. It may be
69	// 0 if these instructions cannot be attributed to any source
70	// line.
71	Line int
72
73	// Column is the column number within the source line of these
74	// instructions. Columns are numbered beginning at 1. It may
75	// be 0 to indicate the "left edge" of the line.
76	Column int
77
78	// IsStmt indicates that Address is a recommended breakpoint
79	// location, such as the beginning of a line, statement, or a
80	// distinct subpart of a statement.
81	IsStmt bool
82
83	// BasicBlock indicates that Address is the beginning of a
84	// basic block.
85	BasicBlock bool
86
87	// PrologueEnd indicates that Address is one (of possibly
88	// many) PCs where execution should be suspended for a
89	// breakpoint on entry to the containing function.
90	//
91	// Added in DWARF 3.
92	PrologueEnd bool
93
94	// EpilogueBegin indicates that Address is one (of possibly
95	// many) PCs where execution should be suspended for a
96	// breakpoint on exit from this function.
97	//
98	// Added in DWARF 3.
99	EpilogueBegin bool
100
101	// ISA is the instruction set architecture for these
102	// instructions. Possible ISA values should be defined by the
103	// applicable ABI specification.
104	//
105	// Added in DWARF 3.
106	ISA int
107
108	// Discriminator is an arbitrary integer indicating the block
109	// to which these instructions belong. It serves to
110	// distinguish among multiple blocks that may all have with
111	// the same source file, line, and column. Where only one
112	// block exists for a given source position, it should be 0.
113	//
114	// Added in DWARF 3.
115	Discriminator int
116
117	// EndSequence indicates that Address is the first byte after
118	// the end of a sequence of target machine instructions. If it
119	// is set, only this and the Address field are meaningful. A
120	// line number table may contain information for multiple
121	// potentially disjoint instruction sequences. The last entry
122	// in a line table should always have EndSequence set.
123	EndSequence bool
124}
125
126// A LineFile is a source file referenced by a DWARF line table entry.
127type LineFile struct {
128	Name   string
129	Mtime  uint64 // Implementation defined modification time, or 0 if unknown
130	Length int    // File length, or 0 if unknown
131}
132
133// LineReader returns a new reader for the line table of compilation
134// unit cu, which must be an Entry with tag TagCompileUnit.
135//
136// If this compilation unit has no line table, it returns nil, nil.
137func (d *Data) LineReader(cu *Entry) (*LineReader, error) {
138	if d.line == nil {
139		// No line tables available.
140		return nil, nil
141	}
142
143	// Get line table information from cu.
144	off, ok := cu.Val(AttrStmtList).(int64)
145	if !ok {
146		// cu has no line table.
147		return nil, nil
148	}
149	if off > int64(len(d.line)) {
150		return nil, errors.New("AttrStmtList value out of range")
151	}
152	// AttrCompDir is optional if all file names are absolute. Use
153	// the empty string if it's not present.
154	compDir, _ := cu.Val(AttrCompDir).(string)
155
156	// Create the LineReader.
157	u := &d.unit[d.offsetToUnit(cu.Offset)]
158	buf := makeBuf(d, u, "line", Offset(off), d.line[off:])
159	// The compilation directory is implicitly directories[0].
160	r := LineReader{buf: buf, section: d.line, directories: []string{compDir}}
161
162	// Read the header.
163	if err := r.readHeader(); err != nil {
164		return nil, err
165	}
166
167	// Initialize line reader state.
168	r.Reset()
169
170	return &r, nil
171}
172
173// readHeader reads the line number program header from r.buf and sets
174// all of the header fields in r.
175func (r *LineReader) readHeader() error {
176	buf := &r.buf
177
178	// Read basic header fields [DWARF2 6.2.4].
179	hdrOffset := buf.off
180	unitLength, dwarf64 := buf.unitLength()
181	r.endOffset = buf.off + unitLength
182	if r.endOffset > buf.off+Offset(len(buf.data)) {
183		return DecodeError{"line", hdrOffset, fmt.Sprintf("line table end %d exceeds section size %d", r.endOffset, buf.off+Offset(len(buf.data)))}
184	}
185	r.version = buf.uint16()
186	if buf.err == nil && (r.version < 2 || r.version > 4) {
187		// DWARF goes to all this effort to make new opcodes
188		// backward-compatible, and then adds fields right in
189		// the middle of the header in new versions, so we're
190		// picky about only supporting known line table
191		// versions.
192		return DecodeError{"line", hdrOffset, fmt.Sprintf("unknown line table version %d", r.version)}
193	}
194	var headerLength Offset
195	if dwarf64 {
196		headerLength = Offset(buf.uint64())
197	} else {
198		headerLength = Offset(buf.uint32())
199	}
200	r.programOffset = buf.off + headerLength
201	r.minInstructionLength = int(buf.uint8())
202	if r.version >= 4 {
203		// [DWARF4 6.2.4]
204		r.maxOpsPerInstruction = int(buf.uint8())
205	} else {
206		r.maxOpsPerInstruction = 1
207	}
208	r.defaultIsStmt = buf.uint8() != 0
209	r.lineBase = int(int8(buf.uint8()))
210	r.lineRange = int(buf.uint8())
211
212	// Validate header.
213	if buf.err != nil {
214		return buf.err
215	}
216	if r.maxOpsPerInstruction == 0 {
217		return DecodeError{"line", hdrOffset, "invalid maximum operations per instruction: 0"}
218	}
219	if r.lineRange == 0 {
220		return DecodeError{"line", hdrOffset, "invalid line range: 0"}
221	}
222
223	// Read standard opcode length table. This table starts with opcode 1.
224	r.opcodeBase = int(buf.uint8())
225	r.opcodeLengths = make([]int, r.opcodeBase)
226	for i := 1; i < r.opcodeBase; i++ {
227		r.opcodeLengths[i] = int(buf.uint8())
228	}
229
230	// Validate opcode lengths.
231	if buf.err != nil {
232		return buf.err
233	}
234	for i, length := range r.opcodeLengths {
235		if known, ok := knownOpcodeLengths[i]; ok && known != length {
236			return DecodeError{"line", hdrOffset, fmt.Sprintf("opcode %d expected to have length %d, but has length %d", i, known, length)}
237		}
238	}
239
240	// Read include directories table. The caller already set
241	// directories[0] to the compilation directory.
242	for {
243		directory := buf.string()
244		if buf.err != nil {
245			return buf.err
246		}
247		if len(directory) == 0 {
248			break
249		}
250		if !path.IsAbs(directory) {
251			// Relative paths are implicitly relative to
252			// the compilation directory.
253			directory = path.Join(r.directories[0], directory)
254		}
255		r.directories = append(r.directories, directory)
256	}
257
258	// Read file name list. File numbering starts with 1, so leave
259	// the first entry nil.
260	r.fileEntries = make([]*LineFile, 1)
261	for {
262		if done, err := r.readFileEntry(); err != nil {
263			return err
264		} else if done {
265			break
266		}
267	}
268	r.initialFileEntries = len(r.fileEntries)
269
270	return buf.err
271}
272
273// readFileEntry reads a file entry from either the header or a
274// DW_LNE_define_file extended opcode and adds it to r.fileEntries. A
275// true return value indicates that there are no more entries to read.
276func (r *LineReader) readFileEntry() (bool, error) {
277	name := r.buf.string()
278	if r.buf.err != nil {
279		return false, r.buf.err
280	}
281	if len(name) == 0 {
282		return true, nil
283	}
284	off := r.buf.off
285	dirIndex := int(r.buf.uint())
286	if !path.IsAbs(name) {
287		if dirIndex >= len(r.directories) {
288			return false, DecodeError{"line", off, "directory index too large"}
289		}
290		name = path.Join(r.directories[dirIndex], name)
291	}
292	mtime := r.buf.uint()
293	length := int(r.buf.uint())
294
295	r.fileEntries = append(r.fileEntries, &LineFile{name, mtime, length})
296	return false, nil
297}
298
299// updateFile updates r.state.File after r.fileIndex has
300// changed or r.fileEntries has changed.
301func (r *LineReader) updateFile() {
302	if r.fileIndex < len(r.fileEntries) {
303		r.state.File = r.fileEntries[r.fileIndex]
304	} else {
305		r.state.File = nil
306	}
307}
308
309// Next sets *entry to the next row in this line table and moves to
310// the next row. If there are no more entries and the line table is
311// properly terminated, it returns io.EOF.
312//
313// Rows are always in order of increasing entry.Address, but
314// entry.Line may go forward or backward.
315func (r *LineReader) Next(entry *LineEntry) error {
316	if r.buf.err != nil {
317		return r.buf.err
318	}
319
320	// Execute opcodes until we reach an opcode that emits a line
321	// table entry.
322	for {
323		if len(r.buf.data) == 0 {
324			return io.EOF
325		}
326		emit := r.step(entry)
327		if r.buf.err != nil {
328			return r.buf.err
329		}
330		if emit {
331			return nil
332		}
333	}
334}
335
336// knownOpcodeLengths gives the opcode lengths (in varint arguments)
337// of known standard opcodes.
338var knownOpcodeLengths = map[int]int{
339	lnsCopy:             0,
340	lnsAdvancePC:        1,
341	lnsAdvanceLine:      1,
342	lnsSetFile:          1,
343	lnsNegateStmt:       0,
344	lnsSetBasicBlock:    0,
345	lnsConstAddPC:       0,
346	lnsSetPrologueEnd:   0,
347	lnsSetEpilogueBegin: 0,
348	lnsSetISA:           1,
349	// lnsFixedAdvancePC takes a uint8 rather than a varint; it's
350	// unclear what length the header is supposed to claim, so
351	// ignore it.
352}
353
354// step processes the next opcode and updates r.state. If the opcode
355// emits a row in the line table, this updates *entry and returns
356// true.
357func (r *LineReader) step(entry *LineEntry) bool {
358	opcode := int(r.buf.uint8())
359
360	if opcode >= r.opcodeBase {
361		// Special opcode [DWARF2 6.2.5.1, DWARF4 6.2.5.1]
362		adjustedOpcode := opcode - r.opcodeBase
363		r.advancePC(adjustedOpcode / r.lineRange)
364		lineDelta := r.lineBase + int(adjustedOpcode)%r.lineRange
365		r.state.Line += lineDelta
366		goto emit
367	}
368
369	switch opcode {
370	case 0:
371		// Extended opcode [DWARF2 6.2.5.3]
372		length := Offset(r.buf.uint())
373		startOff := r.buf.off
374		opcode := r.buf.uint8()
375
376		switch opcode {
377		case lneEndSequence:
378			r.state.EndSequence = true
379			*entry = r.state
380			r.resetState()
381
382		case lneSetAddress:
383			r.state.Address = r.buf.addr()
384
385		case lneDefineFile:
386			if done, err := r.readFileEntry(); err != nil {
387				r.buf.err = err
388				return false
389			} else if done {
390				r.buf.err = DecodeError{"line", startOff, "malformed DW_LNE_define_file operation"}
391				return false
392			}
393			r.updateFile()
394
395		case lneSetDiscriminator:
396			// [DWARF4 6.2.5.3]
397			r.state.Discriminator = int(r.buf.uint())
398		}
399
400		r.buf.skip(int(startOff + length - r.buf.off))
401
402		if opcode == lneEndSequence {
403			return true
404		}
405
406	// Standard opcodes [DWARF2 6.2.5.2]
407	case lnsCopy:
408		goto emit
409
410	case lnsAdvancePC:
411		r.advancePC(int(r.buf.uint()))
412
413	case lnsAdvanceLine:
414		r.state.Line += int(r.buf.int())
415
416	case lnsSetFile:
417		r.fileIndex = int(r.buf.uint())
418		r.updateFile()
419
420	case lnsSetColumn:
421		r.state.Column = int(r.buf.uint())
422
423	case lnsNegateStmt:
424		r.state.IsStmt = !r.state.IsStmt
425
426	case lnsSetBasicBlock:
427		r.state.BasicBlock = true
428
429	case lnsConstAddPC:
430		r.advancePC((255 - r.opcodeBase) / r.lineRange)
431
432	case lnsFixedAdvancePC:
433		r.state.Address += uint64(r.buf.uint16())
434
435	// DWARF3 standard opcodes [DWARF3 6.2.5.2]
436	case lnsSetPrologueEnd:
437		r.state.PrologueEnd = true
438
439	case lnsSetEpilogueBegin:
440		r.state.EpilogueBegin = true
441
442	case lnsSetISA:
443		r.state.ISA = int(r.buf.uint())
444
445	default:
446		// Unhandled standard opcode. Skip the number of
447		// arguments that the prologue says this opcode has.
448		for i := 0; i < r.opcodeLengths[opcode]; i++ {
449			r.buf.uint()
450		}
451	}
452	return false
453
454emit:
455	*entry = r.state
456	r.state.BasicBlock = false
457	r.state.PrologueEnd = false
458	r.state.EpilogueBegin = false
459	r.state.Discriminator = 0
460	return true
461}
462
463// advancePC advances "operation pointer" (the combination of Address
464// and OpIndex) in r.state by opAdvance steps.
465func (r *LineReader) advancePC(opAdvance int) {
466	opIndex := r.state.OpIndex + opAdvance
467	r.state.Address += uint64(r.minInstructionLength * (opIndex / r.maxOpsPerInstruction))
468	r.state.OpIndex = opIndex % r.maxOpsPerInstruction
469}
470
471// A LineReaderPos represents a position in a line table.
472type LineReaderPos struct {
473	// off is the current offset in the DWARF line section.
474	off Offset
475	// numFileEntries is the length of fileEntries.
476	numFileEntries int
477	// state and fileIndex are the statement machine state at
478	// offset off.
479	state     LineEntry
480	fileIndex int
481}
482
483// Tell returns the current position in the line table.
484func (r *LineReader) Tell() LineReaderPos {
485	return LineReaderPos{r.buf.off, len(r.fileEntries), r.state, r.fileIndex}
486}
487
488// Seek restores the line table reader to a position returned by Tell.
489//
490// The argument pos must have been returned by a call to Tell on this
491// line table.
492func (r *LineReader) Seek(pos LineReaderPos) {
493	r.buf.off = pos.off
494	r.buf.data = r.section[r.buf.off:r.endOffset]
495	r.fileEntries = r.fileEntries[:pos.numFileEntries]
496	r.state = pos.state
497	r.fileIndex = pos.fileIndex
498}
499
500// Reset repositions the line table reader at the beginning of the
501// line table.
502func (r *LineReader) Reset() {
503	// Reset buffer to the line number program offset.
504	r.buf.off = r.programOffset
505	r.buf.data = r.section[r.buf.off:r.endOffset]
506
507	// Reset file entries list.
508	r.fileEntries = r.fileEntries[:r.initialFileEntries]
509
510	// Reset line number program state.
511	r.resetState()
512}
513
514// resetState resets r.state to its default values
515func (r *LineReader) resetState() {
516	// Reset the state machine registers to the defaults given in
517	// [DWARF4 6.2.2].
518	r.state = LineEntry{
519		Address:       0,
520		OpIndex:       0,
521		File:          nil,
522		Line:          1,
523		Column:        0,
524		IsStmt:        r.defaultIsStmt,
525		BasicBlock:    false,
526		PrologueEnd:   false,
527		EpilogueBegin: false,
528		ISA:           0,
529		Discriminator: 0,
530	}
531	r.fileIndex = 1
532	r.updateFile()
533}
534
535// ErrUnknownPC is the error returned by LineReader.ScanPC when the
536// seek PC is not covered by any entry in the line table.
537var ErrUnknownPC = errors.New("ErrUnknownPC")
538
539// SeekPC sets *entry to the LineEntry that includes pc and positions
540// the reader on the next entry in the line table. If necessary, this
541// will seek backwards to find pc.
542//
543// If pc is not covered by any entry in this line table, SeekPC
544// returns ErrUnknownPC. In this case, *entry and the final seek
545// position are unspecified.
546//
547// Note that DWARF line tables only permit sequential, forward scans.
548// Hence, in the worst case, this takes time linear in the size of the
549// line table. If the caller wishes to do repeated fast PC lookups, it
550// should build an appropriate index of the line table.
551func (r *LineReader) SeekPC(pc uint64, entry *LineEntry) error {
552	if err := r.Next(entry); err != nil {
553		return err
554	}
555	if entry.Address > pc {
556		// We're too far. Start at the beginning of the table.
557		r.Reset()
558		if err := r.Next(entry); err != nil {
559			return err
560		}
561		if entry.Address > pc {
562			// The whole table starts after pc.
563			r.Reset()
564			return ErrUnknownPC
565		}
566	}
567
568	// Scan until we pass pc, then back up one.
569	for {
570		var next LineEntry
571		pos := r.Tell()
572		if err := r.Next(&next); err != nil {
573			if err == io.EOF {
574				return ErrUnknownPC
575			}
576			return err
577		}
578		if next.Address > pc {
579			if entry.EndSequence {
580				// pc is in a hole in the table.
581				return ErrUnknownPC
582			}
583			// entry is the desired entry. Back up the
584			// cursor to "next" and return success.
585			r.Seek(pos)
586			return nil
587		}
588		*entry = next
589	}
590}
591