1// Copyright 2015 The Go Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5package lex
6
7import (
8	"fmt"
9	"os"
10	"path/filepath"
11	"strconv"
12	"strings"
13	"text/scanner"
14
15	"cmd/asm/internal/flags"
16	"cmd/internal/src"
17)
18
19// Input is the main input: a stack of readers and some macro definitions.
20// It also handles #include processing (by pushing onto the input stack)
21// and parses and instantiates macro definitions.
22type Input struct {
23	Stack
24	includes        []string
25	beginningOfLine bool
26	ifdefStack      []bool
27	macros          map[string]*Macro
28	text            string // Text of last token returned by Next.
29	peek            bool
30	peekToken       ScanToken
31	peekText        string
32}
33
34// NewInput returns an Input from the given path.
35func NewInput(name string) *Input {
36	return &Input{
37		// include directories: look in source dir, then -I directories.
38		includes:        append([]string{filepath.Dir(name)}, flags.I...),
39		beginningOfLine: true,
40		macros:          predefine(flags.D),
41	}
42}
43
44// predefine installs the macros set by the -D flag on the command line.
45func predefine(defines flags.MultiFlag) map[string]*Macro {
46	macros := make(map[string]*Macro)
47	for _, name := range defines {
48		value := "1"
49		i := strings.IndexRune(name, '=')
50		if i > 0 {
51			name, value = name[:i], name[i+1:]
52		}
53		tokens := Tokenize(name)
54		if len(tokens) != 1 || tokens[0].ScanToken != scanner.Ident {
55			fmt.Fprintf(os.Stderr, "asm: parsing -D: %q is not a valid identifier name\n", tokens[0])
56			flags.Usage()
57		}
58		macros[name] = &Macro{
59			name:   name,
60			args:   nil,
61			tokens: Tokenize(value),
62		}
63	}
64	return macros
65}
66
67var panicOnError bool // For testing.
68
69func (in *Input) Error(args ...interface{}) {
70	if panicOnError {
71		panic(fmt.Errorf("%s:%d: %s", in.File(), in.Line(), fmt.Sprintln(args...)))
72	}
73	fmt.Fprintf(os.Stderr, "%s:%d: %s", in.File(), in.Line(), fmt.Sprintln(args...))
74	os.Exit(1)
75}
76
77// expectText is like Error but adds "got XXX" where XXX is a quoted representation of the most recent token.
78func (in *Input) expectText(args ...interface{}) {
79	in.Error(append(args, "; got", strconv.Quote(in.Stack.Text()))...)
80}
81
82// enabled reports whether the input is enabled by an ifdef, or is at the top level.
83func (in *Input) enabled() bool {
84	return len(in.ifdefStack) == 0 || in.ifdefStack[len(in.ifdefStack)-1]
85}
86
87func (in *Input) expectNewline(directive string) {
88	tok := in.Stack.Next()
89	if tok != '\n' {
90		in.expectText("expected newline after", directive)
91	}
92}
93
94func (in *Input) Next() ScanToken {
95	if in.peek {
96		in.peek = false
97		tok := in.peekToken
98		in.text = in.peekText
99		return tok
100	}
101	// If we cannot generate a token after 100 macro invocations, we're in trouble.
102	// The usual case is caught by Push, below, but be safe.
103	for nesting := 0; nesting < 100; {
104		tok := in.Stack.Next()
105		switch tok {
106		case '#':
107			if !in.beginningOfLine {
108				in.Error("'#' must be first item on line")
109			}
110			in.beginningOfLine = in.hash()
111		case scanner.Ident:
112			// Is it a macro name?
113			name := in.Stack.Text()
114			macro := in.macros[name]
115			if macro != nil {
116				nesting++
117				in.invokeMacro(macro)
118				continue
119			}
120			fallthrough
121		default:
122			if tok == scanner.EOF && len(in.ifdefStack) > 0 {
123				// We're skipping text but have run out of input with no #endif.
124				in.Error("unclosed #ifdef or #ifndef")
125			}
126			in.beginningOfLine = tok == '\n'
127			if in.enabled() {
128				in.text = in.Stack.Text()
129				return tok
130			}
131		}
132	}
133	in.Error("recursive macro invocation")
134	return 0
135}
136
137func (in *Input) Text() string {
138	return in.text
139}
140
141// hash processes a # preprocessor directive. It returns true iff it completes.
142func (in *Input) hash() bool {
143	// We have a '#'; it must be followed by a known word (define, include, etc.).
144	tok := in.Stack.Next()
145	if tok != scanner.Ident {
146		in.expectText("expected identifier after '#'")
147	}
148	if !in.enabled() {
149		// Can only start including again if we are at #else or #endif but also
150		// need to keep track of nested #if[n]defs.
151		// We let #line through because it might affect errors.
152		switch in.Stack.Text() {
153		case "else", "endif", "ifdef", "ifndef", "line":
154			// Press on.
155		default:
156			return false
157		}
158	}
159	switch in.Stack.Text() {
160	case "define":
161		in.define()
162	case "else":
163		in.else_()
164	case "endif":
165		in.endif()
166	case "ifdef":
167		in.ifdef(true)
168	case "ifndef":
169		in.ifdef(false)
170	case "include":
171		in.include()
172	case "line":
173		in.line()
174	case "undef":
175		in.undef()
176	default:
177		in.Error("unexpected token after '#':", in.Stack.Text())
178	}
179	return true
180}
181
182// macroName returns the name for the macro being referenced.
183func (in *Input) macroName() string {
184	// We use the Stack's input method; no macro processing at this stage.
185	tok := in.Stack.Next()
186	if tok != scanner.Ident {
187		in.expectText("expected identifier after # directive")
188	}
189	// Name is alphanumeric by definition.
190	return in.Stack.Text()
191}
192
193// #define processing.
194func (in *Input) define() {
195	name := in.macroName()
196	args, tokens := in.macroDefinition(name)
197	in.defineMacro(name, args, tokens)
198}
199
200// defineMacro stores the macro definition in the Input.
201func (in *Input) defineMacro(name string, args []string, tokens []Token) {
202	if in.macros[name] != nil {
203		in.Error("redefinition of macro:", name)
204	}
205	in.macros[name] = &Macro{
206		name:   name,
207		args:   args,
208		tokens: tokens,
209	}
210}
211
212// macroDefinition returns the list of formals and the tokens of the definition.
213// The argument list is nil for no parens on the definition; otherwise a list of
214// formal argument names.
215func (in *Input) macroDefinition(name string) ([]string, []Token) {
216	prevCol := in.Stack.Col()
217	tok := in.Stack.Next()
218	if tok == '\n' || tok == scanner.EOF {
219		return nil, nil // No definition for macro
220	}
221	var args []string
222	// The C preprocessor treats
223	//	#define A(x)
224	// and
225	//	#define A (x)
226	// distinctly: the first is a macro with arguments, the second without.
227	// Distinguish these cases using the column number, since we don't
228	// see the space itself. Note that text/scanner reports the position at the
229	// end of the token. It's where you are now, and you just read this token.
230	if tok == '(' && in.Stack.Col() == prevCol+1 {
231		// Macro has arguments. Scan list of formals.
232		acceptArg := true
233		args = []string{} // Zero length but not nil.
234	Loop:
235		for {
236			tok = in.Stack.Next()
237			switch tok {
238			case ')':
239				tok = in.Stack.Next() // First token of macro definition.
240				break Loop
241			case ',':
242				if acceptArg {
243					in.Error("bad syntax in definition for macro:", name)
244				}
245				acceptArg = true
246			case scanner.Ident:
247				if !acceptArg {
248					in.Error("bad syntax in definition for macro:", name)
249				}
250				arg := in.Stack.Text()
251				if i := lookup(args, arg); i >= 0 {
252					in.Error("duplicate argument", arg, "in definition for macro:", name)
253				}
254				args = append(args, arg)
255				acceptArg = false
256			default:
257				in.Error("bad definition for macro:", name)
258			}
259		}
260	}
261	var tokens []Token
262	// Scan to newline. Backslashes escape newlines.
263	for tok != '\n' {
264		if tok == scanner.EOF {
265			in.Error("missing newline in definition for macro:", name)
266		}
267		if tok == '\\' {
268			tok = in.Stack.Next()
269			if tok != '\n' && tok != '\\' {
270				in.Error(`can only escape \ or \n in definition for macro:`, name)
271			}
272		}
273		tokens = append(tokens, Make(tok, in.Stack.Text()))
274		tok = in.Stack.Next()
275	}
276	return args, tokens
277}
278
279func lookup(args []string, arg string) int {
280	for i, a := range args {
281		if a == arg {
282			return i
283		}
284	}
285	return -1
286}
287
288// invokeMacro pushes onto the input Stack a Slice that holds the macro definition with the actual
289// parameters substituted for the formals.
290// Invoking a macro does not touch the PC/line history.
291func (in *Input) invokeMacro(macro *Macro) {
292	// If the macro has no arguments, just substitute the text.
293	if macro.args == nil {
294		in.Push(NewSlice(in.Base(), in.Line(), macro.tokens))
295		return
296	}
297	tok := in.Stack.Next()
298	if tok != '(' {
299		// If the macro has arguments but is invoked without them, all we push is the macro name.
300		// First, put back the token.
301		in.peekToken = tok
302		in.peekText = in.text
303		in.peek = true
304		in.Push(NewSlice(in.Base(), in.Line(), []Token{Make(macroName, macro.name)}))
305		return
306	}
307	actuals := in.argsFor(macro)
308	var tokens []Token
309	for _, tok := range macro.tokens {
310		if tok.ScanToken != scanner.Ident {
311			tokens = append(tokens, tok)
312			continue
313		}
314		substitution := actuals[tok.text]
315		if substitution == nil {
316			tokens = append(tokens, tok)
317			continue
318		}
319		tokens = append(tokens, substitution...)
320	}
321	in.Push(NewSlice(in.Base(), in.Line(), tokens))
322}
323
324// argsFor returns a map from formal name to actual value for this argumented macro invocation.
325// The opening parenthesis has been absorbed.
326func (in *Input) argsFor(macro *Macro) map[string][]Token {
327	var args [][]Token
328	// One macro argument per iteration. Collect them all and check counts afterwards.
329	for argNum := 0; ; argNum++ {
330		tokens, tok := in.collectArgument(macro)
331		args = append(args, tokens)
332		if tok == ')' {
333			break
334		}
335	}
336	// Zero-argument macros are tricky.
337	if len(macro.args) == 0 && len(args) == 1 && args[0] == nil {
338		args = nil
339	} else if len(args) != len(macro.args) {
340		in.Error("wrong arg count for macro", macro.name)
341	}
342	argMap := make(map[string][]Token)
343	for i, arg := range args {
344		argMap[macro.args[i]] = arg
345	}
346	return argMap
347}
348
349// collectArgument returns the actual tokens for a single argument of a macro.
350// It also returns the token that terminated the argument, which will always
351// be either ',' or ')'. The starting '(' has been scanned.
352func (in *Input) collectArgument(macro *Macro) ([]Token, ScanToken) {
353	nesting := 0
354	var tokens []Token
355	for {
356		tok := in.Stack.Next()
357		if tok == scanner.EOF || tok == '\n' {
358			in.Error("unterminated arg list invoking macro:", macro.name)
359		}
360		if nesting == 0 && (tok == ')' || tok == ',') {
361			return tokens, tok
362		}
363		if tok == '(' {
364			nesting++
365		}
366		if tok == ')' {
367			nesting--
368		}
369		tokens = append(tokens, Make(tok, in.Stack.Text()))
370	}
371}
372
373// #ifdef and #ifndef processing.
374func (in *Input) ifdef(truth bool) {
375	name := in.macroName()
376	in.expectNewline("#if[n]def")
377	if !in.enabled() {
378		truth = false
379	} else if _, defined := in.macros[name]; !defined {
380		truth = !truth
381	}
382	in.ifdefStack = append(in.ifdefStack, truth)
383}
384
385// #else processing
386func (in *Input) else_() {
387	in.expectNewline("#else")
388	if len(in.ifdefStack) == 0 {
389		in.Error("unmatched #else")
390	}
391	if len(in.ifdefStack) == 1 || in.ifdefStack[len(in.ifdefStack)-2] {
392		in.ifdefStack[len(in.ifdefStack)-1] = !in.ifdefStack[len(in.ifdefStack)-1]
393	}
394}
395
396// #endif processing.
397func (in *Input) endif() {
398	in.expectNewline("#endif")
399	if len(in.ifdefStack) == 0 {
400		in.Error("unmatched #endif")
401	}
402	in.ifdefStack = in.ifdefStack[:len(in.ifdefStack)-1]
403}
404
405// #include processing.
406func (in *Input) include() {
407	// Find and parse string.
408	tok := in.Stack.Next()
409	if tok != scanner.String {
410		in.expectText("expected string after #include")
411	}
412	name, err := strconv.Unquote(in.Stack.Text())
413	if err != nil {
414		in.Error("unquoting include file name: ", err)
415	}
416	in.expectNewline("#include")
417	// Push tokenizer for file onto stack.
418	fd, err := os.Open(name)
419	if err != nil {
420		for _, dir := range in.includes {
421			fd, err = os.Open(filepath.Join(dir, name))
422			if err == nil {
423				break
424			}
425		}
426		if err != nil {
427			in.Error("#include:", err)
428		}
429	}
430	in.Push(NewTokenizer(name, fd, fd))
431}
432
433// #line processing.
434func (in *Input) line() {
435	// Only need to handle Plan 9 format: #line 337 "filename"
436	tok := in.Stack.Next()
437	if tok != scanner.Int {
438		in.expectText("expected line number after #line")
439	}
440	line, err := strconv.Atoi(in.Stack.Text())
441	if err != nil {
442		in.Error("error parsing #line (cannot happen):", err)
443	}
444	tok = in.Stack.Next()
445	if tok != scanner.String {
446		in.expectText("expected file name in #line")
447	}
448	file, err := strconv.Unquote(in.Stack.Text())
449	if err != nil {
450		in.Error("unquoting #line file name: ", err)
451	}
452	tok = in.Stack.Next()
453	if tok != '\n' {
454		in.Error("unexpected token at end of #line: ", tok)
455	}
456	pos := src.MakePos(in.Base(), uint(in.Line()), uint(in.Col()))
457	in.Stack.SetBase(src.NewLinePragmaBase(pos, file, uint(line)))
458}
459
460// #undef processing
461func (in *Input) undef() {
462	name := in.macroName()
463	if in.macros[name] == nil {
464		in.Error("#undef for undefined macro:", name)
465	}
466	// Newline must be next.
467	tok := in.Stack.Next()
468	if tok != '\n' {
469		in.Error("syntax error in #undef for macro:", name)
470	}
471	delete(in.macros, name)
472}
473
474func (in *Input) Push(r TokenReader) {
475	if len(in.tr) > 100 {
476		in.Error("input recursion")
477	}
478	in.Stack.Push(r)
479}
480
481func (in *Input) Close() {
482}
483