1package scanner
2
3import (
4	"bytes"
5	"fmt"
6	"testing"
7
8	"github.com/hashicorp/hcl/json/token"
9)
10
11var f100 = "ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff"
12
13type tokenPair struct {
14	tok  token.Type
15	text string
16}
17
18var tokenLists = map[string][]tokenPair{
19	"operator": []tokenPair{
20		{token.LBRACK, "["},
21		{token.LBRACE, "{"},
22		{token.COMMA, ","},
23		{token.PERIOD, "."},
24		{token.RBRACK, "]"},
25		{token.RBRACE, "}"},
26	},
27	"bool": []tokenPair{
28		{token.BOOL, "true"},
29		{token.BOOL, "false"},
30	},
31	"string": []tokenPair{
32		{token.STRING, `" "`},
33		{token.STRING, `"a"`},
34		{token.STRING, `"本"`},
35		{token.STRING, `"${file(\"foo\")}"`},
36		{token.STRING, `"\a"`},
37		{token.STRING, `"\b"`},
38		{token.STRING, `"\f"`},
39		{token.STRING, `"\n"`},
40		{token.STRING, `"\r"`},
41		{token.STRING, `"\t"`},
42		{token.STRING, `"\v"`},
43		{token.STRING, `"\""`},
44		{token.STRING, `"\000"`},
45		{token.STRING, `"\777"`},
46		{token.STRING, `"\x00"`},
47		{token.STRING, `"\xff"`},
48		{token.STRING, `"\u0000"`},
49		{token.STRING, `"\ufA16"`},
50		{token.STRING, `"\U00000000"`},
51		{token.STRING, `"\U0000ffAB"`},
52		{token.STRING, `"` + f100 + `"`},
53	},
54	"number": []tokenPair{
55		{token.NUMBER, "0"},
56		{token.NUMBER, "1"},
57		{token.NUMBER, "9"},
58		{token.NUMBER, "42"},
59		{token.NUMBER, "1234567890"},
60		{token.NUMBER, "-0"},
61		{token.NUMBER, "-1"},
62		{token.NUMBER, "-9"},
63		{token.NUMBER, "-42"},
64		{token.NUMBER, "-1234567890"},
65	},
66	"float": []tokenPair{
67		{token.FLOAT, "0."},
68		{token.FLOAT, "1."},
69		{token.FLOAT, "42."},
70		{token.FLOAT, "01234567890."},
71		{token.FLOAT, ".0"},
72		{token.FLOAT, ".1"},
73		{token.FLOAT, ".42"},
74		{token.FLOAT, ".0123456789"},
75		{token.FLOAT, "0.0"},
76		{token.FLOAT, "1.0"},
77		{token.FLOAT, "42.0"},
78		{token.FLOAT, "01234567890.0"},
79		{token.FLOAT, "0e0"},
80		{token.FLOAT, "1e0"},
81		{token.FLOAT, "42e0"},
82		{token.FLOAT, "01234567890e0"},
83		{token.FLOAT, "0E0"},
84		{token.FLOAT, "1E0"},
85		{token.FLOAT, "42E0"},
86		{token.FLOAT, "01234567890E0"},
87		{token.FLOAT, "0e+10"},
88		{token.FLOAT, "1e-10"},
89		{token.FLOAT, "42e+10"},
90		{token.FLOAT, "01234567890e-10"},
91		{token.FLOAT, "0E+10"},
92		{token.FLOAT, "1E-10"},
93		{token.FLOAT, "42E+10"},
94		{token.FLOAT, "01234567890E-10"},
95		{token.FLOAT, "01.8e0"},
96		{token.FLOAT, "1.4e0"},
97		{token.FLOAT, "42.2e0"},
98		{token.FLOAT, "01234567890.12e0"},
99		{token.FLOAT, "0.E0"},
100		{token.FLOAT, "1.12E0"},
101		{token.FLOAT, "42.123E0"},
102		{token.FLOAT, "01234567890.213E0"},
103		{token.FLOAT, "0.2e+10"},
104		{token.FLOAT, "1.2e-10"},
105		{token.FLOAT, "42.54e+10"},
106		{token.FLOAT, "01234567890.98e-10"},
107		{token.FLOAT, "0.1E+10"},
108		{token.FLOAT, "1.1E-10"},
109		{token.FLOAT, "42.1E+10"},
110		{token.FLOAT, "01234567890.1E-10"},
111		{token.FLOAT, "-0.0"},
112		{token.FLOAT, "-1.0"},
113		{token.FLOAT, "-42.0"},
114		{token.FLOAT, "-01234567890.0"},
115		{token.FLOAT, "-0e0"},
116		{token.FLOAT, "-1e0"},
117		{token.FLOAT, "-42e0"},
118		{token.FLOAT, "-01234567890e0"},
119		{token.FLOAT, "-0E0"},
120		{token.FLOAT, "-1E0"},
121		{token.FLOAT, "-42E0"},
122		{token.FLOAT, "-01234567890E0"},
123		{token.FLOAT, "-0e+10"},
124		{token.FLOAT, "-1e-10"},
125		{token.FLOAT, "-42e+10"},
126		{token.FLOAT, "-01234567890e-10"},
127		{token.FLOAT, "-0E+10"},
128		{token.FLOAT, "-1E-10"},
129		{token.FLOAT, "-42E+10"},
130		{token.FLOAT, "-01234567890E-10"},
131		{token.FLOAT, "-01.8e0"},
132		{token.FLOAT, "-1.4e0"},
133		{token.FLOAT, "-42.2e0"},
134		{token.FLOAT, "-01234567890.12e0"},
135		{token.FLOAT, "-0.E0"},
136		{token.FLOAT, "-1.12E0"},
137		{token.FLOAT, "-42.123E0"},
138		{token.FLOAT, "-01234567890.213E0"},
139		{token.FLOAT, "-0.2e+10"},
140		{token.FLOAT, "-1.2e-10"},
141		{token.FLOAT, "-42.54e+10"},
142		{token.FLOAT, "-01234567890.98e-10"},
143		{token.FLOAT, "-0.1E+10"},
144		{token.FLOAT, "-1.1E-10"},
145		{token.FLOAT, "-42.1E+10"},
146		{token.FLOAT, "-01234567890.1E-10"},
147	},
148}
149
150var orderedTokenLists = []string{
151	"comment",
152	"operator",
153	"bool",
154	"string",
155	"number",
156	"float",
157}
158
159func TestPosition(t *testing.T) {
160	// create artifical source code
161	buf := new(bytes.Buffer)
162
163	for _, listName := range orderedTokenLists {
164		for _, ident := range tokenLists[listName] {
165			fmt.Fprintf(buf, "\t\t\t\t%s\n", ident.text)
166		}
167	}
168
169	s := New(buf.Bytes())
170
171	pos := token.Pos{"", 4, 1, 5}
172	s.Scan()
173	for _, listName := range orderedTokenLists {
174
175		for _, k := range tokenLists[listName] {
176			curPos := s.tokPos
177			// fmt.Printf("[%q] s = %+v:%+v\n", k.text, curPos.Offset, curPos.Column)
178
179			if curPos.Offset != pos.Offset {
180				t.Fatalf("offset = %d, want %d for %q", curPos.Offset, pos.Offset, k.text)
181			}
182			if curPos.Line != pos.Line {
183				t.Fatalf("line = %d, want %d for %q", curPos.Line, pos.Line, k.text)
184			}
185			if curPos.Column != pos.Column {
186				t.Fatalf("column = %d, want %d for %q", curPos.Column, pos.Column, k.text)
187			}
188			pos.Offset += 4 + len(k.text) + 1     // 4 tabs + token bytes + newline
189			pos.Line += countNewlines(k.text) + 1 // each token is on a new line
190
191			s.Error = func(pos token.Pos, msg string) {
192				t.Errorf("error %q for %q", msg, k.text)
193			}
194
195			s.Scan()
196		}
197	}
198	// make sure there were no token-internal errors reported by scanner
199	if s.ErrorCount != 0 {
200		t.Errorf("%d errors", s.ErrorCount)
201	}
202}
203
204func TestComment(t *testing.T) {
205	testTokenList(t, tokenLists["comment"])
206}
207
208func TestOperator(t *testing.T) {
209	testTokenList(t, tokenLists["operator"])
210}
211
212func TestBool(t *testing.T) {
213	testTokenList(t, tokenLists["bool"])
214}
215
216func TestIdent(t *testing.T) {
217	testTokenList(t, tokenLists["ident"])
218}
219
220func TestString(t *testing.T) {
221	testTokenList(t, tokenLists["string"])
222}
223
224func TestNumber(t *testing.T) {
225	testTokenList(t, tokenLists["number"])
226}
227
228func TestFloat(t *testing.T) {
229	testTokenList(t, tokenLists["float"])
230}
231
232func TestRealExample(t *testing.T) {
233	complexReal := `
234{
235    "variable": {
236        "foo": {
237            "default": "bar",
238            "description": "bar",
239            "depends_on": ["something"]
240        }
241    }
242}`
243
244	literals := []struct {
245		tokenType token.Type
246		literal   string
247	}{
248		{token.LBRACE, `{`},
249		{token.STRING, `"variable"`},
250		{token.COLON, `:`},
251		{token.LBRACE, `{`},
252		{token.STRING, `"foo"`},
253		{token.COLON, `:`},
254		{token.LBRACE, `{`},
255		{token.STRING, `"default"`},
256		{token.COLON, `:`},
257		{token.STRING, `"bar"`},
258		{token.COMMA, `,`},
259		{token.STRING, `"description"`},
260		{token.COLON, `:`},
261		{token.STRING, `"bar"`},
262		{token.COMMA, `,`},
263		{token.STRING, `"depends_on"`},
264		{token.COLON, `:`},
265		{token.LBRACK, `[`},
266		{token.STRING, `"something"`},
267		{token.RBRACK, `]`},
268		{token.RBRACE, `}`},
269		{token.RBRACE, `}`},
270		{token.RBRACE, `}`},
271		{token.EOF, ``},
272	}
273
274	s := New([]byte(complexReal))
275	for _, l := range literals {
276		tok := s.Scan()
277		if l.tokenType != tok.Type {
278			t.Errorf("got: %s want %s for %s\n", tok, l.tokenType, tok.String())
279		}
280
281		if l.literal != tok.Text {
282			t.Errorf("got: %s want %s\n", tok, l.literal)
283		}
284	}
285
286}
287
288func TestError(t *testing.T) {
289	testError(t, "\x80", "1:1", "illegal UTF-8 encoding", token.ILLEGAL)
290	testError(t, "\xff", "1:1", "illegal UTF-8 encoding", token.ILLEGAL)
291
292	testError(t, `"ab`+"\x80", "1:4", "illegal UTF-8 encoding", token.STRING)
293	testError(t, `"abc`+"\xff", "1:5", "illegal UTF-8 encoding", token.STRING)
294
295	testError(t, `01238`, "1:7", "numbers cannot start with 0", token.NUMBER)
296	testError(t, `01238123`, "1:10", "numbers cannot start with 0", token.NUMBER)
297	testError(t, `'aa'`, "1:1", "illegal char: '", token.ILLEGAL)
298
299	testError(t, `"`, "1:2", "literal not terminated", token.STRING)
300	testError(t, `"abc`, "1:5", "literal not terminated", token.STRING)
301	testError(t, `"abc`+"\n", "1:5", "literal not terminated", token.STRING)
302}
303
304func testError(t *testing.T, src, pos, msg string, tok token.Type) {
305	s := New([]byte(src))
306
307	errorCalled := false
308	s.Error = func(p token.Pos, m string) {
309		if !errorCalled {
310			if pos != p.String() {
311				t.Errorf("pos = %q, want %q for %q", p, pos, src)
312			}
313
314			if m != msg {
315				t.Errorf("msg = %q, want %q for %q", m, msg, src)
316			}
317			errorCalled = true
318		}
319	}
320
321	tk := s.Scan()
322	if tk.Type != tok {
323		t.Errorf("tok = %s, want %s for %q", tk, tok, src)
324	}
325	if !errorCalled {
326		t.Errorf("error handler not called for %q", src)
327	}
328	if s.ErrorCount == 0 {
329		t.Errorf("count = %d, want > 0 for %q", s.ErrorCount, src)
330	}
331}
332
333func testTokenList(t *testing.T, tokenList []tokenPair) {
334	// create artifical source code
335	buf := new(bytes.Buffer)
336	for _, ident := range tokenList {
337		fmt.Fprintf(buf, "%s\n", ident.text)
338	}
339
340	s := New(buf.Bytes())
341	for _, ident := range tokenList {
342		tok := s.Scan()
343		if tok.Type != ident.tok {
344			t.Errorf("tok = %q want %q for %q\n", tok, ident.tok, ident.text)
345		}
346
347		if tok.Text != ident.text {
348			t.Errorf("text = %q want %q", tok.String(), ident.text)
349		}
350
351	}
352}
353
354func countNewlines(s string) int {
355	n := 0
356	for _, ch := range s {
357		if ch == '\n' {
358			n++
359		}
360	}
361	return n
362}
363