1package json
2
3import (
4	"reflect"
5	"testing"
6)
7
8type token struct {
9	delim Delim
10	value RawValue
11	err   error
12	depth int
13	index int
14	isKey bool
15}
16
17func delim(s string, depth, index int) token {
18	return token{
19		delim: Delim(s[0]),
20		value: RawValue(s),
21		depth: depth,
22		index: index,
23	}
24}
25
26func key(v string, depth, index int) token {
27	return token{
28		value: RawValue(v),
29		depth: depth,
30		index: index,
31		isKey: true,
32	}
33}
34
35func value(v string, depth, index int) token {
36	return token{
37		value: RawValue(v),
38		depth: depth,
39		index: index,
40	}
41}
42
43func tokenize(b []byte) (tokens []token) {
44	t := NewTokenizer(b)
45
46	for t.Next() {
47		tokens = append(tokens, token{
48			delim: t.Delim,
49			value: t.Value,
50			err:   t.Err,
51			depth: t.Depth,
52			index: t.Index,
53			isKey: t.IsKey,
54		})
55	}
56
57	if t.Err != nil {
58		panic(t.Err)
59	}
60
61	return
62}
63
64func TestTokenizer(t *testing.T) {
65	tests := []struct {
66		input  []byte
67		tokens []token
68	}{
69		{
70			input: []byte(`null`),
71			tokens: []token{
72				value(`null`, 0, 0),
73			},
74		},
75
76		{
77			input: []byte(`true`),
78			tokens: []token{
79				value(`true`, 0, 0),
80			},
81		},
82
83		{
84			input: []byte(`false`),
85			tokens: []token{
86				value(`false`, 0, 0),
87			},
88		},
89
90		{
91			input: []byte(`""`),
92			tokens: []token{
93				value(`""`, 0, 0),
94			},
95		},
96
97		{
98			input: []byte(`"Hello World!"`),
99			tokens: []token{
100				value(`"Hello World!"`, 0, 0),
101			},
102		},
103
104		{
105			input: []byte(`-0.1234`),
106			tokens: []token{
107				value(`-0.1234`, 0, 0),
108			},
109		},
110
111		{
112			input: []byte(` { } `),
113			tokens: []token{
114				delim(`{`, 0, 0),
115				delim(`}`, 0, 0),
116			},
117		},
118
119		{
120			input: []byte(`{ "answer": 42 }`),
121			tokens: []token{
122				delim(`{`, 0, 0),
123				key(`"answer"`, 1, 0),
124				delim(`:`, 1, 0),
125				value(`42`, 1, 0),
126				delim(`}`, 0, 0),
127			},
128		},
129
130		{
131			input: []byte(`{ "sub": { "key-A": 1, "key-B": 2, "key-C": 3 } }`),
132			tokens: []token{
133				delim(`{`, 0, 0),
134				key(`"sub"`, 1, 0),
135				delim(`:`, 1, 0),
136				delim(`{`, 1, 0),
137				key(`"key-A"`, 2, 0),
138				delim(`:`, 2, 0),
139				value(`1`, 2, 0),
140				delim(`,`, 2, 0),
141				key(`"key-B"`, 2, 1),
142				delim(`:`, 2, 1),
143				value(`2`, 2, 1),
144				delim(`,`, 2, 1),
145				key(`"key-C"`, 2, 2),
146				delim(`:`, 2, 2),
147				value(`3`, 2, 2),
148				delim(`}`, 1, 0),
149				delim(`}`, 0, 0),
150			},
151		},
152
153		{
154			input: []byte(` [ ] `),
155			tokens: []token{
156				delim(`[`, 0, 0),
157				delim(`]`, 0, 0),
158			},
159		},
160
161		{
162			input: []byte(`[1, 2, 3]`),
163			tokens: []token{
164				delim(`[`, 0, 0),
165				value(`1`, 1, 0),
166				delim(`,`, 1, 0),
167				value(`2`, 1, 1),
168				delim(`,`, 1, 1),
169				value(`3`, 1, 2),
170				delim(`]`, 0, 0),
171			},
172		},
173	}
174
175	for _, test := range tests {
176		t.Run(string(test.input), func(t *testing.T) {
177			tokens := tokenize(test.input)
178
179			if !reflect.DeepEqual(tokens, test.tokens) {
180				t.Error("tokens mismatch")
181				t.Logf("expected: %+v", test.tokens)
182				t.Logf("found:    %+v", tokens)
183			}
184		})
185	}
186}
187
188// Regression test for syntax that caused panics in Next.
189func TestTokenizer_invalidInput(t *testing.T) {
190	tests := []struct {
191		scenario string
192		payload  []byte
193	}{
194		{
195			scenario: "bare comma",
196			payload:  []byte(","),
197		},
198		{
199			scenario: "comma after array",
200			payload:  []byte("[],"),
201		},
202		{
203			scenario: "comma after object",
204			payload:  []byte("{},"),
205		},
206	}
207
208	for _, test := range tests {
209		t.Run(test.scenario, func(t *testing.T) {
210			tkn := NewTokenizer(test.payload)
211
212			// This shouldn't panic
213			for tkn.Next() {
214			}
215
216			if tkn.Err == nil {
217				t.Error("expected Err to be set, got nil")
218			}
219		})
220	}
221}
222
223func BenchmarkTokenizer(b *testing.B) {
224	values := []struct {
225		scenario string
226		payload  []byte
227	}{
228		{
229			scenario: "null",
230			payload:  []byte(`null`),
231		},
232
233		{
234			scenario: "true",
235			payload:  []byte(`true`),
236		},
237
238		{
239			scenario: "false",
240			payload:  []byte(`false`),
241		},
242
243		{
244			scenario: "number",
245			payload:  []byte(`-1.23456789`),
246		},
247
248		{
249			scenario: "string",
250			payload:  []byte(`"1234567890"`),
251		},
252
253		{
254			scenario: "object",
255			payload: []byte(`{
256    "timestamp": "2019-01-09T18:59:57.456Z",
257    "channel": "server",
258    "type": "track",
259    "event": "Test",
260    "userId": "test-user-whatever",
261    "messageId": "test-message-whatever",
262    "integrations": {
263        "whatever": {
264            "debugMode": false
265        },
266        "myIntegration": {
267            "debugMode": true
268        }
269    },
270    "properties": {
271        "trait1": 1,
272        "trait2": "test",
273        "trait3": true
274    },
275    "settings": {
276        "apiKey": "1234567890",
277        "debugMode": false,
278        "directChannels": [
279            "server",
280            "client"
281        ],
282        "endpoint": "https://somewhere.com/v1/integrations/segment"
283    }
284}`),
285		},
286	}
287
288	benchmarks := []struct {
289		scenario string
290		function func(*testing.B, []byte)
291	}{
292		{
293			scenario: "github.com/segmentio/encoding/json",
294			function: func(b *testing.B, json []byte) {
295				t := NewTokenizer(nil)
296
297				for i := 0; i < b.N; i++ {
298					t.Reset(json)
299
300					for t.Next() {
301						// Does nothing other than iterating over each token to measure the
302						// CPU and memory footprint.
303					}
304
305					if t.Err != nil {
306						b.Error(t.Err)
307					}
308				}
309			},
310		},
311	}
312
313	for _, bechmark := range benchmarks {
314		b.Run(bechmark.scenario, func(b *testing.B) {
315			for _, value := range values {
316				b.Run(value.scenario, func(b *testing.B) {
317					bechmark.function(b, value.payload)
318					b.SetBytes(int64(len(value.payload)))
319				})
320			}
321		})
322	}
323}
324