1package json 2 3import ( 4 "reflect" 5 "testing" 6) 7 8type token struct { 9 delim Delim 10 value RawValue 11 err error 12 depth int 13 index int 14 isKey bool 15} 16 17func delim(s string, depth, index int) token { 18 return token{ 19 delim: Delim(s[0]), 20 value: RawValue(s), 21 depth: depth, 22 index: index, 23 } 24} 25 26func key(v string, depth, index int) token { 27 return token{ 28 value: RawValue(v), 29 depth: depth, 30 index: index, 31 isKey: true, 32 } 33} 34 35func value(v string, depth, index int) token { 36 return token{ 37 value: RawValue(v), 38 depth: depth, 39 index: index, 40 } 41} 42 43func tokenize(b []byte) (tokens []token) { 44 t := NewTokenizer(b) 45 46 for t.Next() { 47 tokens = append(tokens, token{ 48 delim: t.Delim, 49 value: t.Value, 50 err: t.Err, 51 depth: t.Depth, 52 index: t.Index, 53 isKey: t.IsKey, 54 }) 55 } 56 57 if t.Err != nil { 58 panic(t.Err) 59 } 60 61 return 62} 63 64func TestTokenizer(t *testing.T) { 65 tests := []struct { 66 input []byte 67 tokens []token 68 }{ 69 { 70 input: []byte(`null`), 71 tokens: []token{ 72 value(`null`, 0, 0), 73 }, 74 }, 75 76 { 77 input: []byte(`true`), 78 tokens: []token{ 79 value(`true`, 0, 0), 80 }, 81 }, 82 83 { 84 input: []byte(`false`), 85 tokens: []token{ 86 value(`false`, 0, 0), 87 }, 88 }, 89 90 { 91 input: []byte(`""`), 92 tokens: []token{ 93 value(`""`, 0, 0), 94 }, 95 }, 96 97 { 98 input: []byte(`"Hello World!"`), 99 tokens: []token{ 100 value(`"Hello World!"`, 0, 0), 101 }, 102 }, 103 104 { 105 input: []byte(`-0.1234`), 106 tokens: []token{ 107 value(`-0.1234`, 0, 0), 108 }, 109 }, 110 111 { 112 input: []byte(` { } `), 113 tokens: []token{ 114 delim(`{`, 0, 0), 115 delim(`}`, 0, 0), 116 }, 117 }, 118 119 { 120 input: []byte(`{ "answer": 42 }`), 121 tokens: []token{ 122 delim(`{`, 0, 0), 123 key(`"answer"`, 1, 0), 124 delim(`:`, 1, 0), 125 value(`42`, 1, 0), 126 delim(`}`, 0, 0), 127 }, 128 }, 129 130 { 131 input: []byte(`{ "sub": { "key-A": 1, "key-B": 2, "key-C": 3 } }`), 132 tokens: []token{ 133 delim(`{`, 0, 0), 134 key(`"sub"`, 1, 0), 135 delim(`:`, 1, 0), 136 delim(`{`, 1, 0), 137 key(`"key-A"`, 2, 0), 138 delim(`:`, 2, 0), 139 value(`1`, 2, 0), 140 delim(`,`, 2, 0), 141 key(`"key-B"`, 2, 1), 142 delim(`:`, 2, 1), 143 value(`2`, 2, 1), 144 delim(`,`, 2, 1), 145 key(`"key-C"`, 2, 2), 146 delim(`:`, 2, 2), 147 value(`3`, 2, 2), 148 delim(`}`, 1, 0), 149 delim(`}`, 0, 0), 150 }, 151 }, 152 153 { 154 input: []byte(` [ ] `), 155 tokens: []token{ 156 delim(`[`, 0, 0), 157 delim(`]`, 0, 0), 158 }, 159 }, 160 161 { 162 input: []byte(`[1, 2, 3]`), 163 tokens: []token{ 164 delim(`[`, 0, 0), 165 value(`1`, 1, 0), 166 delim(`,`, 1, 0), 167 value(`2`, 1, 1), 168 delim(`,`, 1, 1), 169 value(`3`, 1, 2), 170 delim(`]`, 0, 0), 171 }, 172 }, 173 } 174 175 for _, test := range tests { 176 t.Run(string(test.input), func(t *testing.T) { 177 tokens := tokenize(test.input) 178 179 if !reflect.DeepEqual(tokens, test.tokens) { 180 t.Error("tokens mismatch") 181 t.Logf("expected: %+v", test.tokens) 182 t.Logf("found: %+v", tokens) 183 } 184 }) 185 } 186} 187 188// Regression test for syntax that caused panics in Next. 189func TestTokenizer_invalidInput(t *testing.T) { 190 tests := []struct { 191 scenario string 192 payload []byte 193 }{ 194 { 195 scenario: "bare comma", 196 payload: []byte(","), 197 }, 198 { 199 scenario: "comma after array", 200 payload: []byte("[],"), 201 }, 202 { 203 scenario: "comma after object", 204 payload: []byte("{},"), 205 }, 206 } 207 208 for _, test := range tests { 209 t.Run(test.scenario, func(t *testing.T) { 210 tkn := NewTokenizer(test.payload) 211 212 // This shouldn't panic 213 for tkn.Next() { 214 } 215 216 if tkn.Err == nil { 217 t.Error("expected Err to be set, got nil") 218 } 219 }) 220 } 221} 222 223func BenchmarkTokenizer(b *testing.B) { 224 values := []struct { 225 scenario string 226 payload []byte 227 }{ 228 { 229 scenario: "null", 230 payload: []byte(`null`), 231 }, 232 233 { 234 scenario: "true", 235 payload: []byte(`true`), 236 }, 237 238 { 239 scenario: "false", 240 payload: []byte(`false`), 241 }, 242 243 { 244 scenario: "number", 245 payload: []byte(`-1.23456789`), 246 }, 247 248 { 249 scenario: "string", 250 payload: []byte(`"1234567890"`), 251 }, 252 253 { 254 scenario: "object", 255 payload: []byte(`{ 256 "timestamp": "2019-01-09T18:59:57.456Z", 257 "channel": "server", 258 "type": "track", 259 "event": "Test", 260 "userId": "test-user-whatever", 261 "messageId": "test-message-whatever", 262 "integrations": { 263 "whatever": { 264 "debugMode": false 265 }, 266 "myIntegration": { 267 "debugMode": true 268 } 269 }, 270 "properties": { 271 "trait1": 1, 272 "trait2": "test", 273 "trait3": true 274 }, 275 "settings": { 276 "apiKey": "1234567890", 277 "debugMode": false, 278 "directChannels": [ 279 "server", 280 "client" 281 ], 282 "endpoint": "https://somewhere.com/v1/integrations/segment" 283 } 284}`), 285 }, 286 } 287 288 benchmarks := []struct { 289 scenario string 290 function func(*testing.B, []byte) 291 }{ 292 { 293 scenario: "github.com/segmentio/encoding/json", 294 function: func(b *testing.B, json []byte) { 295 t := NewTokenizer(nil) 296 297 for i := 0; i < b.N; i++ { 298 t.Reset(json) 299 300 for t.Next() { 301 // Does nothing other than iterating over each token to measure the 302 // CPU and memory footprint. 303 } 304 305 if t.Err != nil { 306 b.Error(t.Err) 307 } 308 } 309 }, 310 }, 311 } 312 313 for _, bechmark := range benchmarks { 314 b.Run(bechmark.scenario, func(b *testing.B) { 315 for _, value := range values { 316 b.Run(value.scenario, func(b *testing.B) { 317 bechmark.function(b, value.payload) 318 b.SetBytes(int64(len(value.payload))) 319 }) 320 } 321 }) 322 } 323} 324