encoding/json/token_test.go

package json

import (
	"reflect"
	"testing"
)

type token struct {
	delim Delim
	value RawValue
	err   error
	depth int
	index int
	isKey bool
}

func delim(s string, depth, index int) token {
	return token{
		delim: Delim(s[0]),
		value: RawValue(s),
		depth: depth,
		index: index,
	}
}

func key(v string, depth, index int) token {
	return token{
		value: RawValue(v),
		depth: depth,
		index: index,
		isKey: true,
	}
}

func value(v string, depth, index int) token {
	return token{
		value: RawValue(v),
		depth: depth,
		index: index,
	}
}

func tokenize(b []byte) (tokens []token) {
	t := NewTokenizer(b)

	for t.Next() {
		tokens = append(tokens, token{
			delim: t.Delim,
			value: t.Value,
			err:   t.Err,
			depth: t.Depth,
			index: t.Index,
			isKey: t.IsKey,
		})
	}

	if t.Err != nil {
		panic(t.Err)
	}

	return
}

func TestTokenizer(t *testing.T) {
	tests := []struct {
		input  []byte
		tokens []token
	}{
		{
			input: []byte(`null`),
			tokens: []token{
				value(`null`, 0, 0),
			},
		},

		{
			input: []byte(`true`),
			tokens: []token{
				value(`true`, 0, 0),
			},
		},

		{
			input: []byte(`false`),
			tokens: []token{
				value(`false`, 0, 0),
			},
		},

		{
			input: []byte(`""`),
			tokens: []token{
				value(`""`, 0, 0),
			},
		},

		{
			input: []byte(`"Hello World!"`),
			tokens: []token{
				value(`"Hello World!"`, 0, 0),
			},
		},

		{
			input: []byte(`-0.1234`),
			tokens: []token{
				value(`-0.1234`, 0, 0),
			},
		},

		{
			input: []byte(` { } `),
			tokens: []token{
				delim(`{`, 0, 0),
				delim(`}`, 0, 0),
			},
		},

		{
			input: []byte(`{ "answer": 42 }`),
			tokens: []token{
				delim(`{`, 0, 0),
				key(`"answer"`, 1, 0),
				delim(`:`, 1, 0),
				value(`42`, 1, 0),
				delim(`}`, 0, 0),
			},
		},

		{
			input: []byte(`{ "sub": { "key-A": 1, "key-B": 2, "key-C": 3 } }`),
			tokens: []token{
				delim(`{`, 0, 0),
				key(`"sub"`, 1, 0),
				delim(`:`, 1, 0),
				delim(`{`, 1, 0),
				key(`"key-A"`, 2, 0),
				delim(`:`, 2, 0),
				value(`1`, 2, 0),
				delim(`,`, 2, 0),
				key(`"key-B"`, 2, 1),
				delim(`:`, 2, 1),
				value(`2`, 2, 1),
				delim(`,`, 2, 1),
				key(`"key-C"`, 2, 2),
				delim(`:`, 2, 2),
				value(`3`, 2, 2),
				delim(`}`, 1, 0),
				delim(`}`, 0, 0),
			},
		},

		{
			input: []byte(` [ ] `),
			tokens: []token{
				delim(`[`, 0, 0),
				delim(`]`, 0, 0),
			},
		},

		{
			input: []byte(`[1, 2, 3]`),
			tokens: []token{
				delim(`[`, 0, 0),
				value(`1`, 1, 0),
				delim(`,`, 1, 0),
				value(`2`, 1, 1),
				delim(`,`, 1, 1),
				value(`3`, 1, 2),
				delim(`]`, 0, 0),
			},
		},
	}

	for _, test := range tests {
		t.Run(string(test.input), func(t *testing.T) {
			tokens := tokenize(test.input)

			if !reflect.DeepEqual(tokens, test.tokens) {
				t.Error("tokens mismatch")
				t.Logf("expected: %+v", test.tokens)
				t.Logf("found:    %+v", tokens)
			}
		})
	}
}

// Regression test for syntax that caused panics in Next.
func TestTokenizer_invalidInput(t *testing.T) {
	tests := []struct {
		scenario string
		payload  []byte
	}{
		{
			scenario: "bare comma",
			payload:  []byte(","),
		},
		{
			scenario: "comma after array",
			payload:  []byte("[],"),
		},
		{
			scenario: "comma after object",
			payload:  []byte("{},"),
		},
	}

	for _, test := range tests {
		t.Run(test.scenario, func(t *testing.T) {
			tkn := NewTokenizer(test.payload)

			// This shouldn't panic
			for tkn.Next() {
			}

			if tkn.Err == nil {
				t.Error("expected Err to be set, got nil")
			}
		})
	}
}

func BenchmarkTokenizer(b *testing.B) {
	values := []struct {
		scenario string
		payload  []byte
	}{
		{
			scenario: "null",
			payload:  []byte(`null`),
		},

		{
			scenario: "true",
			payload:  []byte(`true`),
		},

		{
			scenario: "false",
			payload:  []byte(`false`),
		},

		{
			scenario: "number",
			payload:  []byte(`-1.23456789`),
		},

		{
			scenario: "string",
			payload:  []byte(`"1234567890"`),
		},

		{
			scenario: "object",
			payload: []byte(`{
    "timestamp": "2019-01-09T18:59:57.456Z",
    "channel": "server",
    "type": "track",
    "event": "Test",
    "userId": "test-user-whatever",
    "messageId": "test-message-whatever",
    "integrations": {
        "whatever": {
            "debugMode": false
        },
        "myIntegration": {
            "debugMode": true
        }
    },
    "properties": {
        "trait1": 1,
        "trait2": "test",
        "trait3": true
    },
    "settings": {
        "apiKey": "1234567890",
        "debugMode": false,
        "directChannels": [
            "server",
            "client"
        ],
        "endpoint": "https://somewhere.com/v1/integrations/segment"
    }
}`),
		},
	}

	benchmarks := []struct {
		scenario string
		function func(*testing.B, []byte)
	}{
		{
			scenario: "github.com/segmentio/encoding/json",
			function: func(b *testing.B, json []byte) {
				t := NewTokenizer(nil)

				for i := 0; i < b.N; i++ {
					t.Reset(json)

					for t.Next() {
						// Does nothing other than iterating over each token to measure the
						// CPU and memory footprint.
					}

					if t.Err != nil {
						b.Error(t.Err)
					}
				}
			},
		},
	}

	for _, bechmark := range benchmarks {
		b.Run(bechmark.scenario, func(b *testing.B) {
			for _, value := range values {
				b.Run(value.scenario, func(b *testing.B) {
					bechmark.function(b, value.payload)
					b.SetBytes(int64(len(value.payload)))
				})
			}
		})
	}
}