1package robotstxt
2
3import (
4	"fmt"
5	"testing"
6
7	"github.com/stretchr/testify/assert"
8)
9
10func TestScanner(t *testing.T) {
11	t.Parallel()
12
13	type tcase struct {
14		input    string
15		expect   []string
16		errCount int
17	}
18	cases := []tcase{
19		{"foo", []string{"foo"}, 0},
20		{"\u2010", []string{"‐"}, 0},
21		{"# comment \r\nSomething: Somewhere\r\n", []string{tokEOL, "Something", "Somewhere", tokEOL}, 0},
22		{"# comment \r\n# more comments\n\nDisallow:\r", []string{tokEOL, tokEOL, "Disallow", tokEOL}, 0},
23		{"\xef\xbb\xbfUser-agent: *\n", []string{"User-agent", "*", tokEOL}, 0},
24		{"\xd9\xd9", []string{"\uFFFD\uFFFD"}, 2},
25	}
26	for i, c := range cases {
27		tag := fmt.Sprintf("test-%d", i)
28		t.Run(tag, func(t *testing.T) {
29			sc := newByteScanner(tag, true)
30			sc.feed([]byte(c.input), true)
31			tokens := sc.scanAll()
32			assert.Equal(t, c.errCount, sc.ErrorCount)
33			assert.Equal(t, c.expect, tokens)
34		})
35	}
36}
37