1package robotstxt 2 3import ( 4 "fmt" 5 "testing" 6 7 "github.com/stretchr/testify/assert" 8) 9 10func TestScanner(t *testing.T) { 11 t.Parallel() 12 13 type tcase struct { 14 input string 15 expect []string 16 errCount int 17 } 18 cases := []tcase{ 19 {"foo", []string{"foo"}, 0}, 20 {"\u2010", []string{"‐"}, 0}, 21 {"# comment \r\nSomething: Somewhere\r\n", []string{tokEOL, "Something", "Somewhere", tokEOL}, 0}, 22 {"# comment \r\n# more comments\n\nDisallow:\r", []string{tokEOL, tokEOL, "Disallow", tokEOL}, 0}, 23 {"\xef\xbb\xbfUser-agent: *\n", []string{"User-agent", "*", tokEOL}, 0}, 24 {"\xd9\xd9", []string{"\uFFFD\uFFFD"}, 2}, 25 } 26 for i, c := range cases { 27 tag := fmt.Sprintf("test-%d", i) 28 t.Run(tag, func(t *testing.T) { 29 sc := newByteScanner(tag, true) 30 sc.feed([]byte(c.input), true) 31 tokens := sc.scanAll() 32 assert.Equal(t, c.errCount, sc.ErrorCount) 33 assert.Equal(t, c.expect, tokens) 34 }) 35 } 36} 37