1package xml 2 3import ( 4 "fmt" 5 "io" 6 "testing" 7 8 "github.com/tdewolff/parse/v2" 9 "github.com/tdewolff/test" 10) 11 12type TTs []TokenType 13 14func TestTokens(t *testing.T) { 15 var tokenTests = []struct { 16 xml string 17 expected []TokenType 18 }{ 19 {"", TTs{}}, 20 {"<!-- comment -->", TTs{CommentToken}}, 21 {"<!-- comment \n multi \r line -->", TTs{CommentToken}}, 22 {"<foo/>", TTs{StartTagToken, StartTagCloseVoidToken}}, 23 {"<foo \t\r\n/>", TTs{StartTagToken, StartTagCloseVoidToken}}, 24 {"<foo:bar.qux-norf/>", TTs{StartTagToken, StartTagCloseVoidToken}}, 25 {"<foo></foo>", TTs{StartTagToken, StartTagCloseToken, EndTagToken}}, 26 {"<foo>text</foo>", TTs{StartTagToken, StartTagCloseToken, TextToken, EndTagToken}}, 27 {"<foo/> text", TTs{StartTagToken, StartTagCloseVoidToken, TextToken}}, 28 {"<a> <b> <c>text</c> </b> </a>", TTs{StartTagToken, StartTagCloseToken, TextToken, StartTagToken, StartTagCloseToken, TextToken, StartTagToken, StartTagCloseToken, TextToken, EndTagToken, TextToken, EndTagToken, TextToken, EndTagToken}}, 29 {"<foo a='a' b=\"b\" c=c/>", TTs{StartTagToken, AttributeToken, AttributeToken, AttributeToken, StartTagCloseVoidToken}}, 30 {"<foo a=\"\"/>", TTs{StartTagToken, AttributeToken, StartTagCloseVoidToken}}, 31 {"<foo a-b=\"\"/>", TTs{StartTagToken, AttributeToken, StartTagCloseVoidToken}}, 32 {"<foo \nchecked \r\n value\r=\t'=/>\"' />", TTs{StartTagToken, AttributeToken, AttributeToken, StartTagCloseVoidToken}}, 33 {"<?xml?>", TTs{StartTagPIToken, StartTagClosePIToken}}, 34 {"<?xml a=\"a\" ?>", TTs{StartTagPIToken, AttributeToken, StartTagClosePIToken}}, 35 {"<?xml a=a?>", TTs{StartTagPIToken, AttributeToken, StartTagClosePIToken}}, 36 {"<![CDATA[ test ]]>", TTs{CDATAToken}}, 37 {"<!DOCTYPE>", TTs{DOCTYPEToken}}, 38 {"<!DOCTYPE note SYSTEM \"Note.dtd\">", TTs{DOCTYPEToken}}, 39 {`<!DOCTYPE note [<!ENTITY nbsp " "><!ENTITY writer "Writer: Donald Duck."><!ENTITY copyright "Copyright:]> W3Schools.">]>`, TTs{DOCTYPEToken}}, 40 {"<!foo>", TTs{StartTagToken, StartTagCloseToken}}, 41 42 // early endings 43 {"<!-- comment", TTs{CommentToken}}, 44 {"<foo", TTs{StartTagToken}}, 45 {"</foo", TTs{EndTagToken}}, 46 {"<foo x", TTs{StartTagToken, AttributeToken}}, 47 {"<foo x=", TTs{StartTagToken, AttributeToken}}, 48 {"<foo x='", TTs{StartTagToken, AttributeToken}}, 49 {"<foo x=''", TTs{StartTagToken, AttributeToken}}, 50 {"<?xml", TTs{StartTagPIToken}}, 51 {"<![CDATA[ test", TTs{CDATAToken}}, 52 {"<!DOCTYPE note SYSTEM", TTs{DOCTYPEToken}}, 53 54 // go fuzz 55 {"</", TTs{EndTagToken}}, 56 {"</\n", TTs{EndTagToken}}, 57 } 58 for _, tt := range tokenTests { 59 t.Run(tt.xml, func(t *testing.T) { 60 l := NewLexer(parse.NewInputString(tt.xml)) 61 i := 0 62 for { 63 token, _ := l.Next() 64 if token == ErrorToken { 65 test.T(t, l.Err(), io.EOF) 66 test.T(t, i, len(tt.expected), "when error occurred we must be at the end") 67 break 68 } 69 test.That(t, i < len(tt.expected), "index", i, "must not exceed expected token types size", len(tt.expected)) 70 if i < len(tt.expected) { 71 test.T(t, token, tt.expected[i], "token types must match") 72 } 73 i++ 74 } 75 }) 76 } 77 78 // coverage 79 for i := 0; ; i++ { 80 if TokenType(i).String() == fmt.Sprintf("Invalid(%d)", i) { 81 break 82 } 83 } 84} 85 86func TestTags(t *testing.T) { 87 var tagTests = []struct { 88 xml string 89 expected string 90 }{ 91 {"<foo:bar.qux-norf/>", "foo:bar.qux-norf"}, 92 {"<?xml?>", "xml"}, 93 {"<foo?bar/qux>", "foo?bar/qux"}, 94 {"<!DOCTYPE note SYSTEM \"Note.dtd\">", " note SYSTEM \"Note.dtd\""}, 95 96 // early endings 97 {"<foo ", "foo"}, 98 } 99 for _, tt := range tagTests { 100 t.Run(tt.xml, func(t *testing.T) { 101 l := NewLexer(parse.NewInputString(tt.xml)) 102 for { 103 token, _ := l.Next() 104 if token == ErrorToken { 105 test.T(t, l.Err(), io.EOF) 106 test.Fail(t, "when error occurred we must be at the end") 107 break 108 } else if token == StartTagToken || token == StartTagPIToken || token == EndTagToken || token == DOCTYPEToken { 109 test.String(t, string(l.Text()), tt.expected, "tags must match") 110 break 111 } 112 } 113 }) 114 } 115} 116 117func TestAttributes(t *testing.T) { 118 var attributeTests = []struct { 119 attr string 120 expected []string 121 }{ 122 {"<foo a=\"b\" />", []string{"a", "\"b\""}}, 123 {"<foo \nchecked \r\n value\r=\t'=/>\"' />", []string{"checked", "", "value", "'=/>\"'"}}, 124 {"<foo bar=\" a \n\t\r b \" />", []string{"bar", "\" a b \""}}, 125 {"<?xml a=b?>", []string{"a", "b"}}, 126 {"<foo /=? >", []string{"/", "?"}}, 127 128 // early endings 129 {"<foo x", []string{"x", ""}}, 130 {"<foo x=", []string{"x", ""}}, 131 {"<foo x='", []string{"x", "'"}}, 132 } 133 for _, tt := range attributeTests { 134 t.Run(tt.attr, func(t *testing.T) { 135 l := NewLexer(parse.NewInputString(tt.attr)) 136 i := 0 137 for { 138 token, _ := l.Next() 139 if token == ErrorToken { 140 test.T(t, l.Err(), io.EOF) 141 test.T(t, i, len(tt.expected), "when error occurred we must be at the end") 142 break 143 } else if token == AttributeToken { 144 test.That(t, i+1 < len(tt.expected), "index", i+1, "must not exceed expected attributes size", len(tt.expected)) 145 if i+1 < len(tt.expected) { 146 test.String(t, string(l.Text()), tt.expected[i], "attribute keys must match") 147 test.String(t, string(l.AttrVal()), tt.expected[i+1], "attribute keys must match") 148 i += 2 149 } 150 } 151 } 152 }) 153 } 154} 155 156func TestErrors(t *testing.T) { 157 var errorTests = []struct { 158 xml string 159 col int 160 }{ 161 {"a\x00b", 2}, 162 {"<\x00 b='5'>", 2}, 163 {"<a\x00b='5'>", 3}, 164 {"<a \x00='5'>", 4}, 165 {"<a b\x00'5'>", 5}, 166 {"<a b=\x005'>", 6}, 167 {"<a b='\x00'>", 7}, 168 {"<a b='5\x00>", 8}, 169 {"<a b='5'\x00", 9}, 170 {"</\x00a>", 3}, 171 {"</ \x00>", 4}, 172 {"</ a\x00", 5}, 173 {"<!\x00", 3}, 174 {"<![CDATA[\x00", 10}, 175 {"/*\x00", 3}, 176 } 177 for _, tt := range errorTests { 178 t.Run(tt.xml, func(t *testing.T) { 179 l := NewLexer(parse.NewInputString(tt.xml)) 180 for { 181 token, _ := l.Next() 182 if token == ErrorToken { 183 if perr, ok := l.Err().(*parse.Error); ok { 184 _, col, _ := perr.Position() 185 test.T(t, col, tt.col) 186 } else { 187 test.Fail(t, "bad error:", l.Err()) 188 } 189 break 190 } 191 } 192 }) 193 } 194} 195 196func TestTextAndAttrVal(t *testing.T) { 197 l := NewLexer(parse.NewInputString(`<xml attr="val" >text<!--comment--><!DOCTYPE doctype><![CDATA[cdata]]>`)) 198 _, data := l.Next() 199 test.Bytes(t, data, []byte("<xml")) 200 test.Bytes(t, l.Text(), []byte("xml")) 201 test.Bytes(t, l.AttrVal(), nil) 202 203 _, data = l.Next() 204 test.Bytes(t, data, []byte(` attr="val"`)) 205 test.Bytes(t, l.Text(), []byte("attr")) 206 test.Bytes(t, l.AttrVal(), []byte(`"val"`)) 207 208 _, data = l.Next() 209 test.Bytes(t, data, []byte(">")) 210 test.Bytes(t, l.Text(), nil) 211 test.Bytes(t, l.AttrVal(), nil) 212 213 _, data = l.Next() 214 test.Bytes(t, data, []byte("text")) 215 test.Bytes(t, l.Text(), []byte("text")) 216 test.Bytes(t, l.AttrVal(), nil) 217 218 _, data = l.Next() 219 test.Bytes(t, data, []byte("<!--comment-->")) 220 test.Bytes(t, l.Text(), []byte("comment")) 221 test.Bytes(t, l.AttrVal(), nil) 222 223 _, data = l.Next() 224 test.Bytes(t, data, []byte("<!DOCTYPE doctype>")) 225 test.Bytes(t, l.Text(), []byte(" doctype")) 226 test.Bytes(t, l.AttrVal(), nil) 227 228 _, data = l.Next() 229 test.Bytes(t, data, []byte("<![CDATA[cdata]]>")) 230 test.Bytes(t, l.Text(), []byte("cdata")) 231 test.Bytes(t, l.AttrVal(), nil) 232} 233 234func TestOffset(t *testing.T) { 235 z := parse.NewInputString(`<div attr="val">text</div>`) 236 l := NewLexer(z) 237 test.T(t, z.Offset(), 0) 238 _, _ = l.Next() 239 test.T(t, z.Offset(), 4) // <div 240 _, _ = l.Next() 241 test.T(t, z.Offset(), 15) // attr="val" 242 _, _ = l.Next() 243 test.T(t, z.Offset(), 16) // > 244 _, _ = l.Next() 245 test.T(t, z.Offset(), 20) // text 246 _, _ = l.Next() 247 test.T(t, z.Offset(), 26) // </div> 248} 249 250//////////////////////////////////////////////////////////////// 251 252func ExampleNewLexer() { 253 l := NewLexer(parse.NewInputString("<span class='user'>John Doe</span>")) 254 out := "" 255 for { 256 tt, data := l.Next() 257 if tt == ErrorToken { 258 break 259 } 260 out += string(data) 261 } 262 fmt.Println(out) 263 // Output: <span class='user'>John Doe</span> 264} 265