1from __future__ import absolute_import, division, unicode_literals 2 3import itertools 4import re 5import warnings 6from difflib import unified_diff 7 8import pytest 9 10from .support import TestData, convert, convertExpected, treeTypes 11from html5lib import html5parser, constants, treewalkers 12from html5lib.filters.lint import Filter as Lint 13 14_attrlist_re = re.compile(r"^(\s+)\w+=.*(\n\1\w+=.*)+", re.M) 15 16 17def sortattrs(s): 18 def replace(m): 19 lines = m.group(0).split("\n") 20 lines.sort() 21 return "\n".join(lines) 22 return _attrlist_re.sub(replace, s) 23 24 25class TreeConstructionFile(pytest.File): 26 def collect(self): 27 tests = TestData(str(self.fspath), "data") 28 for i, test in enumerate(tests): 29 yield TreeConstructionTest(str(i), self, testdata=test) 30 31 32class TreeConstructionTest(pytest.Collector): 33 def __init__(self, name, parent=None, config=None, session=None, testdata=None): 34 super(TreeConstructionTest, self).__init__(name, parent, config, session) 35 self.testdata = testdata 36 37 def collect(self): 38 for treeName, treeAPIs in sorted(treeTypes.items()): 39 for x in itertools.chain(self._getParserTests(treeName, treeAPIs), 40 self._getTreeWalkerTests(treeName, treeAPIs)): 41 yield x 42 43 def _getParserTests(self, treeName, treeAPIs): 44 if treeAPIs is not None and "adapter" in treeAPIs: 45 return 46 for namespaceHTMLElements in (True, False): 47 if namespaceHTMLElements: 48 nodeid = "%s::parser::namespaced" % treeName 49 else: 50 nodeid = "%s::parser::void-namespace" % treeName 51 item = ParserTest(nodeid, 52 self, 53 self.testdata, 54 treeAPIs["builder"] if treeAPIs is not None else None, 55 namespaceHTMLElements) 56 item.add_marker(getattr(pytest.mark, treeName)) 57 item.add_marker(pytest.mark.parser) 58 if namespaceHTMLElements: 59 item.add_marker(pytest.mark.namespaced) 60 yield item 61 62 def _getTreeWalkerTests(self, treeName, treeAPIs): 63 nodeid = "%s::treewalker" % treeName 64 item = TreeWalkerTest(nodeid, 65 self, 66 self.testdata, 67 treeAPIs) 68 item.add_marker(getattr(pytest.mark, treeName)) 69 item.add_marker(pytest.mark.treewalker) 70 yield item 71 72 73def convertTreeDump(data): 74 return "\n".join(convert(3)(data).split("\n")[1:]) 75 76 77namespaceExpected = re.compile(r"^(\s*)<(\S+)>", re.M).sub 78 79 80class ParserTest(pytest.Item): 81 def __init__(self, name, parent, test, treeClass, namespaceHTMLElements): 82 super(ParserTest, self).__init__(name, parent) 83 self.test = test 84 self.treeClass = treeClass 85 self.namespaceHTMLElements = namespaceHTMLElements 86 87 def runtest(self): 88 if self.treeClass is None: 89 pytest.skip("Treebuilder not loaded") 90 91 p = html5parser.HTMLParser(tree=self.treeClass, 92 namespaceHTMLElements=self.namespaceHTMLElements) 93 94 input = self.test['data'] 95 fragmentContainer = self.test['document-fragment'] 96 expected = convertExpected(self.test['document']) 97 expectedErrors = self.test['errors'].split("\n") if self.test['errors'] else [] 98 99 scripting = False 100 if 'script-on' in self.test: 101 scripting = True 102 103 with warnings.catch_warnings(): 104 warnings.simplefilter("error") 105 try: 106 if fragmentContainer: 107 document = p.parseFragment(input, fragmentContainer, scripting=scripting) 108 else: 109 document = p.parse(input, scripting=scripting) 110 except constants.DataLossWarning: 111 pytest.skip("data loss warning") 112 113 output = convertTreeDump(p.tree.testSerializer(document)) 114 115 expected = expected 116 if self.namespaceHTMLElements: 117 expected = namespaceExpected(r"\1<html \2>", expected) 118 119 errorMsg = "\n".join(["\n\nInput:", input, "\nExpected:", expected, 120 "\nReceived:", output]) 121 assert expected == output, errorMsg 122 123 errStr = [] 124 for (line, col), errorcode, datavars in p.errors: 125 assert isinstance(datavars, dict), "%s, %s" % (errorcode, repr(datavars)) 126 errStr.append("Line: %i Col: %i %s" % (line, col, 127 constants.E[errorcode] % datavars)) 128 129 errorMsg2 = "\n".join(["\n\nInput:", input, 130 "\nExpected errors (" + str(len(expectedErrors)) + "):\n" + "\n".join(expectedErrors), 131 "\nActual errors (" + str(len(p.errors)) + "):\n" + "\n".join(errStr)]) 132 if False: # we're currently not testing parse errors 133 assert len(p.errors) == len(expectedErrors), errorMsg2 134 135 def repr_failure(self, excinfo): 136 traceback = excinfo.traceback 137 ntraceback = traceback.cut(path=__file__) 138 excinfo.traceback = ntraceback.filter() 139 140 return excinfo.getrepr(funcargs=True, 141 showlocals=False, 142 style="short", tbfilter=False) 143 144 145class TreeWalkerTest(pytest.Item): 146 def __init__(self, name, parent, test, treeAPIs): 147 super(TreeWalkerTest, self).__init__(name, parent) 148 self.test = test 149 self.treeAPIs = treeAPIs 150 151 def runtest(self): 152 if self.treeAPIs is None: 153 pytest.skip("Treebuilder not loaded") 154 155 p = html5parser.HTMLParser(tree=self.treeAPIs["builder"]) 156 157 input = self.test['data'] 158 fragmentContainer = self.test['document-fragment'] 159 expected = convertExpected(self.test['document']) 160 161 scripting = False 162 if 'script-on' in self.test: 163 scripting = True 164 165 with warnings.catch_warnings(): 166 warnings.simplefilter("error") 167 try: 168 if fragmentContainer: 169 document = p.parseFragment(input, fragmentContainer, scripting=scripting) 170 else: 171 document = p.parse(input, scripting=scripting) 172 except constants.DataLossWarning: 173 pytest.skip("data loss warning") 174 175 poutput = convertTreeDump(p.tree.testSerializer(document)) 176 namespace_expected = namespaceExpected(r"\1<html \2>", expected) 177 if poutput != namespace_expected: 178 pytest.skip("parser output incorrect") 179 180 document = self.treeAPIs.get("adapter", lambda x: x)(document) 181 182 try: 183 output = treewalkers.pprint(Lint(self.treeAPIs["walker"](document))) 184 output = sortattrs(output) 185 expected = sortattrs(expected) 186 diff = "".join(unified_diff([line + "\n" for line in expected.splitlines()], 187 [line + "\n" for line in output.splitlines()], 188 "Expected", "Received")) 189 assert expected == output, "\n".join([ 190 "", "Input:", input, 191 "", "Expected:", expected, 192 "", "Received:", output, 193 "", "Diff:", diff, 194 ]) 195 except NotImplementedError: 196 pytest.skip("tree walker NotImplementedError") 197 198 def repr_failure(self, excinfo): 199 traceback = excinfo.traceback 200 ntraceback = traceback.cut(path=__file__) 201 excinfo.traceback = ntraceback.filter() 202 203 return excinfo.getrepr(funcargs=True, 204 showlocals=False, 205 style="short", tbfilter=False) 206