1from __future__ import absolute_import, division, unicode_literals
2
3import itertools
4import re
5import warnings
6from difflib import unified_diff
7
8import pytest
9
10from .support import TestData, convert, convertExpected, treeTypes
11from html5lib import html5parser, constants, treewalkers
12from html5lib.filters.lint import Filter as Lint
13
14_attrlist_re = re.compile(r"^(\s+)\w+=.*(\n\1\w+=.*)+", re.M)
15
16
17def sortattrs(s):
18    def replace(m):
19        lines = m.group(0).split("\n")
20        lines.sort()
21        return "\n".join(lines)
22    return _attrlist_re.sub(replace, s)
23
24
25class TreeConstructionFile(pytest.File):
26    def collect(self):
27        tests = TestData(str(self.fspath), "data")
28        for i, test in enumerate(tests):
29            yield TreeConstructionTest(str(i), self, testdata=test)
30
31
32class TreeConstructionTest(pytest.Collector):
33    def __init__(self, name, parent=None, config=None, session=None, testdata=None):
34        super(TreeConstructionTest, self).__init__(name, parent, config, session)
35        self.testdata = testdata
36
37    def collect(self):
38        for treeName, treeAPIs in sorted(treeTypes.items()):
39            for x in itertools.chain(self._getParserTests(treeName, treeAPIs),
40                                     self._getTreeWalkerTests(treeName, treeAPIs)):
41                yield x
42
43    def _getParserTests(self, treeName, treeAPIs):
44        if treeAPIs is not None and "adapter" in treeAPIs:
45            return
46        for namespaceHTMLElements in (True, False):
47            if namespaceHTMLElements:
48                nodeid = "%s::parser::namespaced" % treeName
49            else:
50                nodeid = "%s::parser::void-namespace" % treeName
51            item = ParserTest(nodeid,
52                              self,
53                              self.testdata,
54                              treeAPIs["builder"] if treeAPIs is not None else None,
55                              namespaceHTMLElements)
56            item.add_marker(getattr(pytest.mark, treeName))
57            item.add_marker(pytest.mark.parser)
58            if namespaceHTMLElements:
59                item.add_marker(pytest.mark.namespaced)
60            yield item
61
62    def _getTreeWalkerTests(self, treeName, treeAPIs):
63        nodeid = "%s::treewalker" % treeName
64        item = TreeWalkerTest(nodeid,
65                              self,
66                              self.testdata,
67                              treeAPIs)
68        item.add_marker(getattr(pytest.mark, treeName))
69        item.add_marker(pytest.mark.treewalker)
70        yield item
71
72
73def convertTreeDump(data):
74    return "\n".join(convert(3)(data).split("\n")[1:])
75
76
77namespaceExpected = re.compile(r"^(\s*)<(\S+)>", re.M).sub
78
79
80class ParserTest(pytest.Item):
81    def __init__(self, name, parent, test, treeClass, namespaceHTMLElements):
82        super(ParserTest, self).__init__(name, parent)
83        self.test = test
84        self.treeClass = treeClass
85        self.namespaceHTMLElements = namespaceHTMLElements
86
87    def runtest(self):
88        if self.treeClass is None:
89            pytest.skip("Treebuilder not loaded")
90
91        p = html5parser.HTMLParser(tree=self.treeClass,
92                                   namespaceHTMLElements=self.namespaceHTMLElements)
93
94        input = self.test['data']
95        fragmentContainer = self.test['document-fragment']
96        expected = convertExpected(self.test['document'])
97        expectedErrors = self.test['errors'].split("\n") if self.test['errors'] else []
98
99        scripting = False
100        if 'script-on' in self.test:
101            scripting = True
102
103        with warnings.catch_warnings():
104            warnings.simplefilter("error")
105            try:
106                if fragmentContainer:
107                    document = p.parseFragment(input, fragmentContainer, scripting=scripting)
108                else:
109                    document = p.parse(input, scripting=scripting)
110            except constants.DataLossWarning:
111                pytest.skip("data loss warning")
112
113        output = convertTreeDump(p.tree.testSerializer(document))
114
115        expected = expected
116        if self.namespaceHTMLElements:
117            expected = namespaceExpected(r"\1<html \2>", expected)
118
119        errorMsg = "\n".join(["\n\nInput:", input, "\nExpected:", expected,
120                              "\nReceived:", output])
121        assert expected == output, errorMsg
122
123        errStr = []
124        for (line, col), errorcode, datavars in p.errors:
125            assert isinstance(datavars, dict), "%s, %s" % (errorcode, repr(datavars))
126            errStr.append("Line: %i Col: %i %s" % (line, col,
127                                                   constants.E[errorcode] % datavars))
128
129        errorMsg2 = "\n".join(["\n\nInput:", input,
130                               "\nExpected errors (" + str(len(expectedErrors)) + "):\n" + "\n".join(expectedErrors),
131                               "\nActual errors (" + str(len(p.errors)) + "):\n" + "\n".join(errStr)])
132        if False:  # we're currently not testing parse errors
133            assert len(p.errors) == len(expectedErrors), errorMsg2
134
135    def repr_failure(self, excinfo):
136        traceback = excinfo.traceback
137        ntraceback = traceback.cut(path=__file__)
138        excinfo.traceback = ntraceback.filter()
139
140        return excinfo.getrepr(funcargs=True,
141                               showlocals=False,
142                               style="short", tbfilter=False)
143
144
145class TreeWalkerTest(pytest.Item):
146    def __init__(self, name, parent, test, treeAPIs):
147        super(TreeWalkerTest, self).__init__(name, parent)
148        self.test = test
149        self.treeAPIs = treeAPIs
150
151    def runtest(self):
152        if self.treeAPIs is None:
153            pytest.skip("Treebuilder not loaded")
154
155        p = html5parser.HTMLParser(tree=self.treeAPIs["builder"])
156
157        input = self.test['data']
158        fragmentContainer = self.test['document-fragment']
159        expected = convertExpected(self.test['document'])
160
161        scripting = False
162        if 'script-on' in self.test:
163            scripting = True
164
165        with warnings.catch_warnings():
166            warnings.simplefilter("error")
167            try:
168                if fragmentContainer:
169                    document = p.parseFragment(input, fragmentContainer, scripting=scripting)
170                else:
171                    document = p.parse(input, scripting=scripting)
172            except constants.DataLossWarning:
173                pytest.skip("data loss warning")
174
175        poutput = convertTreeDump(p.tree.testSerializer(document))
176        namespace_expected = namespaceExpected(r"\1<html \2>", expected)
177        if poutput != namespace_expected:
178            pytest.skip("parser output incorrect")
179
180        document = self.treeAPIs.get("adapter", lambda x: x)(document)
181
182        try:
183            output = treewalkers.pprint(Lint(self.treeAPIs["walker"](document)))
184            output = sortattrs(output)
185            expected = sortattrs(expected)
186            diff = "".join(unified_diff([line + "\n" for line in expected.splitlines()],
187                                        [line + "\n" for line in output.splitlines()],
188                                        "Expected", "Received"))
189            assert expected == output, "\n".join([
190                "", "Input:", input,
191                    "", "Expected:", expected,
192                    "", "Received:", output,
193                    "", "Diff:", diff,
194            ])
195        except NotImplementedError:
196            pytest.skip("tree walker NotImplementedError")
197
198    def repr_failure(self, excinfo):
199        traceback = excinfo.traceback
200        ntraceback = traceback.cut(path=__file__)
201        excinfo.traceback = ntraceback.filter()
202
203        return excinfo.getrepr(funcargs=True,
204                               showlocals=False,
205                               style="short", tbfilter=False)
206