1#!/usr/bin/env python3
2
3# Copyright(C) 2017  Vincent A
4#
5# This file is part of weboob.
6#
7# weboob is free software: you can redistribute it and/or modify
8# it under the terms of the GNU Lesser General Public License as published by
9# the Free Software Foundation, either version 3 of the License, or
10# (at your option) any later version.
11#
12# weboob is distributed in the hope that it will be useful,
13# but WITHOUT ANY WARRANTY; without even the implied warranty of
14# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15# GNU Lesser General Public License for more details.
16#
17# You should have received a copy of the GNU Lesser General Public License
18# along with weboob. If not, see <http://www.gnu.org/licenses/>.
19
20from __future__ import print_function
21
22import ast
23import fnmatch
24import os
25import traceback
26
27import lxml.etree
28from weboob.browser.filters import standard
29
30
31class Error(SyntaxError):
32    def __init__(self, file, line, message):
33        super(Error, self).__init__('%s:%s: %s' % (file, line, message))
34        self.file = file
35        self.line = line
36
37
38def do_visits(*funcs):
39    def wrapper(self, node):
40        for func in funcs:
41            func(self, node)
42        self.generic_visit(node)
43    return wrapper
44
45
46class Visitor(ast.NodeVisitor):
47    def __init__(self, file, *args, **kwargs):
48        self.warnings = kwargs.pop('warnings', False)
49        super(Visitor, self).__init__(*args, **kwargs)
50        self.file = file
51
52        self.filters = []
53        self.filters.extend(f for f in dir(standard) if isinstance(getattr(standard, f), type) and issubclass(getattr(standard, f), standard.CleanText))
54        self.filters.extend(['Regexp', 'XPath', 'Attr', 'Link'])
55
56        self.element_context = []
57
58    def check_xpath(self, s, lineno):
59        try:
60            lxml.etree.XPath(s)
61        except lxml.etree.XPathSyntaxError as exc:
62            raise Error(self.file, lineno, exc)
63
64        if self.warnings:
65            if not s.lstrip('(').startswith('.') and len(self.element_context) >= 2:
66                if self.element_context[-1] == 'ItemElement' and self.element_context[-2] in ('TableElement', 'ListElement'):
67                    print('%s:%s: probable missing "." at start of XPath' % (self.file, lineno))
68
69    def _item_xpath(self, node):
70        try:
71            target, = node.targets
72        except ValueError:
73            return
74        if not isinstance(target, ast.Name) or target.id != 'item_xpath':
75            return
76        try:
77            if self.element_context[-1] not in ('TableElement', 'ListElement'):
78                return
79        except IndexError:
80            return
81        if not isinstance(node.value, ast.Str):
82            return
83
84        self.check_xpath(node.value.s, node.lineno)
85
86    visit_Assign = do_visits(_item_xpath)
87
88    def _xpath_call(self, node):
89        if not isinstance(node.func, ast.Attribute):
90            return
91        if node.func.attr != 'xpath':
92            return
93        try:
94            if not isinstance(node.args[0], ast.Str):
95                return
96        except IndexError:
97            return
98
99        self.check_xpath(node.args[0].s, node.lineno)
100
101    def _filter_call(self, node):
102        if not isinstance(node.func, ast.Name):
103            return
104        if node.func.id not in self.filters:
105            return
106        try:
107            if not isinstance(node.args[0], ast.Str):
108                return
109        except IndexError:
110            return
111
112        self.check_xpath(node.args[0].s, node.lineno)
113
114    visit_Call = do_visits(_xpath_call, _filter_call)
115
116    def visit_ClassDef(self, node):
117        has_element = False
118
119        for basenode in node.bases:
120            if isinstance(basenode, ast.Name) and basenode.id in ('ListElement', 'ItemElement', 'TableElement'):
121                self.element_context.append(basenode.id)
122                has_element = True
123                break
124
125        self.generic_visit(node)
126
127        if has_element:
128            self.element_context.pop()
129
130
131def search_py(root):
132    for path, dirs, files in os.walk(root):
133        dirs.sort()
134        for f in fnmatch.filter(files, '*.py'):
135            yield os.path.join(path, f)
136
137
138if __name__ == '__main__':
139    import argparse
140    parser = argparse.ArgumentParser(description="Check XPath definitions")
141    parser.add_argument('-w', '--warnings', action='store_true')
142    args = parser.parse_args()
143
144    modpath = os.getenv('WEBOOB_MODULES', os.path.normpath(os.path.dirname(__file__) + '/../modules'))
145    for fn in search_py(modpath):
146        with open(fn) as fd:
147            try:
148                node = ast.parse(fd.read(), fn)
149            except SyntaxError as exc:
150                print('In file', fn)
151                traceback.print_exc(exc)
152        try:
153            Visitor(fn, warnings=args.warnings).visit(node)
154        except SyntaxError as exc:
155            print(exc)
156