1#!/usr/bin/env python3 2 3# Copyright(C) 2017 Vincent A 4# 5# This file is part of weboob. 6# 7# weboob is free software: you can redistribute it and/or modify 8# it under the terms of the GNU Lesser General Public License as published by 9# the Free Software Foundation, either version 3 of the License, or 10# (at your option) any later version. 11# 12# weboob is distributed in the hope that it will be useful, 13# but WITHOUT ANY WARRANTY; without even the implied warranty of 14# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 15# GNU Lesser General Public License for more details. 16# 17# You should have received a copy of the GNU Lesser General Public License 18# along with weboob. If not, see <http://www.gnu.org/licenses/>. 19 20from __future__ import print_function 21 22import ast 23import fnmatch 24import os 25import traceback 26 27import lxml.etree 28from weboob.browser.filters import standard 29 30 31class Error(SyntaxError): 32 def __init__(self, file, line, message): 33 super(Error, self).__init__('%s:%s: %s' % (file, line, message)) 34 self.file = file 35 self.line = line 36 37 38def do_visits(*funcs): 39 def wrapper(self, node): 40 for func in funcs: 41 func(self, node) 42 self.generic_visit(node) 43 return wrapper 44 45 46class Visitor(ast.NodeVisitor): 47 def __init__(self, file, *args, **kwargs): 48 self.warnings = kwargs.pop('warnings', False) 49 super(Visitor, self).__init__(*args, **kwargs) 50 self.file = file 51 52 self.filters = [] 53 self.filters.extend(f for f in dir(standard) if isinstance(getattr(standard, f), type) and issubclass(getattr(standard, f), standard.CleanText)) 54 self.filters.extend(['Regexp', 'XPath', 'Attr', 'Link']) 55 56 self.element_context = [] 57 58 def check_xpath(self, s, lineno): 59 try: 60 lxml.etree.XPath(s) 61 except lxml.etree.XPathSyntaxError as exc: 62 raise Error(self.file, lineno, exc) 63 64 if self.warnings: 65 if not s.lstrip('(').startswith('.') and len(self.element_context) >= 2: 66 if self.element_context[-1] == 'ItemElement' and self.element_context[-2] in ('TableElement', 'ListElement'): 67 print('%s:%s: probable missing "." at start of XPath' % (self.file, lineno)) 68 69 def _item_xpath(self, node): 70 try: 71 target, = node.targets 72 except ValueError: 73 return 74 if not isinstance(target, ast.Name) or target.id != 'item_xpath': 75 return 76 try: 77 if self.element_context[-1] not in ('TableElement', 'ListElement'): 78 return 79 except IndexError: 80 return 81 if not isinstance(node.value, ast.Str): 82 return 83 84 self.check_xpath(node.value.s, node.lineno) 85 86 visit_Assign = do_visits(_item_xpath) 87 88 def _xpath_call(self, node): 89 if not isinstance(node.func, ast.Attribute): 90 return 91 if node.func.attr != 'xpath': 92 return 93 try: 94 if not isinstance(node.args[0], ast.Str): 95 return 96 except IndexError: 97 return 98 99 self.check_xpath(node.args[0].s, node.lineno) 100 101 def _filter_call(self, node): 102 if not isinstance(node.func, ast.Name): 103 return 104 if node.func.id not in self.filters: 105 return 106 try: 107 if not isinstance(node.args[0], ast.Str): 108 return 109 except IndexError: 110 return 111 112 self.check_xpath(node.args[0].s, node.lineno) 113 114 visit_Call = do_visits(_xpath_call, _filter_call) 115 116 def visit_ClassDef(self, node): 117 has_element = False 118 119 for basenode in node.bases: 120 if isinstance(basenode, ast.Name) and basenode.id in ('ListElement', 'ItemElement', 'TableElement'): 121 self.element_context.append(basenode.id) 122 has_element = True 123 break 124 125 self.generic_visit(node) 126 127 if has_element: 128 self.element_context.pop() 129 130 131def search_py(root): 132 for path, dirs, files in os.walk(root): 133 dirs.sort() 134 for f in fnmatch.filter(files, '*.py'): 135 yield os.path.join(path, f) 136 137 138if __name__ == '__main__': 139 import argparse 140 parser = argparse.ArgumentParser(description="Check XPath definitions") 141 parser.add_argument('-w', '--warnings', action='store_true') 142 args = parser.parse_args() 143 144 modpath = os.getenv('WEBOOB_MODULES', os.path.normpath(os.path.dirname(__file__) + '/../modules')) 145 for fn in search_py(modpath): 146 with open(fn) as fd: 147 try: 148 node = ast.parse(fd.read(), fn) 149 except SyntaxError as exc: 150 print('In file', fn) 151 traceback.print_exc(exc) 152 try: 153 Visitor(fn, warnings=args.warnings).visit(node) 154 except SyntaxError as exc: 155 print(exc) 156