1# Licensed to the Apache Software Foundation (ASF) under one 2# or more contributor license agreements. See the NOTICE file 3# distributed with this work for additional information 4# regarding copyright ownership. The ASF licenses this file 5# to you under the Apache License, Version 2.0 (the 6# "License"); you may not use this file except in compliance 7# with the License. You may obtain a copy of the License at 8# 9# http://www.apache.org/licenses/LICENSE-2.0 10# 11# Unless required by applicable law or agreed to in writing, 12# software distributed under the License is distributed on an 13# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14# KIND, either express or implied. See the License for the 15# specific language governing permissions and limitations 16# under the License. 17 18import inspect 19import tokenize 20from contextlib import contextmanager 21 22try: 23 from numpydoc.validate import Docstring, validate 24except ImportError: 25 have_numpydoc = False 26else: 27 have_numpydoc = True 28 29from ..utils.logger import logger 30from ..utils.command import Command, capture_stdout, default_bin 31 32 33class Flake8(Command): 34 def __init__(self, flake8_bin=None): 35 self.bin = default_bin(flake8_bin, "flake8") 36 37 38class Autopep8(Command): 39 def __init__(self, autopep8_bin=None): 40 self.bin = default_bin(autopep8_bin, "autopep8") 41 42 @capture_stdout() 43 def run_captured(self, *args, **kwargs): 44 return self.run(*args, **kwargs) 45 46 47def _tokenize_signature(s): 48 lines = s.encode('ascii').splitlines() 49 generator = iter(lines).__next__ 50 return tokenize.tokenize(generator) 51 52 53def _convert_typehint(tokens): 54 names = [] 55 opening_bracket_reached = False 56 for token in tokens: 57 # omit the tokens before the opening bracket 58 if not opening_bracket_reached: 59 if token.string == '(': 60 opening_bracket_reached = True 61 else: 62 continue 63 64 if token.type == 1: # type 1 means NAME token 65 names.append(token) 66 else: 67 if len(names) == 1: 68 yield (names[0].type, names[0].string) 69 elif len(names) == 2: 70 # two "NAME" tokens follow each other which means a cython 71 # typehint like `bool argument`, so remove the typehint 72 # note that we could convert it to python typehints, but hints 73 # are not supported by _signature_fromstr 74 yield (names[1].type, names[1].string) 75 elif len(names) > 2: 76 raise ValueError('More than two NAME tokens follow each other') 77 names = [] 78 yield (token.type, token.string) 79 80 81def inspect_signature(obj): 82 """ 83 Custom signature inspection primarily for cython generated callables. 84 85 Cython puts the signatures to the first line of the docstrings, which we 86 can reuse to parse the python signature from, but some gymnastics are 87 required, like removing the cython typehints. 88 89 It converts the cython signature: 90 array(obj, type=None, mask=None, size=None, from_pandas=None, 91 bool safe=True, MemoryPool memory_pool=None) 92 To: 93 <Signature (obj, type=None, mask=None, size=None, from_pandas=None, 94 safe=True, memory_pool=None)> 95 """ 96 cython_signature = obj.__doc__.splitlines()[0] 97 cython_tokens = _tokenize_signature(cython_signature) 98 python_tokens = _convert_typehint(cython_tokens) 99 python_signature = tokenize.untokenize(python_tokens) 100 return inspect._signature_fromstr(inspect.Signature, obj, python_signature) 101 102 103class NumpyDoc: 104 105 def __init__(self, symbols=None): 106 if not have_numpydoc: 107 raise RuntimeError( 108 'Numpydoc is not available, install the development version ' 109 'with command: pip install numpydoc==1.1.0' 110 ) 111 self.symbols = set(symbols or {'pyarrow'}) 112 113 def traverse(self, fn, obj, from_package): 114 """Apply a function on publicly exposed API components. 115 116 Recursively iterates over the members of the passed object. It omits 117 any '_' prefixed and thirdparty (non pyarrow) symbols. 118 119 Parameters 120 ---------- 121 obj : Any 122 from_package : string, default 'pyarrow' 123 Predicate to only consider objects from this package. 124 """ 125 todo = [obj] 126 seen = set() 127 128 while todo: 129 obj = todo.pop() 130 if obj in seen: 131 continue 132 else: 133 seen.add(obj) 134 135 fn(obj) 136 137 for name in dir(obj): 138 if name.startswith('_'): 139 continue 140 141 member = getattr(obj, name) 142 module = getattr(member, '__module__', None) 143 if not (module and module.startswith(from_package)): 144 continue 145 146 todo.append(member) 147 148 @contextmanager 149 def _apply_patches(self): 150 """ 151 Patch Docstring class to bypass loading already loaded python objects. 152 """ 153 orig_load_obj = Docstring._load_obj 154 orig_signature = inspect.signature 155 156 @staticmethod 157 def _load_obj(obj): 158 # By default it expects a qualname and import the object, but we 159 # have already loaded object after the API traversal. 160 if isinstance(obj, str): 161 return orig_load_obj(obj) 162 else: 163 return obj 164 165 def signature(obj): 166 # inspect.signature tries to parse __text_signature__ if other 167 # properties like __signature__ doesn't exists, but cython 168 # doesn't set that property despite that embedsignature cython 169 # directive is set. The only way to inspect a cython compiled 170 # callable's signature to parse it from __doc__ while 171 # embedsignature directive is set during the build phase. 172 # So path inspect.signature function to attempt to parse the first 173 # line of callable.__doc__ as a signature. 174 try: 175 return orig_signature(obj) 176 except Exception as orig_error: 177 try: 178 return inspect_signature(obj) 179 except Exception: 180 raise orig_error 181 182 try: 183 Docstring._load_obj = _load_obj 184 inspect.signature = signature 185 yield 186 finally: 187 Docstring._load_obj = orig_load_obj 188 inspect.signature = orig_signature 189 190 def validate(self, from_package='', allow_rules=None, 191 disallow_rules=None): 192 results = [] 193 194 def callback(obj): 195 try: 196 result = validate(obj) 197 except OSError as e: 198 symbol = f"{obj.__module__}.{obj.__name__}" 199 logger.warning(f"Unable to validate `{symbol}` due to `{e}`") 200 return 201 202 errors = [] 203 for errcode, errmsg in result.get('errors', []): 204 if allow_rules and errcode not in allow_rules: 205 continue 206 if disallow_rules and errcode in disallow_rules: 207 continue 208 errors.append((errcode, errmsg)) 209 210 if len(errors): 211 result['errors'] = errors 212 results.append((obj, result)) 213 214 with self._apply_patches(): 215 for symbol in self.symbols: 216 try: 217 obj = Docstring._load_obj(symbol) 218 except (ImportError, AttributeError): 219 print('{} is not available for import'.format(symbol)) 220 else: 221 self.traverse(callback, obj, from_package=from_package) 222 223 return results 224