1# Licensed to the Apache Software Foundation (ASF) under one
2# or more contributor license agreements.  See the NOTICE file
3# distributed with this work for additional information
4# regarding copyright ownership.  The ASF licenses this file
5# to you under the Apache License, Version 2.0 (the
6# "License"); you may not use this file except in compliance
7# with the License.  You may obtain a copy of the License at
8#
9#   http://www.apache.org/licenses/LICENSE-2.0
10#
11# Unless required by applicable law or agreed to in writing,
12# software distributed under the License is distributed on an
13# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14# KIND, either express or implied.  See the License for the
15# specific language governing permissions and limitations
16# under the License.
17
18import inspect
19import tokenize
20from contextlib import contextmanager
21
22try:
23    from numpydoc.validate import Docstring, validate
24except ImportError:
25    have_numpydoc = False
26else:
27    have_numpydoc = True
28
29from ..utils.logger import logger
30from ..utils.command import Command, capture_stdout, default_bin
31
32
33class Flake8(Command):
34    def __init__(self, flake8_bin=None):
35        self.bin = default_bin(flake8_bin, "flake8")
36
37
38class Autopep8(Command):
39    def __init__(self, autopep8_bin=None):
40        self.bin = default_bin(autopep8_bin, "autopep8")
41
42    @capture_stdout()
43    def run_captured(self, *args, **kwargs):
44        return self.run(*args, **kwargs)
45
46
47def _tokenize_signature(s):
48    lines = s.encode('ascii').splitlines()
49    generator = iter(lines).__next__
50    return tokenize.tokenize(generator)
51
52
53def _convert_typehint(tokens):
54    names = []
55    opening_bracket_reached = False
56    for token in tokens:
57        # omit the tokens before the opening bracket
58        if not opening_bracket_reached:
59            if token.string == '(':
60                opening_bracket_reached = True
61            else:
62                continue
63
64        if token.type == 1:  # type 1 means NAME token
65            names.append(token)
66        else:
67            if len(names) == 1:
68                yield (names[0].type, names[0].string)
69            elif len(names) == 2:
70                # two "NAME" tokens follow each other which means a cython
71                # typehint like `bool argument`, so remove the typehint
72                # note that we could convert it to python typehints, but hints
73                # are not supported by _signature_fromstr
74                yield (names[1].type, names[1].string)
75            elif len(names) > 2:
76                raise ValueError('More than two NAME tokens follow each other')
77            names = []
78            yield (token.type, token.string)
79
80
81def inspect_signature(obj):
82    """
83    Custom signature inspection primarily for cython generated callables.
84
85    Cython puts the signatures to the first line of the docstrings, which we
86    can reuse to parse the python signature from, but some gymnastics are
87    required, like removing the cython typehints.
88
89    It converts the cython signature:
90        array(obj, type=None, mask=None, size=None, from_pandas=None,
91              bool safe=True, MemoryPool memory_pool=None)
92    To:
93        <Signature (obj, type=None, mask=None, size=None, from_pandas=None,
94                    safe=True, memory_pool=None)>
95    """
96    cython_signature = obj.__doc__.splitlines()[0]
97    cython_tokens = _tokenize_signature(cython_signature)
98    python_tokens = _convert_typehint(cython_tokens)
99    python_signature = tokenize.untokenize(python_tokens)
100    return inspect._signature_fromstr(inspect.Signature, obj, python_signature)
101
102
103class NumpyDoc:
104
105    def __init__(self, symbols=None):
106        if not have_numpydoc:
107            raise RuntimeError(
108                'Numpydoc is not available, install the development version '
109                'with command: pip install numpydoc==1.1.0'
110            )
111        self.symbols = set(symbols or {'pyarrow'})
112
113    def traverse(self, fn, obj, from_package):
114        """Apply a function on publicly exposed API components.
115
116        Recursively iterates over the members of the passed object. It omits
117        any '_' prefixed and thirdparty (non pyarrow) symbols.
118
119        Parameters
120        ----------
121        obj : Any
122        from_package : string, default 'pyarrow'
123            Predicate to only consider objects from this package.
124        """
125        todo = [obj]
126        seen = set()
127
128        while todo:
129            obj = todo.pop()
130            if obj in seen:
131                continue
132            else:
133                seen.add(obj)
134
135            fn(obj)
136
137            for name in dir(obj):
138                if name.startswith('_'):
139                    continue
140
141                member = getattr(obj, name)
142                module = getattr(member, '__module__', None)
143                if not (module and module.startswith(from_package)):
144                    continue
145
146                todo.append(member)
147
148    @contextmanager
149    def _apply_patches(self):
150        """
151        Patch Docstring class to bypass loading already loaded python objects.
152        """
153        orig_load_obj = Docstring._load_obj
154        orig_signature = inspect.signature
155
156        @staticmethod
157        def _load_obj(obj):
158            # By default it expects a qualname and import the object, but we
159            # have already loaded object after the API traversal.
160            if isinstance(obj, str):
161                return orig_load_obj(obj)
162            else:
163                return obj
164
165        def signature(obj):
166            # inspect.signature tries to parse __text_signature__ if other
167            # properties like __signature__ doesn't exists, but cython
168            # doesn't set that property despite that embedsignature cython
169            # directive is set. The only way to inspect a cython compiled
170            # callable's signature to parse it from __doc__ while
171            # embedsignature directive is set during the build phase.
172            # So path inspect.signature function to attempt to parse the first
173            # line of callable.__doc__ as a signature.
174            try:
175                return orig_signature(obj)
176            except Exception as orig_error:
177                try:
178                    return inspect_signature(obj)
179                except Exception:
180                    raise orig_error
181
182        try:
183            Docstring._load_obj = _load_obj
184            inspect.signature = signature
185            yield
186        finally:
187            Docstring._load_obj = orig_load_obj
188            inspect.signature = orig_signature
189
190    def validate(self, from_package='', allow_rules=None,
191                 disallow_rules=None):
192        results = []
193
194        def callback(obj):
195            try:
196                result = validate(obj)
197            except OSError as e:
198                symbol = f"{obj.__module__}.{obj.__name__}"
199                logger.warning(f"Unable to validate `{symbol}` due to `{e}`")
200                return
201
202            errors = []
203            for errcode, errmsg in result.get('errors', []):
204                if allow_rules and errcode not in allow_rules:
205                    continue
206                if disallow_rules and errcode in disallow_rules:
207                    continue
208                errors.append((errcode, errmsg))
209
210            if len(errors):
211                result['errors'] = errors
212                results.append((obj, result))
213
214        with self._apply_patches():
215            for symbol in self.symbols:
216                try:
217                    obj = Docstring._load_obj(symbol)
218                except (ImportError, AttributeError):
219                    print('{} is not available for import'.format(symbol))
220                else:
221                    self.traverse(callback, obj, from_package=from_package)
222
223        return results
224