1#!/usr/bin/env python
2"""
3Analyze docstrings to detect errors.
4
5Call ``validate(object_name_to_validate)`` to get a dictionary
6with all the detected errors.
7"""
8import ast
9import collections
10import importlib
11import inspect
12import pydoc
13import re
14import textwrap
15from .docscrape import NumpyDocString
16
17
18DIRECTIVES = ["versionadded", "versionchanged", "deprecated"]
19DIRECTIVE_PATTERN = re.compile(r"^\s*\.\. ({})(?!::)".format('|'.join(DIRECTIVES)),
20                               re.I | re.M)
21ALLOWED_SECTIONS = [
22    "Parameters",
23    "Attributes",
24    "Methods",
25    "Returns",
26    "Yields",
27    "Other Parameters",
28    "Raises",
29    "Warns",
30    "See Also",
31    "Notes",
32    "References",
33    "Examples",
34]
35ERROR_MSGS = {
36    "GL01": "Docstring text (summary) should start in the line immediately "
37    "after the opening quotes (not in the same line, or leaving a "
38    "blank line in between)",
39    "GL02": "Closing quotes should be placed in the line after the last text "
40    "in the docstring (do not close the quotes in the same line as "
41    "the text, or leave a blank line between the last text and the "
42    "quotes)",
43    "GL03": "Double line break found; please use only one blank line to "
44    "separate sections or paragraphs, and do not leave blank lines "
45    "at the end of docstrings",
46    "GL05": 'Tabs found at the start of line "{line_with_tabs}", please use '
47    "whitespace only",
48    "GL06": 'Found unknown section "{section}". Allowed sections are: '
49    "{allowed_sections}",
50    "GL07": "Sections are in the wrong order. Correct order is: {correct_sections}",
51    "GL08": "The object does not have a docstring",
52    "GL09": "Deprecation warning should precede extended summary",
53    "GL10": "reST directives {directives} must be followed by two colons",
54    "SS01": "No summary found (a short summary in a single line should be "
55    "present at the beginning of the docstring)",
56    "SS02": "Summary does not start with a capital letter",
57    "SS03": "Summary does not end with a period",
58    "SS04": "Summary contains heading whitespaces",
59    "SS05": "Summary must start with infinitive verb, not third person "
60    '(e.g. use "Generate" instead of "Generates")',
61    "SS06": "Summary should fit in a single line",
62    "ES01": "No extended summary found",
63    "PR01": "Parameters {missing_params} not documented",
64    "PR02": "Unknown parameters {unknown_params}",
65    "PR03": "Wrong parameters order. Actual: {actual_params}. "
66    "Documented: {documented_params}",
67    "PR04": 'Parameter "{param_name}" has no type',
68    "PR05": 'Parameter "{param_name}" type should not finish with "."',
69    "PR06": 'Parameter "{param_name}" type should use "{right_type}" instead '
70    'of "{wrong_type}"',
71    "PR07": 'Parameter "{param_name}" has no description',
72    "PR08": 'Parameter "{param_name}" description should start with a '
73    "capital letter",
74    "PR09": 'Parameter "{param_name}" description should finish with "."',
75    "PR10": 'Parameter "{param_name}" requires a space before the colon '
76    "separating the parameter name and type",
77    "RT01": "No Returns section found",
78    "RT02": "The first line of the Returns section should contain only the "
79    "type, unless multiple values are being returned",
80    "RT03": "Return value has no description",
81    "RT04": "Return value description should start with a capital letter",
82    "RT05": 'Return value description should finish with "."',
83    "YD01": "No Yields section found",
84    "SA01": "See Also section not found",
85    "SA02": "Missing period at end of description for See Also "
86    '"{reference_name}" reference',
87    "SA03": "Description should be capitalized for See Also "
88    '"{reference_name}" reference',
89    "SA04": 'Missing description for See Also "{reference_name}" reference',
90    "EX01": "No examples section found",
91}
92
93# Ignore these when evaluating end-of-line-"." checks
94IGNORE_STARTS = (" ", "* ", "- ")
95
96
97def error(code, **kwargs):
98    """
99    Return a tuple with the error code and the message with variables replaced.
100
101    This is syntactic sugar so instead of:
102    - `('PR02', ERROR_MSGS['PR02'].format(doctest_log=log))`
103
104    We can simply use:
105    - `error('PR02', doctest_log=log)`
106
107    Parameters
108    ----------
109    code : str
110        Error code.
111    **kwargs
112        Values for the variables in the error messages
113
114    Returns
115    -------
116    code : str
117        Error code.
118    message : str
119        Error message with variables replaced.
120    """
121    return (code, ERROR_MSGS[code].format(**kwargs))
122
123
124class Docstring:
125    # TODO Can all this class be merged into NumpyDocString?
126    def __init__(self, name):
127        self.name = name
128        obj = self._load_obj(name)
129        self.obj = obj
130        self.code_obj = inspect.unwrap(obj)
131        self.raw_doc = obj.__doc__ or ""
132        self.clean_doc = pydoc.getdoc(obj)
133        self.doc = NumpyDocString(self.clean_doc)
134
135    @staticmethod
136    def _load_obj(name):
137        """
138        Import Python object from its name as string.
139
140        Parameters
141        ----------
142        name : str
143            Object name to import (e.g. pandas.Series.str.upper)
144
145        Returns
146        -------
147        object
148            Python object that can be a class, method, function...
149
150        Examples
151        --------
152        >>> Docstring._load_obj('datetime.datetime')
153        <class 'datetime.datetime'>
154        """
155        for maxsplit in range(0, name.count(".") + 1):
156            module, *func_parts = name.rsplit(".", maxsplit)
157            try:
158                obj = importlib.import_module(module)
159            except ImportError:
160                pass
161            else:
162                break
163        else:
164            raise ImportError("No module can be imported " 'from "{}"'.format(name))
165
166        for part in func_parts:
167            obj = getattr(obj, part)
168        return obj
169
170    @property
171    def type(self):
172        return type(self.obj).__name__
173
174    @property
175    def is_function_or_method(self):
176        return inspect.isfunction(self.obj)
177
178    @property
179    def source_file_name(self):
180        """
181        File name where the object is implemented (e.g. pandas/core/frame.py).
182        """
183        try:
184            fname = inspect.getsourcefile(self.code_obj)
185        except TypeError:
186            # In some cases the object is something complex like a cython
187            # object that can't be easily introspected. An it's better to
188            # return the source code file of the object as None, than crash
189            pass
190        else:
191            return fname
192
193    @property
194    def source_file_def_line(self):
195        """
196        Number of line where the object is defined in its file.
197        """
198        try:
199            return inspect.getsourcelines(self.code_obj)[-1]
200        except (OSError, TypeError):
201            # In some cases the object is something complex like a cython
202            # object that can't be easily introspected. An it's better to
203            # return the line number as None, than crash
204            pass
205
206    @property
207    def start_blank_lines(self):
208        i = None
209        if self.raw_doc:
210            for i, row in enumerate(self.raw_doc.split("\n")):
211                if row.strip():
212                    break
213        return i
214
215    @property
216    def end_blank_lines(self):
217        i = None
218        if self.raw_doc:
219            for i, row in enumerate(reversed(self.raw_doc.split("\n"))):
220                if row.strip():
221                    break
222        return i
223
224    @property
225    def double_blank_lines(self):
226        prev = True
227        for row in self.raw_doc.split("\n"):
228            if not prev and not row.strip():
229                return True
230            prev = row.strip()
231        return False
232
233    @property
234    def section_titles(self):
235        sections = []
236        self.doc._doc.reset()
237        while not self.doc._doc.eof():
238            content = self.doc._read_to_next_section()
239            if (
240                len(content) > 1
241                and len(content[0]) == len(content[1])
242                and set(content[1]) == {"-"}
243            ):
244                sections.append(content[0])
245        return sections
246
247    @property
248    def summary(self):
249        return " ".join(self.doc["Summary"])
250
251    @property
252    def num_summary_lines(self):
253        return len(self.doc["Summary"])
254
255    @property
256    def extended_summary(self):
257        if not self.doc["Extended Summary"] and len(self.doc["Summary"]) > 1:
258            return " ".join(self.doc["Summary"])
259        return " ".join(self.doc["Extended Summary"])
260
261    @property
262    def doc_parameters(self):
263        parameters = collections.OrderedDict()
264        for names, type_, desc in self.doc["Parameters"]:
265            for name in names.split(", "):
266                parameters[name] = (type_, desc)
267        return parameters
268
269    @property
270    def signature_parameters(self):
271        def add_stars(param_name, info):
272            """
273            Add stars to *args and **kwargs parameters
274            """
275            if info.kind == inspect.Parameter.VAR_POSITIONAL:
276                return "*{}".format(param_name)
277            elif info.kind == inspect.Parameter.VAR_KEYWORD:
278                return "**{}".format(param_name)
279            else:
280                return param_name
281
282        if inspect.isclass(self.obj):
283            if hasattr(self.obj, "_accessors") and (
284                self.name.split(".")[-1] in self.obj._accessors
285            ):
286                # accessor classes have a signature but don't want to show this
287                return tuple()
288        try:
289            sig = inspect.signature(self.obj)
290        except (TypeError, ValueError):
291            # Some objects, mainly in C extensions do not support introspection
292            # of the signature
293            return tuple()
294
295        params = tuple(
296            add_stars(parameter, sig.parameters[parameter])
297            for parameter in sig.parameters
298        )
299        if params and params[0] in ("self", "cls"):
300            return params[1:]
301        return params
302
303    @property
304    def parameter_mismatches(self):
305        errs = []
306        signature_params = self.signature_parameters
307        doc_params = tuple(self.doc_parameters)
308        missing = set(signature_params) - set(doc_params)
309        if missing:
310            errs.append(error("PR01", missing_params=str(missing)))
311        extra = set(doc_params) - set(signature_params)
312        if extra:
313            errs.append(error("PR02", unknown_params=str(extra)))
314        if (
315            not missing
316            and not extra
317            and signature_params != doc_params
318            and not (not signature_params and not doc_params)
319        ):
320            errs.append(
321                error(
322                    "PR03", actual_params=signature_params, documented_params=doc_params
323                )
324            )
325
326        return errs
327
328    @property
329    def directives_without_two_colons(self):
330        return DIRECTIVE_PATTERN.findall(self.raw_doc)
331
332    def parameter_type(self, param):
333        return self.doc_parameters[param][0]
334
335    @property
336    def see_also(self):
337        result = collections.OrderedDict()
338        for funcs, desc in self.doc["See Also"]:
339            for func, _ in funcs:
340                result[func] = "".join(desc)
341
342        return result
343
344    @property
345    def examples(self):
346        return self.doc["Examples"]
347
348    @property
349    def returns(self):
350        return self.doc["Returns"]
351
352    @property
353    def yields(self):
354        return self.doc["Yields"]
355
356    @property
357    def method_source(self):
358        try:
359            source = inspect.getsource(self.obj)
360        except TypeError:
361            return ""
362        return textwrap.dedent(source)
363
364    @property
365    def method_returns_something(self):
366        """
367        Check if the docstrings method can return something.
368
369        Bare returns, returns valued None and returns from nested functions are
370        disconsidered.
371
372        Returns
373        -------
374        bool
375            Whether the docstrings method can return something.
376        """
377
378        def get_returns_not_on_nested_functions(node):
379            returns = [node] if isinstance(node, ast.Return) else []
380            for child in ast.iter_child_nodes(node):
381                # Ignore nested functions and its subtrees.
382                if not isinstance(child, ast.FunctionDef):
383                    child_returns = get_returns_not_on_nested_functions(child)
384                    returns.extend(child_returns)
385            return returns
386
387        tree = ast.parse(self.method_source).body
388        if tree:
389            returns = get_returns_not_on_nested_functions(tree[0])
390            return_values = [r.value for r in returns]
391            # Replace NameConstant nodes valued None for None.
392            for i, v in enumerate(return_values):
393                if isinstance(v, ast.NameConstant) and v.value is None:
394                    return_values[i] = None
395            return any(return_values)
396        else:
397            return False
398
399    @property
400    def deprecated(self):
401        return ".. deprecated:: " in (self.summary + self.extended_summary)
402
403
404def _check_desc(desc, code_no_desc, code_no_upper, code_no_period, **kwargs):
405    # Find and strip out any sphinx directives
406    desc = "\n".join(desc)
407    for directive in DIRECTIVES:
408        full_directive = ".. {}".format(directive)
409        if full_directive in desc:
410            # Only retain any description before the directive
411            desc = desc[: desc.index(full_directive)].rstrip("\n")
412    desc = desc.split("\n")
413
414    errs = list()
415    if not "".join(desc):
416        errs.append(error(code_no_desc, **kwargs))
417    else:
418        if desc[0][0].isalpha() and not desc[0][0].isupper():
419            errs.append(error(code_no_upper, **kwargs))
420        # Not ending in "." is only an error if the last bit is not
421        # indented (e.g., quote or code block)
422        if not desc[-1].endswith(".") and \
423                not desc[-1].startswith(IGNORE_STARTS):
424            errs.append(error(code_no_period, **kwargs))
425    return errs
426
427
428def validate(func_name):
429    """
430    Validate the docstring.
431
432    Parameters
433    ----------
434    func_name : function
435        Function whose docstring will be evaluated (e.g. pandas.read_csv).
436
437    Returns
438    -------
439    dict
440        A dictionary containing all the information obtained from validating
441        the docstring.
442
443    Notes
444    -----
445    The errors codes are defined as:
446    - First two characters: Section where the error happens:
447       * GL: Global (no section, like section ordering errors)
448       * SS: Short summary
449       * ES: Extended summary
450       * PR: Parameters
451       * RT: Returns
452       * YD: Yields
453       * RS: Raises
454       * WN: Warns
455       * SA: See Also
456       * NT: Notes
457       * RF: References
458       * EX: Examples
459    - Last two characters: Numeric error code inside the section
460
461    For example, PR02 is the second codified error in the Parameters section
462    (which in this case is assigned to the error when unknown parameters are documented).
463
464    The error codes, their corresponding error messages, and the details on how
465    they are validated, are not documented more than in the source code of this
466    function.
467    """
468    doc = Docstring(func_name)
469
470    errs = []
471    if not doc.raw_doc:
472        errs.append(error("GL08"))
473        return {
474            "type": doc.type,
475            "docstring": doc.clean_doc,
476            "deprecated": doc.deprecated,
477            "file": doc.source_file_name,
478            "file_line": doc.source_file_def_line,
479            "errors": errs,
480            "examples_errors": "",
481        }
482
483    if doc.start_blank_lines != 1 and "\n" in doc.raw_doc:
484        errs.append(error("GL01"))
485    if doc.end_blank_lines != 1 and "\n" in doc.raw_doc:
486        errs.append(error("GL02"))
487    if doc.double_blank_lines:
488        errs.append(error("GL03"))
489    for line in doc.raw_doc.splitlines():
490        if re.match("^ *\t", line):
491            errs.append(error("GL05", line_with_tabs=line.lstrip()))
492
493    unexpected_sections = [
494        section for section in doc.section_titles if section not in ALLOWED_SECTIONS
495    ]
496    for section in unexpected_sections:
497        errs.append(
498            error("GL06", section=section, allowed_sections=", ".join(ALLOWED_SECTIONS))
499        )
500
501    correct_order = [
502        section for section in ALLOWED_SECTIONS if section in doc.section_titles
503    ]
504    if correct_order != doc.section_titles:
505        errs.append(error("GL07", correct_sections=", ".join(correct_order)))
506
507    if doc.deprecated and not doc.extended_summary.startswith(".. deprecated:: "):
508        errs.append(error("GL09"))
509
510    directives_without_two_colons = doc.directives_without_two_colons
511    if directives_without_two_colons:
512        errs.append(error("GL10", directives=directives_without_two_colons))
513
514    if not doc.summary:
515        errs.append(error("SS01"))
516    else:
517        if doc.summary[0].isalpha() and not doc.summary[0].isupper():
518            errs.append(error("SS02"))
519        if doc.summary[-1] != ".":
520            errs.append(error("SS03"))
521        if doc.summary != doc.summary.lstrip():
522            errs.append(error("SS04"))
523        elif doc.is_function_or_method and doc.summary.split(" ")[0][-1] == "s":
524            errs.append(error("SS05"))
525        if doc.num_summary_lines > 1:
526            errs.append(error("SS06"))
527
528    if not doc.extended_summary:
529        errs.append(("ES01", "No extended summary found"))
530
531    # PR01: Parameters not documented
532    # PR02: Unknown parameters
533    # PR03: Wrong parameters order
534    errs += doc.parameter_mismatches
535
536    for param, kind_desc in doc.doc_parameters.items():
537        if not param.startswith("*"):  # Check can ignore var / kwargs
538            if not doc.parameter_type(param):
539                if ":" in param:
540                    errs.append(error("PR10", param_name=param.split(":")[0]))
541                else:
542                    errs.append(error("PR04", param_name=param))
543            else:
544                if doc.parameter_type(param)[-1] == ".":
545                    errs.append(error("PR05", param_name=param))
546                common_type_errors = [
547                    ("integer", "int"),
548                    ("boolean", "bool"),
549                    ("string", "str"),
550                ]
551                for wrong_type, right_type in common_type_errors:
552                    if wrong_type in doc.parameter_type(param):
553                        errs.append(
554                            error(
555                                "PR06",
556                                param_name=param,
557                                right_type=right_type,
558                                wrong_type=wrong_type,
559                            )
560                        )
561        errs.extend(_check_desc(
562            kind_desc[1], "PR07", "PR08", "PR09", param_name=param))
563
564    if doc.is_function_or_method:
565        if not doc.returns:
566            if doc.method_returns_something:
567                errs.append(error("RT01"))
568        else:
569            if len(doc.returns) == 1 and doc.returns[0].name:
570                errs.append(error("RT02"))
571            for name_or_type, type_, desc in doc.returns:
572                errs.extend(_check_desc(desc, "RT03", "RT04", "RT05"))
573
574        if not doc.yields and "yield" in doc.method_source:
575            errs.append(error("YD01"))
576
577    if not doc.see_also:
578        errs.append(error("SA01"))
579    else:
580        for rel_name, rel_desc in doc.see_also.items():
581            if rel_desc:
582                if not rel_desc.endswith("."):
583                    errs.append(error("SA02", reference_name=rel_name))
584                if rel_desc[0].isalpha() and not rel_desc[0].isupper():
585                    errs.append(error("SA03", reference_name=rel_name))
586            else:
587                errs.append(error("SA04", reference_name=rel_name))
588
589    if not doc.examples:
590        errs.append(error("EX01"))
591    return {
592        "type": doc.type,
593        "docstring": doc.clean_doc,
594        "deprecated": doc.deprecated,
595        "file": doc.source_file_name,
596        "file_line": doc.source_file_def_line,
597        "errors": errs,
598    }
599