1#!/usr/bin/env python 2""" 3Analyze docstrings to detect errors. 4 5Call ``validate(object_name_to_validate)`` to get a dictionary 6with all the detected errors. 7""" 8import ast 9import collections 10import importlib 11import inspect 12import pydoc 13import re 14import textwrap 15from .docscrape import NumpyDocString 16 17 18DIRECTIVES = ["versionadded", "versionchanged", "deprecated"] 19DIRECTIVE_PATTERN = re.compile(r"^\s*\.\. ({})(?!::)".format('|'.join(DIRECTIVES)), 20 re.I | re.M) 21ALLOWED_SECTIONS = [ 22 "Parameters", 23 "Attributes", 24 "Methods", 25 "Returns", 26 "Yields", 27 "Other Parameters", 28 "Raises", 29 "Warns", 30 "See Also", 31 "Notes", 32 "References", 33 "Examples", 34] 35ERROR_MSGS = { 36 "GL01": "Docstring text (summary) should start in the line immediately " 37 "after the opening quotes (not in the same line, or leaving a " 38 "blank line in between)", 39 "GL02": "Closing quotes should be placed in the line after the last text " 40 "in the docstring (do not close the quotes in the same line as " 41 "the text, or leave a blank line between the last text and the " 42 "quotes)", 43 "GL03": "Double line break found; please use only one blank line to " 44 "separate sections or paragraphs, and do not leave blank lines " 45 "at the end of docstrings", 46 "GL05": 'Tabs found at the start of line "{line_with_tabs}", please use ' 47 "whitespace only", 48 "GL06": 'Found unknown section "{section}". Allowed sections are: ' 49 "{allowed_sections}", 50 "GL07": "Sections are in the wrong order. Correct order is: {correct_sections}", 51 "GL08": "The object does not have a docstring", 52 "GL09": "Deprecation warning should precede extended summary", 53 "GL10": "reST directives {directives} must be followed by two colons", 54 "SS01": "No summary found (a short summary in a single line should be " 55 "present at the beginning of the docstring)", 56 "SS02": "Summary does not start with a capital letter", 57 "SS03": "Summary does not end with a period", 58 "SS04": "Summary contains heading whitespaces", 59 "SS05": "Summary must start with infinitive verb, not third person " 60 '(e.g. use "Generate" instead of "Generates")', 61 "SS06": "Summary should fit in a single line", 62 "ES01": "No extended summary found", 63 "PR01": "Parameters {missing_params} not documented", 64 "PR02": "Unknown parameters {unknown_params}", 65 "PR03": "Wrong parameters order. Actual: {actual_params}. " 66 "Documented: {documented_params}", 67 "PR04": 'Parameter "{param_name}" has no type', 68 "PR05": 'Parameter "{param_name}" type should not finish with "."', 69 "PR06": 'Parameter "{param_name}" type should use "{right_type}" instead ' 70 'of "{wrong_type}"', 71 "PR07": 'Parameter "{param_name}" has no description', 72 "PR08": 'Parameter "{param_name}" description should start with a ' 73 "capital letter", 74 "PR09": 'Parameter "{param_name}" description should finish with "."', 75 "PR10": 'Parameter "{param_name}" requires a space before the colon ' 76 "separating the parameter name and type", 77 "RT01": "No Returns section found", 78 "RT02": "The first line of the Returns section should contain only the " 79 "type, unless multiple values are being returned", 80 "RT03": "Return value has no description", 81 "RT04": "Return value description should start with a capital letter", 82 "RT05": 'Return value description should finish with "."', 83 "YD01": "No Yields section found", 84 "SA01": "See Also section not found", 85 "SA02": "Missing period at end of description for See Also " 86 '"{reference_name}" reference', 87 "SA03": "Description should be capitalized for See Also " 88 '"{reference_name}" reference', 89 "SA04": 'Missing description for See Also "{reference_name}" reference', 90 "EX01": "No examples section found", 91} 92 93# Ignore these when evaluating end-of-line-"." checks 94IGNORE_STARTS = (" ", "* ", "- ") 95 96 97def error(code, **kwargs): 98 """ 99 Return a tuple with the error code and the message with variables replaced. 100 101 This is syntactic sugar so instead of: 102 - `('PR02', ERROR_MSGS['PR02'].format(doctest_log=log))` 103 104 We can simply use: 105 - `error('PR02', doctest_log=log)` 106 107 Parameters 108 ---------- 109 code : str 110 Error code. 111 **kwargs 112 Values for the variables in the error messages 113 114 Returns 115 ------- 116 code : str 117 Error code. 118 message : str 119 Error message with variables replaced. 120 """ 121 return (code, ERROR_MSGS[code].format(**kwargs)) 122 123 124class Docstring: 125 # TODO Can all this class be merged into NumpyDocString? 126 def __init__(self, name): 127 self.name = name 128 obj = self._load_obj(name) 129 self.obj = obj 130 self.code_obj = inspect.unwrap(obj) 131 self.raw_doc = obj.__doc__ or "" 132 self.clean_doc = pydoc.getdoc(obj) 133 self.doc = NumpyDocString(self.clean_doc) 134 135 @staticmethod 136 def _load_obj(name): 137 """ 138 Import Python object from its name as string. 139 140 Parameters 141 ---------- 142 name : str 143 Object name to import (e.g. pandas.Series.str.upper) 144 145 Returns 146 ------- 147 object 148 Python object that can be a class, method, function... 149 150 Examples 151 -------- 152 >>> Docstring._load_obj('datetime.datetime') 153 <class 'datetime.datetime'> 154 """ 155 for maxsplit in range(0, name.count(".") + 1): 156 module, *func_parts = name.rsplit(".", maxsplit) 157 try: 158 obj = importlib.import_module(module) 159 except ImportError: 160 pass 161 else: 162 break 163 else: 164 raise ImportError("No module can be imported " 'from "{}"'.format(name)) 165 166 for part in func_parts: 167 obj = getattr(obj, part) 168 return obj 169 170 @property 171 def type(self): 172 return type(self.obj).__name__ 173 174 @property 175 def is_function_or_method(self): 176 return inspect.isfunction(self.obj) 177 178 @property 179 def source_file_name(self): 180 """ 181 File name where the object is implemented (e.g. pandas/core/frame.py). 182 """ 183 try: 184 fname = inspect.getsourcefile(self.code_obj) 185 except TypeError: 186 # In some cases the object is something complex like a cython 187 # object that can't be easily introspected. An it's better to 188 # return the source code file of the object as None, than crash 189 pass 190 else: 191 return fname 192 193 @property 194 def source_file_def_line(self): 195 """ 196 Number of line where the object is defined in its file. 197 """ 198 try: 199 return inspect.getsourcelines(self.code_obj)[-1] 200 except (OSError, TypeError): 201 # In some cases the object is something complex like a cython 202 # object that can't be easily introspected. An it's better to 203 # return the line number as None, than crash 204 pass 205 206 @property 207 def start_blank_lines(self): 208 i = None 209 if self.raw_doc: 210 for i, row in enumerate(self.raw_doc.split("\n")): 211 if row.strip(): 212 break 213 return i 214 215 @property 216 def end_blank_lines(self): 217 i = None 218 if self.raw_doc: 219 for i, row in enumerate(reversed(self.raw_doc.split("\n"))): 220 if row.strip(): 221 break 222 return i 223 224 @property 225 def double_blank_lines(self): 226 prev = True 227 for row in self.raw_doc.split("\n"): 228 if not prev and not row.strip(): 229 return True 230 prev = row.strip() 231 return False 232 233 @property 234 def section_titles(self): 235 sections = [] 236 self.doc._doc.reset() 237 while not self.doc._doc.eof(): 238 content = self.doc._read_to_next_section() 239 if ( 240 len(content) > 1 241 and len(content[0]) == len(content[1]) 242 and set(content[1]) == {"-"} 243 ): 244 sections.append(content[0]) 245 return sections 246 247 @property 248 def summary(self): 249 return " ".join(self.doc["Summary"]) 250 251 @property 252 def num_summary_lines(self): 253 return len(self.doc["Summary"]) 254 255 @property 256 def extended_summary(self): 257 if not self.doc["Extended Summary"] and len(self.doc["Summary"]) > 1: 258 return " ".join(self.doc["Summary"]) 259 return " ".join(self.doc["Extended Summary"]) 260 261 @property 262 def doc_parameters(self): 263 parameters = collections.OrderedDict() 264 for names, type_, desc in self.doc["Parameters"]: 265 for name in names.split(", "): 266 parameters[name] = (type_, desc) 267 return parameters 268 269 @property 270 def signature_parameters(self): 271 def add_stars(param_name, info): 272 """ 273 Add stars to *args and **kwargs parameters 274 """ 275 if info.kind == inspect.Parameter.VAR_POSITIONAL: 276 return "*{}".format(param_name) 277 elif info.kind == inspect.Parameter.VAR_KEYWORD: 278 return "**{}".format(param_name) 279 else: 280 return param_name 281 282 if inspect.isclass(self.obj): 283 if hasattr(self.obj, "_accessors") and ( 284 self.name.split(".")[-1] in self.obj._accessors 285 ): 286 # accessor classes have a signature but don't want to show this 287 return tuple() 288 try: 289 sig = inspect.signature(self.obj) 290 except (TypeError, ValueError): 291 # Some objects, mainly in C extensions do not support introspection 292 # of the signature 293 return tuple() 294 295 params = tuple( 296 add_stars(parameter, sig.parameters[parameter]) 297 for parameter in sig.parameters 298 ) 299 if params and params[0] in ("self", "cls"): 300 return params[1:] 301 return params 302 303 @property 304 def parameter_mismatches(self): 305 errs = [] 306 signature_params = self.signature_parameters 307 doc_params = tuple(self.doc_parameters) 308 missing = set(signature_params) - set(doc_params) 309 if missing: 310 errs.append(error("PR01", missing_params=str(missing))) 311 extra = set(doc_params) - set(signature_params) 312 if extra: 313 errs.append(error("PR02", unknown_params=str(extra))) 314 if ( 315 not missing 316 and not extra 317 and signature_params != doc_params 318 and not (not signature_params and not doc_params) 319 ): 320 errs.append( 321 error( 322 "PR03", actual_params=signature_params, documented_params=doc_params 323 ) 324 ) 325 326 return errs 327 328 @property 329 def directives_without_two_colons(self): 330 return DIRECTIVE_PATTERN.findall(self.raw_doc) 331 332 def parameter_type(self, param): 333 return self.doc_parameters[param][0] 334 335 @property 336 def see_also(self): 337 result = collections.OrderedDict() 338 for funcs, desc in self.doc["See Also"]: 339 for func, _ in funcs: 340 result[func] = "".join(desc) 341 342 return result 343 344 @property 345 def examples(self): 346 return self.doc["Examples"] 347 348 @property 349 def returns(self): 350 return self.doc["Returns"] 351 352 @property 353 def yields(self): 354 return self.doc["Yields"] 355 356 @property 357 def method_source(self): 358 try: 359 source = inspect.getsource(self.obj) 360 except TypeError: 361 return "" 362 return textwrap.dedent(source) 363 364 @property 365 def method_returns_something(self): 366 """ 367 Check if the docstrings method can return something. 368 369 Bare returns, returns valued None and returns from nested functions are 370 disconsidered. 371 372 Returns 373 ------- 374 bool 375 Whether the docstrings method can return something. 376 """ 377 378 def get_returns_not_on_nested_functions(node): 379 returns = [node] if isinstance(node, ast.Return) else [] 380 for child in ast.iter_child_nodes(node): 381 # Ignore nested functions and its subtrees. 382 if not isinstance(child, ast.FunctionDef): 383 child_returns = get_returns_not_on_nested_functions(child) 384 returns.extend(child_returns) 385 return returns 386 387 tree = ast.parse(self.method_source).body 388 if tree: 389 returns = get_returns_not_on_nested_functions(tree[0]) 390 return_values = [r.value for r in returns] 391 # Replace NameConstant nodes valued None for None. 392 for i, v in enumerate(return_values): 393 if isinstance(v, ast.NameConstant) and v.value is None: 394 return_values[i] = None 395 return any(return_values) 396 else: 397 return False 398 399 @property 400 def deprecated(self): 401 return ".. deprecated:: " in (self.summary + self.extended_summary) 402 403 404def _check_desc(desc, code_no_desc, code_no_upper, code_no_period, **kwargs): 405 # Find and strip out any sphinx directives 406 desc = "\n".join(desc) 407 for directive in DIRECTIVES: 408 full_directive = ".. {}".format(directive) 409 if full_directive in desc: 410 # Only retain any description before the directive 411 desc = desc[: desc.index(full_directive)].rstrip("\n") 412 desc = desc.split("\n") 413 414 errs = list() 415 if not "".join(desc): 416 errs.append(error(code_no_desc, **kwargs)) 417 else: 418 if desc[0][0].isalpha() and not desc[0][0].isupper(): 419 errs.append(error(code_no_upper, **kwargs)) 420 # Not ending in "." is only an error if the last bit is not 421 # indented (e.g., quote or code block) 422 if not desc[-1].endswith(".") and \ 423 not desc[-1].startswith(IGNORE_STARTS): 424 errs.append(error(code_no_period, **kwargs)) 425 return errs 426 427 428def validate(func_name): 429 """ 430 Validate the docstring. 431 432 Parameters 433 ---------- 434 func_name : function 435 Function whose docstring will be evaluated (e.g. pandas.read_csv). 436 437 Returns 438 ------- 439 dict 440 A dictionary containing all the information obtained from validating 441 the docstring. 442 443 Notes 444 ----- 445 The errors codes are defined as: 446 - First two characters: Section where the error happens: 447 * GL: Global (no section, like section ordering errors) 448 * SS: Short summary 449 * ES: Extended summary 450 * PR: Parameters 451 * RT: Returns 452 * YD: Yields 453 * RS: Raises 454 * WN: Warns 455 * SA: See Also 456 * NT: Notes 457 * RF: References 458 * EX: Examples 459 - Last two characters: Numeric error code inside the section 460 461 For example, PR02 is the second codified error in the Parameters section 462 (which in this case is assigned to the error when unknown parameters are documented). 463 464 The error codes, their corresponding error messages, and the details on how 465 they are validated, are not documented more than in the source code of this 466 function. 467 """ 468 doc = Docstring(func_name) 469 470 errs = [] 471 if not doc.raw_doc: 472 errs.append(error("GL08")) 473 return { 474 "type": doc.type, 475 "docstring": doc.clean_doc, 476 "deprecated": doc.deprecated, 477 "file": doc.source_file_name, 478 "file_line": doc.source_file_def_line, 479 "errors": errs, 480 "examples_errors": "", 481 } 482 483 if doc.start_blank_lines != 1 and "\n" in doc.raw_doc: 484 errs.append(error("GL01")) 485 if doc.end_blank_lines != 1 and "\n" in doc.raw_doc: 486 errs.append(error("GL02")) 487 if doc.double_blank_lines: 488 errs.append(error("GL03")) 489 for line in doc.raw_doc.splitlines(): 490 if re.match("^ *\t", line): 491 errs.append(error("GL05", line_with_tabs=line.lstrip())) 492 493 unexpected_sections = [ 494 section for section in doc.section_titles if section not in ALLOWED_SECTIONS 495 ] 496 for section in unexpected_sections: 497 errs.append( 498 error("GL06", section=section, allowed_sections=", ".join(ALLOWED_SECTIONS)) 499 ) 500 501 correct_order = [ 502 section for section in ALLOWED_SECTIONS if section in doc.section_titles 503 ] 504 if correct_order != doc.section_titles: 505 errs.append(error("GL07", correct_sections=", ".join(correct_order))) 506 507 if doc.deprecated and not doc.extended_summary.startswith(".. deprecated:: "): 508 errs.append(error("GL09")) 509 510 directives_without_two_colons = doc.directives_without_two_colons 511 if directives_without_two_colons: 512 errs.append(error("GL10", directives=directives_without_two_colons)) 513 514 if not doc.summary: 515 errs.append(error("SS01")) 516 else: 517 if doc.summary[0].isalpha() and not doc.summary[0].isupper(): 518 errs.append(error("SS02")) 519 if doc.summary[-1] != ".": 520 errs.append(error("SS03")) 521 if doc.summary != doc.summary.lstrip(): 522 errs.append(error("SS04")) 523 elif doc.is_function_or_method and doc.summary.split(" ")[0][-1] == "s": 524 errs.append(error("SS05")) 525 if doc.num_summary_lines > 1: 526 errs.append(error("SS06")) 527 528 if not doc.extended_summary: 529 errs.append(("ES01", "No extended summary found")) 530 531 # PR01: Parameters not documented 532 # PR02: Unknown parameters 533 # PR03: Wrong parameters order 534 errs += doc.parameter_mismatches 535 536 for param, kind_desc in doc.doc_parameters.items(): 537 if not param.startswith("*"): # Check can ignore var / kwargs 538 if not doc.parameter_type(param): 539 if ":" in param: 540 errs.append(error("PR10", param_name=param.split(":")[0])) 541 else: 542 errs.append(error("PR04", param_name=param)) 543 else: 544 if doc.parameter_type(param)[-1] == ".": 545 errs.append(error("PR05", param_name=param)) 546 common_type_errors = [ 547 ("integer", "int"), 548 ("boolean", "bool"), 549 ("string", "str"), 550 ] 551 for wrong_type, right_type in common_type_errors: 552 if wrong_type in doc.parameter_type(param): 553 errs.append( 554 error( 555 "PR06", 556 param_name=param, 557 right_type=right_type, 558 wrong_type=wrong_type, 559 ) 560 ) 561 errs.extend(_check_desc( 562 kind_desc[1], "PR07", "PR08", "PR09", param_name=param)) 563 564 if doc.is_function_or_method: 565 if not doc.returns: 566 if doc.method_returns_something: 567 errs.append(error("RT01")) 568 else: 569 if len(doc.returns) == 1 and doc.returns[0].name: 570 errs.append(error("RT02")) 571 for name_or_type, type_, desc in doc.returns: 572 errs.extend(_check_desc(desc, "RT03", "RT04", "RT05")) 573 574 if not doc.yields and "yield" in doc.method_source: 575 errs.append(error("YD01")) 576 577 if not doc.see_also: 578 errs.append(error("SA01")) 579 else: 580 for rel_name, rel_desc in doc.see_also.items(): 581 if rel_desc: 582 if not rel_desc.endswith("."): 583 errs.append(error("SA02", reference_name=rel_name)) 584 if rel_desc[0].isalpha() and not rel_desc[0].isupper(): 585 errs.append(error("SA03", reference_name=rel_name)) 586 else: 587 errs.append(error("SA04", reference_name=rel_name)) 588 589 if not doc.examples: 590 errs.append(error("EX01")) 591 return { 592 "type": doc.type, 593 "docstring": doc.clean_doc, 594 "deprecated": doc.deprecated, 595 "file": doc.source_file_name, 596 "file_line": doc.source_file_def_line, 597 "errors": errs, 598 } 599