1#!/usr/bin/env python 2# 3# Natural Language Toolkit: Deprecated Function & Class Finder 4# 5# Copyright (C) 2001-2019 NLTK Project 6# Author: Edward Loper <edloper@gmail.com> 7# URL: <http://nltk.org/> 8# For license information, see LICENSE.TXT 9 10from __future__ import print_function 11 12""" 13This command-line tool takes a list of python files or directories, 14and searches them for calls to deprecated NLTK functions, or uses of 15deprecated NLTK classes. For each use of a deprecated object it 16finds, it will print out a warning containing the offending line, as 17well as its line number and containing file name. If the terminal has 18color support (and if epydoc is installed), then the offending 19identifier will be highlighted in red. 20""" 21 22###################################################################### 23# Imports 24###################################################################### 25 26import os 27import re 28import sys 29import tokenize 30import textwrap 31import nltk.corpus 32from doctest import DocTestParser, register_optionflag 33from cStringIO import StringIO 34from nltk import defaultdict 35 36###################################################################### 37# Regexps 38###################################################################### 39 40#: A little over-simplified, but it'll do. 41STRING_PAT = ( 42 r'\s*[ur]{0,2}(?:' 43 '"""[\s\S]*?"""|' 44 '"[^"\n]+?"|' 45 "'''[\s\S]*?'''|" 46 "'[^'\n]+?'" 47 ")\s*" 48) 49STRING_RE = re.compile(STRING_PAT) 50 51STRINGS_PAT = '{}(?:[+]?{})*'.format(STRING_PAT, STRING_PAT) 52STRINGS_RE = re.compile(STRINGS_PAT) 53 54# Define a regexp to search for deprecated definitions. 55DEPRECATED_DEF_PAT = ( 56 r'^\s*@deprecated\s*\(\s*({})\s*\)\s*\n+'.format(STRINGS_PAT) 57 + r'\s*def\s*(\w+).*' 58 + r'|' 59 + r'^\s*class\s+(\w+)\s*\(.*Deprecated.*\):\s*' 60) 61DEPRECATED_DEF_RE = re.compile(DEPRECATED_DEF_PAT, re.MULTILINE) 62 63CORPUS_READ_METHOD_RE = re.compile( 64 '({})\.read\('.format('|'.join(re.escape(n) for n in dir(nltk.corpus))) 65) 66 67CLASS_DEF_RE = re.compile('^\s*class\s+(\w+)\s*[:\(]') 68 69###################################################################### 70# Globals 71###################################################################### 72# Yes, it's bad programming practice, but this is a little hack 73# script. :) These get initialized by find_deprecated_defs. 74 75deprecated_funcs = defaultdict(set) 76deprecated_classes = defaultdict(set) 77deprecated_methods = defaultdict(set) 78 79try: 80 from epydoc.cli import TerminalController 81except ImportError: 82 83 class TerminalController: 84 def __getattr__(self, attr): 85 return '' 86 87 88term = TerminalController() 89 90###################################################################### 91# Code 92###################################################################### 93 94# If we're using py24, then ignore the +SKIP directive. 95if sys.version_info[:2] < (2, 5): 96 register_optionflag('SKIP') 97 98 99def strip_quotes(s): 100 s = s.strip() 101 while s and (s[0] in "ur") and (s[-1] in "'\""): 102 s = s[1:] 103 while s and (s[0] in "'\"" and (s[0] == s[-1])): 104 s = s[1:-1] 105 s = s.strip() 106 return s 107 108 109def find_class(s, index): 110 lines = s[:index].split('\n') 111 while lines: 112 m = CLASS_DEF_RE.match(lines[-1]) 113 if m: 114 return m.group(1) + '.' 115 lines.pop() 116 return '?.' 117 118 119def find_deprecated_defs(pkg_dir): 120 """ 121 Return a list of all functions marked with the @deprecated 122 decorator, and classes with an immediate Deprecated base class, in 123 all Python files in the given directory. 124 """ 125 # Walk through the directory, finding python files. 126 for root, dirs, files in os.walk(pkg_dir): 127 for filename in files: 128 if filename.endswith('.py'): 129 # Search the file for any deprecated definitions. 130 s = open(os.path.join(root, filename)).read() 131 for m in DEPRECATED_DEF_RE.finditer(s): 132 if m.group(2): 133 name = m.group(2) 134 msg = ' '.join( 135 strip_quotes(s) for s in STRING_RE.findall(m.group(1)) 136 ) 137 msg = ' '.join(msg.split()) 138 if m.group()[0] in ' \t': 139 cls = find_class(s, m.start()) 140 deprecated_methods[name].add((msg, cls, '()')) 141 else: 142 deprecated_funcs[name].add((msg, '', '()')) 143 else: 144 name = m.group(3) 145 m2 = STRING_RE.match(s, m.end()) 146 if m2: 147 msg = strip_quotes(m2.group()) 148 else: 149 msg = '' 150 msg = ' '.join(msg.split()) 151 deprecated_classes[name].add((msg, '', '')) 152 153 154def print_deprecated_uses(paths): 155 dep_names = set() 156 dep_files = set() 157 for path in sorted(paths): 158 if os.path.isdir(path): 159 dep_names.update( 160 print_deprecated_uses([os.path.join(path, f) for f in os.listdir(path)]) 161 ) 162 elif path.endswith('.py'): 163 print_deprecated_uses_in(open(path).readline, path, dep_files, dep_names, 0) 164 elif path.endswith('.doctest') or path.endswith('.txt'): 165 for example in DocTestParser().get_examples(open(path).read()): 166 ex = StringIO(example.source) 167 try: 168 print_deprecated_uses_in( 169 ex.readline, path, dep_files, dep_names, example.lineno 170 ) 171 except tokenize.TokenError: 172 print( 173 term.RED + 'Caught TokenError -- ' 174 'malformatted doctest?' + term.NORMAL 175 ) 176 return dep_names 177 178 179def print_deprecated_uses_in(readline, path, dep_files, dep_names, lineno_offset): 180 tokiter = tokenize.generate_tokens(readline) 181 context = [''] 182 for (typ, tok, start, end, line) in tokiter: 183 # Remember the previous line -- it might contain 184 # the @deprecated decorator. 185 if line is not context[-1]: 186 context.append(line) 187 if len(context) > 10: 188 del context[0] 189 esctok = re.escape(tok) 190 # Ignore all tokens except deprecated names. 191 if not ( 192 tok in deprecated_classes 193 or ( 194 tok in deprecated_funcs and re.search(r'\b{}\s*\('.format(esctok), line) 195 ) 196 or ( 197 tok in deprecated_methods 198 and re.search(r'(?!<\bself)[.]\s*{}\s*\('.format(esctok), line) 199 ) 200 ): 201 continue 202 # Hack: only complain about read if it's used after a corpus. 203 if tok == 'read' and not CORPUS_READ_METHOD_RE.search(line): 204 continue 205 # Ignore deprecated definitions: 206 if DEPRECATED_DEF_RE.search(''.join(context)): 207 continue 208 # Print a header for the first use in a file: 209 if path not in dep_files: 210 print('\n' + term.BOLD + path + term.NORMAL) 211 print(' {}linenum{}'.format(term.YELLOW, term.NORMAL)) 212 dep_files.add(path) 213 # Mark the offending token. 214 dep_names.add(tok) 215 if term.RED: 216 sub = term.RED + tok + term.NORMAL 217 elif term.BOLD: 218 sub = term.BOLD + tok + term.NORMAL 219 else: 220 sub = '<<' + tok + '>>' 221 line = re.sub(r'\b{}\b'.format(esctok), sub, line) 222 # Print the offending line. 223 print( 224 ' {}[{:5d}]{} {}'.format( 225 term.YELLOW, start[0] + lineno_offset, term.NORMAL, line.rstrip() 226 ) 227 ) 228 229 230def main(): 231 paths = sys.argv[1:] or ['.'] 232 233 print('Importing nltk...') 234 try: 235 import nltk 236 except ImportError: 237 print('Unable to import nltk -- check your PYTHONPATH.') 238 sys.exit(-1) 239 240 print('Finding definitions of deprecated funtions & classes in nltk...') 241 find_deprecated_defs(nltk.__path__[0]) 242 243 print('Looking for possible uses of deprecated funcs & classes...') 244 dep_names = print_deprecated_uses(paths) 245 246 if not dep_names: 247 print('No deprecated funcs or classes found!') 248 else: 249 print("\n" + term.BOLD + "What you should use instead:" + term.NORMAL) 250 for name in sorted(dep_names): 251 msgs = ( 252 deprecated_funcs[name] 253 .union(deprecated_classes[name]) 254 .union(deprecated_methods[name]) 255 ) 256 for msg, prefix, suffix in msgs: 257 print( 258 textwrap.fill( 259 term.RED + prefix + name + suffix + term.NORMAL + ': ' + msg, 260 width=75, 261 initial_indent=' ' * 2, 262 subsequent_indent=' ' * 6, 263 ) 264 ) 265 266 267if __name__ == '__main__': 268 main() 269