1# -*- coding: utf-8 -*- 2 3# Copyright (c) 2005 - 2021 Detlev Offenbach <detlev@die-offenbachs.de> 4# 5 6""" 7Parse a Ruby file and retrieve classes, modules, methods and attributes. 8 9Parse enough of a Ruby file to recognize class, module and method definitions 10and to find out the superclasses of a class as well as its attributes. 11 12It is based on the Python class browser found in this package. 13""" 14 15import re 16 17import Utilities 18import Utilities.ClassBrowsers as ClassBrowsers 19from . import ClbrBaseClasses 20 21SUPPORTED_TYPES = [ClassBrowsers.RB_SOURCE] 22 23_getnext = re.compile( 24 r""" 25 (?P<String> 26 =begin .*? =end 27 28 | <<-? (?P<HereMarker1> [a-zA-Z0-9_]+? ) [ \t]* .*? (?P=HereMarker1) 29 30 | <<-? ['"] (?P<HereMarker2> [^'"]+? ) ['"] [ \t]* .*? (?P=HereMarker2) 31 32 | " [^"\\\n]* (?: \\. [^"\\\n]*)* " 33 34 | ' [^'\\\n]* (?: \\. [^'\\\n]*)* ' 35 ) 36 37| (?P<CodingLine> 38 ^ \# \s* [*_-]* \s* coding[:=] \s* (?P<Coding> [-\w_.]+ ) \s* [*_-]* $ 39 ) 40 41| (?P<Comment> 42 ^ 43 [ \t]* \#+ .*? $ 44 ) 45 46| (?P<Method> 47 ^ 48 (?P<MethodIndent> [ \t]* ) 49 def [ \t]+ 50 (?: 51 (?P<MethodName2> [a-zA-Z0-9_]+ (?: \. | :: ) 52 [a-zA-Z_] [a-zA-Z0-9_?!=]* ) 53 | 54 (?P<MethodName> [a-zA-Z_] [a-zA-Z0-9_?!=]* ) 55 | 56 (?P<MethodName3> [^( \t]{1,3} ) 57 ) 58 [ \t]* 59 (?: 60 \( (?P<MethodSignature> (?: [^)] | \)[ \t]*,? )*? ) \) 61 )? 62 [ \t]* 63 ) 64 65| (?P<Class> 66 ^ 67 (?P<ClassIndent> [ \t]* ) 68 class 69 (?: 70 [ \t]+ 71 (?P<ClassName> [A-Z] [a-zA-Z0-9_]* ) 72 [ \t]* 73 (?P<ClassSupers> < [ \t]* [A-Z] [a-zA-Z0-9_:]* )? 74 | 75 [ \t]* << [ \t]* 76 (?P<ClassName2> [a-zA-Z_] [a-zA-Z0-9_:]* ) 77 ) 78 [ \t]* 79 ) 80 81| (?P<ClassIgnored> 82 \( 83 [ \t]* 84 class 85 .*? 86 end 87 [ \t]* 88 \) 89 ) 90 91| (?P<Module> 92 ^ 93 (?P<ModuleIndent> [ \t]* ) 94 module [ \t]+ 95 (?P<ModuleName> [A-Z] [a-zA-Z0-9_:]* ) 96 [ \t]* 97 ) 98 99| (?P<AccessControl> 100 ^ 101 (?P<AccessControlIndent> [ \t]* ) 102 (?: 103 (?P<AccessControlType> private | public | protected ) [^_] 104 | 105 (?P<AccessControlType2> 106 private_class_method | public_class_method ) 107 ) 108 \(? 109 [ \t]* 110 (?P<AccessControlList> (?: : [a-zA-Z0-9_]+ , \s* )* 111 (?: : [a-zA-Z0-9_]+ )+ )? 112 [ \t]* 113 \)? 114 ) 115 116| (?P<Attribute> 117 ^ 118 (?P<AttributeIndent> [ \t]* ) 119 (?P<AttributeName> (?: @ | @@ ) [a-zA-Z0-9_]* ) 120 [ \t]* = 121 ) 122 123| (?P<Attr> 124 ^ 125 (?P<AttrIndent> [ \t]* ) 126 attr 127 (?P<AttrType> (?: _accessor | _reader | _writer ) )? 128 \(? 129 [ \t]* 130 (?P<AttrList> (?: : [a-zA-Z0-9_]+ , \s* )* 131 (?: : [a-zA-Z0-9_]+ | true | false )+ ) 132 [ \t]* 133 \)? 134 ) 135 136| (?P<Begin> 137 ^ 138 [ \t]* 139 (?: def | if | unless | case | while | until | for | begin ) 140 \b [^_] 141 | 142 [ \t]* do [ \t]* (?: \| .*? \| )? [ \t]* $ 143 ) 144 145| (?P<BeginEnd> 146 \b (?: if ) \b [^_] .*? $ 147 | 148 \b (?: if ) \b [^_] .*? end [ \t]* $ 149 ) 150 151| (?P<End> 152 [ \t]* 153 (?: 154 end [ \t]* $ 155 | 156 end \b [^_] 157 ) 158 )""", 159 re.VERBOSE | re.DOTALL | re.MULTILINE).search 160 161_commentsub = re.compile(r"""#[^\n]*\n|#[^\n]*$""").sub 162 163_modules = {} # cache of modules we've seen 164 165 166class VisibilityMixin(ClbrBaseClasses.ClbrVisibilityMixinBase): 167 """ 168 Mixin class implementing the notion of visibility. 169 """ 170 def __init__(self): 171 """ 172 Constructor 173 """ 174 self.setPublic() 175 176 177class Class(ClbrBaseClasses.Class, VisibilityMixin): 178 """ 179 Class to represent a Ruby class. 180 """ 181 def __init__(self, module, name, superClasses, file, lineno): 182 """ 183 Constructor 184 185 @param module name of the module containing this class 186 @param name name of this class 187 @param superClasses list of class names this class is inherited from 188 @param file filename containing this class 189 @param lineno linenumber of the class definition 190 """ 191 ClbrBaseClasses.Class.__init__(self, module, name, superClasses, file, 192 lineno) 193 VisibilityMixin.__init__(self) 194 195 196class Module(ClbrBaseClasses.Module, VisibilityMixin): 197 """ 198 Class to represent a Ruby module. 199 """ 200 def __init__(self, module, name, file, lineno): 201 """ 202 Constructor 203 204 @param module name of the module containing this class 205 @param name name of this class 206 @param file filename containing this class 207 @param lineno linenumber of the class definition 208 """ 209 ClbrBaseClasses.Module.__init__(self, module, name, file, lineno) 210 VisibilityMixin.__init__(self) 211 212 213class Function(ClbrBaseClasses.Function, VisibilityMixin): 214 """ 215 Class to represent a Ruby function. 216 """ 217 def __init__(self, module, name, file, lineno, signature='', 218 separator=','): 219 """ 220 Constructor 221 222 @param module name of the module containing this function 223 @param name name of this function 224 @param file filename containing this class 225 @param lineno linenumber of the class definition 226 @param signature parameterlist of the method 227 @param separator string separating the parameters 228 """ 229 ClbrBaseClasses.Function.__init__(self, module, name, file, lineno, 230 signature, separator) 231 VisibilityMixin.__init__(self) 232 233 234class Attribute(ClbrBaseClasses.Attribute, VisibilityMixin): 235 """ 236 Class to represent a class or module attribute. 237 """ 238 def __init__(self, module, name, file, lineno): 239 """ 240 Constructor 241 242 @param module name of the module containing this class 243 @param name name of this class 244 @param file filename containing this attribute 245 @param lineno linenumber of the class definition 246 """ 247 ClbrBaseClasses.Attribute.__init__(self, module, name, file, lineno) 248 VisibilityMixin.__init__(self) 249 self.setPrivate() 250 251 252def readmodule_ex(module, path=None): 253 """ 254 Read a Ruby file and return a dictionary of classes, functions and modules. 255 256 @param module name of the Ruby file (string) 257 @param path path the file should be searched in (list of strings) 258 @return the resulting dictionary 259 """ 260 global _modules 261 262 if module in _modules: 263 # we've seen this file before... 264 return _modules[module] 265 266 # search the path for the file 267 f = None 268 fullpath = [] if path is None else path[:] 269 f, file, (suff, mode, type) = ClassBrowsers.find_module(module, fullpath) 270 if f: 271 f.close() 272 if type not in SUPPORTED_TYPES: 273 # not Ruby source, can't do anything with this module 274 _modules[module] = {} 275 return {} 276 277 try: 278 src = Utilities.readEncodedFile(file)[0] 279 except (UnicodeError, OSError): 280 # can't do anything with this module 281 _modules[module] = {} 282 return {} 283 284 _modules[module] = scan(src, file, module) 285 return _modules[module] 286 287 288def scan(src, file, module): 289 """ 290 Public method to scan the given source text. 291 292 @param src source text to be scanned 293 @type str 294 @param file file name associated with the source text 295 @type str 296 @param module module name associated with the source text 297 @type str 298 @return dictionary containing the extracted data 299 @rtype dict 300 """ 301 # convert eol markers the Python style 302 src = src.replace("\r\n", "\n").replace("\r", "\n") 303 304 dictionary = {} 305 dict_counts = {} 306 307 classstack = [] # stack of (class, indent) pairs 308 acstack = [] # stack of (access control, indent) pairs 309 indent = 0 310 311 lineno, last_lineno_pos = 1, 0 312 cur_obj = None 313 lastGlobalEntry = None 314 i = 0 315 while True: 316 m = _getnext(src, i) 317 if not m: 318 break 319 start, i = m.span() 320 321 if m.start("Method") >= 0: 322 # found a method definition or function 323 thisindent = indent 324 indent += 1 325 meth_name = ( 326 m.group("MethodName") or 327 m.group("MethodName2") or 328 m.group("MethodName3") 329 ) 330 meth_sig = m.group("MethodSignature") 331 meth_sig = meth_sig and meth_sig.replace('\\\n', '') or '' 332 meth_sig = _commentsub('', meth_sig) 333 lineno += src.count('\n', last_lineno_pos, start) 334 last_lineno_pos = start 335 if meth_name.startswith('self.'): 336 meth_name = meth_name[5:] 337 elif meth_name.startswith('self::'): 338 meth_name = meth_name[6:] 339 # close all classes/modules indented at least as much 340 while classstack and classstack[-1][1] >= thisindent: 341 if classstack[-1][0] is not None: 342 # record the end line 343 classstack[-1][0].setEndLine(lineno - 1) 344 del classstack[-1] 345 while acstack and acstack[-1][1] >= thisindent: 346 del acstack[-1] 347 if classstack: 348 # it's a class/module method 349 cur_class = classstack[-1][0] 350 if isinstance(cur_class, (Class, Module)): 351 # it's a method 352 f = Function(None, meth_name, 353 file, lineno, meth_sig) 354 cur_class._addmethod(meth_name, f) 355 else: 356 f = cur_class 357 # set access control 358 if acstack: 359 accesscontrol = acstack[-1][0] 360 if accesscontrol == "private": 361 f.setPrivate() 362 elif accesscontrol == "protected": 363 f.setProtected() 364 elif accesscontrol == "public": 365 f.setPublic() 366 # else it's a nested def 367 else: 368 # it's a function 369 f = Function(module, meth_name, 370 file, lineno, meth_sig) 371 if meth_name in dict_counts: 372 dict_counts[meth_name] += 1 373 meth_name = "{0}_{1:d}".format( 374 meth_name, dict_counts[meth_name]) 375 else: 376 dict_counts[meth_name] = 0 377 dictionary[meth_name] = f 378 if not classstack: 379 if lastGlobalEntry: 380 lastGlobalEntry.setEndLine(lineno - 1) 381 lastGlobalEntry = f 382 if cur_obj and isinstance(cur_obj, Function): 383 cur_obj.setEndLine(lineno - 1) 384 cur_obj = f 385 classstack.append((f, thisindent)) # Marker for nested fns 386 387 elif ( 388 m.start("String") >= 0 or 389 m.start("Comment") >= 0 or 390 m.start("ClassIgnored") >= 0 or 391 m.start("BeginEnd") >= 0 392 ): 393 pass 394 395 elif m.start("Class") >= 0: 396 # we found a class definition 397 thisindent = indent 398 indent += 1 399 lineno += src.count('\n', last_lineno_pos, start) 400 last_lineno_pos = start 401 # close all classes/modules indented at least as much 402 while classstack and classstack[-1][1] >= thisindent: 403 if classstack[-1][0] is not None: 404 # record the end line 405 classstack[-1][0].setEndLine(lineno - 1) 406 del classstack[-1] 407 class_name = m.group("ClassName") or m.group("ClassName2") 408 inherit = m.group("ClassSupers") 409 if inherit: 410 # the class inherits from other classes 411 inherit = inherit[1:].strip() 412 inherit = [_commentsub('', inherit)] 413 # remember this class 414 cur_class = Class(module, class_name, inherit, 415 file, lineno) 416 if not classstack: 417 if class_name in dictionary: 418 cur_class = dictionary[class_name] 419 else: 420 dictionary[class_name] = cur_class 421 else: 422 cls = classstack[-1][0] 423 if class_name in cls.classes: 424 cur_class = cls.classes[class_name] 425 elif class_name in (cls.name, "self"): 426 cur_class = cls 427 else: 428 cls._addclass(class_name, cur_class) 429 if not classstack: 430 if lastGlobalEntry: 431 lastGlobalEntry.setEndLine(lineno - 1) 432 lastGlobalEntry = cur_class 433 cur_obj = cur_class 434 classstack.append((cur_class, thisindent)) 435 while acstack and acstack[-1][1] >= thisindent: 436 del acstack[-1] 437 acstack.append(["public", thisindent]) 438 # default access control is 'public' 439 440 elif m.start("Module") >= 0: 441 # we found a module definition 442 thisindent = indent 443 indent += 1 444 lineno += src.count('\n', last_lineno_pos, start) 445 last_lineno_pos = start 446 # close all classes/modules indented at least as much 447 while classstack and classstack[-1][1] >= thisindent: 448 if classstack[-1][0] is not None: 449 # record the end line 450 classstack[-1][0].setEndLine(lineno - 1) 451 del classstack[-1] 452 module_name = m.group("ModuleName") 453 # remember this class 454 cur_class = Module(module, module_name, file, lineno) 455 if not classstack: 456 if module_name in dictionary: 457 cur_class = dictionary[module_name] 458 else: 459 dictionary[module_name] = cur_class 460 else: 461 cls = classstack[-1][0] 462 if module_name in cls.classes: 463 cur_class = cls.classes[module_name] 464 elif cls.name == module_name: 465 cur_class = cls 466 else: 467 cls._addclass(module_name, cur_class) 468 if not classstack: 469 if lastGlobalEntry: 470 lastGlobalEntry.setEndLine(lineno - 1) 471 lastGlobalEntry = cur_class 472 cur_obj = cur_class 473 classstack.append((cur_class, thisindent)) 474 while acstack and acstack[-1][1] >= thisindent: 475 del acstack[-1] 476 acstack.append(["public", thisindent]) 477 # default access control is 'public' 478 479 elif m.start("AccessControl") >= 0: 480 aclist = m.group("AccessControlList") 481 if aclist is None: 482 index = -1 483 while index >= -len(acstack): 484 if acstack[index][1] < indent: 485 actype = ( 486 m.group("AccessControlType") or 487 m.group("AccessControlType2").split('_')[0] 488 ) 489 acstack[index][0] = actype.lower() 490 break 491 else: 492 index -= 1 493 else: 494 index = -1 495 while index >= -len(classstack): 496 if ( 497 classstack[index][0] is not None and 498 not isinstance(classstack[index][0], Function) and 499 classstack[index][1] < indent 500 ): 501 parent = classstack[index][0] 502 actype = ( 503 m.group("AccessControlType") or 504 m.group("AccessControlType2").split('_')[0] 505 ) 506 actype = actype.lower() 507 for name in aclist.split(","): 508 name = name.strip()[1:] # get rid of leading ':' 509 acmeth = parent._getmethod(name) 510 if acmeth is None: 511 continue 512 if actype == "private": 513 acmeth.setPrivate() 514 elif actype == "protected": 515 acmeth.setProtected() 516 elif actype == "public": 517 acmeth.setPublic() 518 break 519 else: 520 index -= 1 521 522 elif m.start("Attribute") >= 0: 523 lineno += src.count('\n', last_lineno_pos, start) 524 last_lineno_pos = start 525 index = -1 526 while index >= -len(classstack): 527 if ( 528 classstack[index][0] is not None and 529 not isinstance(classstack[index][0], Function) and 530 classstack[index][1] < indent 531 ): 532 attr = Attribute( 533 module, m.group("AttributeName"), file, lineno) 534 classstack[index][0]._addattribute(attr) 535 break 536 else: 537 index -= 1 538 if lastGlobalEntry: 539 lastGlobalEntry.setEndLine(lineno - 1) 540 lastGlobalEntry = None 541 542 elif m.start("Attr") >= 0: 543 lineno += src.count('\n', last_lineno_pos, start) 544 last_lineno_pos = start 545 index = -1 546 while index >= -len(classstack): 547 if ( 548 classstack[index][0] is not None and 549 not isinstance(classstack[index][0], Function) and 550 classstack[index][1] < indent 551 ): 552 parent = classstack[index][0] 553 if m.group("AttrType") is None: 554 nv = m.group("AttrList").split(",") 555 if not nv: 556 break 557 name = nv[0].strip()[1:] # get rid of leading ':' 558 attr = ( 559 parent._getattribute("@" + name) or 560 parent._getattribute("@@" + name) or 561 Attribute(module, "@" + name, file, lineno) 562 ) 563 if len(nv) == 1 or nv[1].strip() == "false": 564 attr.setProtected() 565 elif nv[1].strip() == "true": 566 attr.setPublic() 567 parent._addattribute(attr) 568 else: 569 access = m.group("AttrType") 570 for name in m.group("AttrList").split(","): 571 name = name.strip()[1:] # get rid of leading ':' 572 attr = ( 573 parent._getattribute("@" + name) or 574 parent._getattribute("@@" + name) or 575 Attribute(module, "@" + name, file, lineno) 576 ) 577 if access == "_accessor": 578 attr.setPublic() 579 elif access in ("_reader", "_writer"): 580 if attr.isPrivate(): 581 attr.setProtected() 582 elif attr.isProtected(): 583 attr.setPublic() 584 parent._addattribute(attr) 585 break 586 else: 587 index -= 1 588 589 elif m.start("Begin") >= 0: 590 # a begin of a block we are not interested in 591 indent += 1 592 593 elif m.start("End") >= 0: 594 # an end of a block 595 indent -= 1 596 if indent < 0: 597 # no negative indent allowed 598 if classstack: 599 # it's a class/module method 600 indent = classstack[-1][1] 601 else: 602 indent = 0 603 604 elif m.start("CodingLine") >= 0: 605 # a coding statement 606 coding = m.group("Coding") 607 lineno += src.count('\n', last_lineno_pos, start) 608 last_lineno_pos = start 609 if "@@Coding@@" not in dictionary: 610 dictionary["@@Coding@@"] = ClbrBaseClasses.Coding( 611 module, file, lineno, coding) 612 613 return dictionary 614