1# -*- coding: utf-8 -*-
3# Copyright (c) 2005 - 2021 Detlev Offenbach <detlev@die-offenbachs.de>
7Parse a Ruby file and retrieve classes, modules, methods and attributes.
9Parse enough of a Ruby file to recognize class, module and method definitions
10and to find out the superclasses of a class as well as its attributes.
12It is based on the Python class browser found in this package.
15import re
17import Utilities
18import Utilities.ClassBrowsers as ClassBrowsers
19from . import ClbrBaseClasses
23_getnext = re.compile(
24    r"""
25    (?P<String>
26        =begin .*? =end
28    |   <<-? (?P<HereMarker1> [a-zA-Z0-9_]+? ) [ \t]* .*? (?P=HereMarker1)
30    |   <<-? ['"] (?P<HereMarker2> [^'"]+? ) ['"] [ \t]* .*? (?P=HereMarker2)
32    |   " [^"\\\n]* (?: \\. [^"\\\n]*)* "
34    |   ' [^'\\\n]* (?: \\. [^'\\\n]*)* '
35    )
37|   (?P<CodingLine>
38        ^ \# \s* [*_-]* \s* coding[:=] \s* (?P<Coding> [-\w_.]+ ) \s* [*_-]* $
39    )
41|   (?P<Comment>
42        ^
43        [ \t]* \#+ .*? $
44    )
46|   (?P<Method>
47        ^
48        (?P<MethodIndent> [ \t]* )
49        def [ \t]+
50        (?:
51            (?P<MethodName2> [a-zA-Z0-9_]+ (?: \. | :: )
52            [a-zA-Z_] [a-zA-Z0-9_?!=]* )
53        |
54            (?P<MethodName> [a-zA-Z_] [a-zA-Z0-9_?!=]* )
55        |
56            (?P<MethodName3> [^( \t]{1,3} )
57        )
58        [ \t]*
59        (?:
60            \( (?P<MethodSignature> (?: [^)] | \)[ \t]*,? )*? ) \)
61        )?
62        [ \t]*
63    )
65|   (?P<Class>
66        ^
67        (?P<ClassIndent> [ \t]* )
68        class
69        (?:
70            [ \t]+
71            (?P<ClassName> [A-Z] [a-zA-Z0-9_]* )
72            [ \t]*
73            (?P<ClassSupers> < [ \t]* [A-Z] [a-zA-Z0-9_:]* )?
74        |
75            [ \t]* << [ \t]*
76            (?P<ClassName2> [a-zA-Z_] [a-zA-Z0-9_:]* )
77        )
78        [ \t]*
79    )
81|   (?P<ClassIgnored>
82        \(
83        [ \t]*
84        class
85        .*?
86        end
87        [ \t]*
88        \)
89    )
91|   (?P<Module>
92        ^
93        (?P<ModuleIndent> [ \t]* )
94        module [ \t]+
95        (?P<ModuleName> [A-Z] [a-zA-Z0-9_:]* )
96        [ \t]*
97    )
99|   (?P<AccessControl>
100        ^
101        (?P<AccessControlIndent> [ \t]* )
102        (?:
103            (?P<AccessControlType> private | public | protected ) [^_]
104        |
105            (?P<AccessControlType2>
106            private_class_method | public_class_method )
107        )
108        \(?
109        [ \t]*
110        (?P<AccessControlList> (?: : [a-zA-Z0-9_]+ , \s* )*
111        (?: : [a-zA-Z0-9_]+ )+ )?
112        [ \t]*
113        \)?
114    )
116|   (?P<Attribute>
117        ^
118        (?P<AttributeIndent> [ \t]* )
119        (?P<AttributeName> (?: @ | @@ ) [a-zA-Z0-9_]* )
120        [ \t]* =
121    )
123|   (?P<Attr>
124        ^
125        (?P<AttrIndent> [ \t]* )
126        attr
127        (?P<AttrType> (?: _accessor | _reader | _writer ) )?
128        \(?
129        [ \t]*
130        (?P<AttrList> (?: : [a-zA-Z0-9_]+ , \s* )*
131        (?: : [a-zA-Z0-9_]+ | true | false )+ )
132        [ \t]*
133        \)?
134    )
136|   (?P<Begin>
137            ^
138            [ \t]*
139            (?: def | if | unless | case | while | until | for | begin )
140            \b [^_]
141        |
142            [ \t]* do [ \t]* (?: \| .*? \| )? [ \t]* $
143    )
145|   (?P<BeginEnd>
146        \b (?: if ) \b [^_] .*? $
147        |
148        \b (?: if ) \b [^_] .*? end [ \t]* $
149    )
151|   (?P<End>
152        [ \t]*
153        (?:
154            end [ \t]* $
155        |
156            end \b [^_]
157        )
158    )""",
159    re.VERBOSE | re.DOTALL | re.MULTILINE).search
161_commentsub = re.compile(r"""#[^\n]*\n|#[^\n]*$""").sub
163_modules = {}                           # cache of modules we've seen
166class VisibilityMixin(ClbrBaseClasses.ClbrVisibilityMixinBase):
167    """
168    Mixin class implementing the notion of visibility.
169    """
170    def __init__(self):
171        """
172        Constructor
173        """
174        self.setPublic()
177class Class(ClbrBaseClasses.Class, VisibilityMixin):
178    """
179    Class to represent a Ruby class.
180    """
181    def __init__(self, module, name, superClasses, file, lineno):
182        """
183        Constructor
185        @param module name of the module containing this class
186        @param name name of this class
187        @param superClasses list of class names this class is inherited from
188        @param file filename containing this class
189        @param lineno linenumber of the class definition
190        """
191        ClbrBaseClasses.Class.__init__(self, module, name, superClasses, file,
192                                       lineno)
193        VisibilityMixin.__init__(self)
196class Module(ClbrBaseClasses.Module, VisibilityMixin):
197    """
198    Class to represent a Ruby module.
199    """
200    def __init__(self, module, name, file, lineno):
201        """
202        Constructor
204        @param module name of the module containing this class
205        @param name name of this class
206        @param file filename containing this class
207        @param lineno linenumber of the class definition
208        """
209        ClbrBaseClasses.Module.__init__(self, module, name, file, lineno)
210        VisibilityMixin.__init__(self)
213class Function(ClbrBaseClasses.Function, VisibilityMixin):
214    """
215    Class to represent a Ruby function.
216    """
217    def __init__(self, module, name, file, lineno, signature='',
218                 separator=','):
219        """
220        Constructor
222        @param module name of the module containing this function
223        @param name name of this function
224        @param file filename containing this class
225        @param lineno linenumber of the class definition
226        @param signature parameterlist of the method
227        @param separator string separating the parameters
228        """
229        ClbrBaseClasses.Function.__init__(self, module, name, file, lineno,
230                                          signature, separator)
231        VisibilityMixin.__init__(self)
234class Attribute(ClbrBaseClasses.Attribute, VisibilityMixin):
235    """
236    Class to represent a class or module attribute.
237    """
238    def __init__(self, module, name, file, lineno):
239        """
240        Constructor
242        @param module name of the module containing this class
243        @param name name of this class
244        @param file filename containing this attribute
245        @param lineno linenumber of the class definition
246        """
247        ClbrBaseClasses.Attribute.__init__(self, module, name, file, lineno)
248        VisibilityMixin.__init__(self)
249        self.setPrivate()
252def readmodule_ex(module, path=None):
253    """
254    Read a Ruby file and return a dictionary of classes, functions and modules.
256    @param module name of the Ruby file (string)
257    @param path path the file should be searched in (list of strings)
258    @return the resulting dictionary
259    """
260    global _modules
262    if module in _modules:
263        # we've seen this file before...
264        return _modules[module]
266    # search the path for the file
267    f = None
268    fullpath = [] if path is None else path[:]
269    f, file, (suff, mode, type) = ClassBrowsers.find_module(module, fullpath)
270    if f:
271        f.close()
272    if type not in SUPPORTED_TYPES:
273        # not Ruby source, can't do anything with this module
274        _modules[module] = {}
275        return {}
277    try:
278        src = Utilities.readEncodedFile(file)[0]
279    except (UnicodeError, OSError):
280        # can't do anything with this module
281        _modules[module] = {}
282        return {}
284    _modules[module] = scan(src, file, module)
285    return _modules[module]
288def scan(src, file, module):
289    """
290    Public method to scan the given source text.
292    @param src source text to be scanned
293    @type str
294    @param file file name associated with the source text
295    @type str
296    @param module module name associated with the source text
297    @type str
298    @return dictionary containing the extracted data
299    @rtype dict
300    """
301    # convert eol markers the Python style
302    src = src.replace("\r\n", "\n").replace("\r", "\n")
304    dictionary = {}
305    dict_counts = {}
307    classstack = []  # stack of (class, indent) pairs
308    acstack = []    # stack of (access control, indent) pairs
309    indent = 0
311    lineno, last_lineno_pos = 1, 0
312    cur_obj = None
313    lastGlobalEntry = None
314    i = 0
315    while True:
316        m = _getnext(src, i)
317        if not m:
318            break
319        start, i = m.span()
321        if m.start("Method") >= 0:
322            # found a method definition or function
323            thisindent = indent
324            indent += 1
325            meth_name = (
326                m.group("MethodName") or
327                m.group("MethodName2") or
328                m.group("MethodName3")
329            )
330            meth_sig = m.group("MethodSignature")
331            meth_sig = meth_sig and meth_sig.replace('\\\n', '') or ''
332            meth_sig = _commentsub('', meth_sig)
333            lineno += src.count('\n', last_lineno_pos, start)
334            last_lineno_pos = start
335            if meth_name.startswith('self.'):
336                meth_name = meth_name[5:]
337            elif meth_name.startswith('self::'):
338                meth_name = meth_name[6:]
339            # close all classes/modules indented at least as much
340            while classstack and classstack[-1][1] >= thisindent:
341                if classstack[-1][0] is not None:
342                    # record the end line
343                    classstack[-1][0].setEndLine(lineno - 1)
344                del classstack[-1]
345            while acstack and acstack[-1][1] >= thisindent:
346                del acstack[-1]
347            if classstack:
348                # it's a class/module method
349                cur_class = classstack[-1][0]
350                if isinstance(cur_class, (Class, Module)):
351                    # it's a method
352                    f = Function(None, meth_name,
353                                 file, lineno, meth_sig)
354                    cur_class._addmethod(meth_name, f)
355                else:
356                    f = cur_class
357                # set access control
358                if acstack:
359                    accesscontrol = acstack[-1][0]
360                    if accesscontrol == "private":
361                        f.setPrivate()
362                    elif accesscontrol == "protected":
363                        f.setProtected()
364                    elif accesscontrol == "public":
365                        f.setPublic()
366                # else it's a nested def
367            else:
368                # it's a function
369                f = Function(module, meth_name,
370                             file, lineno, meth_sig)
371                if meth_name in dict_counts:
372                    dict_counts[meth_name] += 1
373                    meth_name = "{0}_{1:d}".format(
374                        meth_name, dict_counts[meth_name])
375                else:
376                    dict_counts[meth_name] = 0
377                dictionary[meth_name] = f
378            if not classstack:
379                if lastGlobalEntry:
380                    lastGlobalEntry.setEndLine(lineno - 1)
381                lastGlobalEntry = f
382            if cur_obj and isinstance(cur_obj, Function):
383                cur_obj.setEndLine(lineno - 1)
384            cur_obj = f
385            classstack.append((f, thisindent))  # Marker for nested fns
387        elif (
388            m.start("String") >= 0 or
389            m.start("Comment") >= 0 or
390            m.start("ClassIgnored") >= 0 or
391            m.start("BeginEnd") >= 0
392        ):
393            pass
395        elif m.start("Class") >= 0:
396            # we found a class definition
397            thisindent = indent
398            indent += 1
399            lineno += src.count('\n', last_lineno_pos, start)
400            last_lineno_pos = start
401            # close all classes/modules indented at least as much
402            while classstack and classstack[-1][1] >= thisindent:
403                if classstack[-1][0] is not None:
404                    # record the end line
405                    classstack[-1][0].setEndLine(lineno - 1)
406                del classstack[-1]
407            class_name = m.group("ClassName") or m.group("ClassName2")
408            inherit = m.group("ClassSupers")
409            if inherit:
410                # the class inherits from other classes
411                inherit = inherit[1:].strip()
412                inherit = [_commentsub('', inherit)]
413            # remember this class
414            cur_class = Class(module, class_name, inherit,
415                              file, lineno)
416            if not classstack:
417                if class_name in dictionary:
418                    cur_class = dictionary[class_name]
419                else:
420                    dictionary[class_name] = cur_class
421            else:
422                cls = classstack[-1][0]
423                if class_name in cls.classes:
424                    cur_class = cls.classes[class_name]
425                elif class_name in (cls.name, "self"):
426                    cur_class = cls
427                else:
428                    cls._addclass(class_name, cur_class)
429            if not classstack:
430                if lastGlobalEntry:
431                    lastGlobalEntry.setEndLine(lineno - 1)
432                lastGlobalEntry = cur_class
433            cur_obj = cur_class
434            classstack.append((cur_class, thisindent))
435            while acstack and acstack[-1][1] >= thisindent:
436                del acstack[-1]
437            acstack.append(["public", thisindent])
438            # default access control is 'public'
440        elif m.start("Module") >= 0:
441            # we found a module definition
442            thisindent = indent
443            indent += 1
444            lineno += src.count('\n', last_lineno_pos, start)
445            last_lineno_pos = start
446            # close all classes/modules indented at least as much
447            while classstack and classstack[-1][1] >= thisindent:
448                if classstack[-1][0] is not None:
449                    # record the end line
450                    classstack[-1][0].setEndLine(lineno - 1)
451                del classstack[-1]
452            module_name = m.group("ModuleName")
453            # remember this class
454            cur_class = Module(module, module_name, file, lineno)
455            if not classstack:
456                if module_name in dictionary:
457                    cur_class = dictionary[module_name]
458                else:
459                    dictionary[module_name] = cur_class
460            else:
461                cls = classstack[-1][0]
462                if module_name in cls.classes:
463                    cur_class = cls.classes[module_name]
464                elif cls.name == module_name:
465                    cur_class = cls
466                else:
467                    cls._addclass(module_name, cur_class)
468            if not classstack:
469                if lastGlobalEntry:
470                    lastGlobalEntry.setEndLine(lineno - 1)
471                lastGlobalEntry = cur_class
472            cur_obj = cur_class
473            classstack.append((cur_class, thisindent))
474            while acstack and acstack[-1][1] >= thisindent:
475                del acstack[-1]
476            acstack.append(["public", thisindent])
477            # default access control is 'public'
479        elif m.start("AccessControl") >= 0:
480            aclist = m.group("AccessControlList")
481            if aclist is None:
482                index = -1
483                while index >= -len(acstack):
484                    if acstack[index][1] < indent:
485                        actype = (
486                            m.group("AccessControlType") or
487                            m.group("AccessControlType2").split('_')[0]
488                        )
489                        acstack[index][0] = actype.lower()
490                        break
491                    else:
492                        index -= 1
493            else:
494                index = -1
495                while index >= -len(classstack):
496                    if (
497                        classstack[index][0] is not None and
498                        not isinstance(classstack[index][0], Function) and
499                        classstack[index][1] < indent
500                    ):
501                        parent = classstack[index][0]
502                        actype = (
503                            m.group("AccessControlType") or
504                            m.group("AccessControlType2").split('_')[0]
505                        )
506                        actype = actype.lower()
507                        for name in aclist.split(","):
508                            name = name.strip()[1:]   # get rid of leading ':'
509                            acmeth = parent._getmethod(name)
510                            if acmeth is None:
511                                continue
512                            if actype == "private":
513                                acmeth.setPrivate()
514                            elif actype == "protected":
515                                acmeth.setProtected()
516                            elif actype == "public":
517                                acmeth.setPublic()
518                        break
519                    else:
520                        index -= 1
522        elif m.start("Attribute") >= 0:
523            lineno += src.count('\n', last_lineno_pos, start)
524            last_lineno_pos = start
525            index = -1
526            while index >= -len(classstack):
527                if (
528                    classstack[index][0] is not None and
529                    not isinstance(classstack[index][0], Function) and
530                    classstack[index][1] < indent
531                ):
532                    attr = Attribute(
533                        module, m.group("AttributeName"), file, lineno)
534                    classstack[index][0]._addattribute(attr)
535                    break
536                else:
537                    index -= 1
538                    if lastGlobalEntry:
539                        lastGlobalEntry.setEndLine(lineno - 1)
540                    lastGlobalEntry = None
542        elif m.start("Attr") >= 0:
543            lineno += src.count('\n', last_lineno_pos, start)
544            last_lineno_pos = start
545            index = -1
546            while index >= -len(classstack):
547                if (
548                    classstack[index][0] is not None and
549                    not isinstance(classstack[index][0], Function) and
550                    classstack[index][1] < indent
551                ):
552                    parent = classstack[index][0]
553                    if m.group("AttrType") is None:
554                        nv = m.group("AttrList").split(",")
555                        if not nv:
556                            break
557                        name = nv[0].strip()[1:]    # get rid of leading ':'
558                        attr = (
559                            parent._getattribute("@" + name) or
560                            parent._getattribute("@@" + name) or
561                            Attribute(module, "@" + name, file, lineno)
562                        )
563                        if len(nv) == 1 or nv[1].strip() == "false":
564                            attr.setProtected()
565                        elif nv[1].strip() == "true":
566                            attr.setPublic()
567                        parent._addattribute(attr)
568                    else:
569                        access = m.group("AttrType")
570                        for name in m.group("AttrList").split(","):
571                            name = name.strip()[1:]   # get rid of leading ':'
572                            attr = (
573                                parent._getattribute("@" + name) or
574                                parent._getattribute("@@" + name) or
575                                Attribute(module, "@" + name, file, lineno)
576                            )
577                            if access == "_accessor":
578                                attr.setPublic()
579                            elif access in ("_reader", "_writer"):
580                                if attr.isPrivate():
581                                    attr.setProtected()
582                                elif attr.isProtected():
583                                    attr.setPublic()
584                            parent._addattribute(attr)
585                    break
586                else:
587                    index -= 1
589        elif m.start("Begin") >= 0:
590            # a begin of a block we are not interested in
591            indent += 1
593        elif m.start("End") >= 0:
594            # an end of a block
595            indent -= 1
596            if indent < 0:
597                # no negative indent allowed
598                if classstack:
599                    # it's a class/module method
600                    indent = classstack[-1][1]
601                else:
602                    indent = 0
604        elif m.start("CodingLine") >= 0:
605            # a coding statement
606            coding = m.group("Coding")
607            lineno += src.count('\n', last_lineno_pos, start)
608            last_lineno_pos = start
609            if "@@Coding@@" not in dictionary:
610                dictionary["@@Coding@@"] = ClbrBaseClasses.Coding(
611                    module, file, lineno, coding)
613    return dictionary