1# -*- coding: utf-8 -*-
2
3# Copyright (c) 2005 - 2021 Detlev Offenbach <detlev@die-offenbachs.de>
4#
5
6"""
7Parse a Ruby file and retrieve classes, modules, methods and attributes.
8
9Parse enough of a Ruby file to recognize class, module and method definitions
10and to find out the superclasses of a class as well as its attributes.
11
12It is based on the Python class browser found in this package.
13"""
14
15import re
16
17import Utilities
18import Utilities.ClassBrowsers as ClassBrowsers
19from . import ClbrBaseClasses
20
21SUPPORTED_TYPES = [ClassBrowsers.RB_SOURCE]
22
23_getnext = re.compile(
24    r"""
25    (?P<String>
26        =begin .*? =end
27
28    |   <<-? (?P<HereMarker1> [a-zA-Z0-9_]+? ) [ \t]* .*? (?P=HereMarker1)
29
30    |   <<-? ['"] (?P<HereMarker2> [^'"]+? ) ['"] [ \t]* .*? (?P=HereMarker2)
31
32    |   " [^"\\\n]* (?: \\. [^"\\\n]*)* "
33
34    |   ' [^'\\\n]* (?: \\. [^'\\\n]*)* '
35    )
36
37|   (?P<CodingLine>
38        ^ \# \s* [*_-]* \s* coding[:=] \s* (?P<Coding> [-\w_.]+ ) \s* [*_-]* $
39    )
40
41|   (?P<Comment>
42        ^
43        [ \t]* \#+ .*? $
44    )
45
46|   (?P<Method>
47        ^
48        (?P<MethodIndent> [ \t]* )
49        def [ \t]+
50        (?:
51            (?P<MethodName2> [a-zA-Z0-9_]+ (?: \. | :: )
52            [a-zA-Z_] [a-zA-Z0-9_?!=]* )
53        |
54            (?P<MethodName> [a-zA-Z_] [a-zA-Z0-9_?!=]* )
55        |
56            (?P<MethodName3> [^( \t]{1,3} )
57        )
58        [ \t]*
59        (?:
60            \( (?P<MethodSignature> (?: [^)] | \)[ \t]*,? )*? ) \)
61        )?
62        [ \t]*
63    )
64
65|   (?P<Class>
66        ^
67        (?P<ClassIndent> [ \t]* )
68        class
69        (?:
70            [ \t]+
71            (?P<ClassName> [A-Z] [a-zA-Z0-9_]* )
72            [ \t]*
73            (?P<ClassSupers> < [ \t]* [A-Z] [a-zA-Z0-9_:]* )?
74        |
75            [ \t]* << [ \t]*
76            (?P<ClassName2> [a-zA-Z_] [a-zA-Z0-9_:]* )
77        )
78        [ \t]*
79    )
80
81|   (?P<ClassIgnored>
82        \(
83        [ \t]*
84        class
85        .*?
86        end
87        [ \t]*
88        \)
89    )
90
91|   (?P<Module>
92        ^
93        (?P<ModuleIndent> [ \t]* )
94        module [ \t]+
95        (?P<ModuleName> [A-Z] [a-zA-Z0-9_:]* )
96        [ \t]*
97    )
98
99|   (?P<AccessControl>
100        ^
101        (?P<AccessControlIndent> [ \t]* )
102        (?:
103            (?P<AccessControlType> private | public | protected ) [^_]
104        |
105            (?P<AccessControlType2>
106            private_class_method | public_class_method )
107        )
108        \(?
109        [ \t]*
110        (?P<AccessControlList> (?: : [a-zA-Z0-9_]+ , \s* )*
111        (?: : [a-zA-Z0-9_]+ )+ )?
112        [ \t]*
113        \)?
114    )
115
116|   (?P<Attribute>
117        ^
118        (?P<AttributeIndent> [ \t]* )
119        (?P<AttributeName> (?: @ | @@ ) [a-zA-Z0-9_]* )
120        [ \t]* =
121    )
122
123|   (?P<Attr>
124        ^
125        (?P<AttrIndent> [ \t]* )
126        attr
127        (?P<AttrType> (?: _accessor | _reader | _writer ) )?
128        \(?
129        [ \t]*
130        (?P<AttrList> (?: : [a-zA-Z0-9_]+ , \s* )*
131        (?: : [a-zA-Z0-9_]+ | true | false )+ )
132        [ \t]*
133        \)?
134    )
135
136|   (?P<Begin>
137            ^
138            [ \t]*
139            (?: def | if | unless | case | while | until | for | begin )
140            \b [^_]
141        |
142            [ \t]* do [ \t]* (?: \| .*? \| )? [ \t]* $
143    )
144
145|   (?P<BeginEnd>
146        \b (?: if ) \b [^_] .*? $
147        |
148        \b (?: if ) \b [^_] .*? end [ \t]* $
149    )
150
151|   (?P<End>
152        [ \t]*
153        (?:
154            end [ \t]* $
155        |
156            end \b [^_]
157        )
158    )""",
159    re.VERBOSE | re.DOTALL | re.MULTILINE).search
160
161_commentsub = re.compile(r"""#[^\n]*\n|#[^\n]*$""").sub
162
163_modules = {}                           # cache of modules we've seen
164
165
166class VisibilityMixin(ClbrBaseClasses.ClbrVisibilityMixinBase):
167    """
168    Mixin class implementing the notion of visibility.
169    """
170    def __init__(self):
171        """
172        Constructor
173        """
174        self.setPublic()
175
176
177class Class(ClbrBaseClasses.Class, VisibilityMixin):
178    """
179    Class to represent a Ruby class.
180    """
181    def __init__(self, module, name, superClasses, file, lineno):
182        """
183        Constructor
184
185        @param module name of the module containing this class
186        @param name name of this class
187        @param superClasses list of class names this class is inherited from
188        @param file filename containing this class
189        @param lineno linenumber of the class definition
190        """
191        ClbrBaseClasses.Class.__init__(self, module, name, superClasses, file,
192                                       lineno)
193        VisibilityMixin.__init__(self)
194
195
196class Module(ClbrBaseClasses.Module, VisibilityMixin):
197    """
198    Class to represent a Ruby module.
199    """
200    def __init__(self, module, name, file, lineno):
201        """
202        Constructor
203
204        @param module name of the module containing this class
205        @param name name of this class
206        @param file filename containing this class
207        @param lineno linenumber of the class definition
208        """
209        ClbrBaseClasses.Module.__init__(self, module, name, file, lineno)
210        VisibilityMixin.__init__(self)
211
212
213class Function(ClbrBaseClasses.Function, VisibilityMixin):
214    """
215    Class to represent a Ruby function.
216    """
217    def __init__(self, module, name, file, lineno, signature='',
218                 separator=','):
219        """
220        Constructor
221
222        @param module name of the module containing this function
223        @param name name of this function
224        @param file filename containing this class
225        @param lineno linenumber of the class definition
226        @param signature parameterlist of the method
227        @param separator string separating the parameters
228        """
229        ClbrBaseClasses.Function.__init__(self, module, name, file, lineno,
230                                          signature, separator)
231        VisibilityMixin.__init__(self)
232
233
234class Attribute(ClbrBaseClasses.Attribute, VisibilityMixin):
235    """
236    Class to represent a class or module attribute.
237    """
238    def __init__(self, module, name, file, lineno):
239        """
240        Constructor
241
242        @param module name of the module containing this class
243        @param name name of this class
244        @param file filename containing this attribute
245        @param lineno linenumber of the class definition
246        """
247        ClbrBaseClasses.Attribute.__init__(self, module, name, file, lineno)
248        VisibilityMixin.__init__(self)
249        self.setPrivate()
250
251
252def readmodule_ex(module, path=None):
253    """
254    Read a Ruby file and return a dictionary of classes, functions and modules.
255
256    @param module name of the Ruby file (string)
257    @param path path the file should be searched in (list of strings)
258    @return the resulting dictionary
259    """
260    global _modules
261
262    if module in _modules:
263        # we've seen this file before...
264        return _modules[module]
265
266    # search the path for the file
267    f = None
268    fullpath = [] if path is None else path[:]
269    f, file, (suff, mode, type) = ClassBrowsers.find_module(module, fullpath)
270    if f:
271        f.close()
272    if type not in SUPPORTED_TYPES:
273        # not Ruby source, can't do anything with this module
274        _modules[module] = {}
275        return {}
276
277    try:
278        src = Utilities.readEncodedFile(file)[0]
279    except (UnicodeError, OSError):
280        # can't do anything with this module
281        _modules[module] = {}
282        return {}
283
284    _modules[module] = scan(src, file, module)
285    return _modules[module]
286
287
288def scan(src, file, module):
289    """
290    Public method to scan the given source text.
291
292    @param src source text to be scanned
293    @type str
294    @param file file name associated with the source text
295    @type str
296    @param module module name associated with the source text
297    @type str
298    @return dictionary containing the extracted data
299    @rtype dict
300    """
301    # convert eol markers the Python style
302    src = src.replace("\r\n", "\n").replace("\r", "\n")
303
304    dictionary = {}
305    dict_counts = {}
306
307    classstack = []  # stack of (class, indent) pairs
308    acstack = []    # stack of (access control, indent) pairs
309    indent = 0
310
311    lineno, last_lineno_pos = 1, 0
312    cur_obj = None
313    lastGlobalEntry = None
314    i = 0
315    while True:
316        m = _getnext(src, i)
317        if not m:
318            break
319        start, i = m.span()
320
321        if m.start("Method") >= 0:
322            # found a method definition or function
323            thisindent = indent
324            indent += 1
325            meth_name = (
326                m.group("MethodName") or
327                m.group("MethodName2") or
328                m.group("MethodName3")
329            )
330            meth_sig = m.group("MethodSignature")
331            meth_sig = meth_sig and meth_sig.replace('\\\n', '') or ''
332            meth_sig = _commentsub('', meth_sig)
333            lineno += src.count('\n', last_lineno_pos, start)
334            last_lineno_pos = start
335            if meth_name.startswith('self.'):
336                meth_name = meth_name[5:]
337            elif meth_name.startswith('self::'):
338                meth_name = meth_name[6:]
339            # close all classes/modules indented at least as much
340            while classstack and classstack[-1][1] >= thisindent:
341                if classstack[-1][0] is not None:
342                    # record the end line
343                    classstack[-1][0].setEndLine(lineno - 1)
344                del classstack[-1]
345            while acstack and acstack[-1][1] >= thisindent:
346                del acstack[-1]
347            if classstack:
348                # it's a class/module method
349                cur_class = classstack[-1][0]
350                if isinstance(cur_class, (Class, Module)):
351                    # it's a method
352                    f = Function(None, meth_name,
353                                 file, lineno, meth_sig)
354                    cur_class._addmethod(meth_name, f)
355                else:
356                    f = cur_class
357                # set access control
358                if acstack:
359                    accesscontrol = acstack[-1][0]
360                    if accesscontrol == "private":
361                        f.setPrivate()
362                    elif accesscontrol == "protected":
363                        f.setProtected()
364                    elif accesscontrol == "public":
365                        f.setPublic()
366                # else it's a nested def
367            else:
368                # it's a function
369                f = Function(module, meth_name,
370                             file, lineno, meth_sig)
371                if meth_name in dict_counts:
372                    dict_counts[meth_name] += 1
373                    meth_name = "{0}_{1:d}".format(
374                        meth_name, dict_counts[meth_name])
375                else:
376                    dict_counts[meth_name] = 0
377                dictionary[meth_name] = f
378            if not classstack:
379                if lastGlobalEntry:
380                    lastGlobalEntry.setEndLine(lineno - 1)
381                lastGlobalEntry = f
382            if cur_obj and isinstance(cur_obj, Function):
383                cur_obj.setEndLine(lineno - 1)
384            cur_obj = f
385            classstack.append((f, thisindent))  # Marker for nested fns
386
387        elif (
388            m.start("String") >= 0 or
389            m.start("Comment") >= 0 or
390            m.start("ClassIgnored") >= 0 or
391            m.start("BeginEnd") >= 0
392        ):
393            pass
394
395        elif m.start("Class") >= 0:
396            # we found a class definition
397            thisindent = indent
398            indent += 1
399            lineno += src.count('\n', last_lineno_pos, start)
400            last_lineno_pos = start
401            # close all classes/modules indented at least as much
402            while classstack and classstack[-1][1] >= thisindent:
403                if classstack[-1][0] is not None:
404                    # record the end line
405                    classstack[-1][0].setEndLine(lineno - 1)
406                del classstack[-1]
407            class_name = m.group("ClassName") or m.group("ClassName2")
408            inherit = m.group("ClassSupers")
409            if inherit:
410                # the class inherits from other classes
411                inherit = inherit[1:].strip()
412                inherit = [_commentsub('', inherit)]
413            # remember this class
414            cur_class = Class(module, class_name, inherit,
415                              file, lineno)
416            if not classstack:
417                if class_name in dictionary:
418                    cur_class = dictionary[class_name]
419                else:
420                    dictionary[class_name] = cur_class
421            else:
422                cls = classstack[-1][0]
423                if class_name in cls.classes:
424                    cur_class = cls.classes[class_name]
425                elif class_name in (cls.name, "self"):
426                    cur_class = cls
427                else:
428                    cls._addclass(class_name, cur_class)
429            if not classstack:
430                if lastGlobalEntry:
431                    lastGlobalEntry.setEndLine(lineno - 1)
432                lastGlobalEntry = cur_class
433            cur_obj = cur_class
434            classstack.append((cur_class, thisindent))
435            while acstack and acstack[-1][1] >= thisindent:
436                del acstack[-1]
437            acstack.append(["public", thisindent])
438            # default access control is 'public'
439
440        elif m.start("Module") >= 0:
441            # we found a module definition
442            thisindent = indent
443            indent += 1
444            lineno += src.count('\n', last_lineno_pos, start)
445            last_lineno_pos = start
446            # close all classes/modules indented at least as much
447            while classstack and classstack[-1][1] >= thisindent:
448                if classstack[-1][0] is not None:
449                    # record the end line
450                    classstack[-1][0].setEndLine(lineno - 1)
451                del classstack[-1]
452            module_name = m.group("ModuleName")
453            # remember this class
454            cur_class = Module(module, module_name, file, lineno)
455            if not classstack:
456                if module_name in dictionary:
457                    cur_class = dictionary[module_name]
458                else:
459                    dictionary[module_name] = cur_class
460            else:
461                cls = classstack[-1][0]
462                if module_name in cls.classes:
463                    cur_class = cls.classes[module_name]
464                elif cls.name == module_name:
465                    cur_class = cls
466                else:
467                    cls._addclass(module_name, cur_class)
468            if not classstack:
469                if lastGlobalEntry:
470                    lastGlobalEntry.setEndLine(lineno - 1)
471                lastGlobalEntry = cur_class
472            cur_obj = cur_class
473            classstack.append((cur_class, thisindent))
474            while acstack and acstack[-1][1] >= thisindent:
475                del acstack[-1]
476            acstack.append(["public", thisindent])
477            # default access control is 'public'
478
479        elif m.start("AccessControl") >= 0:
480            aclist = m.group("AccessControlList")
481            if aclist is None:
482                index = -1
483                while index >= -len(acstack):
484                    if acstack[index][1] < indent:
485                        actype = (
486                            m.group("AccessControlType") or
487                            m.group("AccessControlType2").split('_')[0]
488                        )
489                        acstack[index][0] = actype.lower()
490                        break
491                    else:
492                        index -= 1
493            else:
494                index = -1
495                while index >= -len(classstack):
496                    if (
497                        classstack[index][0] is not None and
498                        not isinstance(classstack[index][0], Function) and
499                        classstack[index][1] < indent
500                    ):
501                        parent = classstack[index][0]
502                        actype = (
503                            m.group("AccessControlType") or
504                            m.group("AccessControlType2").split('_')[0]
505                        )
506                        actype = actype.lower()
507                        for name in aclist.split(","):
508                            name = name.strip()[1:]   # get rid of leading ':'
509                            acmeth = parent._getmethod(name)
510                            if acmeth is None:
511                                continue
512                            if actype == "private":
513                                acmeth.setPrivate()
514                            elif actype == "protected":
515                                acmeth.setProtected()
516                            elif actype == "public":
517                                acmeth.setPublic()
518                        break
519                    else:
520                        index -= 1
521
522        elif m.start("Attribute") >= 0:
523            lineno += src.count('\n', last_lineno_pos, start)
524            last_lineno_pos = start
525            index = -1
526            while index >= -len(classstack):
527                if (
528                    classstack[index][0] is not None and
529                    not isinstance(classstack[index][0], Function) and
530                    classstack[index][1] < indent
531                ):
532                    attr = Attribute(
533                        module, m.group("AttributeName"), file, lineno)
534                    classstack[index][0]._addattribute(attr)
535                    break
536                else:
537                    index -= 1
538                    if lastGlobalEntry:
539                        lastGlobalEntry.setEndLine(lineno - 1)
540                    lastGlobalEntry = None
541
542        elif m.start("Attr") >= 0:
543            lineno += src.count('\n', last_lineno_pos, start)
544            last_lineno_pos = start
545            index = -1
546            while index >= -len(classstack):
547                if (
548                    classstack[index][0] is not None and
549                    not isinstance(classstack[index][0], Function) and
550                    classstack[index][1] < indent
551                ):
552                    parent = classstack[index][0]
553                    if m.group("AttrType") is None:
554                        nv = m.group("AttrList").split(",")
555                        if not nv:
556                            break
557                        name = nv[0].strip()[1:]    # get rid of leading ':'
558                        attr = (
559                            parent._getattribute("@" + name) or
560                            parent._getattribute("@@" + name) or
561                            Attribute(module, "@" + name, file, lineno)
562                        )
563                        if len(nv) == 1 or nv[1].strip() == "false":
564                            attr.setProtected()
565                        elif nv[1].strip() == "true":
566                            attr.setPublic()
567                        parent._addattribute(attr)
568                    else:
569                        access = m.group("AttrType")
570                        for name in m.group("AttrList").split(","):
571                            name = name.strip()[1:]   # get rid of leading ':'
572                            attr = (
573                                parent._getattribute("@" + name) or
574                                parent._getattribute("@@" + name) or
575                                Attribute(module, "@" + name, file, lineno)
576                            )
577                            if access == "_accessor":
578                                attr.setPublic()
579                            elif access in ("_reader", "_writer"):
580                                if attr.isPrivate():
581                                    attr.setProtected()
582                                elif attr.isProtected():
583                                    attr.setPublic()
584                            parent._addattribute(attr)
585                    break
586                else:
587                    index -= 1
588
589        elif m.start("Begin") >= 0:
590            # a begin of a block we are not interested in
591            indent += 1
592
593        elif m.start("End") >= 0:
594            # an end of a block
595            indent -= 1
596            if indent < 0:
597                # no negative indent allowed
598                if classstack:
599                    # it's a class/module method
600                    indent = classstack[-1][1]
601                else:
602                    indent = 0
603
604        elif m.start("CodingLine") >= 0:
605            # a coding statement
606            coding = m.group("Coding")
607            lineno += src.count('\n', last_lineno_pos, start)
608            last_lineno_pos = start
609            if "@@Coding@@" not in dictionary:
610                dictionary["@@Coding@@"] = ClbrBaseClasses.Coding(
611                    module, file, lineno, coding)
612
613    return dictionary
614