1#
2# Copyright (c) 2014
3#	Yoshitaro Makise
4#
5# This program is free software: you can redistribute it and/or modify
6# it under the terms of the GNU General Public License as published by
7# the Free Software Foundation, either version 3 of the License, or
8# (at your option) any later version.
9#
10# This program is distributed in the hope that it will be useful,
11# but WITHOUT ANY WARRANTY; without even the implied warranty of
12# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13# GNU General Public License for more details.
14#
15# You should have received a copy of the GNU General Public License
16# along with this program.  If not, see <http://www.gnu.org/licenses/>.
17
18from __future__ import print_function
19import io
20import os
21import subprocess
22import sys
23import re
24import string
25import optparse
26import pygments.lexers
27from pygments.token import Token
28
29EXUBERANT_CTAGS = "@EXUBERANT_CTAGS@"
30
31# In most cases, lexers can be looked up with lowercase form of formal
32# language names. This dictionary defines exceptions.
33LANGUAGE_ALIASES = {
34    'fantom':     'fan',
35    'haxe':       'haXe',
36    'sourcepawn': 'sp',
37    'typescript': 'ts',
38    'xbase':      'XBase'
39}
40
41# All punctuation characters except some characters which are valid
42# identifier characters in some languages
43if sys.version_info < (3,):
44    PUNCTUATION_CHARACTERS = string.punctuation.translate(None, '-_.')
45else:
46    PUNCTUATION_CHARACTERS = string.punctuation.translate(str.maketrans('', '', '-_.'))
47
48CLOSEFDS = sys.platform != 'win32';
49
50TERMINATOR = '###terminator###\n'
51
52class ParserOptions:
53    def __init__(self):
54        self.strip_punctuation = False
55
56class PygmentsParser:
57    class ContentParser:
58        def __init__(self, path, text, lexer, options):
59            self.path = path
60            self.text = text
61            self.lexer = lexer
62            self.options = options
63            self.lines_index = None
64
65        def parse(self):
66            self.lines_index = self.build_lines_index(self.text)
67            tokens = self.lexer.get_tokens_unprocessed(self.text)
68            return self.parse_tokens(tokens)
69
70        # builds index of beginning of line
71        def build_lines_index(self, text):
72            lines_index = []
73            cur = 0
74            while True:
75                i = text.find('\n', cur)
76                if i == -1:
77                    break
78                cur = i + 1
79                lines_index.append(cur)
80            lines_index.append(len(text))    # sentinel
81            return lines_index
82
83        def parse_tokens(self, tokens):
84            result = {}
85            cur_line = 0
86            for index, tokentype, tag in tokens:
87                if tokentype in Token.Name:
88                    # we can assume index are delivered in ascending order
89                    while self.lines_index[cur_line] <= index:
90                        cur_line += 1
91                    tag = re.sub('\s+', '', tag)    # remove newline and spaces
92                    if self.options.strip_punctuation:
93                        tag = tag.strip(PUNCTUATION_CHARACTERS)
94                    if tag:
95                        result[(False, tag, cur_line + 1)] = ''
96            return result
97
98    def __init__(self, langmap, options):
99        self.langmap = langmap
100        self.options = options
101
102    def parse(self, path):
103        lexer = self.get_lexer_by_langmap(path)
104        if lexer:
105            text = self.read_file(path)
106            if text:
107                parser = self.ContentParser(path, text, lexer, self.options)
108                return parser.parse()
109        return {}
110
111    def get_lexer_by_langmap(self, path):
112        ext = os.path.splitext(path)[1]
113        if sys.platform == 'win32':
114            lang = self.langmap.get(ext.lower(), None)
115        else:
116            lang = self.langmap.get(ext, None)
117        if lang:
118            name = lang.lower()
119            if name in LANGUAGE_ALIASES:
120                name = LANGUAGE_ALIASES[name]
121            lexer = pygments.lexers.get_lexer_by_name(name)
122            return lexer
123        return None
124
125    def read_file(self, path):
126        try:
127            if sys.version_info < (3,):
128                with open(path, 'r') as f:
129                    text = f.read()
130                    return text
131            else:
132                with open(path, 'r', encoding='latin1') as f:
133                    text = f.read()
134                    return text
135        except Exception as e:
136            print(e, file=sys.stderr)
137            return None
138
139class CtagsParser:
140    def __init__(self, ctags_command, options):
141        self.process = subprocess.Popen([ctags_command, '-xu', '--filter', '--filter-terminator=' + TERMINATOR, '--format=1'], bufsize=-1,
142                                        stdin=subprocess.PIPE, stdout=subprocess.PIPE, close_fds=CLOSEFDS,
143                                        universal_newlines=True)
144        if sys.version_info < (3,):
145            self.child_stdout = self.process.stdout
146        else:
147            self.child_stdout = io.TextIOWrapper(self.process.stdout.buffer, encoding='latin1')
148            sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding='latin1')
149        self.child_stdin = self.process.stdin
150        self.options = options
151
152    def parse(self, path):
153        print(path, file=self.child_stdin)
154        self.child_stdin.flush()
155        result = {}
156        while True:
157            line = self.child_stdout.readline()
158            if not line or line.startswith(TERMINATOR):
159                break
160            match = re.search(r'(\S+)\s+(\d+)\s+' + re.escape(path) + '\s+(.*)$', line)
161            if match:
162                (tag, lnum, image) = match.groups()
163                if self.options.strip_punctuation:
164                    tag = tag.strip(PUNCTUATION_CHARACTERS)
165                if tag:
166                    result[(True, tag, int(lnum))] = image
167        return result
168
169class MergingParser:
170    def __init__(self, def_parser, ref_parser):
171        self.def_parser = def_parser
172        self.ref_parser = ref_parser
173        pass
174
175    def parse(self, path):
176        def_result = self.def_parser.parse(path)
177        ref_result = self.ref_parser.parse(path)
178        result = def_result.copy()
179        result.update(ref_result)
180        for (isdef, tag, lnum) in def_result:
181            ref_entry = (False, tag, lnum)
182            if ref_entry in ref_result:
183                del result[ref_entry]
184        return result
185
186def parse_langmap(string):
187    langmap = {}
188    mappings = string.split(',')
189    for mapping in mappings:
190        lang, exts = mapping.split(':')
191        if not lang[0].islower():  # skip lowercase, that is for builtin parser
192            for ext in exts.split('.'):
193                if ext:
194                    if sys.platform == 'win32':
195                        langmap['.' + ext.lower()] = lang
196                    else:
197                        langmap['.' + ext] = lang
198    return langmap
199
200def handle_requests(langmap, options):
201    # Update ctags's path from the configuration file
202    global EXUBERANT_CTAGS
203    path = load_ctags_path()
204    if path != '':
205       EXUBERANT_CTAGS = path
206    if EXUBERANT_CTAGS != '' and EXUBERANT_CTAGS != 'no':
207        pygments_parser = PygmentsParser(langmap, options)
208        try:
209            ctags_parser = CtagsParser(EXUBERANT_CTAGS, options)
210            parser = MergingParser(ctags_parser, pygments_parser)
211        except Exception as e:
212            parser = pygments_parser
213    else:
214        parser = PygmentsParser(langmap, options)
215    while True:
216        path = sys.stdin.readline()
217        if not path:
218            break
219        path = path.rstrip()
220        tags = parser.parse(path)
221        for (isdef, tag, lnum),image in tags.items():
222            if isdef:
223                typ = 'D'
224            else:
225                typ = 'R'
226            print(typ, tag, lnum, path, image)
227        print(TERMINATOR, end='')
228        sys.stdout.flush()
229
230def get_parser_options_from_env(parser_options):
231    env = os.getenv('GTAGSPYGMENTSOPTS')
232    if env:
233        for s in env.split(','):
234            s = s.strip()
235            if s == 'strippunctuation':
236                parser_options.strip_punctuation = True
237
238def load_ctags_path():
239    path = ''
240    p = subprocess.Popen("gtags --config=ctagscom", shell=True,
241                stdout=subprocess.PIPE, stderr=subprocess.PIPE)
242    if p.wait() == 0:
243        if sys.platform == 'win32' and sys.version_info >= (3,):
244            path = io.TextIOWrapper(p.stdout, encoding='latin1').readline().rstrip()
245        else:
246            path = p.stdout.readline().rstrip()
247    return path
248
249def main():
250    opt_parser = optparse.OptionParser()
251    opt_parser.add_option('--langmap', dest='langmap')
252    (options, args) = opt_parser.parse_args()
253    if not options.langmap:
254        opt_parser.error('--langmap option not given')
255    langmap = parse_langmap(options.langmap)
256    parser_options = ParserOptions()
257    get_parser_options_from_env(parser_options)
258    handle_requests(langmap, parser_options)
259
260if __name__ == '__main__':
261    main()
262