1# 2# Copyright (c) 2014 3# Yoshitaro Makise 4# 5# This program is free software: you can redistribute it and/or modify 6# it under the terms of the GNU General Public License as published by 7# the Free Software Foundation, either version 3 of the License, or 8# (at your option) any later version. 9# 10# This program is distributed in the hope that it will be useful, 11# but WITHOUT ANY WARRANTY; without even the implied warranty of 12# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13# GNU General Public License for more details. 14# 15# You should have received a copy of the GNU General Public License 16# along with this program. If not, see <http://www.gnu.org/licenses/>. 17 18from __future__ import print_function 19import io 20import os 21import subprocess 22import sys 23import re 24import string 25import optparse 26import pygments.lexers 27from pygments.token import Token 28 29EXUBERANT_CTAGS = "@EXUBERANT_CTAGS@" 30 31# In most cases, lexers can be looked up with lowercase form of formal 32# language names. This dictionary defines exceptions. 33LANGUAGE_ALIASES = { 34 'fantom': 'fan', 35 'haxe': 'haXe', 36 'sourcepawn': 'sp', 37 'typescript': 'ts', 38 'xbase': 'XBase' 39} 40 41# All punctuation characters except some characters which are valid 42# identifier characters in some languages 43if sys.version_info < (3,): 44 PUNCTUATION_CHARACTERS = string.punctuation.translate(None, '-_.') 45else: 46 PUNCTUATION_CHARACTERS = string.punctuation.translate(str.maketrans('', '', '-_.')) 47 48CLOSEFDS = sys.platform != 'win32'; 49 50TERMINATOR = '###terminator###\n' 51 52class ParserOptions: 53 def __init__(self): 54 self.strip_punctuation = False 55 56class PygmentsParser: 57 class ContentParser: 58 def __init__(self, path, text, lexer, options): 59 self.path = path 60 self.text = text 61 self.lexer = lexer 62 self.options = options 63 self.lines_index = None 64 65 def parse(self): 66 self.lines_index = self.build_lines_index(self.text) 67 tokens = self.lexer.get_tokens_unprocessed(self.text) 68 return self.parse_tokens(tokens) 69 70 # builds index of beginning of line 71 def build_lines_index(self, text): 72 lines_index = [] 73 cur = 0 74 while True: 75 i = text.find('\n', cur) 76 if i == -1: 77 break 78 cur = i + 1 79 lines_index.append(cur) 80 lines_index.append(len(text)) # sentinel 81 return lines_index 82 83 def parse_tokens(self, tokens): 84 result = {} 85 cur_line = 0 86 for index, tokentype, tag in tokens: 87 if tokentype in Token.Name: 88 # we can assume index are delivered in ascending order 89 while self.lines_index[cur_line] <= index: 90 cur_line += 1 91 tag = re.sub('\s+', '', tag) # remove newline and spaces 92 if self.options.strip_punctuation: 93 tag = tag.strip(PUNCTUATION_CHARACTERS) 94 if tag: 95 result[(False, tag, cur_line + 1)] = '' 96 return result 97 98 def __init__(self, langmap, options): 99 self.langmap = langmap 100 self.options = options 101 102 def parse(self, path): 103 lexer = self.get_lexer_by_langmap(path) 104 if lexer: 105 text = self.read_file(path) 106 if text: 107 parser = self.ContentParser(path, text, lexer, self.options) 108 return parser.parse() 109 return {} 110 111 def get_lexer_by_langmap(self, path): 112 ext = os.path.splitext(path)[1] 113 if sys.platform == 'win32': 114 lang = self.langmap.get(ext.lower(), None) 115 else: 116 lang = self.langmap.get(ext, None) 117 if lang: 118 name = lang.lower() 119 if name in LANGUAGE_ALIASES: 120 name = LANGUAGE_ALIASES[name] 121 lexer = pygments.lexers.get_lexer_by_name(name) 122 return lexer 123 return None 124 125 def read_file(self, path): 126 try: 127 if sys.version_info < (3,): 128 with open(path, 'r') as f: 129 text = f.read() 130 return text 131 else: 132 with open(path, 'r', encoding='latin1') as f: 133 text = f.read() 134 return text 135 except Exception as e: 136 print(e, file=sys.stderr) 137 return None 138 139class CtagsParser: 140 def __init__(self, ctags_command, options): 141 self.process = subprocess.Popen([ctags_command, '-xu', '--filter', '--filter-terminator=' + TERMINATOR, '--format=1'], bufsize=-1, 142 stdin=subprocess.PIPE, stdout=subprocess.PIPE, close_fds=CLOSEFDS, 143 universal_newlines=True) 144 if sys.version_info < (3,): 145 self.child_stdout = self.process.stdout 146 else: 147 self.child_stdout = io.TextIOWrapper(self.process.stdout.buffer, encoding='latin1') 148 sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding='latin1') 149 self.child_stdin = self.process.stdin 150 self.options = options 151 152 def parse(self, path): 153 print(path, file=self.child_stdin) 154 self.child_stdin.flush() 155 result = {} 156 while True: 157 line = self.child_stdout.readline() 158 if not line or line.startswith(TERMINATOR): 159 break 160 match = re.search(r'(\S+)\s+(\d+)\s+' + re.escape(path) + '\s+(.*)$', line) 161 if match: 162 (tag, lnum, image) = match.groups() 163 if self.options.strip_punctuation: 164 tag = tag.strip(PUNCTUATION_CHARACTERS) 165 if tag: 166 result[(True, tag, int(lnum))] = image 167 return result 168 169class MergingParser: 170 def __init__(self, def_parser, ref_parser): 171 self.def_parser = def_parser 172 self.ref_parser = ref_parser 173 pass 174 175 def parse(self, path): 176 def_result = self.def_parser.parse(path) 177 ref_result = self.ref_parser.parse(path) 178 result = def_result.copy() 179 result.update(ref_result) 180 for (isdef, tag, lnum) in def_result: 181 ref_entry = (False, tag, lnum) 182 if ref_entry in ref_result: 183 del result[ref_entry] 184 return result 185 186def parse_langmap(string): 187 langmap = {} 188 mappings = string.split(',') 189 for mapping in mappings: 190 lang, exts = mapping.split(':') 191 if not lang[0].islower(): # skip lowercase, that is for builtin parser 192 for ext in exts.split('.'): 193 if ext: 194 if sys.platform == 'win32': 195 langmap['.' + ext.lower()] = lang 196 else: 197 langmap['.' + ext] = lang 198 return langmap 199 200def handle_requests(langmap, options): 201 # Update ctags's path from the configuration file 202 global EXUBERANT_CTAGS 203 path = load_ctags_path() 204 if path != '': 205 EXUBERANT_CTAGS = path 206 if EXUBERANT_CTAGS != '' and EXUBERANT_CTAGS != 'no': 207 pygments_parser = PygmentsParser(langmap, options) 208 try: 209 ctags_parser = CtagsParser(EXUBERANT_CTAGS, options) 210 parser = MergingParser(ctags_parser, pygments_parser) 211 except Exception as e: 212 parser = pygments_parser 213 else: 214 parser = PygmentsParser(langmap, options) 215 while True: 216 path = sys.stdin.readline() 217 if not path: 218 break 219 path = path.rstrip() 220 tags = parser.parse(path) 221 for (isdef, tag, lnum),image in tags.items(): 222 if isdef: 223 typ = 'D' 224 else: 225 typ = 'R' 226 print(typ, tag, lnum, path, image) 227 print(TERMINATOR, end='') 228 sys.stdout.flush() 229 230def get_parser_options_from_env(parser_options): 231 env = os.getenv('GTAGSPYGMENTSOPTS') 232 if env: 233 for s in env.split(','): 234 s = s.strip() 235 if s == 'strippunctuation': 236 parser_options.strip_punctuation = True 237 238def load_ctags_path(): 239 path = '' 240 p = subprocess.Popen("gtags --config=ctagscom", shell=True, 241 stdout=subprocess.PIPE, stderr=subprocess.PIPE) 242 if p.wait() == 0: 243 if sys.platform == 'win32' and sys.version_info >= (3,): 244 path = io.TextIOWrapper(p.stdout, encoding='latin1').readline().rstrip() 245 else: 246 path = p.stdout.readline().rstrip() 247 return path 248 249def main(): 250 opt_parser = optparse.OptionParser() 251 opt_parser.add_option('--langmap', dest='langmap') 252 (options, args) = opt_parser.parse_args() 253 if not options.langmap: 254 opt_parser.error('--langmap option not given') 255 langmap = parse_langmap(options.langmap) 256 parser_options = ParserOptions() 257 get_parser_options_from_env(parser_options) 258 handle_requests(langmap, parser_options) 259 260if __name__ == '__main__': 261 main() 262