1############################################################################# 2# Copyright (c) 2015-2016 Balabit 3# 4# This program is free software; you can redistribute it and/or modify it 5# under the terms of the GNU General Public License version 2 as published 6# by the Free Software Foundation, or (at your option) any later version. 7# 8# This program is distributed in the hope that it will be useful, 9# but WITHOUT ANY WARRANTY; without even the implied warranty of 10# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 11# GNU General Public License for more details. 12# 13# You should have received a copy of the GNU General Public License 14# along with this program; if not, write to the Free Software 15# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA 16# 17# As an additional exemption you are allowed to compile & link against the 18# OpenSSL libraries as published by the OpenSSL project. See the file 19# COPYING for details. 20# 21############################################################################# 22 23from __future__ import print_function, absolute_import 24import ply.lex as lex 25from .lexertoken import LexerToken 26from .lexer import Lexer 27 28 29class LexBasedLexer(Lexer): 30 def __init__(self): 31 self._lexer = lex.lex(object=self) 32 self._lexer.current_token = '' 33 self._lexer.current_token_pos = -1 34 35 def token(self): 36 token = self._lexer.token() 37 if token is None and self._lexer.lexstate != 'INITIAL': 38 return LexerToken('LITERAL', 39 value=self._lexer.current_token, 40 partial=True, 41 lexpos=self._lexer.current_token_pos) 42 return token 43 44 def input(self, text): 45 while self._lexer.lexstatestack: 46 self._lexer.pop_state() 47 return self._lexer.input(text) 48 49 def get_position(self): 50 return self._lexer.lexpos 51 52 53class TemplateLexerError(Exception): 54 pass 55 56 57class TemplateLexer(LexBasedLexer): 58 # pylint: disable=no-self-use,invalid-name 59 tokens = ( 60 'LITERAL', 'MACRO', "TEMPLATE_FUNC" 61 ) 62 63 states = ( 64 ('dollar', 'exclusive'), 65 ('dollarbrace', 'exclusive'), 66 ('dollarparen', 'exclusive'), 67 ('string', 'exclusive'), 68 ('qstring', 'exclusive'), 69 ) 70 71 # Tokens 72 73 def t_LITERAL(self, t): 74 r'[^\$]+' 75 t.type = "LITERAL" 76 return t 77 78 def t_DOLLAR(self, t): 79 r'\$' 80 t.lexer.push_state('dollar') 81 t.lexer.current_token = '$' 82 t.lexer.current_token_pos = t.lexpos 83 84 def t_dollar_MACRO(self, t): 85 r'[a-zA-Z0-9_]+' 86 t.lexer.pop_state() 87 t.value = '$' + t.value 88 t.lexpos = t.lexer.current_token_pos 89 return t 90 91 def t_dollar_BRACE_OPEN(self, t): 92 r'{' 93 t.lexer.push_state('dollarbrace') 94 t.lexer.current_token = '$' + t.value 95 96 def t_dollarbrace_MACRO(self, t): 97 r'[^}]+' 98 t.lexer.current_token += t.value 99 100 def t_dollarbrace_BRACE_CLOSE(self, t): 101 r'}' 102 t.lexer.current_token += t.value 103 # go back to INITIAL 104 t.lexer.pop_state() 105 t.lexer.pop_state() 106 t.type = 'MACRO' 107 t.value = t.lexer.current_token 108 t.lexpos = t.lexer.current_token_pos 109 return t 110 111 def t_dollar_PAREN_OPEN(self, t): 112 r'\(' 113 t.lexer.push_state('dollarparen') 114 t.lexer.paren_count = 1 115 t.lexer.current_token = '$(' 116 t.lexer.current_token_pos = t.lexpos - 1 117 118 def t_dollarparen_PAREN_OPEN(self, t): 119 r'\(' 120 t.lexer.paren_count += 1 121 t.lexer.current_token += '(' 122 123 def t_dollarparen_TEMPLATE_FUNC(self, t): 124 r'''[^()'"]+''' 125 t.lexer.current_token += t.value 126 127 def t_dollarparen_PAREN_CLOSE(self, t): 128 r'\)' 129 t.lexer.current_token += ')' 130 t.lexer.paren_count -= 1 131 if t.lexer.paren_count == 0: 132 t.type = 'TEMPLATE_FUNC' 133 t.value = t.lexer.current_token 134 t.lexpos = t.lexer.current_token_pos 135 t.lexer.pop_state() 136 t.lexer.pop_state() 137 return t 138 139 return None 140 141 def t_dollarparen_QUOTE(self, t): 142 r'"' 143 t.lexer.current_token += t.value 144 t.lexer.push_state('string') 145 146 def t_dollarparen_APOSTROPHE(self, t): 147 r"'" 148 t.lexer.current_token += t.value 149 t.lexer.push_state('qstring') 150 151 def t_string_CHARS(self, t): 152 r'[^"\\]' 153 t.lexer.current_token += t.value 154 155 def t_string_backslash_plus_character(self, t): 156 r'\\.' 157 t.lexer.current_token += t.value 158 159 def t_string_QUOTE(self, t): 160 r'"' 161 162 # closing quote character 163 t.lexer.current_token += t.value 164 t.lexer.pop_state() 165 166 def t_qstring_CHARS(self, t): 167 r"[^']" 168 t.lexer.current_token += t.value 169 170 def t_qstring_APOSTROPHE(self, t): 171 r"'" 172 173 # closing apostrophe 174 t.lexer.current_token += t.value 175 t.lexer.pop_state() 176 177 def t_error(self, t): 178 raise TemplateLexerError("Illegal character {} in state {}".format(t.value, self._lexer.lexstate)) 179 180 def t_dollar_error(self, t): 181 return self.t_error(t) 182 183 def t_dollarbrace_error(self, t): 184 return self.t_error(t) 185 186 def t_dollarparen_error(self, t): 187 return self.t_error(t) 188 189 def t_string_error(self, t): 190 return self.t_error(t) 191 192 def t_qstring_error(self, t): 193 return self.t_error(t) 194