1# -*- coding: utf-8 -*- 2 3""" 4The MIT License (MIT) 5 6Copyright (c) 2015-present Rapptz 7 8Permission is hereby granted, free of charge, to any person obtaining a 9copy of this software and associated documentation files (the "Software"), 10to deal in the Software without restriction, including without limitation 11the rights to use, copy, modify, merge, publish, distribute, sublicense, 12and/or sell copies of the Software, and to permit persons to whom the 13Software is furnished to do so, subject to the following conditions: 14 15The above copyright notice and this permission notice shall be included in 16all copies or substantial portions of the Software. 17 18THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 19OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 20FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 21AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 22LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 23FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 24DEALINGS IN THE SOFTWARE. 25""" 26 27from .errors import UnexpectedQuoteError, InvalidEndOfQuotedStringError, ExpectedClosingQuoteError 28 29# map from opening quotes to closing quotes 30_quotes = { 31 '"': '"', 32 "‘": "’", 33 "‚": "‛", 34 "“": "”", 35 "„": "‟", 36 "⹂": "⹂", 37 "「": "」", 38 "『": "』", 39 "〝": "〞", 40 "﹁": "﹂", 41 "﹃": "﹄", 42 """: """, 43 "「": "」", 44 "«": "»", 45 "‹": "›", 46 "《": "》", 47 "〈": "〉", 48} 49_all_quotes = set(_quotes.keys()) | set(_quotes.values()) 50 51class StringView: 52 def __init__(self, buffer): 53 self.index = 0 54 self.buffer = buffer 55 self.end = len(buffer) 56 self.previous = 0 57 58 @property 59 def current(self): 60 return None if self.eof else self.buffer[self.index] 61 62 @property 63 def eof(self): 64 return self.index >= self.end 65 66 def undo(self): 67 self.index = self.previous 68 69 def skip_ws(self): 70 pos = 0 71 while not self.eof: 72 try: 73 current = self.buffer[self.index + pos] 74 if not current.isspace(): 75 break 76 pos += 1 77 except IndexError: 78 break 79 80 self.previous = self.index 81 self.index += pos 82 return self.previous != self.index 83 84 def skip_string(self, string): 85 strlen = len(string) 86 if self.buffer[self.index:self.index + strlen] == string: 87 self.previous = self.index 88 self.index += strlen 89 return True 90 return False 91 92 def read_rest(self): 93 result = self.buffer[self.index:] 94 self.previous = self.index 95 self.index = self.end 96 return result 97 98 def read(self, n): 99 result = self.buffer[self.index:self.index + n] 100 self.previous = self.index 101 self.index += n 102 return result 103 104 def get(self): 105 try: 106 result = self.buffer[self.index + 1] 107 except IndexError: 108 result = None 109 110 self.previous = self.index 111 self.index += 1 112 return result 113 114 def get_word(self): 115 pos = 0 116 while not self.eof: 117 try: 118 current = self.buffer[self.index + pos] 119 if current.isspace(): 120 break 121 pos += 1 122 except IndexError: 123 break 124 self.previous = self.index 125 result = self.buffer[self.index:self.index + pos] 126 self.index += pos 127 return result 128 129 def get_quoted_word(self): 130 current = self.current 131 if current is None: 132 return None 133 134 close_quote = _quotes.get(current) 135 is_quoted = bool(close_quote) 136 if is_quoted: 137 result = [] 138 _escaped_quotes = (current, close_quote) 139 else: 140 result = [current] 141 _escaped_quotes = _all_quotes 142 143 while not self.eof: 144 current = self.get() 145 if not current: 146 if is_quoted: 147 # unexpected EOF 148 raise ExpectedClosingQuoteError(close_quote) 149 return ''.join(result) 150 151 # currently we accept strings in the format of "hello world" 152 # to embed a quote inside the string you must escape it: "a \"world\"" 153 if current == '\\': 154 next_char = self.get() 155 if not next_char: 156 # string ends with \ and no character after it 157 if is_quoted: 158 # if we're quoted then we're expecting a closing quote 159 raise ExpectedClosingQuoteError(close_quote) 160 # if we aren't then we just let it through 161 return ''.join(result) 162 163 if next_char in _escaped_quotes: 164 # escaped quote 165 result.append(next_char) 166 else: 167 # different escape character, ignore it 168 self.undo() 169 result.append(current) 170 continue 171 172 if not is_quoted and current in _all_quotes: 173 # we aren't quoted 174 raise UnexpectedQuoteError(current) 175 176 # closing quote 177 if is_quoted and current == close_quote: 178 next_char = self.get() 179 valid_eof = not next_char or next_char.isspace() 180 if not valid_eof: 181 raise InvalidEndOfQuotedStringError(next_char) 182 183 # we're quoted so it's okay 184 return ''.join(result) 185 186 if current.isspace() and not is_quoted: 187 # end of word found 188 return ''.join(result) 189 190 result.append(current) 191 192 193 def __repr__(self): 194 return '<StringView pos: {0.index} prev: {0.previous} end: {0.end} eof: {0.eof}>'.format(self) 195