1# -*- coding: utf-8 -*-
2
3"""
4The MIT License (MIT)
5
6Copyright (c) 2015-present Rapptz
7
8Permission is hereby granted, free of charge, to any person obtaining a
9copy of this software and associated documentation files (the "Software"),
10to deal in the Software without restriction, including without limitation
11the rights to use, copy, modify, merge, publish, distribute, sublicense,
12and/or sell copies of the Software, and to permit persons to whom the
13Software is furnished to do so, subject to the following conditions:
14
15The above copyright notice and this permission notice shall be included in
16all copies or substantial portions of the Software.
17
18THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
21AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
22LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
23FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
24DEALINGS IN THE SOFTWARE.
25"""
26
27from .errors import UnexpectedQuoteError, InvalidEndOfQuotedStringError, ExpectedClosingQuoteError
28
29# map from opening quotes to closing quotes
30_quotes = {
31    '"': '"',
32    "‘": "’",
33    "‚": "‛",
34    "“": "”",
35    "„": "‟",
36    "⹂": "⹂",
37    "「": "」",
38    "『": "』",
39    "〝": "〞",
40    "﹁": "﹂",
41    "﹃": "﹄",
42    """: """,
43    "「": "」",
44    "«": "»",
45    "‹": "›",
46    "《": "》",
47    "〈": "〉",
48}
49_all_quotes = set(_quotes.keys()) | set(_quotes.values())
50
51class StringView:
52    def __init__(self, buffer):
53        self.index = 0
54        self.buffer = buffer
55        self.end = len(buffer)
56        self.previous = 0
57
58    @property
59    def current(self):
60        return None if self.eof else self.buffer[self.index]
61
62    @property
63    def eof(self):
64        return self.index >= self.end
65
66    def undo(self):
67        self.index = self.previous
68
69    def skip_ws(self):
70        pos = 0
71        while not self.eof:
72            try:
73                current = self.buffer[self.index + pos]
74                if not current.isspace():
75                    break
76                pos += 1
77            except IndexError:
78                break
79
80        self.previous = self.index
81        self.index += pos
82        return self.previous != self.index
83
84    def skip_string(self, string):
85        strlen = len(string)
86        if self.buffer[self.index:self.index + strlen] == string:
87            self.previous = self.index
88            self.index += strlen
89            return True
90        return False
91
92    def read_rest(self):
93        result = self.buffer[self.index:]
94        self.previous = self.index
95        self.index = self.end
96        return result
97
98    def read(self, n):
99        result = self.buffer[self.index:self.index + n]
100        self.previous = self.index
101        self.index += n
102        return result
103
104    def get(self):
105        try:
106            result = self.buffer[self.index + 1]
107        except IndexError:
108            result = None
109
110        self.previous = self.index
111        self.index += 1
112        return result
113
114    def get_word(self):
115        pos = 0
116        while not self.eof:
117            try:
118                current = self.buffer[self.index + pos]
119                if current.isspace():
120                    break
121                pos += 1
122            except IndexError:
123                break
124        self.previous = self.index
125        result = self.buffer[self.index:self.index + pos]
126        self.index += pos
127        return result
128
129    def get_quoted_word(self):
130        current = self.current
131        if current is None:
132            return None
133
134        close_quote = _quotes.get(current)
135        is_quoted = bool(close_quote)
136        if is_quoted:
137            result = []
138            _escaped_quotes = (current, close_quote)
139        else:
140            result = [current]
141            _escaped_quotes = _all_quotes
142
143        while not self.eof:
144            current = self.get()
145            if not current:
146                if is_quoted:
147                    # unexpected EOF
148                    raise ExpectedClosingQuoteError(close_quote)
149                return ''.join(result)
150
151            # currently we accept strings in the format of "hello world"
152            # to embed a quote inside the string you must escape it: "a \"world\""
153            if current == '\\':
154                next_char = self.get()
155                if not next_char:
156                    # string ends with \ and no character after it
157                    if is_quoted:
158                        # if we're quoted then we're expecting a closing quote
159                        raise ExpectedClosingQuoteError(close_quote)
160                    # if we aren't then we just let it through
161                    return ''.join(result)
162
163                if next_char in _escaped_quotes:
164                    # escaped quote
165                    result.append(next_char)
166                else:
167                    # different escape character, ignore it
168                    self.undo()
169                    result.append(current)
170                continue
171
172            if not is_quoted and current in _all_quotes:
173                # we aren't quoted
174                raise UnexpectedQuoteError(current)
175
176            # closing quote
177            if is_quoted and current == close_quote:
178                next_char = self.get()
179                valid_eof = not next_char or next_char.isspace()
180                if not valid_eof:
181                    raise InvalidEndOfQuotedStringError(next_char)
182
183                # we're quoted so it's okay
184                return ''.join(result)
185
186            if current.isspace() and not is_quoted:
187                # end of word found
188                return ''.join(result)
189
190            result.append(current)
191
192
193    def __repr__(self):
194        return '<StringView pos: {0.index} prev: {0.previous} end: {0.end} eof: {0.eof}>'.format(self)
195