1"""Implementation of UTTP (Untyped Tree Transfer Protocol) in Python."""
2
3# $Id: uttp.py 589795 2019-07-16 18:31:28Z satskyse $
4
5# Tell pylint not to obsess about old-style class definitions
6# pylint: disable=C1001
7
8
9import sys
10
11PY3 = sys.version_info[ 0 ] == 3
12
13
14class Reader:
15    """Parse input buffer and sequentially return a stream of UTTP tokens."""
16
17    class FormatError(Exception):
18        """Exception raised for unexpected characters in the input stream."""
19        pass
20
21    CHUNK_PART = 0
22    CHUNK = 1
23    CONTROL_SYMBOL = 2
24    NUMBER = 3
25    END_OF_BUFFER = 4
26
27    __CONTROL_CHARS = 0
28    __CHUNK_LENGTH = 1
29    __CHUNK = 2
30
31    def __init__(self, offset=0):
32        """Initialize the state machine of this object."""
33        self.__state = Reader.__CONTROL_CHARS
34        self.__offset = offset
35        self.__buf = ''
36        self.__buf_offset = 0
37        self.__token = ''
38        self.__length_acc = 0
39        self.__chunk_continued = False
40
41    def reset_offset(self, offset=0):
42        """Set the current offset in the input stream to the new value."""
43        self.__offset = offset
44
45    def set_new_buf(self, buf):
46        """Start processing of the next chunk of data."""
47        self.__buf = buf
48        self.__buf_offset = 0
49
50    def next_event(self):
51        """Parse the input buffer until a parsing event occurs."""
52        if self.__buf_offset == len(self.__buf):
53            return Reader.END_OF_BUFFER
54        if self.__state == Reader.__CONTROL_CHARS:
55            # At least one character will be consumed in this block.
56            self.__offset += 1
57
58            # Slicing is used to make it compatible between python 2 and
59            # python 3. In case of python 2 the buf is a string while in case
60            # of python 3 it is a bytes array
61            next_char = self.__buf[self.__buf_offset : self.__buf_offset + 1]
62            self.__buf_offset += 1
63
64            # All non-digit characters are considered control symbols.
65            if not next_char.isdigit():
66                self.__token = next_char
67                return Reader.CONTROL_SYMBOL
68
69            # The current character is a digit, which is the first
70            # character of the next chunk length. Proceed with reading
71            # the chunk length.
72            self.__state = Reader.__CHUNK_LENGTH
73            self.__length_acc = int(next_char)
74            if self.__buf_offset == len(self.__buf):
75                return Reader.END_OF_BUFFER
76            return self.__continue_reading_chunk_length()
77        if self.__state == Reader.__CHUNK_LENGTH:
78            return self.__continue_reading_chunk_length()
79        return self.__continue_reading_chunk()
80
81    def read_raw_data(self, data_size):
82        """Read a block of fixed size data. Return a "parsing event"."""
83        if self.__state != Reader.__CONTROL_CHARS:
84            raise Reader.FormatError('invalid reader state')
85
86        self.__length_acc = data_size
87        self.__state = Reader.__CHUNK
88
89        if self.__buf_offset == len(self.__buf):
90            return Reader.END_OF_BUFFER
91
92        return self.__continue_reading_chunk()
93
94    def get_chunk(self):
95        """Return the chunk (part) if next_event() was CHUNK(_PART)."""
96        return self.__token
97
98    def get_control_symbol(self):
99        """Return the control symbol if next_event() was CONTROL_SYMBOL."""
100        return self.__token
101
102    def get_number(self):
103        """Return the number if next_event() was NUMBER."""
104        return self.__length_acc
105
106    def get_offset(self):
107        """Return the offset of the current character in the input stream."""
108        return self.__offset
109
110    def __continue_reading_chunk_length(self):
111        """The current state is __CHUNK_LENGTH, proceed with parsing."""
112        # Slicing is used to make it compatible between python 2 and
113        # python 3. In case of python 2 the buf is a string while in case
114        # of python 3 it is a bytes array
115        next_char = self.__buf[self.__buf_offset : self.__buf_offset + 1]
116        while next_char.isdigit():
117            self.__length_acc = self.__length_acc * 10 + int(next_char)
118            self.__offset += 1
119            self.__buf_offset += 1
120            if self.__buf_offset == len(self.__buf):
121                return Reader.END_OF_BUFFER
122            next_char = self.__buf[self.__buf_offset : self.__buf_offset + 1]
123
124        self.__offset += 1
125        self.__buf_offset += 1
126
127        # For python 3 ...
128        if type(next_char) == bytes:
129            next_char = next_char.decode()
130
131        if next_char == '+':
132            self.__chunk_continued = True
133        elif next_char == ' ':
134            self.__chunk_continued = False
135        else:
136            self.__state = Reader.__CONTROL_CHARS
137            if next_char == '=':
138                return Reader.NUMBER
139            elif next_char == '-':
140                self.__length_acc = -self.__length_acc
141                return Reader.NUMBER
142            else:
143                self.__token = next_char
144                raise Reader.FormatError('invalid character (' +
145                                         repr(next_char) + ') '
146                                         'after chunk length ' +
147                                         str(self.__length_acc))
148
149        self.__state = Reader.__CHUNK
150        if self.__buf_offset == len(self.__buf):
151            return Reader.END_OF_BUFFER
152
153        return self.__continue_reading_chunk()
154
155    def __continue_reading_chunk(self):
156        """The current state is __CHUNK, proceed with reading the chunk."""
157        chunk_end = self.__buf_offset + self.__length_acc
158        if chunk_end <= len(self.__buf):
159            self.__token = self.__buf[self.__buf_offset:chunk_end]
160            self.__offset += self.__length_acc
161            self.__buf_offset = chunk_end
162            # The last part of the chunk has been read --
163            # get back to reading control symbols.
164            self.__state = Reader.__CONTROL_CHARS
165            if self.__chunk_continued:
166                return Reader.CHUNK_PART
167            return Reader.CHUNK
168        else:
169            self.__token = self.__buf[self.__buf_offset:]
170            self.__offset += len(self.__token)
171            self.__length_acc -= len(self.__token)
172            self.__buf_offset = len(self.__buf)
173            return Reader.CHUNK_PART
174
175class Writer:
176    """Serialize series of chunks of data for sending over binary streams."""
177    def __init__(self, min_buf_size):
178        if PY3:
179            self.__buf = b''
180        else:
181            self.__buf = ''
182        self.__min_buf_size = min_buf_size
183
184    def send_control_symbol(self, symbol):
185        """Pack a control symbol into the internal buffer. Control
186        symbol can be any single byte character except digits.
187        Return a buffer to send to the output stream in case of overflow."""
188        if PY3:
189            if type(symbol) != bytes:
190                symbol = symbol.encode()
191
192        if len(self.__buf) < self.__min_buf_size:
193            self.__buf += symbol
194            return None
195
196        buf = self.__buf
197        self.__buf = symbol
198        return buf
199
200    def send_chunk(self, chunk, to_be_continued=False):
201        """Copy a chunk of data to the internal buffer. Return a buffer
202        to send to the output stream in case of overflow."""
203        if PY3:
204            if type(chunk) != bytes:
205                chunk = chunk.encode()
206
207        chunk_length = str(len(chunk))
208        if PY3:
209            chunk_length = chunk_length.encode()
210        self.__buf += chunk_length
211
212
213        if to_be_continued:
214            if PY3:
215                self.__buf += b'+'
216            else:
217                self.__buf += '+'
218        else:
219            if PY3:
220                self.__buf += b' '
221            else:
222                self.__buf += ' '
223
224        if len(self.__buf) + len(chunk) <= self.__min_buf_size:
225            self.__buf += chunk
226            return None
227
228        buf = self.__buf
229        self.__buf = chunk
230        return buf
231
232    def send_raw_data(self, data):
233        """Send a block of fixed size data. Return a buffer
234        to send to the output stream in case of overflow."""
235        if PY3:
236            if type(data) != bytes:
237                data = data.encode()
238
239        if len(self.__buf) + len(data) <= self.__min_buf_size:
240            self.__buf += data
241            return None
242
243        buf = self.__buf
244        self.__buf = data
245        return buf
246
247    def send_number(self, number):
248        """Pack a number into the internal buffer. Return a buffer
249        to send to the output stream in case of overflow."""
250        if number >= 0:
251            number = str(number) + '='
252        else:
253            number = str(-number) + '-'
254
255        if PY3:
256            number = number.encode()
257
258        if len(self.__buf) + len(number) <= self.__min_buf_size:
259            self.__buf += number
260            return None
261
262        buf = self.__buf
263        self.__buf = number
264        return buf
265
266    def flush_buf(self):
267        """Return the contents of the internal buffer and reset the buffer."""
268        buf = self.__buf
269        if PY3:
270            self.__buf = b''
271        else:
272            self.__buf = ''
273        return buf
274
275