1"""Implementation of UTTP (Untyped Tree Transfer Protocol) in Python.""" 2 3# $Id: uttp.py 589795 2019-07-16 18:31:28Z satskyse $ 4 5# Tell pylint not to obsess about old-style class definitions 6# pylint: disable=C1001 7 8 9import sys 10 11PY3 = sys.version_info[ 0 ] == 3 12 13 14class Reader: 15 """Parse input buffer and sequentially return a stream of UTTP tokens.""" 16 17 class FormatError(Exception): 18 """Exception raised for unexpected characters in the input stream.""" 19 pass 20 21 CHUNK_PART = 0 22 CHUNK = 1 23 CONTROL_SYMBOL = 2 24 NUMBER = 3 25 END_OF_BUFFER = 4 26 27 __CONTROL_CHARS = 0 28 __CHUNK_LENGTH = 1 29 __CHUNK = 2 30 31 def __init__(self, offset=0): 32 """Initialize the state machine of this object.""" 33 self.__state = Reader.__CONTROL_CHARS 34 self.__offset = offset 35 self.__buf = '' 36 self.__buf_offset = 0 37 self.__token = '' 38 self.__length_acc = 0 39 self.__chunk_continued = False 40 41 def reset_offset(self, offset=0): 42 """Set the current offset in the input stream to the new value.""" 43 self.__offset = offset 44 45 def set_new_buf(self, buf): 46 """Start processing of the next chunk of data.""" 47 self.__buf = buf 48 self.__buf_offset = 0 49 50 def next_event(self): 51 """Parse the input buffer until a parsing event occurs.""" 52 if self.__buf_offset == len(self.__buf): 53 return Reader.END_OF_BUFFER 54 if self.__state == Reader.__CONTROL_CHARS: 55 # At least one character will be consumed in this block. 56 self.__offset += 1 57 58 # Slicing is used to make it compatible between python 2 and 59 # python 3. In case of python 2 the buf is a string while in case 60 # of python 3 it is a bytes array 61 next_char = self.__buf[self.__buf_offset : self.__buf_offset + 1] 62 self.__buf_offset += 1 63 64 # All non-digit characters are considered control symbols. 65 if not next_char.isdigit(): 66 self.__token = next_char 67 return Reader.CONTROL_SYMBOL 68 69 # The current character is a digit, which is the first 70 # character of the next chunk length. Proceed with reading 71 # the chunk length. 72 self.__state = Reader.__CHUNK_LENGTH 73 self.__length_acc = int(next_char) 74 if self.__buf_offset == len(self.__buf): 75 return Reader.END_OF_BUFFER 76 return self.__continue_reading_chunk_length() 77 if self.__state == Reader.__CHUNK_LENGTH: 78 return self.__continue_reading_chunk_length() 79 return self.__continue_reading_chunk() 80 81 def read_raw_data(self, data_size): 82 """Read a block of fixed size data. Return a "parsing event".""" 83 if self.__state != Reader.__CONTROL_CHARS: 84 raise Reader.FormatError('invalid reader state') 85 86 self.__length_acc = data_size 87 self.__state = Reader.__CHUNK 88 89 if self.__buf_offset == len(self.__buf): 90 return Reader.END_OF_BUFFER 91 92 return self.__continue_reading_chunk() 93 94 def get_chunk(self): 95 """Return the chunk (part) if next_event() was CHUNK(_PART).""" 96 return self.__token 97 98 def get_control_symbol(self): 99 """Return the control symbol if next_event() was CONTROL_SYMBOL.""" 100 return self.__token 101 102 def get_number(self): 103 """Return the number if next_event() was NUMBER.""" 104 return self.__length_acc 105 106 def get_offset(self): 107 """Return the offset of the current character in the input stream.""" 108 return self.__offset 109 110 def __continue_reading_chunk_length(self): 111 """The current state is __CHUNK_LENGTH, proceed with parsing.""" 112 # Slicing is used to make it compatible between python 2 and 113 # python 3. In case of python 2 the buf is a string while in case 114 # of python 3 it is a bytes array 115 next_char = self.__buf[self.__buf_offset : self.__buf_offset + 1] 116 while next_char.isdigit(): 117 self.__length_acc = self.__length_acc * 10 + int(next_char) 118 self.__offset += 1 119 self.__buf_offset += 1 120 if self.__buf_offset == len(self.__buf): 121 return Reader.END_OF_BUFFER 122 next_char = self.__buf[self.__buf_offset : self.__buf_offset + 1] 123 124 self.__offset += 1 125 self.__buf_offset += 1 126 127 # For python 3 ... 128 if type(next_char) == bytes: 129 next_char = next_char.decode() 130 131 if next_char == '+': 132 self.__chunk_continued = True 133 elif next_char == ' ': 134 self.__chunk_continued = False 135 else: 136 self.__state = Reader.__CONTROL_CHARS 137 if next_char == '=': 138 return Reader.NUMBER 139 elif next_char == '-': 140 self.__length_acc = -self.__length_acc 141 return Reader.NUMBER 142 else: 143 self.__token = next_char 144 raise Reader.FormatError('invalid character (' + 145 repr(next_char) + ') ' 146 'after chunk length ' + 147 str(self.__length_acc)) 148 149 self.__state = Reader.__CHUNK 150 if self.__buf_offset == len(self.__buf): 151 return Reader.END_OF_BUFFER 152 153 return self.__continue_reading_chunk() 154 155 def __continue_reading_chunk(self): 156 """The current state is __CHUNK, proceed with reading the chunk.""" 157 chunk_end = self.__buf_offset + self.__length_acc 158 if chunk_end <= len(self.__buf): 159 self.__token = self.__buf[self.__buf_offset:chunk_end] 160 self.__offset += self.__length_acc 161 self.__buf_offset = chunk_end 162 # The last part of the chunk has been read -- 163 # get back to reading control symbols. 164 self.__state = Reader.__CONTROL_CHARS 165 if self.__chunk_continued: 166 return Reader.CHUNK_PART 167 return Reader.CHUNK 168 else: 169 self.__token = self.__buf[self.__buf_offset:] 170 self.__offset += len(self.__token) 171 self.__length_acc -= len(self.__token) 172 self.__buf_offset = len(self.__buf) 173 return Reader.CHUNK_PART 174 175class Writer: 176 """Serialize series of chunks of data for sending over binary streams.""" 177 def __init__(self, min_buf_size): 178 if PY3: 179 self.__buf = b'' 180 else: 181 self.__buf = '' 182 self.__min_buf_size = min_buf_size 183 184 def send_control_symbol(self, symbol): 185 """Pack a control symbol into the internal buffer. Control 186 symbol can be any single byte character except digits. 187 Return a buffer to send to the output stream in case of overflow.""" 188 if PY3: 189 if type(symbol) != bytes: 190 symbol = symbol.encode() 191 192 if len(self.__buf) < self.__min_buf_size: 193 self.__buf += symbol 194 return None 195 196 buf = self.__buf 197 self.__buf = symbol 198 return buf 199 200 def send_chunk(self, chunk, to_be_continued=False): 201 """Copy a chunk of data to the internal buffer. Return a buffer 202 to send to the output stream in case of overflow.""" 203 if PY3: 204 if type(chunk) != bytes: 205 chunk = chunk.encode() 206 207 chunk_length = str(len(chunk)) 208 if PY3: 209 chunk_length = chunk_length.encode() 210 self.__buf += chunk_length 211 212 213 if to_be_continued: 214 if PY3: 215 self.__buf += b'+' 216 else: 217 self.__buf += '+' 218 else: 219 if PY3: 220 self.__buf += b' ' 221 else: 222 self.__buf += ' ' 223 224 if len(self.__buf) + len(chunk) <= self.__min_buf_size: 225 self.__buf += chunk 226 return None 227 228 buf = self.__buf 229 self.__buf = chunk 230 return buf 231 232 def send_raw_data(self, data): 233 """Send a block of fixed size data. Return a buffer 234 to send to the output stream in case of overflow.""" 235 if PY3: 236 if type(data) != bytes: 237 data = data.encode() 238 239 if len(self.__buf) + len(data) <= self.__min_buf_size: 240 self.__buf += data 241 return None 242 243 buf = self.__buf 244 self.__buf = data 245 return buf 246 247 def send_number(self, number): 248 """Pack a number into the internal buffer. Return a buffer 249 to send to the output stream in case of overflow.""" 250 if number >= 0: 251 number = str(number) + '=' 252 else: 253 number = str(-number) + '-' 254 255 if PY3: 256 number = number.encode() 257 258 if len(self.__buf) + len(number) <= self.__min_buf_size: 259 self.__buf += number 260 return None 261 262 buf = self.__buf 263 self.__buf = number 264 return buf 265 266 def flush_buf(self): 267 """Return the contents of the internal buffer and reset the buffer.""" 268 buf = self.__buf 269 if PY3: 270 self.__buf = b'' 271 else: 272 self.__buf = '' 273 return buf 274 275