1*061da546Spatrickimport time 2*061da546Spatrick 3*061da546Spatrickfrom .exceptions import EOF, TIMEOUT 4*061da546Spatrick 5*061da546Spatrickclass Expecter(object): 6*061da546Spatrick def __init__(self, spawn, searcher, searchwindowsize=-1): 7*061da546Spatrick self.spawn = spawn 8*061da546Spatrick self.searcher = searcher 9*061da546Spatrick if searchwindowsize == -1: 10*061da546Spatrick searchwindowsize = spawn.searchwindowsize 11*061da546Spatrick self.searchwindowsize = searchwindowsize 12*061da546Spatrick 13*061da546Spatrick def new_data(self, data): 14*061da546Spatrick spawn = self.spawn 15*061da546Spatrick searcher = self.searcher 16*061da546Spatrick 17*061da546Spatrick pos = spawn._buffer.tell() 18*061da546Spatrick spawn._buffer.write(data) 19*061da546Spatrick spawn._before.write(data) 20*061da546Spatrick 21*061da546Spatrick # determine which chunk of data to search; if a windowsize is 22*061da546Spatrick # specified, this is the *new* data + the preceding <windowsize> bytes 23*061da546Spatrick if self.searchwindowsize: 24*061da546Spatrick spawn._buffer.seek(max(0, pos - self.searchwindowsize)) 25*061da546Spatrick window = spawn._buffer.read(self.searchwindowsize + len(data)) 26*061da546Spatrick else: 27*061da546Spatrick # otherwise, search the whole buffer (really slow for large datasets) 28*061da546Spatrick window = spawn.buffer 29*061da546Spatrick index = searcher.search(window, len(data)) 30*061da546Spatrick if index >= 0: 31*061da546Spatrick spawn._buffer = spawn.buffer_type() 32*061da546Spatrick spawn._buffer.write(window[searcher.end:]) 33*061da546Spatrick spawn.before = spawn._before.getvalue()[0:-(len(window) - searcher.start)] 34*061da546Spatrick spawn._before = spawn.buffer_type() 35*061da546Spatrick spawn.after = window[searcher.start: searcher.end] 36*061da546Spatrick spawn.match = searcher.match 37*061da546Spatrick spawn.match_index = index 38*061da546Spatrick # Found a match 39*061da546Spatrick return index 40*061da546Spatrick elif self.searchwindowsize: 41*061da546Spatrick spawn._buffer = spawn.buffer_type() 42*061da546Spatrick spawn._buffer.write(window) 43*061da546Spatrick 44*061da546Spatrick def eof(self, err=None): 45*061da546Spatrick spawn = self.spawn 46*061da546Spatrick 47*061da546Spatrick spawn.before = spawn.buffer 48*061da546Spatrick spawn._buffer = spawn.buffer_type() 49*061da546Spatrick spawn._before = spawn.buffer_type() 50*061da546Spatrick spawn.after = EOF 51*061da546Spatrick index = self.searcher.eof_index 52*061da546Spatrick if index >= 0: 53*061da546Spatrick spawn.match = EOF 54*061da546Spatrick spawn.match_index = index 55*061da546Spatrick return index 56*061da546Spatrick else: 57*061da546Spatrick spawn.match = None 58*061da546Spatrick spawn.match_index = None 59*061da546Spatrick msg = str(spawn) 60*061da546Spatrick msg += '\nsearcher: %s' % self.searcher 61*061da546Spatrick if err is not None: 62*061da546Spatrick msg = str(err) + '\n' + msg 63*061da546Spatrick raise EOF(msg) 64*061da546Spatrick 65*061da546Spatrick def timeout(self, err=None): 66*061da546Spatrick spawn = self.spawn 67*061da546Spatrick 68*061da546Spatrick spawn.before = spawn.buffer 69*061da546Spatrick spawn.after = TIMEOUT 70*061da546Spatrick index = self.searcher.timeout_index 71*061da546Spatrick if index >= 0: 72*061da546Spatrick spawn.match = TIMEOUT 73*061da546Spatrick spawn.match_index = index 74*061da546Spatrick return index 75*061da546Spatrick else: 76*061da546Spatrick spawn.match = None 77*061da546Spatrick spawn.match_index = None 78*061da546Spatrick msg = str(spawn) 79*061da546Spatrick msg += '\nsearcher: %s' % self.searcher 80*061da546Spatrick if err is not None: 81*061da546Spatrick msg = str(err) + '\n' + msg 82*061da546Spatrick raise TIMEOUT(msg) 83*061da546Spatrick 84*061da546Spatrick def errored(self): 85*061da546Spatrick spawn = self.spawn 86*061da546Spatrick spawn.before = spawn.buffer 87*061da546Spatrick spawn.after = None 88*061da546Spatrick spawn.match = None 89*061da546Spatrick spawn.match_index = None 90*061da546Spatrick 91*061da546Spatrick def expect_loop(self, timeout=-1): 92*061da546Spatrick """Blocking expect""" 93*061da546Spatrick spawn = self.spawn 94*061da546Spatrick 95*061da546Spatrick if timeout is not None: 96*061da546Spatrick end_time = time.time() + timeout 97*061da546Spatrick 98*061da546Spatrick try: 99*061da546Spatrick incoming = spawn.buffer 100*061da546Spatrick spawn._buffer = spawn.buffer_type() 101*061da546Spatrick spawn._before = spawn.buffer_type() 102*061da546Spatrick while True: 103*061da546Spatrick idx = self.new_data(incoming) 104*061da546Spatrick # Keep reading until exception or return. 105*061da546Spatrick if idx is not None: 106*061da546Spatrick return idx 107*061da546Spatrick # No match at this point 108*061da546Spatrick if (timeout is not None) and (timeout < 0): 109*061da546Spatrick return self.timeout() 110*061da546Spatrick # Still have time left, so read more data 111*061da546Spatrick incoming = spawn.read_nonblocking(spawn.maxread, timeout) 112*061da546Spatrick if self.spawn.delayafterread is not None: 113*061da546Spatrick time.sleep(self.spawn.delayafterread) 114*061da546Spatrick if timeout is not None: 115*061da546Spatrick timeout = end_time - time.time() 116*061da546Spatrick except EOF as e: 117*061da546Spatrick return self.eof(e) 118*061da546Spatrick except TIMEOUT as e: 119*061da546Spatrick return self.timeout(e) 120*061da546Spatrick except: 121*061da546Spatrick self.errored() 122*061da546Spatrick raise 123*061da546Spatrick 124*061da546Spatrick 125*061da546Spatrickclass searcher_string(object): 126*061da546Spatrick '''This is a plain string search helper for the spawn.expect_any() method. 127*061da546Spatrick This helper class is for speed. For more powerful regex patterns 128*061da546Spatrick see the helper class, searcher_re. 129*061da546Spatrick 130*061da546Spatrick Attributes: 131*061da546Spatrick 132*061da546Spatrick eof_index - index of EOF, or -1 133*061da546Spatrick timeout_index - index of TIMEOUT, or -1 134*061da546Spatrick 135*061da546Spatrick After a successful match by the search() method the following attributes 136*061da546Spatrick are available: 137*061da546Spatrick 138*061da546Spatrick start - index into the buffer, first byte of match 139*061da546Spatrick end - index into the buffer, first byte after match 140*061da546Spatrick match - the matching string itself 141*061da546Spatrick 142*061da546Spatrick ''' 143*061da546Spatrick 144*061da546Spatrick def __init__(self, strings): 145*061da546Spatrick '''This creates an instance of searcher_string. This argument 'strings' 146*061da546Spatrick may be a list; a sequence of strings; or the EOF or TIMEOUT types. ''' 147*061da546Spatrick 148*061da546Spatrick self.eof_index = -1 149*061da546Spatrick self.timeout_index = -1 150*061da546Spatrick self._strings = [] 151*061da546Spatrick for n, s in enumerate(strings): 152*061da546Spatrick if s is EOF: 153*061da546Spatrick self.eof_index = n 154*061da546Spatrick continue 155*061da546Spatrick if s is TIMEOUT: 156*061da546Spatrick self.timeout_index = n 157*061da546Spatrick continue 158*061da546Spatrick self._strings.append((n, s)) 159*061da546Spatrick 160*061da546Spatrick def __str__(self): 161*061da546Spatrick '''This returns a human-readable string that represents the state of 162*061da546Spatrick the object.''' 163*061da546Spatrick 164*061da546Spatrick ss = [(ns[0], ' %d: %r' % ns) for ns in self._strings] 165*061da546Spatrick ss.append((-1, 'searcher_string:')) 166*061da546Spatrick if self.eof_index >= 0: 167*061da546Spatrick ss.append((self.eof_index, ' %d: EOF' % self.eof_index)) 168*061da546Spatrick if self.timeout_index >= 0: 169*061da546Spatrick ss.append((self.timeout_index, 170*061da546Spatrick ' %d: TIMEOUT' % self.timeout_index)) 171*061da546Spatrick ss.sort() 172*061da546Spatrick ss = list(zip(*ss))[1] 173*061da546Spatrick return '\n'.join(ss) 174*061da546Spatrick 175*061da546Spatrick def search(self, buffer, freshlen, searchwindowsize=None): 176*061da546Spatrick '''This searches 'buffer' for the first occurrence of one of the search 177*061da546Spatrick strings. 'freshlen' must indicate the number of bytes at the end of 178*061da546Spatrick 'buffer' which have not been searched before. It helps to avoid 179*061da546Spatrick searching the same, possibly big, buffer over and over again. 180*061da546Spatrick 181*061da546Spatrick See class spawn for the 'searchwindowsize' argument. 182*061da546Spatrick 183*061da546Spatrick If there is a match this returns the index of that string, and sets 184*061da546Spatrick 'start', 'end' and 'match'. Otherwise, this returns -1. ''' 185*061da546Spatrick 186*061da546Spatrick first_match = None 187*061da546Spatrick 188*061da546Spatrick # 'freshlen' helps a lot here. Further optimizations could 189*061da546Spatrick # possibly include: 190*061da546Spatrick # 191*061da546Spatrick # using something like the Boyer-Moore Fast String Searching 192*061da546Spatrick # Algorithm; pre-compiling the search through a list of 193*061da546Spatrick # strings into something that can scan the input once to 194*061da546Spatrick # search for all N strings; realize that if we search for 195*061da546Spatrick # ['bar', 'baz'] and the input is '...foo' we need not bother 196*061da546Spatrick # rescanning until we've read three more bytes. 197*061da546Spatrick # 198*061da546Spatrick # Sadly, I don't know enough about this interesting topic. /grahn 199*061da546Spatrick 200*061da546Spatrick for index, s in self._strings: 201*061da546Spatrick if searchwindowsize is None: 202*061da546Spatrick # the match, if any, can only be in the fresh data, 203*061da546Spatrick # or at the very end of the old data 204*061da546Spatrick offset = -(freshlen + len(s)) 205*061da546Spatrick else: 206*061da546Spatrick # better obey searchwindowsize 207*061da546Spatrick offset = -searchwindowsize 208*061da546Spatrick n = buffer.find(s, offset) 209*061da546Spatrick if n >= 0 and (first_match is None or n < first_match): 210*061da546Spatrick first_match = n 211*061da546Spatrick best_index, best_match = index, s 212*061da546Spatrick if first_match is None: 213*061da546Spatrick return -1 214*061da546Spatrick self.match = best_match 215*061da546Spatrick self.start = first_match 216*061da546Spatrick self.end = self.start + len(self.match) 217*061da546Spatrick return best_index 218*061da546Spatrick 219*061da546Spatrick 220*061da546Spatrickclass searcher_re(object): 221*061da546Spatrick '''This is regular expression string search helper for the 222*061da546Spatrick spawn.expect_any() method. This helper class is for powerful 223*061da546Spatrick pattern matching. For speed, see the helper class, searcher_string. 224*061da546Spatrick 225*061da546Spatrick Attributes: 226*061da546Spatrick 227*061da546Spatrick eof_index - index of EOF, or -1 228*061da546Spatrick timeout_index - index of TIMEOUT, or -1 229*061da546Spatrick 230*061da546Spatrick After a successful match by the search() method the following attributes 231*061da546Spatrick are available: 232*061da546Spatrick 233*061da546Spatrick start - index into the buffer, first byte of match 234*061da546Spatrick end - index into the buffer, first byte after match 235*061da546Spatrick match - the re.match object returned by a successful re.search 236*061da546Spatrick 237*061da546Spatrick ''' 238*061da546Spatrick 239*061da546Spatrick def __init__(self, patterns): 240*061da546Spatrick '''This creates an instance that searches for 'patterns' Where 241*061da546Spatrick 'patterns' may be a list or other sequence of compiled regular 242*061da546Spatrick expressions, or the EOF or TIMEOUT types.''' 243*061da546Spatrick 244*061da546Spatrick self.eof_index = -1 245*061da546Spatrick self.timeout_index = -1 246*061da546Spatrick self._searches = [] 247*061da546Spatrick for n, s in zip(list(range(len(patterns))), patterns): 248*061da546Spatrick if s is EOF: 249*061da546Spatrick self.eof_index = n 250*061da546Spatrick continue 251*061da546Spatrick if s is TIMEOUT: 252*061da546Spatrick self.timeout_index = n 253*061da546Spatrick continue 254*061da546Spatrick self._searches.append((n, s)) 255*061da546Spatrick 256*061da546Spatrick def __str__(self): 257*061da546Spatrick '''This returns a human-readable string that represents the state of 258*061da546Spatrick the object.''' 259*061da546Spatrick 260*061da546Spatrick #ss = [(n, ' %d: re.compile("%s")' % 261*061da546Spatrick # (n, repr(s.pattern))) for n, s in self._searches] 262*061da546Spatrick ss = list() 263*061da546Spatrick for n, s in self._searches: 264*061da546Spatrick ss.append((n, ' %d: re.compile(%r)' % (n, s.pattern))) 265*061da546Spatrick ss.append((-1, 'searcher_re:')) 266*061da546Spatrick if self.eof_index >= 0: 267*061da546Spatrick ss.append((self.eof_index, ' %d: EOF' % self.eof_index)) 268*061da546Spatrick if self.timeout_index >= 0: 269*061da546Spatrick ss.append((self.timeout_index, ' %d: TIMEOUT' % 270*061da546Spatrick self.timeout_index)) 271*061da546Spatrick ss.sort() 272*061da546Spatrick ss = list(zip(*ss))[1] 273*061da546Spatrick return '\n'.join(ss) 274*061da546Spatrick 275*061da546Spatrick def search(self, buffer, freshlen, searchwindowsize=None): 276*061da546Spatrick '''This searches 'buffer' for the first occurrence of one of the regular 277*061da546Spatrick expressions. 'freshlen' must indicate the number of bytes at the end of 278*061da546Spatrick 'buffer' which have not been searched before. 279*061da546Spatrick 280*061da546Spatrick See class spawn for the 'searchwindowsize' argument. 281*061da546Spatrick 282*061da546Spatrick If there is a match this returns the index of that string, and sets 283*061da546Spatrick 'start', 'end' and 'match'. Otherwise, returns -1.''' 284*061da546Spatrick 285*061da546Spatrick first_match = None 286*061da546Spatrick # 'freshlen' doesn't help here -- we cannot predict the 287*061da546Spatrick # length of a match, and the re module provides no help. 288*061da546Spatrick if searchwindowsize is None: 289*061da546Spatrick searchstart = 0 290*061da546Spatrick else: 291*061da546Spatrick searchstart = max(0, len(buffer) - searchwindowsize) 292*061da546Spatrick for index, s in self._searches: 293*061da546Spatrick match = s.search(buffer, searchstart) 294*061da546Spatrick if match is None: 295*061da546Spatrick continue 296*061da546Spatrick n = match.start() 297*061da546Spatrick if first_match is None or n < first_match: 298*061da546Spatrick first_match = n 299*061da546Spatrick the_match = match 300*061da546Spatrick best_index = index 301*061da546Spatrick if first_match is None: 302*061da546Spatrick return -1 303*061da546Spatrick self.start = first_match 304*061da546Spatrick self.match = the_match 305*061da546Spatrick self.end = self.match.end() 306*061da546Spatrick return best_index 307