1*061da546Spatrickimport time
2*061da546Spatrick
3*061da546Spatrickfrom .exceptions import EOF, TIMEOUT
4*061da546Spatrick
5*061da546Spatrickclass Expecter(object):
6*061da546Spatrick    def __init__(self, spawn, searcher, searchwindowsize=-1):
7*061da546Spatrick        self.spawn = spawn
8*061da546Spatrick        self.searcher = searcher
9*061da546Spatrick        if searchwindowsize == -1:
10*061da546Spatrick            searchwindowsize = spawn.searchwindowsize
11*061da546Spatrick        self.searchwindowsize = searchwindowsize
12*061da546Spatrick
13*061da546Spatrick    def new_data(self, data):
14*061da546Spatrick        spawn = self.spawn
15*061da546Spatrick        searcher = self.searcher
16*061da546Spatrick
17*061da546Spatrick        pos = spawn._buffer.tell()
18*061da546Spatrick        spawn._buffer.write(data)
19*061da546Spatrick        spawn._before.write(data)
20*061da546Spatrick
21*061da546Spatrick        # determine which chunk of data to search; if a windowsize is
22*061da546Spatrick        # specified, this is the *new* data + the preceding <windowsize> bytes
23*061da546Spatrick        if self.searchwindowsize:
24*061da546Spatrick            spawn._buffer.seek(max(0, pos - self.searchwindowsize))
25*061da546Spatrick            window = spawn._buffer.read(self.searchwindowsize + len(data))
26*061da546Spatrick        else:
27*061da546Spatrick            # otherwise, search the whole buffer (really slow for large datasets)
28*061da546Spatrick            window = spawn.buffer
29*061da546Spatrick        index = searcher.search(window, len(data))
30*061da546Spatrick        if index >= 0:
31*061da546Spatrick            spawn._buffer = spawn.buffer_type()
32*061da546Spatrick            spawn._buffer.write(window[searcher.end:])
33*061da546Spatrick            spawn.before = spawn._before.getvalue()[0:-(len(window) - searcher.start)]
34*061da546Spatrick            spawn._before = spawn.buffer_type()
35*061da546Spatrick            spawn.after = window[searcher.start: searcher.end]
36*061da546Spatrick            spawn.match = searcher.match
37*061da546Spatrick            spawn.match_index = index
38*061da546Spatrick            # Found a match
39*061da546Spatrick            return index
40*061da546Spatrick        elif self.searchwindowsize:
41*061da546Spatrick            spawn._buffer = spawn.buffer_type()
42*061da546Spatrick            spawn._buffer.write(window)
43*061da546Spatrick
44*061da546Spatrick    def eof(self, err=None):
45*061da546Spatrick        spawn = self.spawn
46*061da546Spatrick
47*061da546Spatrick        spawn.before = spawn.buffer
48*061da546Spatrick        spawn._buffer = spawn.buffer_type()
49*061da546Spatrick        spawn._before = spawn.buffer_type()
50*061da546Spatrick        spawn.after = EOF
51*061da546Spatrick        index = self.searcher.eof_index
52*061da546Spatrick        if index >= 0:
53*061da546Spatrick            spawn.match = EOF
54*061da546Spatrick            spawn.match_index = index
55*061da546Spatrick            return index
56*061da546Spatrick        else:
57*061da546Spatrick            spawn.match = None
58*061da546Spatrick            spawn.match_index = None
59*061da546Spatrick            msg = str(spawn)
60*061da546Spatrick            msg += '\nsearcher: %s' % self.searcher
61*061da546Spatrick            if err is not None:
62*061da546Spatrick                msg = str(err) + '\n' + msg
63*061da546Spatrick            raise EOF(msg)
64*061da546Spatrick
65*061da546Spatrick    def timeout(self, err=None):
66*061da546Spatrick        spawn = self.spawn
67*061da546Spatrick
68*061da546Spatrick        spawn.before = spawn.buffer
69*061da546Spatrick        spawn.after = TIMEOUT
70*061da546Spatrick        index = self.searcher.timeout_index
71*061da546Spatrick        if index >= 0:
72*061da546Spatrick            spawn.match = TIMEOUT
73*061da546Spatrick            spawn.match_index = index
74*061da546Spatrick            return index
75*061da546Spatrick        else:
76*061da546Spatrick            spawn.match = None
77*061da546Spatrick            spawn.match_index = None
78*061da546Spatrick            msg = str(spawn)
79*061da546Spatrick            msg += '\nsearcher: %s' % self.searcher
80*061da546Spatrick            if err is not None:
81*061da546Spatrick                msg = str(err) + '\n' + msg
82*061da546Spatrick            raise TIMEOUT(msg)
83*061da546Spatrick
84*061da546Spatrick    def errored(self):
85*061da546Spatrick        spawn = self.spawn
86*061da546Spatrick        spawn.before = spawn.buffer
87*061da546Spatrick        spawn.after = None
88*061da546Spatrick        spawn.match = None
89*061da546Spatrick        spawn.match_index = None
90*061da546Spatrick
91*061da546Spatrick    def expect_loop(self, timeout=-1):
92*061da546Spatrick        """Blocking expect"""
93*061da546Spatrick        spawn = self.spawn
94*061da546Spatrick
95*061da546Spatrick        if timeout is not None:
96*061da546Spatrick            end_time = time.time() + timeout
97*061da546Spatrick
98*061da546Spatrick        try:
99*061da546Spatrick            incoming = spawn.buffer
100*061da546Spatrick            spawn._buffer = spawn.buffer_type()
101*061da546Spatrick            spawn._before = spawn.buffer_type()
102*061da546Spatrick            while True:
103*061da546Spatrick                idx = self.new_data(incoming)
104*061da546Spatrick                # Keep reading until exception or return.
105*061da546Spatrick                if idx is not None:
106*061da546Spatrick                    return idx
107*061da546Spatrick                # No match at this point
108*061da546Spatrick                if (timeout is not None) and (timeout < 0):
109*061da546Spatrick                    return self.timeout()
110*061da546Spatrick                # Still have time left, so read more data
111*061da546Spatrick                incoming = spawn.read_nonblocking(spawn.maxread, timeout)
112*061da546Spatrick                if self.spawn.delayafterread is not None:
113*061da546Spatrick                    time.sleep(self.spawn.delayafterread)
114*061da546Spatrick                if timeout is not None:
115*061da546Spatrick                    timeout = end_time - time.time()
116*061da546Spatrick        except EOF as e:
117*061da546Spatrick            return self.eof(e)
118*061da546Spatrick        except TIMEOUT as e:
119*061da546Spatrick            return self.timeout(e)
120*061da546Spatrick        except:
121*061da546Spatrick            self.errored()
122*061da546Spatrick            raise
123*061da546Spatrick
124*061da546Spatrick
125*061da546Spatrickclass searcher_string(object):
126*061da546Spatrick    '''This is a plain string search helper for the spawn.expect_any() method.
127*061da546Spatrick    This helper class is for speed. For more powerful regex patterns
128*061da546Spatrick    see the helper class, searcher_re.
129*061da546Spatrick
130*061da546Spatrick    Attributes:
131*061da546Spatrick
132*061da546Spatrick        eof_index     - index of EOF, or -1
133*061da546Spatrick        timeout_index - index of TIMEOUT, or -1
134*061da546Spatrick
135*061da546Spatrick    After a successful match by the search() method the following attributes
136*061da546Spatrick    are available:
137*061da546Spatrick
138*061da546Spatrick        start - index into the buffer, first byte of match
139*061da546Spatrick        end   - index into the buffer, first byte after match
140*061da546Spatrick        match - the matching string itself
141*061da546Spatrick
142*061da546Spatrick    '''
143*061da546Spatrick
144*061da546Spatrick    def __init__(self, strings):
145*061da546Spatrick        '''This creates an instance of searcher_string. This argument 'strings'
146*061da546Spatrick        may be a list; a sequence of strings; or the EOF or TIMEOUT types. '''
147*061da546Spatrick
148*061da546Spatrick        self.eof_index = -1
149*061da546Spatrick        self.timeout_index = -1
150*061da546Spatrick        self._strings = []
151*061da546Spatrick        for n, s in enumerate(strings):
152*061da546Spatrick            if s is EOF:
153*061da546Spatrick                self.eof_index = n
154*061da546Spatrick                continue
155*061da546Spatrick            if s is TIMEOUT:
156*061da546Spatrick                self.timeout_index = n
157*061da546Spatrick                continue
158*061da546Spatrick            self._strings.append((n, s))
159*061da546Spatrick
160*061da546Spatrick    def __str__(self):
161*061da546Spatrick        '''This returns a human-readable string that represents the state of
162*061da546Spatrick        the object.'''
163*061da546Spatrick
164*061da546Spatrick        ss = [(ns[0], '    %d: %r' % ns) for ns in self._strings]
165*061da546Spatrick        ss.append((-1, 'searcher_string:'))
166*061da546Spatrick        if self.eof_index >= 0:
167*061da546Spatrick            ss.append((self.eof_index, '    %d: EOF' % self.eof_index))
168*061da546Spatrick        if self.timeout_index >= 0:
169*061da546Spatrick            ss.append((self.timeout_index,
170*061da546Spatrick                '    %d: TIMEOUT' % self.timeout_index))
171*061da546Spatrick        ss.sort()
172*061da546Spatrick        ss = list(zip(*ss))[1]
173*061da546Spatrick        return '\n'.join(ss)
174*061da546Spatrick
175*061da546Spatrick    def search(self, buffer, freshlen, searchwindowsize=None):
176*061da546Spatrick        '''This searches 'buffer' for the first occurrence of one of the search
177*061da546Spatrick        strings.  'freshlen' must indicate the number of bytes at the end of
178*061da546Spatrick        'buffer' which have not been searched before. It helps to avoid
179*061da546Spatrick        searching the same, possibly big, buffer over and over again.
180*061da546Spatrick
181*061da546Spatrick        See class spawn for the 'searchwindowsize' argument.
182*061da546Spatrick
183*061da546Spatrick        If there is a match this returns the index of that string, and sets
184*061da546Spatrick        'start', 'end' and 'match'. Otherwise, this returns -1. '''
185*061da546Spatrick
186*061da546Spatrick        first_match = None
187*061da546Spatrick
188*061da546Spatrick        # 'freshlen' helps a lot here. Further optimizations could
189*061da546Spatrick        # possibly include:
190*061da546Spatrick        #
191*061da546Spatrick        # using something like the Boyer-Moore Fast String Searching
192*061da546Spatrick        # Algorithm; pre-compiling the search through a list of
193*061da546Spatrick        # strings into something that can scan the input once to
194*061da546Spatrick        # search for all N strings; realize that if we search for
195*061da546Spatrick        # ['bar', 'baz'] and the input is '...foo' we need not bother
196*061da546Spatrick        # rescanning until we've read three more bytes.
197*061da546Spatrick        #
198*061da546Spatrick        # Sadly, I don't know enough about this interesting topic. /grahn
199*061da546Spatrick
200*061da546Spatrick        for index, s in self._strings:
201*061da546Spatrick            if searchwindowsize is None:
202*061da546Spatrick                # the match, if any, can only be in the fresh data,
203*061da546Spatrick                # or at the very end of the old data
204*061da546Spatrick                offset = -(freshlen + len(s))
205*061da546Spatrick            else:
206*061da546Spatrick                # better obey searchwindowsize
207*061da546Spatrick                offset = -searchwindowsize
208*061da546Spatrick            n = buffer.find(s, offset)
209*061da546Spatrick            if n >= 0 and (first_match is None or n < first_match):
210*061da546Spatrick                first_match = n
211*061da546Spatrick                best_index, best_match = index, s
212*061da546Spatrick        if first_match is None:
213*061da546Spatrick            return -1
214*061da546Spatrick        self.match = best_match
215*061da546Spatrick        self.start = first_match
216*061da546Spatrick        self.end = self.start + len(self.match)
217*061da546Spatrick        return best_index
218*061da546Spatrick
219*061da546Spatrick
220*061da546Spatrickclass searcher_re(object):
221*061da546Spatrick    '''This is regular expression string search helper for the
222*061da546Spatrick    spawn.expect_any() method. This helper class is for powerful
223*061da546Spatrick    pattern matching. For speed, see the helper class, searcher_string.
224*061da546Spatrick
225*061da546Spatrick    Attributes:
226*061da546Spatrick
227*061da546Spatrick        eof_index     - index of EOF, or -1
228*061da546Spatrick        timeout_index - index of TIMEOUT, or -1
229*061da546Spatrick
230*061da546Spatrick    After a successful match by the search() method the following attributes
231*061da546Spatrick    are available:
232*061da546Spatrick
233*061da546Spatrick        start - index into the buffer, first byte of match
234*061da546Spatrick        end   - index into the buffer, first byte after match
235*061da546Spatrick        match - the re.match object returned by a successful re.search
236*061da546Spatrick
237*061da546Spatrick    '''
238*061da546Spatrick
239*061da546Spatrick    def __init__(self, patterns):
240*061da546Spatrick        '''This creates an instance that searches for 'patterns' Where
241*061da546Spatrick        'patterns' may be a list or other sequence of compiled regular
242*061da546Spatrick        expressions, or the EOF or TIMEOUT types.'''
243*061da546Spatrick
244*061da546Spatrick        self.eof_index = -1
245*061da546Spatrick        self.timeout_index = -1
246*061da546Spatrick        self._searches = []
247*061da546Spatrick        for n, s in zip(list(range(len(patterns))), patterns):
248*061da546Spatrick            if s is EOF:
249*061da546Spatrick                self.eof_index = n
250*061da546Spatrick                continue
251*061da546Spatrick            if s is TIMEOUT:
252*061da546Spatrick                self.timeout_index = n
253*061da546Spatrick                continue
254*061da546Spatrick            self._searches.append((n, s))
255*061da546Spatrick
256*061da546Spatrick    def __str__(self):
257*061da546Spatrick        '''This returns a human-readable string that represents the state of
258*061da546Spatrick        the object.'''
259*061da546Spatrick
260*061da546Spatrick        #ss = [(n, '    %d: re.compile("%s")' %
261*061da546Spatrick        #    (n, repr(s.pattern))) for n, s in self._searches]
262*061da546Spatrick        ss = list()
263*061da546Spatrick        for n, s in self._searches:
264*061da546Spatrick            ss.append((n, '    %d: re.compile(%r)' % (n, s.pattern)))
265*061da546Spatrick        ss.append((-1, 'searcher_re:'))
266*061da546Spatrick        if self.eof_index >= 0:
267*061da546Spatrick            ss.append((self.eof_index, '    %d: EOF' % self.eof_index))
268*061da546Spatrick        if self.timeout_index >= 0:
269*061da546Spatrick            ss.append((self.timeout_index, '    %d: TIMEOUT' %
270*061da546Spatrick                self.timeout_index))
271*061da546Spatrick        ss.sort()
272*061da546Spatrick        ss = list(zip(*ss))[1]
273*061da546Spatrick        return '\n'.join(ss)
274*061da546Spatrick
275*061da546Spatrick    def search(self, buffer, freshlen, searchwindowsize=None):
276*061da546Spatrick        '''This searches 'buffer' for the first occurrence of one of the regular
277*061da546Spatrick        expressions. 'freshlen' must indicate the number of bytes at the end of
278*061da546Spatrick        'buffer' which have not been searched before.
279*061da546Spatrick
280*061da546Spatrick        See class spawn for the 'searchwindowsize' argument.
281*061da546Spatrick
282*061da546Spatrick        If there is a match this returns the index of that string, and sets
283*061da546Spatrick        'start', 'end' and 'match'. Otherwise, returns -1.'''
284*061da546Spatrick
285*061da546Spatrick        first_match = None
286*061da546Spatrick        # 'freshlen' doesn't help here -- we cannot predict the
287*061da546Spatrick        # length of a match, and the re module provides no help.
288*061da546Spatrick        if searchwindowsize is None:
289*061da546Spatrick            searchstart = 0
290*061da546Spatrick        else:
291*061da546Spatrick            searchstart = max(0, len(buffer) - searchwindowsize)
292*061da546Spatrick        for index, s in self._searches:
293*061da546Spatrick            match = s.search(buffer, searchstart)
294*061da546Spatrick            if match is None:
295*061da546Spatrick                continue
296*061da546Spatrick            n = match.start()
297*061da546Spatrick            if first_match is None or n < first_match:
298*061da546Spatrick                first_match = n
299*061da546Spatrick                the_match = match
300*061da546Spatrick                best_index = index
301*061da546Spatrick        if first_match is None:
302*061da546Spatrick            return -1
303*061da546Spatrick        self.start = first_match
304*061da546Spatrick        self.match = the_match
305*061da546Spatrick        self.end = self.match.end()
306*061da546Spatrick        return best_index
307