1#!/usr/bin/python -t
2
3#   This library is free software; you can redistribute it and/or
4#   modify it under the terms of the GNU Lesser General Public
5#   License as published by the Free Software Foundation; either
6#   version 2.1 of the License, or (at your option) any later version.
7#
8#   This library is distributed in the hope that it will be useful,
9#   but WITHOUT ANY WARRANTY; without even the implied warranty of
10#   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
11#   Lesser General Public License for more details.
12#
13#   You should have received a copy of the GNU Lesser General Public
14#   License along with this library; if not, write to the
15#      Free Software Foundation, Inc.,
16#      59 Temple Place, Suite 330,
17#      Boston, MA  02111-1307  USA
18
19# This file is part of urlgrabber, a high-level cross-protocol url-grabber
20# Copyright 2002-2004 Michael D. Stenner, Ryan Tomayko
21
22"""mirror.py tests"""
23
24import sys
25import os
26import tempfile, random, os
27
28import urlgrabber.grabber
29from urlgrabber.grabber import URLGrabber, URLGrabError, URLGrabberOptions
30import urlgrabber.mirror
31from urlgrabber.mirror import MirrorGroup, MGRandomStart, MGRandomOrder
32
33from base_test_code import *
34
35class FakeLogger:
36    def __init__(self):
37        self.logs = []
38    def debug(self, msg, *args):
39        self.logs.append(msg % args)
40    warn = warning = info = error = debug
41
42class BasicTests(TestCase):
43    def setUp(self):
44        self.g  = URLGrabber()
45        fullmirrors = [base_mirror_url + m + '/' for m in good_mirrors]
46        self.mg = MirrorGroup(self.g, fullmirrors)
47
48    def test_urlgrab(self):
49        """MirrorGroup.urlgrab"""
50        filename = tempfile.mktemp()
51        url = 'short_reference'
52        self.mg.urlgrab(url, filename)
53
54        data = open(filename, 'rb').read()
55
56        self.assertEqual(data, short_reference_data)
57
58    def test_urlread(self):
59        """MirrorGroup.urlread"""
60        url = 'short_reference'
61        data = self.mg.urlread(url)
62
63        self.assertEqual(data, short_reference_data)
64
65    def test_urlopen(self):
66        """MirrorGroup.urlopen"""
67        url = 'short_reference'
68        fo = self.mg.urlopen(url)
69        data = fo.read()
70        fo.close()
71
72        self.assertEqual(data, short_reference_data)
73
74class SubclassTests(TestCase):
75    def setUp(self):
76        self.g  = URLGrabber()
77        self.fullmirrors = [base_mirror_url + m + '/' for m in good_mirrors]
78
79    def fetchwith(self, mgclass):
80        self.mg = mgclass(self.g, self.fullmirrors)
81
82        filename = tempfile.mktemp()
83        url = 'short_reference'
84        self.mg.urlgrab(url, filename)
85
86        data = open(filename, 'rb').read()
87
88        self.assertEqual(data, short_reference_data)
89
90    def test_MGRandomStart(self):
91        "MGRandomStart.urlgrab"
92        self.fetchwith(MGRandomStart)
93
94    def test_MGRandomOrder(self):
95        "MGRandomOrder.urlgrab"
96        self.fetchwith(MGRandomOrder)
97
98class CallbackTests(TestCase):
99    def setUp(self):
100        self.g  = URLGrabber()
101        fullmirrors = [base_mirror_url + m + '/' for m in \
102                       (bad_mirrors + good_mirrors)]
103        if hasattr(urlgrabber.grabber, '_TH'):
104            # test assumes mirrors are not re-ordered
105            urlgrabber.grabber._TH.hosts.clear()
106        self.mg = MirrorGroup(self.g, fullmirrors)
107
108    def test_failure_callback(self):
109        "test that MG executes the failure callback correctly"
110        tricky_list = []
111        def failure_callback(cb_obj, tl):
112            tl.append(str(cb_obj.exception))
113        self.mg.failure_callback = failure_callback, (tricky_list, ), {}
114        data = self.mg.urlread('reference')
115        self.assertTrue(data == reference_data)
116        self.assertEqual(tricky_list[0][:25],
117                          '[Errno 14] HTTP Error 404')
118
119    def test_callback_reraise(self):
120        "test that the callback can correctly re-raise the exception"
121        def failure_callback(cb_obj): raise cb_obj.exception
122        self.mg.failure_callback = failure_callback
123        self.assertRaises(URLGrabError, self.mg.urlread, 'reference')
124
125class BadMirrorTests(TestCase):
126    def setUp(self):
127        self.g  = URLGrabber()
128        fullmirrors = [base_mirror_url + m + '/' for m in bad_mirrors]
129        self.mg = MirrorGroup(self.g, fullmirrors)
130
131    def test_simple_grab(self):
132        """test that a bad mirror raises URLGrabError"""
133        filename = tempfile.mktemp()
134        url = 'reference'
135        self.assertRaises(URLGrabError, self.mg.urlgrab, url, filename)
136
137class FailoverTests(TestCase):
138    def setUp(self):
139        self.g  = URLGrabber()
140        fullmirrors = [base_mirror_url + m + '/' for m in \
141                       (bad_mirrors + good_mirrors)]
142        self.mg = MirrorGroup(self.g, fullmirrors)
143
144    def test_simple_grab(self):
145        """test that a the MG fails over past a bad mirror"""
146        filename = tempfile.mktemp()
147        url = 'reference'
148        elist = []
149        def cb(e, elist=elist): elist.append(e)
150        self.mg.urlgrab(url, filename, failure_callback=cb)
151
152        contents = open(filename, 'rb').read()
153
154        # first be sure that the first mirror failed and that the
155        # callback was called
156        self.assertEqual(len(elist), 1)
157        # now be sure that the second mirror succeeded and the correct
158        # data was returned
159        self.assertEqual(contents, reference_data)
160
161class FakeGrabber:
162    def __init__(self, resultlist=None):
163        self.resultlist = resultlist or []
164        self.index = 0
165        self.calls = []
166        self.opts = URLGrabberOptions()
167
168    def urlgrab(self, url, filename=None, **kwargs):
169        self.calls.append( (url, filename) )
170        res = self.resultlist[self.index]
171        self.index += 1
172        if isinstance(res, Exception): raise res
173        else: return res
174
175class ActionTests(TestCase):
176    def setUp(self):
177        self.snarfed_logs = []
178        self.db = urlgrabber.mirror.DEBUG
179        urlgrabber.mirror.DEBUG = FakeLogger()
180        self.mirrors = ['a', 'b', 'c', 'd', 'e', 'f']
181        self.g = FakeGrabber([URLGrabError(3), URLGrabError(3), 'filename'])
182        self.mg = MirrorGroup(self.g, self.mirrors)
183
184    def tearDown(self):
185        urlgrabber.mirror.DEBUG = self.db
186
187    def test_defaults(self):
188        'test default action policy'
189        self.mg.urlgrab('somefile')
190        expected_calls = [ (m.encode('utf8') + b'/somefile', None)
191                           for m in self.mirrors[:3] ]
192        expected_logs = \
193            ['MIRROR: trying somefile -> a/somefile',
194             'MIRROR: failed',
195             'GR   mirrors: [b c d e f] 0',
196             'MAIN mirrors: [a b c d e f] 1',
197             'MIRROR: trying somefile -> b/somefile',
198             'MIRROR: failed',
199             'GR   mirrors: [c d e f] 0',
200             'MAIN mirrors: [a b c d e f] 2',
201             'MIRROR: trying somefile -> c/somefile']
202
203        self.assertEqual(self.g.calls, expected_calls)
204        self.assertEqual(urlgrabber.mirror.DEBUG.logs, expected_logs)
205
206    def test_instance_action(self):
207        'test the effects of passed-in default_action'
208        self.mg.default_action = {'remove_master': 1}
209        self.mg.urlgrab('somefile')
210        expected_calls = [ (m.encode('utf8') + b'/somefile', None)
211                           for m in self.mirrors[:3] ]
212        expected_logs = \
213            ['MIRROR: trying somefile -> a/somefile',
214             'MIRROR: failed',
215             'GR   mirrors: [b c d e f] 0',
216             'MAIN mirrors: [b c d e f] 0',
217             'MIRROR: trying somefile -> b/somefile',
218             'MIRROR: failed',
219             'GR   mirrors: [c d e f] 0',
220             'MAIN mirrors: [c d e f] 0',
221             'MIRROR: trying somefile -> c/somefile']
222
223        self.assertEqual(self.g.calls, expected_calls)
224        self.assertEqual(urlgrabber.mirror.DEBUG.logs, expected_logs)
225
226    def test_method_action(self):
227        'test the effects of method-level default_action'
228        self.mg.urlgrab('somefile', default_action={'remove_master': 1})
229        expected_calls = [ (m.encode('utf8') + b'/somefile', None)
230                           for m in self.mirrors[:3] ]
231        expected_logs = \
232            ['MIRROR: trying somefile -> a/somefile',
233             'MIRROR: failed',
234             'GR   mirrors: [b c d e f] 0',
235             'MAIN mirrors: [b c d e f] 0',
236             'MIRROR: trying somefile -> b/somefile',
237             'MIRROR: failed',
238             'GR   mirrors: [c d e f] 0',
239             'MAIN mirrors: [c d e f] 0',
240             'MIRROR: trying somefile -> c/somefile']
241
242        self.assertEqual(self.g.calls, expected_calls)
243        self.assertEqual(urlgrabber.mirror.DEBUG.logs, expected_logs)
244
245
246    def callback(self, e): return {'fail': 1}
247
248    def test_callback_action(self):
249        'test the effects of a callback-returned action'
250        self.assertRaises(URLGrabError, self.mg.urlgrab, 'somefile',
251                          failure_callback=self.callback)
252        expected_calls = [ (m.encode('utf8') + b'/somefile', None)
253                           for m in self.mirrors[:1] ]
254        expected_logs = \
255                      ['MIRROR: trying somefile -> a/somefile',
256                       'MIRROR: failed',
257                       'GR   mirrors: [b c d e f] 0',
258                       'MAIN mirrors: [a b c d e f] 1']
259
260        self.assertEqual(self.g.calls, expected_calls)
261        self.assertEqual(urlgrabber.mirror.DEBUG.logs, expected_logs)
262
263import threading, socket
264
265class HttpReplyCode(TestCase):
266    def setUp(self):
267        # start the server
268        self.exit = False
269        self.process = lambda data: None
270
271        s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
272        s.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
273        s.bind(('localhost', 0)); s.listen(1)
274        self.port = s.getsockname()[1]
275
276        def server():
277            while True:
278                c, a = s.accept()
279                if self.exit: c.close(); break
280                data = b''
281                while not data.endswith(b'\r\n\r\n'):
282                    data = c.recv(4096)
283                self.process(data)
284                c.sendall(b'HTTP/1.1 %d %s\r\n' % self.reply)
285                if self.content is not None:
286                    c.sendall(b'Content-Length: %d\r\n\r\n' % len(self.content))
287                    c.sendall(self.content)
288                c.close()
289            s.close()
290            self.exit = False
291
292        self.thread = threading.Thread(target=server)
293        self.thread.start()
294
295        # create grabber and mirror group objects
296        def failure(obj):
297            self.code = getattr(obj.exception, 'code', None)
298            return {}
299        self.g  = URLGrabber()
300        self.mg = MirrorGroup(self.g, ['http://localhost:%d' % self.port],
301                              failure_callback = failure)
302
303    def tearDown(self):
304        # shut down the server
305        self.exit = True
306        s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
307        try:
308            s.connect(('localhost', self.port)) # wake it up
309        except ConnectionRefusedError:
310            # already gone?
311            pass
312        s.close()
313        self.thread.join()
314
315    def test_grab(self):
316        'tests the propagation of HTTP reply code'
317        self.reply = 503, b'Busy'
318        self.content = None
319
320        # single
321        self.assertRaises(URLGrabError, self.mg.urlgrab, 'foo')
322        self.assertEqual(self.code, 503); del self.code
323
324        # multi
325        err = []
326        self.mg.urlgrab('foo', async_=True, failfunc=err.append)
327        urlgrabber.grabber.parallel_wait()
328        self.assertEqual([e.exception.errno for e in err], [256])
329        self.assertEqual(self.code, 503); del self.code
330
331    def test_range(self):
332        'test client-side processing of HTTP ranges'
333        # server does not process ranges
334        self.reply = 200, b'OK'
335        self.content = b'ABCDEF'
336
337        # no range specified
338        data = self.mg.urlread('foo')
339        self.assertEqual(data, b'ABCDEF')
340
341        data = self.mg.urlread('foo', range = (3, 5))
342        self.assertEqual(data, b'DE')
343
344    def test_retry_no_cache(self):
345        'test bypassing proxy cache on failure'
346        def process(data):
347            if b'Pragma:no-cache' in data:
348                self.content = b'version2'
349            else:
350                self.content = b'version1'
351
352        def checkfunc_read(obj):
353            if obj.data == 'version1':
354                raise URLGrabError(-1, 'Outdated version of foo')
355
356        def checkfunc_grab(obj):
357            with open('foo') as f:
358                if f.read() == 'version1':
359                    raise URLGrabError(-1, 'Outdated version of foo')
360
361        self.process = process
362        self.reply = 200, b'OK'
363
364        opts = self.g.opts
365        opts.retry = 3
366        opts.retry_no_cache = True
367
368        # single
369        opts.checkfunc = checkfunc_read
370        try:
371            self.mg.urlread('foo')
372        except URLGrabError as e:
373            self.fail(str(e))
374
375        # multi
376        opts.checkfunc = checkfunc_grab
377        self.mg.urlgrab('foo', async_=True)
378        try:
379            urlgrabber.grabber.parallel_wait()
380        except URLGrabError as e:
381            self.fail(str(e))
382
383def suite():
384    tl = TestLoader()
385    return tl.loadTestsFromModule(sys.modules[__name__])
386
387if __name__ == '__main__':
388    runner = TextTestRunner(stream=sys.stdout,descriptions=1,verbosity=2)
389    runner.run(suite())
390