1#!/usr/bin/python -t 2 3# This library is free software; you can redistribute it and/or 4# modify it under the terms of the GNU Lesser General Public 5# License as published by the Free Software Foundation; either 6# version 2.1 of the License, or (at your option) any later version. 7# 8# This library is distributed in the hope that it will be useful, 9# but WITHOUT ANY WARRANTY; without even the implied warranty of 10# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 11# Lesser General Public License for more details. 12# 13# You should have received a copy of the GNU Lesser General Public 14# License along with this library; if not, write to the 15# Free Software Foundation, Inc., 16# 59 Temple Place, Suite 330, 17# Boston, MA 02111-1307 USA 18 19# This file is part of urlgrabber, a high-level cross-protocol url-grabber 20# Copyright 2002-2004 Michael D. Stenner, Ryan Tomayko 21 22"""mirror.py tests""" 23 24import sys 25import os 26import tempfile, random, os 27 28import urlgrabber.grabber 29from urlgrabber.grabber import URLGrabber, URLGrabError, URLGrabberOptions 30import urlgrabber.mirror 31from urlgrabber.mirror import MirrorGroup, MGRandomStart, MGRandomOrder 32 33from base_test_code import * 34 35class FakeLogger: 36 def __init__(self): 37 self.logs = [] 38 def debug(self, msg, *args): 39 self.logs.append(msg % args) 40 warn = warning = info = error = debug 41 42class BasicTests(TestCase): 43 def setUp(self): 44 self.g = URLGrabber() 45 fullmirrors = [base_mirror_url + m + '/' for m in good_mirrors] 46 self.mg = MirrorGroup(self.g, fullmirrors) 47 48 def test_urlgrab(self): 49 """MirrorGroup.urlgrab""" 50 filename = tempfile.mktemp() 51 url = 'short_reference' 52 self.mg.urlgrab(url, filename) 53 54 data = open(filename, 'rb').read() 55 56 self.assertEqual(data, short_reference_data) 57 58 def test_urlread(self): 59 """MirrorGroup.urlread""" 60 url = 'short_reference' 61 data = self.mg.urlread(url) 62 63 self.assertEqual(data, short_reference_data) 64 65 def test_urlopen(self): 66 """MirrorGroup.urlopen""" 67 url = 'short_reference' 68 fo = self.mg.urlopen(url) 69 data = fo.read() 70 fo.close() 71 72 self.assertEqual(data, short_reference_data) 73 74class SubclassTests(TestCase): 75 def setUp(self): 76 self.g = URLGrabber() 77 self.fullmirrors = [base_mirror_url + m + '/' for m in good_mirrors] 78 79 def fetchwith(self, mgclass): 80 self.mg = mgclass(self.g, self.fullmirrors) 81 82 filename = tempfile.mktemp() 83 url = 'short_reference' 84 self.mg.urlgrab(url, filename) 85 86 data = open(filename, 'rb').read() 87 88 self.assertEqual(data, short_reference_data) 89 90 def test_MGRandomStart(self): 91 "MGRandomStart.urlgrab" 92 self.fetchwith(MGRandomStart) 93 94 def test_MGRandomOrder(self): 95 "MGRandomOrder.urlgrab" 96 self.fetchwith(MGRandomOrder) 97 98class CallbackTests(TestCase): 99 def setUp(self): 100 self.g = URLGrabber() 101 fullmirrors = [base_mirror_url + m + '/' for m in \ 102 (bad_mirrors + good_mirrors)] 103 if hasattr(urlgrabber.grabber, '_TH'): 104 # test assumes mirrors are not re-ordered 105 urlgrabber.grabber._TH.hosts.clear() 106 self.mg = MirrorGroup(self.g, fullmirrors) 107 108 def test_failure_callback(self): 109 "test that MG executes the failure callback correctly" 110 tricky_list = [] 111 def failure_callback(cb_obj, tl): 112 tl.append(str(cb_obj.exception)) 113 self.mg.failure_callback = failure_callback, (tricky_list, ), {} 114 data = self.mg.urlread('reference') 115 self.assertTrue(data == reference_data) 116 self.assertEqual(tricky_list[0][:25], 117 '[Errno 14] HTTP Error 404') 118 119 def test_callback_reraise(self): 120 "test that the callback can correctly re-raise the exception" 121 def failure_callback(cb_obj): raise cb_obj.exception 122 self.mg.failure_callback = failure_callback 123 self.assertRaises(URLGrabError, self.mg.urlread, 'reference') 124 125class BadMirrorTests(TestCase): 126 def setUp(self): 127 self.g = URLGrabber() 128 fullmirrors = [base_mirror_url + m + '/' for m in bad_mirrors] 129 self.mg = MirrorGroup(self.g, fullmirrors) 130 131 def test_simple_grab(self): 132 """test that a bad mirror raises URLGrabError""" 133 filename = tempfile.mktemp() 134 url = 'reference' 135 self.assertRaises(URLGrabError, self.mg.urlgrab, url, filename) 136 137class FailoverTests(TestCase): 138 def setUp(self): 139 self.g = URLGrabber() 140 fullmirrors = [base_mirror_url + m + '/' for m in \ 141 (bad_mirrors + good_mirrors)] 142 self.mg = MirrorGroup(self.g, fullmirrors) 143 144 def test_simple_grab(self): 145 """test that a the MG fails over past a bad mirror""" 146 filename = tempfile.mktemp() 147 url = 'reference' 148 elist = [] 149 def cb(e, elist=elist): elist.append(e) 150 self.mg.urlgrab(url, filename, failure_callback=cb) 151 152 contents = open(filename, 'rb').read() 153 154 # first be sure that the first mirror failed and that the 155 # callback was called 156 self.assertEqual(len(elist), 1) 157 # now be sure that the second mirror succeeded and the correct 158 # data was returned 159 self.assertEqual(contents, reference_data) 160 161class FakeGrabber: 162 def __init__(self, resultlist=None): 163 self.resultlist = resultlist or [] 164 self.index = 0 165 self.calls = [] 166 self.opts = URLGrabberOptions() 167 168 def urlgrab(self, url, filename=None, **kwargs): 169 self.calls.append( (url, filename) ) 170 res = self.resultlist[self.index] 171 self.index += 1 172 if isinstance(res, Exception): raise res 173 else: return res 174 175class ActionTests(TestCase): 176 def setUp(self): 177 self.snarfed_logs = [] 178 self.db = urlgrabber.mirror.DEBUG 179 urlgrabber.mirror.DEBUG = FakeLogger() 180 self.mirrors = ['a', 'b', 'c', 'd', 'e', 'f'] 181 self.g = FakeGrabber([URLGrabError(3), URLGrabError(3), 'filename']) 182 self.mg = MirrorGroup(self.g, self.mirrors) 183 184 def tearDown(self): 185 urlgrabber.mirror.DEBUG = self.db 186 187 def test_defaults(self): 188 'test default action policy' 189 self.mg.urlgrab('somefile') 190 expected_calls = [ (m.encode('utf8') + b'/somefile', None) 191 for m in self.mirrors[:3] ] 192 expected_logs = \ 193 ['MIRROR: trying somefile -> a/somefile', 194 'MIRROR: failed', 195 'GR mirrors: [b c d e f] 0', 196 'MAIN mirrors: [a b c d e f] 1', 197 'MIRROR: trying somefile -> b/somefile', 198 'MIRROR: failed', 199 'GR mirrors: [c d e f] 0', 200 'MAIN mirrors: [a b c d e f] 2', 201 'MIRROR: trying somefile -> c/somefile'] 202 203 self.assertEqual(self.g.calls, expected_calls) 204 self.assertEqual(urlgrabber.mirror.DEBUG.logs, expected_logs) 205 206 def test_instance_action(self): 207 'test the effects of passed-in default_action' 208 self.mg.default_action = {'remove_master': 1} 209 self.mg.urlgrab('somefile') 210 expected_calls = [ (m.encode('utf8') + b'/somefile', None) 211 for m in self.mirrors[:3] ] 212 expected_logs = \ 213 ['MIRROR: trying somefile -> a/somefile', 214 'MIRROR: failed', 215 'GR mirrors: [b c d e f] 0', 216 'MAIN mirrors: [b c d e f] 0', 217 'MIRROR: trying somefile -> b/somefile', 218 'MIRROR: failed', 219 'GR mirrors: [c d e f] 0', 220 'MAIN mirrors: [c d e f] 0', 221 'MIRROR: trying somefile -> c/somefile'] 222 223 self.assertEqual(self.g.calls, expected_calls) 224 self.assertEqual(urlgrabber.mirror.DEBUG.logs, expected_logs) 225 226 def test_method_action(self): 227 'test the effects of method-level default_action' 228 self.mg.urlgrab('somefile', default_action={'remove_master': 1}) 229 expected_calls = [ (m.encode('utf8') + b'/somefile', None) 230 for m in self.mirrors[:3] ] 231 expected_logs = \ 232 ['MIRROR: trying somefile -> a/somefile', 233 'MIRROR: failed', 234 'GR mirrors: [b c d e f] 0', 235 'MAIN mirrors: [b c d e f] 0', 236 'MIRROR: trying somefile -> b/somefile', 237 'MIRROR: failed', 238 'GR mirrors: [c d e f] 0', 239 'MAIN mirrors: [c d e f] 0', 240 'MIRROR: trying somefile -> c/somefile'] 241 242 self.assertEqual(self.g.calls, expected_calls) 243 self.assertEqual(urlgrabber.mirror.DEBUG.logs, expected_logs) 244 245 246 def callback(self, e): return {'fail': 1} 247 248 def test_callback_action(self): 249 'test the effects of a callback-returned action' 250 self.assertRaises(URLGrabError, self.mg.urlgrab, 'somefile', 251 failure_callback=self.callback) 252 expected_calls = [ (m.encode('utf8') + b'/somefile', None) 253 for m in self.mirrors[:1] ] 254 expected_logs = \ 255 ['MIRROR: trying somefile -> a/somefile', 256 'MIRROR: failed', 257 'GR mirrors: [b c d e f] 0', 258 'MAIN mirrors: [a b c d e f] 1'] 259 260 self.assertEqual(self.g.calls, expected_calls) 261 self.assertEqual(urlgrabber.mirror.DEBUG.logs, expected_logs) 262 263import threading, socket 264 265class HttpReplyCode(TestCase): 266 def setUp(self): 267 # start the server 268 self.exit = False 269 self.process = lambda data: None 270 271 s = socket.socket(socket.AF_INET, socket.SOCK_STREAM) 272 s.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1) 273 s.bind(('localhost', 0)); s.listen(1) 274 self.port = s.getsockname()[1] 275 276 def server(): 277 while True: 278 c, a = s.accept() 279 if self.exit: c.close(); break 280 data = b'' 281 while not data.endswith(b'\r\n\r\n'): 282 data = c.recv(4096) 283 self.process(data) 284 c.sendall(b'HTTP/1.1 %d %s\r\n' % self.reply) 285 if self.content is not None: 286 c.sendall(b'Content-Length: %d\r\n\r\n' % len(self.content)) 287 c.sendall(self.content) 288 c.close() 289 s.close() 290 self.exit = False 291 292 self.thread = threading.Thread(target=server) 293 self.thread.start() 294 295 # create grabber and mirror group objects 296 def failure(obj): 297 self.code = getattr(obj.exception, 'code', None) 298 return {} 299 self.g = URLGrabber() 300 self.mg = MirrorGroup(self.g, ['http://localhost:%d' % self.port], 301 failure_callback = failure) 302 303 def tearDown(self): 304 # shut down the server 305 self.exit = True 306 s = socket.socket(socket.AF_INET, socket.SOCK_STREAM) 307 try: 308 s.connect(('localhost', self.port)) # wake it up 309 except ConnectionRefusedError: 310 # already gone? 311 pass 312 s.close() 313 self.thread.join() 314 315 def test_grab(self): 316 'tests the propagation of HTTP reply code' 317 self.reply = 503, b'Busy' 318 self.content = None 319 320 # single 321 self.assertRaises(URLGrabError, self.mg.urlgrab, 'foo') 322 self.assertEqual(self.code, 503); del self.code 323 324 # multi 325 err = [] 326 self.mg.urlgrab('foo', async_=True, failfunc=err.append) 327 urlgrabber.grabber.parallel_wait() 328 self.assertEqual([e.exception.errno for e in err], [256]) 329 self.assertEqual(self.code, 503); del self.code 330 331 def test_range(self): 332 'test client-side processing of HTTP ranges' 333 # server does not process ranges 334 self.reply = 200, b'OK' 335 self.content = b'ABCDEF' 336 337 # no range specified 338 data = self.mg.urlread('foo') 339 self.assertEqual(data, b'ABCDEF') 340 341 data = self.mg.urlread('foo', range = (3, 5)) 342 self.assertEqual(data, b'DE') 343 344 def test_retry_no_cache(self): 345 'test bypassing proxy cache on failure' 346 def process(data): 347 if b'Pragma:no-cache' in data: 348 self.content = b'version2' 349 else: 350 self.content = b'version1' 351 352 def checkfunc_read(obj): 353 if obj.data == 'version1': 354 raise URLGrabError(-1, 'Outdated version of foo') 355 356 def checkfunc_grab(obj): 357 with open('foo') as f: 358 if f.read() == 'version1': 359 raise URLGrabError(-1, 'Outdated version of foo') 360 361 self.process = process 362 self.reply = 200, b'OK' 363 364 opts = self.g.opts 365 opts.retry = 3 366 opts.retry_no_cache = True 367 368 # single 369 opts.checkfunc = checkfunc_read 370 try: 371 self.mg.urlread('foo') 372 except URLGrabError as e: 373 self.fail(str(e)) 374 375 # multi 376 opts.checkfunc = checkfunc_grab 377 self.mg.urlgrab('foo', async_=True) 378 try: 379 urlgrabber.grabber.parallel_wait() 380 except URLGrabError as e: 381 self.fail(str(e)) 382 383def suite(): 384 tl = TestLoader() 385 return tl.loadTestsFromModule(sys.modules[__name__]) 386 387if __name__ == '__main__': 388 runner = TextTestRunner(stream=sys.stdout,descriptions=1,verbosity=2) 389 runner.run(suite()) 390