1#!/usr/bin/env python 2from __future__ import division, print_function 3"""Retrieve a remote file via http to a local file. 4 5Note: at exit attempts to abort all outstanding transfers and delete the output files. 6 7To do: try polling while connected instead of using a progress callback. 8The advantages are: 9- fewer calls (except over a slow connection) 10- can implement a "stalled" time limit that only kicks in 11 if the given time elapses with no new bytes read. 12To do this you will have to get the full size from the token: 13self._tkApp.eval("set [set token](totalsize)") 14and should probably get the current size as well 15(rather than using the convenience function) 16because it's not clear the convenience function exists in older tcl. 17note: I doubt this tcl code can be executed with _tkApp.call -- 18at least nothing I tried with call worked. 19 20Loosely based on RO.Wdg.FTPLogWdg. 21 22History: 232005-07-08 ROwen 242005-07-11 ROwen Modified to call stateFunc less often during download. 252005-09-15 ROwen Documented the arguments for callback functions. 262008-04-29 ROwen Fixed reporting of exceptions that contain unicode arguments. 272010-05-26 ROwen Tweaked to use _removeAllCallbacks() instead of nulling _callbacks. 282010-11-12 ROwen Bug fix: timeLim was mishandled. 292011-06-16 ROwen Ditched obsolete "except (SystemExit, KeyboardInterrupt): raise" code 302012-08-01 ROwen Changed isDone()->isDone, getStateStr()->state, getErrMsg()->errMsg, 31 getBytes()->readBytes, totBytes; 32 Deleted getState() (use state, but returns a string, not an int); 33 Added didFail, isAbortable. 34 State constants are now strings, not integers. 352014-04-01 ROwen Bug fix: "unknown state" message used an undefined variable. 362014-09-16 ROwen Modified to use RO.AddCallback.safeCall2. 37 Modified _cleanup to deregister tcl callbacks before cleaning up the connection. 38 Modified to print warnings to stderr instead of stdout. 392014-09-18 ROwen Fixed a bug in the unit test. 402015-09-24 ROwen Replace "== None" with "is None" to modernize the code. 412015-11-03 ROwen Replace "!= None" with "is not None" to modernize the code. 42""" 43__all__ = ['HTTPGet'] 44 45import atexit 46import os 47import sys 48import time 49import Tkinter 50import RO.AddCallback 51import RO.StringUtil 52import RO.TkUtil 53 54_Debug = False 55_DebugExit = False 56 57_ProgressInterval = 0.1 # minimum time between progress callbacks (sec) 58 59class _ExitClass: 60 """Class to keep track of outstanding nework transfers 61 and abort them at exit. 62 """ 63 def __init__(self, timeLim = 2.0, dtime = 0.1): 64 self.transferDict = {} 65 self.timeLim = timeLim 66 self.dtime = max(dtime, 0.01) 67 self.didRegisterExit = False 68 69 def addTransfer(self, httpGetter): 70 """Add one httpGetter. 71 """ 72 if _DebugExit: 73 print("HTTPGet._Exit.addTransfer(%s)" % (httpGetter,)) 74 httpGetter.addDoneCallback(self.removeTransfer) 75 self.transferDict[httpGetter] = None 76 if not self.didRegisterExit: 77 atexit.register(self.abortAll) 78 self.didRegisterExit = True 79 80 def removeTransfer(self, httpGetter): 81 """Remove one httpGetter. 82 Does not verify that the getter is finished. 83 """ 84 if _DebugExit: 85 print("HTTPGet._Exit.removeTransfer(%s)" % (httpGetter,)) 86 self.transferDict.pop(httpGetter) 87 88 def abortAll(self): 89 """Abort all outsanding transfers. 90 Meant to be registered with atexit. 91 """ 92 if _DebugExit: 93 print("HTTPGet._Exit.abortAll()") 94 if not self.transferDict: 95 return 96 97 transferList = self.transferDict.keys() 98 for xfer in transferList: 99 if _DebugExit: 100 print("HTTGet._Exit: aborting %s" % (xfer,)) 101 xfer.abort() 102 103 # wait a few seconds for all to end 104 maxWaits = self.timeLim / self.dtime 105 nWaits = 0 106 while self.transferDict: 107 time.sleep(self.dtime) 108 nWaits += 1 109 if nWaits > maxWaits: 110 if _DebugExit: 111 print("HTTGet._Exit: timed out while waiting for aborts to finish") 112 break 113 else: 114 if _DebugExit: 115 print("HTTGet._Exit: all aborts finished") 116_ExitObj = _ExitClass() 117 118 119class HTTPGet(RO.AddCallback.BaseMixin): 120 """Downloads the specified url to a file. 121 122 Inputs: 123 - fromURL url of file to download 124 - toPath full path of destination file 125 - isBinary file is binary? (if False, EOL translation is probably performed) 126 - overwrite if True, overwrites the destination file if it exists; 127 otherwise raises ValueError 128 - createDir if True, creates any required directories; 129 otherwise raises ValueError 130 - doneFunc function to call when the transfer completes 131 - stateFunc function to call when state changes, including data received 132 (stateFunc will be called when the transfer ends) 133 - startNow if True, the transfer is started immediately 134 otherwise callFunc is called and the transaction remains Queued 135 until start is called 136 - dispStr a string to display while downloading the file; 137 if omitted, fromURL is displayed 138 - timeLim time limit (sec) for the total transfer; if None then no limit 139 140 Callbacks receive one argument: this object. 141 """ 142 # state constants 143 Queued = "Queued" 144 Connecting = "Connecting" 145 Running = "Running" 146 Aborting = "Aborting" 147 Done = "Done" 148 Aborted = "Aborted" 149 Failed = "Failed" 150 151 _AllStates = set(( 152 Queued, 153 Connecting, 154 Running, 155 Aborting, 156 Done, 157 Aborted, 158 Failed, 159 )) 160 _AbortableStates = set((Queued, Connecting, Running)) 161 _DoneStates = set((Done, Aborted, Failed)) 162 _FailedStates = set((Aborted, Failed)) 163 164 StateStrMaxLen = 0 165 for stateStr in _AllStates: 166 StateStrMaxLen = max(StateStrMaxLen, len(stateStr)) 167 del(stateStr) 168 _tkApp = None 169 170 def __init__(self, 171 fromURL, 172 toPath, 173 isBinary = False, 174 overwrite = False, 175 createDir = True, 176 doneFunc = None, 177 stateFunc = None, 178 startNow = True, 179 dispStr = None, 180 timeLim = None, 181 ): 182 if self._tkApp is None: 183 self._tkApp = Tkinter.Frame().tk 184 self.fromURL = fromURL 185 self.toPath = toPath 186 self.isBinary = isBinary 187 self.overwrite = bool(overwrite) 188 self.createDir = createDir 189 if timeLim is not None: 190 self.timeLimMS = max(1, int(round(timeLim * 1000.0))) 191 else: 192 self.timeLimMS = 0 193 194 if dispStr is None: 195 self.dispStr = fromURL 196 else: 197 self.dispStr = dispStr 198 199 self._tclFile = None 200 self._tclHTTPConn = None 201 self._tclCallbacks = () 202 self._tclHTTPDoneCallback = None 203 self._tclHTTPProgressCallback = None 204 self._lastProgReportTime = 0 205 206 self._readBytes = 0 207 self._lastReadBytes = 0 208 self._didPrintBlockSize = None # for debug output only 209 self._totBytes = None 210 self._state = self.Queued 211 self._errMsg = None 212 213 self._createdFile = False 214 215 self._tkApp.eval('package require http') 216 217 RO.AddCallback.BaseMixin.__init__(self, stateFunc, callNow=False) 218 self._doneCallbacks = [] 219 220 global _ExitObj 221 _ExitObj.addTransfer(self) 222 223 if doneFunc: 224 self.addDoneCallback(doneFunc) 225 226 if startNow: 227 self.start() 228 229 def addDoneCallback(self, func): 230 """Add a function that will be called when the transfer completes""" 231 self._doneCallbacks.append(func) 232 233 def removeDoneCallback(self, func): 234 """Remove a done callback. 235 """ 236 self._doneCallbacks.remove(func) 237 238 def start(self): 239 """Start the download. 240 241 If state is not Queued, raises RuntimeError 242 """ 243 if _Debug: 244 print("%s.start()" % (self,)) 245 if self._state != self.Queued: 246 raise RuntimeError("Cannot start; state = %r not Queued" % (self._state,)) 247 248 self._setState(self.Connecting) 249 250 try: 251 # verify output file and verify/create output directory, as appropriate 252 self._toPrep() 253 254 # open output file 255 if _Debug: 256 print("HTTPGet: opening output file %r" % (self.toPath,)) 257 try: 258 self._tclFile = self._tkApp.call('open', self.toPath, "w") 259 self._createdFile = True 260 if self.isBinary: 261 self._tkApp.call('fconfigure', self._tclFile, "-encoding", "binary", "-translation", "binary") 262 except Tkinter.TclError as e: 263 raise RuntimeError("Could not open %r: %s" % (self.toPath, e)) 264 265 # start http transfer 266 doneCallback = RO.TkUtil.TclFunc(self._httpDoneCallback, debug=_Debug) 267 progressCallback = RO.TkUtil.TclFunc(self._httpProgressCallback, debug=_Debug) 268 self._tclCallbacks = (doneCallback, progressCallback) 269 if _Debug: 270 print("HTTPGet: creating http connection") 271 self._tclHTTPConn = self._tkApp.call( 272 '::http::geturl', self.fromURL, 273 '-channel', self._tclFile, 274 '-command', doneCallback, 275 '-progress', progressCallback, 276 '-binary', self.isBinary, 277 '-timeout', self.timeLimMS 278 ) 279 except Exception as e: 280 self._setState(self.Failed, RO.StringUtil.strFromException(e)) 281 return 282 283 self._setState(self.Running) 284 285 def abort(self): 286 """Start aborting: cancel the transaction and delete the output file. 287 Silently fails if the transaction has already completed 288 """ 289 if _Debug: 290 print("%s.abort()" % (self,)) 291 if self.isDone: 292 return 293 elif self._state == self.Queued: 294 self._setState(self.Aborted) 295 return 296 297 if self._tclHTTPConn is None: 298 sys.stderr.write("HTTPGet cannot abort: isDone false but no http connection\n") 299 return 300 301 self._setState(self.Aborting) 302 self._tkApp.call("::http::reset", self._tclHTTPConn) 303 if _Debug: 304 print("http connection reset") 305 306 @property 307 def errMsg(self): 308 """If the transfer failed, an explanation as a string, else None 309 """ 310 return self._errMsg 311 312 @property 313 def state(self): 314 """Returns the current state as a string. 315 """ 316 return self._state 317 318 @property 319 def isAbortable(self): 320 """True if the transaction can be aborted 321 """ 322 return self._state in self._AbortableStates 323 324 @property 325 def isDone(self): 326 """Return True if the transaction is finished (succeeded, aborted or failed), False otherwise. 327 """ 328 return self._state in self._DoneStates 329 330 @property 331 def didFail(self): 332 """Return True if the transaction failed or was aborted 333 """ 334 return self._state in self._FailedStates 335 336 @property 337 def readBytes(self): 338 """Bytes read so far 339 """ 340 return self._readBytes 341 342 @property 343 def totBytes(self): 344 """Total bytes in file, if known, None otherwise. 345 346 The value is certain to be unknown until the transfer starts; 347 after that it depends on whether the server sends the info. 348 """ 349 return self._totBytes 350 351 def _setState(self, newState, errMsg=None): 352 """Set a new state and call callbacks. 353 Do nothing if already done. 354 errMsg is ignored unless newState is Failed. 355 356 Raise RuntimeError if newState unknown. 357 """ 358 if _Debug: 359 print("%s._setState(newState=%s, errmsg=%r)" % (self, newState, errMsg)) 360 # if state is not valid, reject 361 if self.isDone: 362 return 363 364 if newState not in self._AllStates: 365 raise RuntimeError("Unknown state %r" % (newState,)) 366 367 self._state = newState 368 if newState == self.Failed: 369 self._errMsg = errMsg 370 371 isDone = self.isDone 372 if isDone: 373 self._cleanup() 374 375 self._doCallbacks() 376 if isDone: 377 # call done callbacks 378 # use a copy in case a callback deregisters itself 379 for func in self._doneCallbacks[:]: 380 RO.AddCallback.safeCall2(str(self), func, self) 381 382 # remove all callbacks 383 self._removeAllCallbacks() 384 self._doneCallbacks = [] 385 self._tclCallbacks = () 386 387 def _cleanup(self): 388 """Clean up everything except references to callbacks. 389 390 Warning: this is a private method: call only from _setState! 391 392 Close the input and output files and deregister the tcl callbacks. 393 If state in (Aborted, Failed), delete the output file. 394 """ 395 if _Debug: 396 print("%s._cleanup()") 397 for tclFunc in self._tclCallbacks: 398 if _Debug: 399 print("deregister %s" % (tclFunc,)) 400 tclFunc.deregister() 401 if self._tclHTTPConn is not None: 402 self._tkApp.call("::http::cleanup", self._tclHTTPConn) 403 self._tclHTTPConn = None 404 if _Debug: 405 print("http connection cleaned up") 406 if self._tclFile: 407 self._tkApp.call("close", self._tclFile) 408 self._tclFile = None 409 if _Debug: 410 print("output file closed") 411 412 if self._createdFile and self._state in (self.Aborted, self.Failed): 413 try: 414 os.remove(self.toPath) 415 if _Debug: 416 print("deleted output file") 417 except OSError as e: 418 if _Debug: 419 print("failed to delete output file: %s" % (e,)) 420 421 def _httpDoneCallback(self, token=None): 422 """Called when the http transfer is finished. 423 """ 424 if self._tclHTTPConn is None: 425 sys.stderr.write("HTTPGet warning: _httpDoneCallback called but no http connection\n") 426 return 427 428 if _Debug: 429 print("%s.httpDoneCallback()" % (self,)) 430 print("status=%r" % (self._tkApp.call('::http::status', self._tclHTTPConn),)) 431 print("code=%r" % (self._tkApp.call('::http::code', self._tclHTTPConn),)) 432 print("ncode=%r" % (self._tkApp.call('::http::ncode', self._tclHTTPConn),)) 433 print("error=%r" % (self._tkApp.call('::http::error', self._tclHTTPConn),)) 434 435 httpState = self._tkApp.call('::http::status', self._tclHTTPConn) 436 errMsg = None 437 if httpState == "ok": 438 codeNum = int(self._tkApp.call('::http::ncode', self._tclHTTPConn)) 439 if codeNum == 200: 440 newState = self.Done 441 else: 442 if _Debug: 443 print("status ok but code=%s not 200" % (codeNum,)) 444 newState = self.Failed 445 errMsg = self._tkApp.call('::http::code', self._tclHTTPConn) 446 elif httpState == "eof": 447 newState = self.Failed 448 errMsg = "No reply from http server" 449 elif httpState == "timeout": 450 newState = self.Failed 451 errMsg = "Timed out" 452 elif httpState == "reset": 453 newState = self.Aborted 454 else: 455 if httpState != "error": 456 sys.stderr.write("HTTPGet warning: unknown httpState=%s; assuming error\n" % (httpState,)) 457 newState = self.Failed 458 errMsg = self._tkApp.call('::http::error', self._tclHTTPConn) 459 if not errMsg: 460 errMsg = httpState 461 462 self._setState(newState, errMsg) 463 464 def _httpProgressCallback(self, token, totBytes, readBytes): 465 """http callback function. 466 """ 467 if _Debug: 468 print("%s._httpProgressCallback(totBytes=%r, readBytes=%r)" % (self, totBytes, readBytes)) 469 470 self._totBytes = int(totBytes) 471 self._lastReadBytes = self._readBytes 472 self._readBytes = int(readBytes) 473 if _Debug and not self._didPrintBlockSize and self._readBytes: 474 print("%s block size=%s" % (self, self._readBytes - self._lastReadBytes)) 475 self._didPrintBlockSize = True 476 477 newTime = time.time() 478 if (newTime - self._lastProgReportTime) > _ProgressInterval: 479 self._doCallbacks() 480 self._lastProgReportTime = newTime 481 482 def __str__(self): 483 return "%s(%s)" % (self.__class__.__name__, self.fromURL) 484 485 def _toPrep(self): 486 """Create or verify the existence of the output directory 487 and check if output file already exists. 488 489 Raise RuntimeError or IOError if anything is wrong. 490 """ 491 if _Debug: 492 print("%s._toPrep()" % (self,)) 493 # if output file exists and not overwrite, complain 494 if not self.overwrite and os.path.exists(self.toPath): 495 raise RuntimeError("toPath %r already exists" % (self.toPath,)) 496 497 # if directory does not exist, create it or fail, depending on createDir; 498 # else if "directory" exists but is a file, fail 499 toDir = os.path.dirname(self.toPath) 500 if toDir: 501 if not os.path.exists(toDir): 502 # create the directory or fail, depending on createDir 503 if self.createDir: 504 if _Debug: 505 print("%s._toPrep creating directory %r" % (self, toDir)) 506 os.makedirs(toDir) 507 else: 508 raise RuntimeError("directory %r does not exist" % (toDir,)) 509 elif not os.path.isdir(toDir): 510 raise RuntimeError("%r is a file, not a directory" % (toDir,)) 511 512if __name__ == "__main__": 513 root = Tkinter.Tk() 514 515 testURL = "http://www.astro.washington.edu/" 516 outFile = "httpget_test.html" 517 518 _Debug = False 519 _DebugExit = True 520 521 def stateCallback(httpObj): 522 print("state =", httpObj.state, end=' ') 523 print("read %s of %s bytes" % (httpObj.readBytes, httpObj.totBytes)) 524 if httpObj.isDone: 525 if httpObj.errMsg: 526 print("error message =", httpObj.errMsg) 527 root.quit() 528 529 httpObj = HTTPGet( 530 fromURL = testURL, 531 toPath = outFile, 532 isBinary = False, 533 stateFunc = stateCallback, 534 startNow = True, 535 overwrite = True, 536 ) 537 538 root.mainloop() 539