1#!/usr/local/bin/python3.8 2######################################################################## 3# 4# Project: Metalink Checker 5# URL: http://www.nabber.org/projects/ 6# E-mail: webmaster@nabber.org 7# 8# Copyright: (C) 2007, Neil McNab 9# License: GNU General Public License Version 2 10# (http://www.gnu.org/copyleft/gpl.html) 11# 12# This program is free software; you can redistribute it and/or modify 13# it under the terms of the GNU General Public License as published by 14# the Free Software Foundation; either version 2 of the License, or 15# (at your option) any later version. 16# 17# This program is distributed in the hope that it will be useful, 18# but WITHOUT ANY WARRANTY; without even the implied warranty of 19# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 20# GNU General Public License for more details. 21# 22# You should have received a copy of the GNU General Public License 23# along with this program; if not, write to the Free Software 24# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 25# 26# Filename: $URL: https://metalinks.svn.sourceforge.net/svnroot/metalinks/checker/metalink.py $ 27# Last Updated: $Date: 2007-11-13 19:33:36 +0100 (Tue, 13 Nov 2007) $ 28# Version: $Rev: 91 $ 29# Author(s): Neil McNab 30# 31# Description: 32# Command line application that checks or downloads metalink files. 33# 34# Instructions: 35# 1. You need to have Python installed. 36# 2. Run on the command line using: python metalink.py 37# 38# usage: metalink.py [options] 39# 40# options: 41# -h, --help show this help message and exit 42# -d, --download Actually download the file(s) in the metalink 43# -f FILE, --file=FILE Metalink file to check 44# -t TIMEOUT, --timeout=TIMEOUT 45# Set timeout in seconds to wait for response 46# (default=10) 47# 48# CHANGELOG: 49# Version 1.3 50# ----------- 51# - Fixed bug when no "size" attribute is present 52# 53# Version 1.2 54# ----------- 55# - Added totals output 56# 57# Version 1.1 58# ----------- 59# - Bugfixes for FTP handling, bad URL handling 60# - rsync doesn't list as a URL Error 61# - reduced timeout value 62# 63# Version 1.0 64# ----------- 65# This is the initial release. 66######################################################################## 67 68import optparse 69import urllib2 70import urlparse 71import sha 72import md5 73import os.path 74import xml.dom.minidom 75import random 76import sys 77import httplib 78import re 79import socket 80 81VERSION="Metalink Checker version 1.3" 82 83def run(): 84 ''' 85 Start a console version of this application. 86 ''' 87 # Command line parser options. 88 parser = optparse.OptionParser(version=VERSION) 89 parser.add_option("--download", "-d", action="store_true", dest="download", help="Actually download the file(s) in the metalink") 90 parser.add_option("--file", "-f", dest="filevar", metavar="FILE", help="Metalink file to check") 91 parser.add_option("--timeout", "-t", dest="timeout", metavar="TIMEOUT", help="Set timeout in seconds to wait for response (default=10)") 92 93 (options, args) = parser.parse_args() 94 95 if options.filevar == None: 96 parser.print_help() 97 return 98 99 socket.setdefaulttimeout(10) 100 if options.timeout != None: 101 socket.setdefaulttimeout(int(options.timeout)) 102 103 if options.download: 104 progress = ProgressBar(55) 105 download_metalink(options.filevar, os.getcwd(), handler=progress.download_update) 106 progress.download_end() 107 else: 108 results = check_metalink(options.filevar) 109 print_totals(results) 110 111def print_totals(results): 112 for key in results.keys(): 113 print "=" * 79 114 print "Summary for:", key 115 116 status_count = 0 117 size_count = 0 118 error_count = 0 119 total = len(results[key]) 120 for subkey in results[key].keys(): 121 status = results[key][subkey][0] 122 status_bool = False 123 if status != "OK" and status != "?": 124 status_bool = True 125 126 size = results[key][subkey][1] 127 size_bool = False 128 if size == "FAIL": 129 size_bool = True 130 131 if size_bool: 132 size_count += 1 133 if status_bool: 134 status_count += 1 135 if size_bool or status_bool: 136 error_count += 1 137 138 print "Download errors: %s/%s" % (status_count, total) 139 print "Size check failures: %s/%s" % (size_count, total) 140 print "Overall failures: %s/%s" % (error_count, total) 141 142##def print_summary(results): 143## for key in results.keys(): 144## print "=" * 79 145## print "Summary for:", key 146## print "-" * 79 147## print "Response Code\tSize Check\tURL" 148## print "-" * 79 149## for subkey in results[key].keys(): 150## print "%s\t\t%s\t\t%s" % (results[key][subkey][0], results[key][subkey][1], subkey) 151 152##def confirm_prompt(noprompt): 153## invalue = "invalid" 154## 155## if noprompt: 156## return True 157## 158## while (invalue != "" and invalue[0] != "n" and invalue[0] != "N" and invalue[0] != "Y" and invalue[0] != "y"): 159## invalue = raw_input("Do you want to continue? [Y/n] ") 160## 161## try: 162## if invalue[0] == "n" or invalue[0] == "N": 163## return False 164## except IndexError: 165## pass 166## 167## return True 168 169################ checks ############################ 170 171def check_metalink(src): 172 ''' 173 Decode a metalink file, can be local or remote 174 First parameter, file to download, URL or file path to download from 175 ''' 176 src = complete_url(src) 177 datasource = urllib2.urlopen(src) 178 dom2 = xml.dom.minidom.parse(datasource) # parse an open file 179 datasource.close() 180 181 urllist = get_subnodes(dom2, ["metalink", "files", "file"]) 182 if len(urllist) == 0: 183 print "No urls to download file from." 184 return False 185 186 results = {} 187 for filenode in urllist: 188 try: 189 size = get_xml_tag_strings(filenode, ["size"])[0] 190 except: 191 size = None 192 name = get_attr_from_item(filenode, "name") 193 print "=" * 79 194 print "File: %s Size: %s" % (name, size) 195 results[name] = check_file_node(filenode) 196 197 return results 198 199def check_process(headers, filesize): 200 size = "?" 201 sizeheader = get_header(headers, "Content-Length") 202 203 if sizeheader != None and filesize != None: 204 if sizeheader == filesize: 205 size = "OK" 206 else: 207 size = "FAIL" 208 209 response_code = "OK" 210 temp_code = get_header(headers, "Response") 211 if temp_code != None: 212 response_code = temp_code 213 214 return (response_code, size) 215 216def get_header(textheaders, name): 217 textheaders = str(textheaders) 218 219 headers = textheaders.split("\n") 220 for line in headers: 221 line = line.strip() 222 if line.startswith(name + ": "): 223 result = line.split(name + ": ") 224 return result[1] 225 226 return None 227 228def check_file_node(item): 229 ''' 230 Downloads a specific version of a program 231 First parameter, file XML node 232 Second parameter, file path to save to 233 Third parameter, optional, force a new download even if a valid copy already exists 234 Fouth parameter, optional, progress handler callback 235 Returns dictionary of file paths with headers 236 ''' 237 try: 238 size = get_xml_tag_strings(item, ["size"])[0] 239 except: 240 size = None 241 urllist = get_subnodes(item, ["resources", "url"]) 242 if len(urllist) == 0: 243 print "No urls to download file from." 244 return False 245 246 number = 0 247 filename = {} 248 249 #error = True 250 count = 1 251 result = {} 252 while (count <= len(urllist)): 253 filename = urllist[number].firstChild.nodeValue.strip() 254 print "-" *79 255 print "Checking %s..." % filename 256 headers = check_urlretrieve(filename) 257 result[filename] = check_process(headers, size) 258 print "Response Code: %s\tSize Check: %s" % (result[filename][0], result[filename][1]) 259 #error = not result 260 number = (number + 1) % len(urllist) 261 count += 1 262 263 return result 264 265def check_urlretrieve(url): 266 ''' 267 modernized replacement for urllib.urlretrieve() for use with proxy 268 ''' 269 try: 270 temp = urllib2.urlopen(url) 271 except urllib2.HTTPError, error: 272 return "Response: %s" % error.code 273 except (urllib2.URLError, httplib.InvalidURL): 274 if url.startswith("rsync://"): 275 return "Response: ?" 276 return "Response: Bad URL" 277 except IOError, error: 278 if error.errno == "ftp error": 279 code = error.strerror 280 result = re.compile("^([0-9]+)").search(str(error.strerror)) 281 if result != None: 282 code = result.group(1) 283 result = re.compile("^\(([0-9]+)").search(str(error.strerror)) 284 if result != None: 285 if result.group(1) == "110": 286 code = "timed out" 287 return "Response: %s" % code 288 headers = temp.info() 289 temp.close() 290 291 return headers 292 293######################################### 294 295############# download functions ############# 296 297def download(src, path, filemd5="", filesha1="", force = False, handler = None): 298 ''' 299 Download a file, decodes metalinks. 300 First parameter, file to download, URL or file path to download from 301 Second parameter, file path to save to 302 Third parameter, optional, expected MD5SUM 303 Fourth parameter, optional, expected SHA1SUM 304 Fifth parameter, optional, force a new download even if a valid copy already exists 305 Sixth parameter, optional, progress handler callback 306 Returns list of file paths if download(s) is successful 307 Returns False otherwise (checksum fails) 308 ''' 309 310 if src.endswith(".metalink"): 311 return download_metalink(src, path, force, handler) 312 else: 313 # parse out filename portion here 314 filename = os.path.basename(src) 315 result = download_file(src, os.path.join(path, filename), filemd5, filesha1, force, handler) 316 if result: 317 return [result] 318 return False 319 320def download_file(remote_file, local_file, filemd5="", filesha1="", force = False, handler = None): 321 ''' 322 Download a file. 323 First parameter, file to download, URL or file path to download from 324 Second parameter, file path to save to 325 Third parameter, optional, expected MD5SUM 326 Fourth parameter, optional, expected SHA1SUM 327 Fifth parameter, optional, force a new download even if a valid copy already exists 328 Sixth parameter, optional, progress handler callback 329 Returns file path if download is successful 330 Returns False otherwise (checksum fails) 331 ''' 332 if os.path.exists(local_file) and (not force) and verify_checksum(local_file, filemd5, filesha1): 333 return local_file 334 335 remote_file = complete_url(remote_file) 336 337 directory = os.path.dirname(local_file) 338 if not os.path.isdir(directory): 339 os.makedirs(directory) 340 341 #print "Downloading: %s" % remote_file 342 343 try: 344 urlretrieve(remote_file, local_file, handler) 345 except: 346 #print "WARNING: Downloading file %s failed." % local_file 347 return False 348 349 if verify_checksum(local_file, filemd5, filesha1): 350 return local_file 351 352 return False 353 354def download_metalink(src, path, force = False, handler = None): 355 ''' 356 Decode a metalink file, can be local or remote 357 First parameter, file to download, URL or file path to download from 358 Second parameter, file path to save to 359 Third parameter, optional, force a new download even if a valid copy already exists 360 Fouth parameter, optional, progress handler callback 361 Returns list of file paths if download(s) is successful 362 Returns False otherwise (checksum fails) 363 ''' 364 src = complete_url(src) 365 datasource = urllib2.urlopen(src) 366 dom2 = xml.dom.minidom.parse(datasource) # parse an open file 367 datasource.close() 368 369 urllist = get_subnodes(dom2, ["metalink", "files", "file"]) 370 if len(urllist) == 0: 371 #print "No urls to download file from." 372 return False 373 374 results = [] 375 for filenode in urllist: 376 result = download_file_node(filenode, path, force, handler) 377 if result: 378 results.append(result) 379 380 return results 381 382def download_file_node(item, path, force = False, handler = None): 383 ''' 384 Downloads a specific version of a program 385 First parameter, file XML node 386 Second parameter, file path to save to 387 Third parameter, optional, force a new download even if a valid copy already exists 388 Fouth parameter, optional, progress handler callback 389 Returns list of file paths if download(s) is successful 390 Returns False otherwise (checksum fails) 391 ''' 392 393 urllist = get_subnodes(item, ["resources", "url"]) 394 if len(urllist) == 0: 395 print "No urls to download file from." 396 return False 397 398 hashlist = get_subnodes(item, ["verification", "hash"]) 399 400 hashes = {} 401 hashes['md5'] = "" 402 hashes['sha1'] = "" 403 for hashitem in hashlist: 404 hashes[get_attr_from_item(hashitem, "type")] = hashitem.firstChild.nodeValue.strip() 405## for i in range(hashitem.attributes.length): 406## if hashitem.attributes.item(i).name == "type": 407## hashes[hashitem.attributes.item(i).value] = hashitem.firstChild.nodeValue.strip() 408 409 local_file = get_attr_from_item(item, "name") 410 localfile = path_join(path, local_file) 411 # choose a random url tag to start with 412 number = int(random.random() * len(urllist)) 413 414 error = True 415 count = 1 416 while (error and (count <= len(urllist))): 417 result = download_file(urllist[number].firstChild.nodeValue.strip(), localfile, hashes['md5'], hashes['sha1'], force, handler) 418 error = not result 419 number = (number + 1) % len(urllist) 420 count += 1 421 422 return result 423 424def complete_url(url): 425 ''' 426 If no transport is specified in typical URL form, we assume it is a local 427 file, perhaps only a relative path too. 428 First parameter, string to convert to URL format 429 Returns, string converted to URL format 430 ''' 431 if get_transport(url) == "": 432 absfile = os.path.abspath(url) 433 if absfile[0] != "/": 434 absfile = "/" + absfile 435 return "file://" + absfile 436 return url 437 438def urlretrieve(url, filename, reporthook = None): 439 ''' 440 modernized replacement for urllib.urlretrieve() for use with proxy 441 ''' 442 block_size = 4096 443 i = 0 444 counter = 0 445 temp = urllib2.urlopen(url) 446 headers = temp.info() 447 448 try: 449 size = int(headers['Content-Length']) 450 except KeyError: 451 size = 0 452 453 data = open(filename, 'wb') 454 block = True 455 456 while block: 457 block = temp.read(block_size) 458 data.write(block) 459 i += block_size 460 counter += 1 461 if reporthook != None: 462 #print counter, block_size, size 463 reporthook(counter, block_size, size) 464 465 data.close() 466 temp.close() 467 468 return (filename, headers) 469 470def verify_checksum(local_file, filemd5="", filesha1=""): 471 ''' 472 Verify the checksum of a file 473 First parameter, filename 474 Second parameter, optional, expected MD5SUM 475 Third parameter, optional, expected SHA1SUM 476 Returns True if first checksum provided is valid 477 Returns True if no checksums are provided 478 Returns False otherwise 479 ''' 480 if filesha1 != "": 481 if sha1sum(local_file) == filesha1.lower(): 482 return True 483 elif filemd5 != "": 484 if md5sum(local_file) == filemd5.lower(): 485 return True 486 else: 487 # No checksum provided, assume OK 488 return True 489 490 # checksum failed here 491 print "ERROR: checksum failed for %s." % local_file 492 return False 493 494def remote_or_local(name): 495 ''' 496 Returns if the file path is a remote file or a local file 497 First parameter, file path 498 Returns "REMOTE" or "LOCAL" based on the file path 499 ''' 500 #transport = urlparse.urlparse(name).scheme 501 transport = get_transport(name) 502 503 if transport != "": 504 return "REMOTE" 505 return "LOCAL" 506 507def get_transport(url): 508 ''' 509 Gets transport type. This is more accurate than the urlparse module which 510 just does a split on colon. 511 First parameter, url 512 Returns the transport type 513 ''' 514 result = url.split("://", 1) 515 if len(result) == 1: 516 transport = "" 517 else: 518 transport = result[0] 519 return transport 520 521def sha1sum(thisfile): 522 ''' 523 First parameter, filename 524 Returns SHA1 sum as a string of hex digits 525 ''' 526 filehandle = open(thisfile, "rb") 527 filesha = sha.new() 528 529 data = filehandle.read() 530 while(data != ""): 531 filesha.update(data) 532 data = filehandle.read() 533 534 filehandle.close() 535 return filesha.hexdigest() 536 537def md5sum(thisfile): 538 ''' 539 First parameter, filename 540 Returns MD5 sum as a string of hex digits 541 ''' 542 filehandle = open(thisfile, "rb") 543 filemd5 = md5.new() 544 545 data = filehandle.read() 546 while(data != ""): 547 filemd5.update(data) 548 data = filehandle.read() 549 550 filehandle.close() 551 return filemd5.hexdigest() 552 553def path_join(first, second): 554 ''' 555 A function that is called to join two paths, can be URLs or filesystem paths 556 Parameters, two paths to be joined 557 Returns new URL or filesystem path 558 ''' 559 if first == "": 560 return second 561 if remote_or_local(second) == "REMOTE": 562 return second 563 564 if remote_or_local(first) == "REMOTE": 565 if remote_or_local(second) == "LOCAL": 566 return urlparse.urljoin(first, second) 567 return second 568 569 return os.path.normpath(os.path.join(first, second)) 570 571############ XML calls ########################### 572 573def get_child_nodes(rootnode, subtag): 574 ''' 575 Extract specific child tag names. 576 First parameter, XML node 577 Second parameter, name (string) of child node(s) to look for 578 Returns a list of child nodes 579 ''' 580 children = [] 581 for childnode in rootnode.childNodes: 582 if childnode.nodeName == subtag: 583 children.append(childnode) 584 585 return children 586 587def get_subnodes(rootnode, subtags): 588 ''' 589 First parameter, XML node 590 Second parameter, tree in array form for names (string) of child node(s) to look for 591 Returns a list of child nodes (searched recursively) 592 ''' 593 children = [] 594 child_nodes = get_child_nodes(rootnode, subtags[0]) 595 if (len(subtags) == 1): 596 return child_nodes 597 598 for child in child_nodes: 599 child_nodes = get_subnodes(child, subtags[1:]) 600 children.extend(child_nodes) 601 602 return children 603 604def get_texttag_values(xmlfile, tag): 605 ''' 606 Get values for selected tags in an XML file 607 First parameter, XML file to parse 608 Second parameter, tag to search for in XML file 609 Returns a list of text values found 610 ''' 611 looking_for = [] 612 try: 613 datasource = open(xmlfile) 614 except IOError: 615 return looking_for 616 617 dom2 = xml.dom.minidom.parse(datasource) # parse an open file 618 datasource.close() 619 return get_xml_tag_strings(dom2, tag) 620 621def get_tags(xmlfile, tag): 622 looking_for = [] 623 try: 624 datasource = open(xmlfile) 625 except IOError: 626 return looking_for 627 628 dom2 = xml.dom.minidom.parse(datasource) # parse an open file 629 datasource.close() 630 return get_subnodes(dom2, tag) 631 632def get_xml_tag_strings(item, tag): 633 ''' 634 Converts an XML node to a list of text for specified tag 635 First parameter, XML node object 636 Second parameter, tag tree names to search for 637 Returns a list of text value for this tag 638 ''' 639 return get_xml_item_strings(get_subnodes(item, tag)) 640 641def get_xml_item_strings(items): 642 ''' 643 Converts XML nodes to text 644 First parameter, list of XML Node objects 645 Returns, list of strings as extracted from text nodes in items 646 ''' 647 stringlist = [] 648 for myitem in items: 649 stringlist.append(myitem.firstChild.nodeValue.strip()) 650 return stringlist 651 652def get_attr_from_item(item, name): 653 ''' 654 Extract the attribute from the XML node 655 First parameter, item XML node 656 Returns value of the attribute 657 ''' 658 local_file = "" 659 660 for i in range(item.attributes.length): 661 if item.attributes.item(i).name == name: 662 local_file = item.attributes.item(i).value 663 664 return local_file 665 666################################################### 667 668class ProgressBar: 669 def __init__(self, length = 68): 670 self.length = length 671 self.update(0, 0) 672 self.total_size = 0 673 674 def download_update(self, block_count, block_size, total_size): 675 self.total_size = total_size 676 677 current_bytes = float(block_count * block_size) / 1024 / 1024 678 total_bytes = float(total_size) / 1024 / 1024 679 680 try: 681 percent = 100 * current_bytes / total_bytes 682 except ZeroDivisionError: 683 percent = 0 684 685 if percent > 100: 686 percent = 100 687 688 if total_bytes < 0: 689 return 690 691 size = int(percent * self.length / 100) 692 bar = ("#" * size) + ("-" * (self.length - size)) 693 output = "[%s] %.0f%% %.2f/%.2f MB" % (bar, percent, current_bytes, total_bytes) 694 695 self.line_reset() 696 sys.stdout.write(output) 697 698 def update(self, count, total): 699 if count > total: 700 count = total 701 702 try: 703 percent = 100 * float(count) / total 704 except ZeroDivisionError: 705 percent = 0 706 707 if total < 0: 708 return 709 710 size = int(percent * self.length / 100) 711 bar = ("#" * size) + ("-" * (self.length - size)) 712 output = "[%s] %.0f%%" % (bar, percent) 713 714 self.line_reset() 715 sys.stdout.write(output) 716 717 def line_reset(self): 718 sys.stdout.write("\b" * 80) 719 if os.name != 'nt': 720 sys.stdout.write("\n") 721 722 def end(self): 723 self.update(1, 1) 724 print "" 725 726 def download_end(self): 727 self.download_update(1, self.total_size, self.total_size) 728 print "" 729 730if __name__ == "__main__": 731 run() 732