1#!/usr/local/bin/python3.8
2########################################################################
3#
4# Project: Metalink Checker
5# URL: http://www.nabber.org/projects/
6# E-mail: webmaster@nabber.org
7#
8# Copyright: (C) 2007, Neil McNab
9# License: GNU General Public License Version 2
10#   (http://www.gnu.org/copyleft/gpl.html)
11#
12# This program is free software; you can redistribute it and/or modify
13# it under the terms of the GNU General Public License as published by
14# the Free Software Foundation; either version 2 of the License, or
15# (at your option) any later version.
16#
17# This program is distributed in the hope that it will be useful,
18# but WITHOUT ANY WARRANTY; without even the implied warranty of
19# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20# GNU General Public License for more details.
21#
22# You should have received a copy of the GNU General Public License
23# along with this program; if not, write to the Free Software
24# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
25#
26# Filename: $URL: https://metalinks.svn.sourceforge.net/svnroot/metalinks/checker/metalink.py $
27# Last Updated: $Date: 2007-11-13 19:33:36 +0100 (Tue, 13 Nov 2007) $
28# Version: $Rev: 91 $
29# Author(s): Neil McNab
30#
31# Description:
32#   Command line application that checks or downloads metalink files.
33#
34# Instructions:
35#   1. You need to have Python installed.
36#   2. Run on the command line using: python metalink.py
37#
38#   usage: metalink.py [options]
39#
40#   options:
41#     -h, --help            show this help message and exit
42#     -d, --download        Actually download the file(s) in the metalink
43#     -f FILE, --file=FILE  Metalink file to check
44#     -t TIMEOUT, --timeout=TIMEOUT
45#                           Set timeout in seconds to wait for response
46#                           (default=10)
47#
48# CHANGELOG:
49# Version 1.3
50# -----------
51# - Fixed bug when no "size" attribute is present
52#
53# Version 1.2
54# -----------
55# - Added totals output
56#
57# Version 1.1
58# -----------
59# - Bugfixes for FTP handling, bad URL handling
60# - rsync doesn't list as a URL Error
61# - reduced timeout value
62#
63# Version 1.0
64# -----------
65# This is the initial release.
66########################################################################
67
68import optparse
69import urllib2
70import urlparse
71import sha
72import md5
73import os.path
74import xml.dom.minidom
75import random
76import sys
77import httplib
78import re
79import socket
80
81VERSION="Metalink Checker version 1.3"
82
83def run():
84    '''
85    Start a console version of this application.
86    '''
87    # Command line parser options.
88    parser = optparse.OptionParser(version=VERSION)
89    parser.add_option("--download", "-d", action="store_true", dest="download", help="Actually download the file(s) in the metalink")
90    parser.add_option("--file", "-f", dest="filevar", metavar="FILE", help="Metalink file to check")
91    parser.add_option("--timeout", "-t", dest="timeout", metavar="TIMEOUT", help="Set timeout in seconds to wait for response (default=10)")
92
93    (options, args) = parser.parse_args()
94
95    if options.filevar == None:
96        parser.print_help()
97        return
98
99    socket.setdefaulttimeout(10)
100    if options.timeout != None:
101        socket.setdefaulttimeout(int(options.timeout))
102
103    if options.download:
104        progress = ProgressBar(55)
105        download_metalink(options.filevar, os.getcwd(), handler=progress.download_update)
106        progress.download_end()
107    else:
108        results = check_metalink(options.filevar)
109        print_totals(results)
110
111def print_totals(results):
112    for key in results.keys():
113        print "=" * 79
114        print "Summary for:", key
115
116        status_count = 0
117        size_count = 0
118        error_count = 0
119        total = len(results[key])
120        for subkey in results[key].keys():
121            status = results[key][subkey][0]
122            status_bool = False
123            if status != "OK" and status != "?":
124                status_bool = True
125
126            size = results[key][subkey][1]
127            size_bool = False
128            if size == "FAIL":
129                size_bool = True
130
131            if size_bool:
132                size_count += 1
133            if status_bool:
134                status_count += 1
135            if size_bool or status_bool:
136                error_count += 1
137
138        print "Download errors: %s/%s" % (status_count, total)
139        print "Size check failures: %s/%s" % (size_count, total)
140        print "Overall failures: %s/%s" % (error_count, total)
141
142##def print_summary(results):
143##    for key in results.keys():
144##        print "=" * 79
145##        print "Summary for:", key
146##        print "-" * 79
147##        print "Response Code\tSize Check\tURL"
148##        print "-" * 79
149##        for subkey in results[key].keys():
150##            print "%s\t\t%s\t\t%s" % (results[key][subkey][0], results[key][subkey][1], subkey)
151
152##def confirm_prompt(noprompt):
153##    invalue = "invalid"
154##
155##    if noprompt:
156##        return True
157##
158##    while (invalue != "" and invalue[0] != "n" and invalue[0] != "N" and invalue[0] != "Y" and invalue[0] != "y"):
159##        invalue = raw_input("Do you want to continue? [Y/n] ")
160##
161##    try:
162##        if invalue[0] == "n" or invalue[0] == "N":
163##            return False
164##    except IndexError:
165##        pass
166##
167##    return True
168
169################ checks ############################
170
171def check_metalink(src):
172    '''
173    Decode a metalink file, can be local or remote
174    First parameter, file to download, URL or file path to download from
175    '''
176    src = complete_url(src)
177    datasource = urllib2.urlopen(src)
178    dom2 = xml.dom.minidom.parse(datasource)   # parse an open file
179    datasource.close()
180
181    urllist = get_subnodes(dom2, ["metalink", "files", "file"])
182    if len(urllist) == 0:
183        print "No urls to download file from."
184        return False
185
186    results = {}
187    for filenode in urllist:
188        try:
189            size = get_xml_tag_strings(filenode, ["size"])[0]
190        except:
191            size = None
192        name = get_attr_from_item(filenode, "name")
193        print "=" * 79
194        print "File: %s Size: %s" % (name, size)
195        results[name] = check_file_node(filenode)
196
197    return results
198
199def check_process(headers, filesize):
200    size = "?"
201    sizeheader = get_header(headers, "Content-Length")
202
203    if sizeheader != None and filesize != None:
204        if sizeheader == filesize:
205            size = "OK"
206        else:
207            size = "FAIL"
208
209    response_code = "OK"
210    temp_code = get_header(headers, "Response")
211    if temp_code != None:
212        response_code = temp_code
213
214    return (response_code, size)
215
216def get_header(textheaders, name):
217    textheaders = str(textheaders)
218
219    headers = textheaders.split("\n")
220    for line in headers:
221        line = line.strip()
222        if line.startswith(name + ": "):
223            result = line.split(name + ": ")
224            return result[1]
225
226    return None
227
228def check_file_node(item):
229    '''
230    Downloads a specific version of a program
231    First parameter, file XML node
232    Second parameter, file path to save to
233    Third parameter, optional, force a new download even if a valid copy already exists
234    Fouth parameter, optional, progress handler callback
235    Returns dictionary of file paths with headers
236    '''
237    try:
238        size = get_xml_tag_strings(item, ["size"])[0]
239    except:
240        size = None
241    urllist = get_subnodes(item, ["resources", "url"])
242    if len(urllist) == 0:
243        print "No urls to download file from."
244        return False
245
246    number = 0
247    filename = {}
248
249    #error = True
250    count = 1
251    result = {}
252    while (count <= len(urllist)):
253        filename = urllist[number].firstChild.nodeValue.strip()
254        print "-" *79
255        print "Checking %s..." % filename
256        headers = check_urlretrieve(filename)
257        result[filename] = check_process(headers, size)
258        print "Response Code: %s\tSize Check: %s" % (result[filename][0], result[filename][1])
259        #error = not result
260        number = (number + 1) % len(urllist)
261        count += 1
262
263    return result
264
265def check_urlretrieve(url):
266    '''
267    modernized replacement for urllib.urlretrieve() for use with proxy
268    '''
269    try:
270        temp = urllib2.urlopen(url)
271    except urllib2.HTTPError, error:
272        return "Response: %s" % error.code
273    except (urllib2.URLError, httplib.InvalidURL):
274        if url.startswith("rsync://"):
275            return "Response: ?"
276        return "Response: Bad URL"
277    except IOError, error:
278        if error.errno == "ftp error":
279            code = error.strerror
280            result = re.compile("^([0-9]+)").search(str(error.strerror))
281            if result != None:
282		code = result.group(1)
283            result = re.compile("^\(([0-9]+)").search(str(error.strerror))
284            if result != None:
285		if result.group(1) == "110":
286                    code = "timed out"
287            return "Response: %s" % code
288    headers = temp.info()
289    temp.close()
290
291    return headers
292
293#########################################
294
295############# download functions #############
296
297def download(src, path, filemd5="", filesha1="", force = False, handler = None):
298    '''
299    Download a file, decodes metalinks.
300    First parameter, file to download, URL or file path to download from
301    Second parameter, file path to save to
302    Third parameter, optional, expected MD5SUM
303    Fourth parameter, optional, expected SHA1SUM
304    Fifth parameter, optional, force a new download even if a valid copy already exists
305    Sixth parameter, optional, progress handler callback
306    Returns list of file paths if download(s) is successful
307    Returns False otherwise (checksum fails)
308    '''
309
310    if src.endswith(".metalink"):
311        return download_metalink(src, path, force, handler)
312    else:
313        # parse out filename portion here
314        filename = os.path.basename(src)
315        result = download_file(src, os.path.join(path, filename), filemd5, filesha1, force, handler)
316        if result:
317            return [result]
318        return False
319
320def download_file(remote_file, local_file, filemd5="", filesha1="", force = False, handler = None):
321    '''
322    Download a file.
323    First parameter, file to download, URL or file path to download from
324    Second parameter, file path to save to
325    Third parameter, optional, expected MD5SUM
326    Fourth parameter, optional, expected SHA1SUM
327    Fifth parameter, optional, force a new download even if a valid copy already exists
328    Sixth parameter, optional, progress handler callback
329    Returns file path if download is successful
330    Returns False otherwise (checksum fails)
331    '''
332    if os.path.exists(local_file) and (not force) and verify_checksum(local_file, filemd5, filesha1):
333        return local_file
334
335    remote_file = complete_url(remote_file)
336
337    directory = os.path.dirname(local_file)
338    if not os.path.isdir(directory):
339        os.makedirs(directory)
340
341    #print "Downloading: %s" % remote_file
342
343    try:
344        urlretrieve(remote_file, local_file, handler)
345    except:
346        #print "WARNING: Downloading file %s failed." % local_file
347        return False
348
349    if verify_checksum(local_file, filemd5, filesha1):
350        return local_file
351
352    return False
353
354def download_metalink(src, path, force = False, handler = None):
355    '''
356    Decode a metalink file, can be local or remote
357    First parameter, file to download, URL or file path to download from
358    Second parameter, file path to save to
359    Third parameter, optional, force a new download even if a valid copy already exists
360    Fouth parameter, optional, progress handler callback
361    Returns list of file paths if download(s) is successful
362    Returns False otherwise (checksum fails)
363    '''
364    src = complete_url(src)
365    datasource = urllib2.urlopen(src)
366    dom2 = xml.dom.minidom.parse(datasource)   # parse an open file
367    datasource.close()
368
369    urllist = get_subnodes(dom2, ["metalink", "files", "file"])
370    if len(urllist) == 0:
371        #print "No urls to download file from."
372        return False
373
374    results = []
375    for filenode in urllist:
376        result = download_file_node(filenode, path, force, handler)
377        if result:
378            results.append(result)
379
380    return results
381
382def download_file_node(item, path, force = False, handler = None):
383    '''
384    Downloads a specific version of a program
385    First parameter, file XML node
386    Second parameter, file path to save to
387    Third parameter, optional, force a new download even if a valid copy already exists
388    Fouth parameter, optional, progress handler callback
389    Returns list of file paths if download(s) is successful
390    Returns False otherwise (checksum fails)
391    '''
392
393    urllist = get_subnodes(item, ["resources", "url"])
394    if len(urllist) == 0:
395        print "No urls to download file from."
396        return False
397
398    hashlist = get_subnodes(item, ["verification", "hash"])
399
400    hashes = {}
401    hashes['md5'] = ""
402    hashes['sha1'] = ""
403    for hashitem in hashlist:
404        hashes[get_attr_from_item(hashitem, "type")] = hashitem.firstChild.nodeValue.strip()
405##        for i in range(hashitem.attributes.length):
406##            if hashitem.attributes.item(i).name == "type":
407##                hashes[hashitem.attributes.item(i).value] = hashitem.firstChild.nodeValue.strip()
408
409    local_file = get_attr_from_item(item, "name")
410    localfile = path_join(path, local_file)
411    # choose a random url tag to start with
412    number = int(random.random() * len(urllist))
413
414    error = True
415    count = 1
416    while (error and (count <= len(urllist))):
417        result = download_file(urllist[number].firstChild.nodeValue.strip(), localfile, hashes['md5'], hashes['sha1'], force, handler)
418        error = not result
419        number = (number + 1) % len(urllist)
420        count += 1
421
422    return result
423
424def complete_url(url):
425    '''
426    If no transport is specified in typical URL form, we assume it is a local
427    file, perhaps only a relative path too.
428    First parameter, string to convert to URL format
429    Returns, string converted to URL format
430    '''
431    if get_transport(url) == "":
432        absfile = os.path.abspath(url)
433        if absfile[0] != "/":
434            absfile = "/" + absfile
435        return "file://" + absfile
436    return url
437
438def urlretrieve(url, filename, reporthook = None):
439    '''
440    modernized replacement for urllib.urlretrieve() for use with proxy
441    '''
442    block_size = 4096
443    i = 0
444    counter = 0
445    temp = urllib2.urlopen(url)
446    headers = temp.info()
447
448    try:
449        size = int(headers['Content-Length'])
450    except KeyError:
451        size = 0
452
453    data = open(filename, 'wb')
454    block = True
455
456    while block:
457        block = temp.read(block_size)
458        data.write(block)
459        i += block_size
460        counter += 1
461        if reporthook != None:
462            #print counter, block_size, size
463            reporthook(counter, block_size, size)
464
465    data.close()
466    temp.close()
467
468    return (filename, headers)
469
470def verify_checksum(local_file, filemd5="", filesha1=""):
471    '''
472    Verify the checksum of a file
473    First parameter, filename
474    Second parameter, optional, expected MD5SUM
475    Third parameter, optional, expected SHA1SUM
476    Returns True if first checksum provided is valid
477    Returns True if no checksums are provided
478    Returns False otherwise
479    '''
480    if filesha1 != "":
481        if sha1sum(local_file) == filesha1.lower():
482            return True
483    elif filemd5 != "":
484        if md5sum(local_file) == filemd5.lower():
485            return True
486    else:
487        # No checksum provided, assume OK
488        return True
489
490    # checksum failed here
491    print "ERROR: checksum failed for %s." % local_file
492    return False
493
494def remote_or_local(name):
495    '''
496    Returns if the file path is a remote file or a local file
497    First parameter, file path
498    Returns "REMOTE" or "LOCAL" based on the file path
499    '''
500    #transport = urlparse.urlparse(name).scheme
501    transport = get_transport(name)
502
503    if transport != "":
504        return "REMOTE"
505    return "LOCAL"
506
507def get_transport(url):
508    '''
509    Gets transport type.  This is more accurate than the urlparse module which
510    just does a split on colon.
511    First parameter, url
512    Returns the transport type
513    '''
514    result = url.split("://", 1)
515    if len(result) == 1:
516        transport = ""
517    else:
518        transport = result[0]
519    return transport
520
521def sha1sum(thisfile):
522    '''
523    First parameter, filename
524    Returns SHA1 sum as a string of hex digits
525    '''
526    filehandle = open(thisfile, "rb")
527    filesha = sha.new()
528
529    data = filehandle.read()
530    while(data != ""):
531        filesha.update(data)
532        data = filehandle.read()
533
534    filehandle.close()
535    return filesha.hexdigest()
536
537def md5sum(thisfile):
538    '''
539    First parameter, filename
540    Returns MD5 sum as a string of hex digits
541    '''
542    filehandle = open(thisfile, "rb")
543    filemd5 = md5.new()
544
545    data = filehandle.read()
546    while(data != ""):
547        filemd5.update(data)
548        data = filehandle.read()
549
550    filehandle.close()
551    return filemd5.hexdigest()
552
553def path_join(first, second):
554    '''
555    A function that is called to join two paths, can be URLs or filesystem paths
556    Parameters, two paths to be joined
557    Returns new URL or filesystem path
558    '''
559    if first == "":
560        return second
561    if remote_or_local(second) == "REMOTE":
562        return second
563
564    if remote_or_local(first) == "REMOTE":
565        if remote_or_local(second) == "LOCAL":
566            return urlparse.urljoin(first, second)
567        return second
568
569    return os.path.normpath(os.path.join(first, second))
570
571############ XML calls ###########################
572
573def get_child_nodes(rootnode, subtag):
574    '''
575    Extract specific child tag names.
576    First parameter, XML node
577    Second parameter, name (string) of child node(s) to look for
578    Returns a list of child nodes
579    '''
580    children = []
581    for childnode in rootnode.childNodes:
582        if childnode.nodeName == subtag:
583            children.append(childnode)
584
585    return children
586
587def get_subnodes(rootnode, subtags):
588    '''
589    First parameter, XML node
590    Second parameter, tree in array form for names (string) of child node(s) to look for
591    Returns a list of child nodes (searched recursively)
592    '''
593    children = []
594    child_nodes = get_child_nodes(rootnode, subtags[0])
595    if (len(subtags) == 1):
596        return child_nodes
597
598    for child in child_nodes:
599        child_nodes = get_subnodes(child, subtags[1:])
600        children.extend(child_nodes)
601
602    return children
603
604def get_texttag_values(xmlfile, tag):
605    '''
606    Get values for selected tags in an XML file
607    First parameter, XML file to parse
608    Second parameter, tag to search for in XML file
609    Returns a list of text values found
610    '''
611    looking_for = []
612    try:
613        datasource = open(xmlfile)
614    except IOError:
615        return looking_for
616
617    dom2 = xml.dom.minidom.parse(datasource)   # parse an open file
618    datasource.close()
619    return get_xml_tag_strings(dom2, tag)
620
621def get_tags(xmlfile, tag):
622    looking_for = []
623    try:
624        datasource = open(xmlfile)
625    except IOError:
626        return looking_for
627
628    dom2 = xml.dom.minidom.parse(datasource)   # parse an open file
629    datasource.close()
630    return get_subnodes(dom2, tag)
631
632def get_xml_tag_strings(item, tag):
633    '''
634    Converts an XML node to a list of text for specified tag
635    First parameter, XML node object
636    Second parameter, tag tree names to search for
637    Returns a list of text value for this tag
638    '''
639    return get_xml_item_strings(get_subnodes(item, tag))
640
641def get_xml_item_strings(items):
642    '''
643    Converts XML nodes to text
644    First parameter, list of XML Node objects
645    Returns, list of strings as extracted from text nodes in items
646    '''
647    stringlist = []
648    for myitem in items:
649        stringlist.append(myitem.firstChild.nodeValue.strip())
650    return stringlist
651
652def get_attr_from_item(item, name):
653    '''
654    Extract the attribute from the XML node
655    First parameter, item XML node
656    Returns value of the attribute
657    '''
658    local_file = ""
659
660    for i in range(item.attributes.length):
661        if item.attributes.item(i).name == name:
662            local_file = item.attributes.item(i).value
663
664    return local_file
665
666###################################################
667
668class ProgressBar:
669    def __init__(self, length = 68):
670        self.length = length
671        self.update(0, 0)
672        self.total_size = 0
673
674    def download_update(self, block_count, block_size, total_size):
675        self.total_size = total_size
676
677        current_bytes = float(block_count * block_size) / 1024 / 1024
678        total_bytes = float(total_size) / 1024 / 1024
679
680        try:
681            percent = 100 * current_bytes / total_bytes
682        except ZeroDivisionError:
683            percent = 0
684
685        if percent > 100:
686            percent = 100
687
688        if total_bytes < 0:
689            return
690
691        size = int(percent * self.length / 100)
692        bar = ("#" * size) + ("-" * (self.length - size))
693        output = "[%s] %.0f%% %.2f/%.2f MB" % (bar, percent, current_bytes, total_bytes)
694
695        self.line_reset()
696        sys.stdout.write(output)
697
698    def update(self, count, total):
699        if count > total:
700            count = total
701
702        try:
703            percent = 100 * float(count) / total
704        except ZeroDivisionError:
705            percent = 0
706
707        if total < 0:
708            return
709
710        size = int(percent * self.length / 100)
711        bar = ("#" * size) + ("-" * (self.length - size))
712        output = "[%s] %.0f%%" % (bar, percent)
713
714        self.line_reset()
715        sys.stdout.write(output)
716
717    def line_reset(self):
718        sys.stdout.write("\b" * 80)
719        if os.name != 'nt':
720            sys.stdout.write("\n")
721
722    def end(self):
723        self.update(1, 1)
724        print ""
725
726    def download_end(self):
727        self.download_update(1, self.total_size, self.total_size)
728        print ""
729
730if __name__ == "__main__":
731    run()
732