1# -*- coding: utf-8 -*-
2
3#   This file is part of periscope.
4#
5#    periscope is free software; you can redistribute it and/or modify
6#    it under the terms of the GNU Lesser General Public License as published by
7#    the Free Software Foundation; either version 2 of the License, or
8#    (at your option) any later version.
9#
10#    periscope is distributed in the hope that it will be useful,
11#    but WITHOUT ANY WARRANTY; without even the implied warranty of
12#    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13#    GNU Lesser General Public License for more details.
14#
15#    You should have received a copy of the GNU Lesser General Public License
16#    along with periscope; if not, write to the Free Software
17#    Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
18
19import os, shutil, urllib2, sys, logging, traceback, zipfile
20import struct
21import socket # For timeout purposes
22import re
23
24log = logging.getLogger(__name__)
25
26USER_AGENT = 'Mozilla/5.0 (X11; U; Linux x86_64; en-US; rv:1.9.1.3)'
27
28class SubtitleDB(object):
29    ''' Base (kind of abstract) class that represent a SubtitleDB, usually a website. Should be rewritten using abc module in Python 2.6/3K'''
30    def __init__(self, langs, revertlangs = None):
31        if langs:
32            self.langs = langs
33            self.revertlangs = dict(map(lambda item: (item[1],item[0]), self.langs.items()))
34        if revertlangs:
35            self.revertlangs = revertlangs
36            self.langs = dict(map(lambda item: (item[1],item[0]), self.revertlangs.items()))
37        self.tvshowRegex = re.compile('(?P<show>.*)S(?P<season>[0-9]{2})E(?P<episode>[0-9]{2}).(?P<teams>.*)', re.IGNORECASE)
38        self.tvshowRegex2 = re.compile('(?P<show>.*).(?P<season>[0-9]{1,2})x(?P<episode>[0-9]{1,2}).(?P<teams>.*)', re.IGNORECASE)
39        self.movieRegex = re.compile('(?P<movie>.*)[\.|\[|\(| ]{1}(?P<year>(?:(?:19|20)[0-9]{2}))(?P<teams>.*)', re.IGNORECASE)
40
41    def searchInThread(self, queue, filename, langs):
42        ''' search subtitles with the given filename for the given languages'''
43        try:
44            subs = self.process(filename, langs)
45            map(lambda item: item.setdefault("plugin", self), subs)
46            map(lambda item: item.setdefault("filename", filename), subs)
47            log.info("%s writing %s items to queue" % (self.__class__.__name__, len(subs)))
48        except:
49            log.exception("Error occured")
50            subs = []
51        queue.put(subs, True) # Each plugin must write as the caller periscopy.py waits for an result on the queue
52
53    def process(self, filepath, langs):
54        ''' main method to call on the plugin, pass the filename and the wished
55        languages and it will query the subtitles source '''
56        fname = self.getFileName(filepath)
57        try:
58            return self.query(fname, langs)
59        except Exception, e:
60            log.exception("Error occured")
61            return []
62
63    def createFile(self, subtitle):
64        '''pass the URL of the sub and the file it matches, will unzip it
65        and return the path to the created file'''
66        suburl = subtitle["link"]
67        videofilename = subtitle["filename"]
68        srtbasefilename = videofilename.rsplit(".", 1)[0]
69        zipfilename = srtbasefilename +".zip"
70        self.downloadFile(suburl, zipfilename)
71
72        if zipfile.is_zipfile(zipfilename):
73            log.debug("Unzipping file " + zipfilename)
74            zf = zipfile.ZipFile(zipfilename, "r")
75            for el in zf.infolist():
76                if el.orig_filename.rsplit(".", 1)[1] in ("srt", "sub", "txt"):
77                    outfile = open(srtbasefilename + "." + el.orig_filename.rsplit(".", 1)[1], "wb")
78                    outfile.write(zf.read(el.orig_filename))
79                    outfile.flush()
80                    outfile.close()
81                else:
82                    log.info("File %s does not seem to be valid " %el.orig_filename)
83            # Deleting the zip file
84            zf.close()
85            os.remove(zipfilename)
86            return srtbasefilename + ".srt"
87        else:
88            log.info("Unexpected file type (not zip)")
89            os.remove(zipfilename)
90            return None
91
92    def downloadContent(self, url, timeout = None):
93        ''' Downloads the given url and returns its contents.'''
94        try:
95            log.debug("Downloading %s" % url)
96            req = urllib2.Request(url, headers={'Referer' : url, 'User-Agent' : USER_AGENT})
97            if timeout:
98                socket.setdefaulttimeout(timeout)
99            f = urllib2.urlopen(req)
100            content = f.read()
101            f.close()
102            return content
103        except urllib2.HTTPError, e:
104            log.warning("HTTP Error: %s - %s" % (e.code, url))
105        except urllib2.URLError, e:
106            log.warning("URL Error: %s - %s" % (e.reason, url))
107
108    def downloadFile(self, url, filename):
109        ''' Downloads the given url to the given filename '''
110        content = self.downloadContent(url)
111        dump = open(filename, "wb")
112        dump.write(content)
113        dump.close()
114        log.debug("Download finished to file %s. Size : %s"%(filename,os.path.getsize(filename)))
115
116    def getLG(self, language):
117        ''' Returns the short (two-character) representation of the long language name'''
118        try:
119            return self.revertlangs[language]
120        except KeyError, e:
121            log.warn("Ooops, you found a missing language in the config file of %s: %s. Send a bug report to have it added." %(self.__class__.__name__, language))
122
123    def getLanguage(self, lg):
124        ''' Returns the long naming of the language on a two character code '''
125        try:
126            return self.langs[lg]
127        except KeyError, e:
128            log.warn("Ooops, you found a missing language in the config file of %s: %s. Send a bug report to have it added." %(self.__class__.__name__, lg))
129
130    def query(self, token):
131        raise TypeError("%s has not implemented method '%s'" %(self.__class__.__name__, sys._getframe().f_code.co_name))
132
133    def fileExtension(self, filename):
134        ''' Returns the file extension (without the dot)'''
135        return os.path.splitext(filename)[1][1:].lower()
136
137    def getFileName(self, filepath):
138        if os.path.isfile(filepath):
139            filename = os.path.basename(filepath)
140        else:
141            filename = filepath
142        if filename.endswith(('.avi', '.wmv', '.mov', '.mp4', '.mpeg', '.mpg', '.mkv')):
143            fname = filename.rsplit('.', 1)[0]
144        else:
145            fname = filename
146        return fname
147
148    def guessFileData(self, filename):
149        filename = unicode(self.getFileName(filename).lower())
150        matches_tvshow = self.tvshowRegex.match(filename)
151        if matches_tvshow: # It looks like a tv show
152            (tvshow, season, episode, teams) = matches_tvshow.groups()
153            tvshow = tvshow.replace(".", " ").strip()
154            teams = teams.split('.')
155            return {'type' : 'tvshow', 'name' : tvshow.strip(), 'season' : int(season), 'episode' : int(episode), 'teams' : teams}
156        else:
157            matches_tvshow = self.tvshowRegex2.match(filename)
158            if matches_tvshow:
159                (tvshow, season, episode, teams) = matches_tvshow.groups()
160                tvshow = tvshow.replace(".", " ").strip()
161                teams = teams.split('.')
162                return {'type' : 'tvshow', 'name' : tvshow.strip(), 'season' : int(season), 'episode' : int(episode), 'teams' : teams}
163            else:
164                matches_movie = self.movieRegex.match(filename)
165                if matches_movie:
166                    (movie, year, teams) = matches_movie.groups()
167                    teams = teams.split('.')
168                    part = None
169                    if "cd1" in teams :
170                        teams.remove('cd1')
171                        part = 1
172                    if "cd2" in teams :
173                        teams.remove('cd2')
174                        part = 2
175                    return {'type' : 'movie', 'name' : movie.strip(), 'year' : year, 'teams' : teams, 'part' : part}
176                else:
177                    return {'type' : 'unknown', 'name' : filename, 'teams' : [] }
178
179    def hashFile(self, name):
180        '''
181        Calculates the Hash à-la Media Player Classic as it is the hash used by OpenSubtitles.
182        By the way, this is not a very robust hash code.
183        '''
184        longlongformat = 'Q'  # unsigned long long little endian
185        bytesize = struct.calcsize(longlongformat)
186        format= "<%d%s" % (65536//bytesize, longlongformat)
187
188        f = open(name, "rb")
189        filesize = os.fstat(f.fileno()).st_size
190        hash = filesize
191
192        if filesize < 65536 * 2:
193            log.error('File is too small')
194            return "SizeError"
195
196        buffer= f.read(65536)
197        longlongs= struct.unpack(format, buffer)
198        hash+= sum(longlongs)
199
200        f.seek(-65536, os.SEEK_END) # size is always > 131072
201        buffer= f.read(65536)
202        longlongs= struct.unpack(format, buffer)
203        hash+= sum(longlongs)
204        hash&= 0xFFFFFFFFFFFFFFFF
205
206        f.close()
207        returnedhash =  "%016x" % hash
208        return returnedhash
209
210
211class InvalidFileException(Exception):
212    ''' Exception object to be raised when the file is invalid'''
213    def __init__(self, filename, reason):
214        self.filename = filename
215        self.reason = reason
216    def __str__(self):
217        return (repr(filename), repr(reason))
218