1# -*- coding: utf-8 -*- 2 3# This file is part of periscope. 4# 5# periscope is free software; you can redistribute it and/or modify 6# it under the terms of the GNU Lesser General Public License as published by 7# the Free Software Foundation; either version 2 of the License, or 8# (at your option) any later version. 9# 10# periscope is distributed in the hope that it will be useful, 11# but WITHOUT ANY WARRANTY; without even the implied warranty of 12# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13# GNU Lesser General Public License for more details. 14# 15# You should have received a copy of the GNU Lesser General Public License 16# along with periscope; if not, write to the Free Software 17# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA 18 19import os, shutil, urllib2, sys, logging, traceback, zipfile 20import struct 21import socket # For timeout purposes 22import re 23 24log = logging.getLogger(__name__) 25 26USER_AGENT = 'Mozilla/5.0 (X11; U; Linux x86_64; en-US; rv:1.9.1.3)' 27 28class SubtitleDB(object): 29 ''' Base (kind of abstract) class that represent a SubtitleDB, usually a website. Should be rewritten using abc module in Python 2.6/3K''' 30 def __init__(self, langs, revertlangs = None): 31 if langs: 32 self.langs = langs 33 self.revertlangs = dict(map(lambda item: (item[1],item[0]), self.langs.items())) 34 if revertlangs: 35 self.revertlangs = revertlangs 36 self.langs = dict(map(lambda item: (item[1],item[0]), self.revertlangs.items())) 37 self.tvshowRegex = re.compile('(?P<show>.*)S(?P<season>[0-9]{2})E(?P<episode>[0-9]{2}).(?P<teams>.*)', re.IGNORECASE) 38 self.tvshowRegex2 = re.compile('(?P<show>.*).(?P<season>[0-9]{1,2})x(?P<episode>[0-9]{1,2}).(?P<teams>.*)', re.IGNORECASE) 39 self.movieRegex = re.compile('(?P<movie>.*)[\.|\[|\(| ]{1}(?P<year>(?:(?:19|20)[0-9]{2}))(?P<teams>.*)', re.IGNORECASE) 40 41 def searchInThread(self, queue, filename, langs): 42 ''' search subtitles with the given filename for the given languages''' 43 try: 44 subs = self.process(filename, langs) 45 map(lambda item: item.setdefault("plugin", self), subs) 46 map(lambda item: item.setdefault("filename", filename), subs) 47 log.info("%s writing %s items to queue" % (self.__class__.__name__, len(subs))) 48 except: 49 log.exception("Error occured") 50 subs = [] 51 queue.put(subs, True) # Each plugin must write as the caller periscopy.py waits for an result on the queue 52 53 def process(self, filepath, langs): 54 ''' main method to call on the plugin, pass the filename and the wished 55 languages and it will query the subtitles source ''' 56 fname = self.getFileName(filepath) 57 try: 58 return self.query(fname, langs) 59 except Exception, e: 60 log.exception("Error occured") 61 return [] 62 63 def createFile(self, subtitle): 64 '''pass the URL of the sub and the file it matches, will unzip it 65 and return the path to the created file''' 66 suburl = subtitle["link"] 67 videofilename = subtitle["filename"] 68 srtbasefilename = videofilename.rsplit(".", 1)[0] 69 zipfilename = srtbasefilename +".zip" 70 self.downloadFile(suburl, zipfilename) 71 72 if zipfile.is_zipfile(zipfilename): 73 log.debug("Unzipping file " + zipfilename) 74 zf = zipfile.ZipFile(zipfilename, "r") 75 for el in zf.infolist(): 76 if el.orig_filename.rsplit(".", 1)[1] in ("srt", "sub", "txt"): 77 outfile = open(srtbasefilename + "." + el.orig_filename.rsplit(".", 1)[1], "wb") 78 outfile.write(zf.read(el.orig_filename)) 79 outfile.flush() 80 outfile.close() 81 else: 82 log.info("File %s does not seem to be valid " %el.orig_filename) 83 # Deleting the zip file 84 zf.close() 85 os.remove(zipfilename) 86 return srtbasefilename + ".srt" 87 else: 88 log.info("Unexpected file type (not zip)") 89 os.remove(zipfilename) 90 return None 91 92 def downloadContent(self, url, timeout = None): 93 ''' Downloads the given url and returns its contents.''' 94 try: 95 log.debug("Downloading %s" % url) 96 req = urllib2.Request(url, headers={'Referer' : url, 'User-Agent' : USER_AGENT}) 97 if timeout: 98 socket.setdefaulttimeout(timeout) 99 f = urllib2.urlopen(req) 100 content = f.read() 101 f.close() 102 return content 103 except urllib2.HTTPError, e: 104 log.warning("HTTP Error: %s - %s" % (e.code, url)) 105 except urllib2.URLError, e: 106 log.warning("URL Error: %s - %s" % (e.reason, url)) 107 108 def downloadFile(self, url, filename): 109 ''' Downloads the given url to the given filename ''' 110 content = self.downloadContent(url) 111 dump = open(filename, "wb") 112 dump.write(content) 113 dump.close() 114 log.debug("Download finished to file %s. Size : %s"%(filename,os.path.getsize(filename))) 115 116 def getLG(self, language): 117 ''' Returns the short (two-character) representation of the long language name''' 118 try: 119 return self.revertlangs[language] 120 except KeyError, e: 121 log.warn("Ooops, you found a missing language in the config file of %s: %s. Send a bug report to have it added." %(self.__class__.__name__, language)) 122 123 def getLanguage(self, lg): 124 ''' Returns the long naming of the language on a two character code ''' 125 try: 126 return self.langs[lg] 127 except KeyError, e: 128 log.warn("Ooops, you found a missing language in the config file of %s: %s. Send a bug report to have it added." %(self.__class__.__name__, lg)) 129 130 def query(self, token): 131 raise TypeError("%s has not implemented method '%s'" %(self.__class__.__name__, sys._getframe().f_code.co_name)) 132 133 def fileExtension(self, filename): 134 ''' Returns the file extension (without the dot)''' 135 return os.path.splitext(filename)[1][1:].lower() 136 137 def getFileName(self, filepath): 138 if os.path.isfile(filepath): 139 filename = os.path.basename(filepath) 140 else: 141 filename = filepath 142 if filename.endswith(('.avi', '.wmv', '.mov', '.mp4', '.mpeg', '.mpg', '.mkv')): 143 fname = filename.rsplit('.', 1)[0] 144 else: 145 fname = filename 146 return fname 147 148 def guessFileData(self, filename): 149 filename = unicode(self.getFileName(filename).lower()) 150 matches_tvshow = self.tvshowRegex.match(filename) 151 if matches_tvshow: # It looks like a tv show 152 (tvshow, season, episode, teams) = matches_tvshow.groups() 153 tvshow = tvshow.replace(".", " ").strip() 154 teams = teams.split('.') 155 return {'type' : 'tvshow', 'name' : tvshow.strip(), 'season' : int(season), 'episode' : int(episode), 'teams' : teams} 156 else: 157 matches_tvshow = self.tvshowRegex2.match(filename) 158 if matches_tvshow: 159 (tvshow, season, episode, teams) = matches_tvshow.groups() 160 tvshow = tvshow.replace(".", " ").strip() 161 teams = teams.split('.') 162 return {'type' : 'tvshow', 'name' : tvshow.strip(), 'season' : int(season), 'episode' : int(episode), 'teams' : teams} 163 else: 164 matches_movie = self.movieRegex.match(filename) 165 if matches_movie: 166 (movie, year, teams) = matches_movie.groups() 167 teams = teams.split('.') 168 part = None 169 if "cd1" in teams : 170 teams.remove('cd1') 171 part = 1 172 if "cd2" in teams : 173 teams.remove('cd2') 174 part = 2 175 return {'type' : 'movie', 'name' : movie.strip(), 'year' : year, 'teams' : teams, 'part' : part} 176 else: 177 return {'type' : 'unknown', 'name' : filename, 'teams' : [] } 178 179 def hashFile(self, name): 180 ''' 181 Calculates the Hash à-la Media Player Classic as it is the hash used by OpenSubtitles. 182 By the way, this is not a very robust hash code. 183 ''' 184 longlongformat = 'Q' # unsigned long long little endian 185 bytesize = struct.calcsize(longlongformat) 186 format= "<%d%s" % (65536//bytesize, longlongformat) 187 188 f = open(name, "rb") 189 filesize = os.fstat(f.fileno()).st_size 190 hash = filesize 191 192 if filesize < 65536 * 2: 193 log.error('File is too small') 194 return "SizeError" 195 196 buffer= f.read(65536) 197 longlongs= struct.unpack(format, buffer) 198 hash+= sum(longlongs) 199 200 f.seek(-65536, os.SEEK_END) # size is always > 131072 201 buffer= f.read(65536) 202 longlongs= struct.unpack(format, buffer) 203 hash+= sum(longlongs) 204 hash&= 0xFFFFFFFFFFFFFFFF 205 206 f.close() 207 returnedhash = "%016x" % hash 208 return returnedhash 209 210 211class InvalidFileException(Exception): 212 ''' Exception object to be raised when the file is invalid''' 213 def __init__(self, filename, reason): 214 self.filename = filename 215 self.reason = reason 216 def __str__(self): 217 return (repr(filename), repr(reason)) 218