1# -*- coding: UTF-8 -*- 2 3__revision__ = '$Id$' 4 5# Copyright (c) 2005-2009 Vasco Nunes, Piotr Ożarowski 6# 7# This program is free software; you can redistribute it and/or modify 8# it under the terms of the GNU General Public License as published by 9# the Free Software Foundation; either version 2 of the License, or 10# (at your option) any later version. 11# 12# This program is distributed in the hope that it will be useful, 13# but WITHOUT ANY WARRANTY; without even the implied warranty of 14# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 15# GNU Library General Public License for more details. 16# 17# You should have received a copy of the GNU General Public License 18# along with this program; if not, write to the Free Software 19# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA 20 21# You may use and distribute this software under the terms of the 22# GNU General Public License, version 2 or later 23 24import string 25import re 26import gutils 27import movie 28 29plugin_name = "Cinematografo" 30plugin_description = "Rivista del Cinematografo dal 1928" 31plugin_url = "www.cinematografo.it" 32plugin_language = _("Italian") 33plugin_author = "Vasco Nunes, Piotr Ożarowski" 34plugin_author_email = "<vasco.m.nunes@gmail.com>" 35plugin_version = "1.4" 36 37 38class Plugin(movie.Movie): 39 40 def __init__(self, id): 41 self.encode = 'iso-8859-1' 42 self.movie_id = id 43 self.url = "http://www.cinematografo.it/bancadati/consultazione/schedafilm_2009.jsp?completa=si&codice=%s" % str(self.movie_id) 44 45 def get_image(self): 46 # Find the film's poster image 47 tmp_poster = gutils.regextrim(self.page, "../images_locandine/%s/" % self.movie_id, ".(JPG|jpg)\"") 48 if tmp_poster != "": 49 self.image_url = "http://www.cinematografo.it/bancadati/images_locandine/%s/%s.jpg" % (self.movie_id, tmp_poster) 50 else: 51 self.image_url = "" 52 53 def get_o_title(self): 54 # Find the film's original title 55 self.o_title = gutils.trim(self.page, ">Titolo Originale</font>", "</tr>") 56 self.o_title = self.capwords(self.o_title) 57 # if nothing found, use the title 58 if self.o_title == '': 59 self.o_title = gutils.trim(self.page, "<!--TITOLO-->", "<!--FINE TITOLO-->") 60 self.o_title = gutils.trim(self.o_title, "<b>", "</b>") 61 self.o_title = self.capwords(self.o_title) 62 63 def get_title(self): 64 # Find the film's local title. 65 # Probably the original title translation 66 self.title = gutils.trim(self.page, "<!--TITOLO-->", "<!--FINE TITOLO-->") 67 self.title = gutils.trim(self.title, "<b>", "</b>") 68 self.title = self.capwords(self.title) 69 70 def get_director(self): 71 # Find the film's director 72 self.director = gutils.trim(self.page, ">Regia", "Attori<") 73 self.director = self.director.replace(" ", " ") 74 self.director = gutils.strip_tags(self.director) 75 self.director = self.director.strip() 76 77 def get_plot(self): 78 # Find the film's plot 79 self.plot = gutils.regextrim(self.page, '"fontYellowB">Trama</font>', "(\n|Critica<|Note<)") 80 81 def get_year(self): 82 # Find the film's year 83 self.year = gutils.trim(self.page, ">Anno</font>", "</tr>") 84 self.year = gutils.digits_only(gutils.clean(self.year)) 85 86 def get_runtime(self): 87 # Find the film's running time 88 self.runtime = gutils.trim(self.page, ">Durata</font>", "</tr>") 89 self.runtime = gutils.digits_only(gutils.clean(self.runtime)) 90 91 def get_genre(self): 92 # Find the film's genre 93 self.genre = self.capwords(gutils.trim(self.page, ">Genere</font>", "</tr>")) 94 95 def get_cast(self): 96 # Find the actors. Try to make it comma separated. 97 self.cast = gutils.regextrim(self.page, ">Attori</font>", '(<font class="fontViolaB">|\n)') 98 self.cast = self.cast.replace("target='_self'>", "\n>") 99 self.cast = self.cast.replace("<a>", _(" as ")) 100 self.cast = self.cast.replace("</tr><tr>", '\n') 101 self.cast = self.cast.replace("...vedi il resto del cast", '') 102 self.cast = gutils.clean(self.cast) 103 self.cast = self.cast.replace(" ", ' ') 104 self.cast = re.sub('[ ]+', ' ', self.cast) 105 self.cast = re.sub('\n[ ]+', '\n', self.cast) 106 107 def get_classification(self): 108 # Find the film's classification 109 self.classification = '' 110 111 def get_studio(self): 112 # Find the studio 113 self.studio = self.capwords(gutils.clean(gutils.trim(self.page, ">Distribuzione</font>", "</tr>"))) 114 115 def get_o_site(self): 116 # Find the film's oficial site 117 self.o_site = '' 118 119 def get_site(self): 120 # Find the film's imdb details page 121 self.site = self.url 122 123 def get_trailer(self): 124 # Find the film's trailer page or location 125 self.trailer = '' 126 pos_end = self.page.find('>guarda il trailer<') 127 if pos_end > -1: 128 pos_beg = self.page[:pos_end].rfind('<a href') 129 if pos_beg > -1: 130 self.trailer = gutils.trim(self.page[pos_beg:pos_end], '"', '"') 131 132 def get_country(self): 133 # Find the film's country 134 self.country = self.capwords(gutils.clean(gutils.trim(self.page, ">Origine</font>", "</tr>"))).replace('Usa', 'USA') 135 136 def get_rating(self): 137 # Find the film's rating. From 0 to 10. 138 # Convert if needed when assigning. 139 self.rating = 0 140 141 def get_notes(self): 142 self.notes = '' 143 critica = gutils.clean(gutils.regextrim(self.page, 'Critica</font>', "(</td>|\n|Note<)").replace('<br>', '\n')) 144 if critica: 145 self.notes = 'Critica:\n\n' + critica + '\n\n' 146 note = gutils.clean(gutils.regextrim(self.page, 'Note</font>', "(</td>|\n|Critica<)").replace('<br>', '--BR--')) 147 if note: 148 # string.capwords removes line breaks, preventing them with placeholder --BR-- 149 note = self.capwords(note) 150 self.notes = self.notes + 'Note:\n\n' + note.replace('--br--', '\n') 151 152 def get_screenplay(self): 153 # Find the screenplay 154 self.screenplay = gutils.trim(self.page, 'Sceneggiatura</font></td></tr><tr>', '<td colspan="2"') 155 self.screenplay = self.screenplay.replace('<tr>', ', ') 156 # beautification 157 self.screenplay = gutils.clean(self.screenplay) 158 self.screenplay = self.screenplay.replace(' ,', ',') 159 self.screenplay = re.sub('[ ]+', ' ', self.screenplay) 160 self.screenplay = re.sub('[,][ ]*$', '', self.screenplay) 161 162 def get_cameraman(self): 163 # Find the cameraman 164 self.cameraman = gutils.trim(self.page, 'Fotografia</font></td></tr><tr>', '<td colspan="2"') 165 self.cameraman = self.cameraman.replace('<tr>', ', ') 166 # beautification 167 self.cameraman = gutils.clean(self.cameraman) 168 self.cameraman = self.cameraman.replace(' ,', ',') 169 self.cameraman = re.sub('[ ]+', ' ', self.cameraman) 170 self.cameraman = re.sub('[,][ ]*$', '', self.cameraman) 171 172 def capwords(self, name): 173 tmp = gutils.clean(name) 174 if tmp == tmp.upper(): 175 return string.capwords(name) 176 return name 177 178class SearchPlugin(movie.SearchMovie): 179 180 # A movie search object 181 def __init__(self): 182 self.encode = 'iso-8859-1' 183 self.original_url_search = 'http://www.cinematografo.it/bancadati/consultazione/trovatitoli.jsp?startrighe=0&endrighe=100&tipo=CONTIENEPAROLE&word=' 184 self.translated_url_search = self.original_url_search 185 186 def search(self, parent_window): 187 # Perform the web search 188 self.open_search(parent_window) 189 self.sub_search() 190 return self.page 191 192 def sub_search(self): 193 # Isolating just a portion (with the data we want) of the results 194 self.page = gutils.trim(self.page, '<td valign="top" width="73%" bgcolor="#4d4d4d">', '</td>') 195 196 def capwords(self, name): 197 tmp = gutils.clean(name) 198 if tmp == tmp.upper(): 199 return string.capwords(name) 200 return name 201 202 def get_searches(self): 203 # Try to find both id and film title for each search result 204 elements = self.page.split("<li>") 205 self.number_results = elements[-1] 206 207 if (elements[0] != ''): 208 for element in elements: 209 id = gutils.trim(element, "?codice=", "\">") 210 if id != '': 211 self.ids.append(id) 212 title = self.capwords(gutils.convert_entities(gutils.trim(element, "<b>", "</b>"))) 213 year = re.search('([[][0-9]{4}[]])', element) 214 if year: 215 year = year.group(0) 216 if year: 217 self.titles.append(title + ' ' + year) 218 else: 219 self.titles.append(title) 220 else: 221 self.number_results = 0 222 223 224# 225# Plugin Test 226# 227 228 229class SearchPluginTest(SearchPlugin): 230 # 231 # Configuration for automated tests: 232 # dict { movie_id -> [ expected result count for original url, expected result count for translated url ] } 233 # 234 test_configuration = { 235 'Rocky' : [12, 12], 236 'però' : [6, 6], 237 'il ritorno' : [98, 98]} 238 239 240class PluginTest: 241 # 242 # Configuration for automated tests: 243 # dict { movie_id -> dict { arribute -> value } } 244 # 245 # value: * True/False if attribute only should be tested for any value 246 # * or the expected value 247 # 248 test_configuration = { 249 '3996' : { 250 'title' : 'Amor non ho, però... però...', 251 'o_title' : 'Amor non ho, però... però...', 252 'director' : 'Giorgio Bianchi', 253 'plot' : True, 254 'cast' : 'Renato Rascel' + _(' as ') + 'Teodoro\n\ 255Gina Lollobrigida' + _(' as ') + 'Gina\n\ 256Luigi Pavese' + _(' as ') + 'Antonio Scutipizzo\n\ 257Aroldo Tieri' + _(' as ') + 'Giuliano\n\ 258Carlo Ninchi' + _(' as ') + 'Maurizio\n\ 259Kiki Urbani' + _(' as ') + 'Kiki, la ballerina\n\ 260Adriana Danieli' + _(' as ') + 'Olga\n\ 261Strelsa Brown' + _(' as ') + 'Mabel\n\ 262Virgilio Riento' + _(' as ') + 'Il contadino\n\ 263Gabriele Tinti' + _(' as ') + '(Gastone Tinti) Un componente dell\'orchestra\n\ 264Guido Barbarisi' + _(' as ') + '\n\ 265Giuseppe De Martino' + _(' as ') + '\n\ 266Maria Carla Vittone' + _(' as ') + '\n\ 267Raimondo Vianello' + _(' as ') + '(Riccardo Vianello) \n\ 268Marco Tulli' + _(' as ') + '\n\ 269Giuseppe Ricagno' + _(' as ') + '\n\ 270Luciano Rebeggiani' + _(' as ') + '\n\ 271Giovanni Lesa' + _(' as ') + '\n\ 272Kurt Lary' + _(' as ') + '\n\ 273Riccardo Ferri' + _(' as ') + '\n\ 274Pia De Doses' + _(' as ') + '\n\ 275Guglielmo Barnabò' + _(' as ') + '\n\ 276Galeazzo Benti as', 277 'country' : 'Italia', 278 'genre' : 'Commedia', 279 'classification' : False, 280 'studio' : 'Minervafilm - Mfd Home Video', 281 'o_site' : False, 282 'site' : 'http://www.cinematografo.it/bancadati/consultazione/schedafilm_2009.jsp?completa=si&codice=3996', 283 'trailer' : False, 284 'year' : 1951, 285 'notes' : True, 286 'runtime' : 90, 287 'image' : False, 288 'rating' : False, 289 'screenplay' : 'Giuseppe Marotta, Mario Brancacci, Vittorio Veltroni, Augusto Borselli, Franco Riganti', 290 'cameraman' : 'Mario Bava'}, } 291