1# -*- coding: utf-8 -*- 2import io 3import logging 4import math 5import re 6import zipfile 7from random import randint 8from threading import Thread 9 10import rarfile 11from guessit import guessit 12from requests import Session 13from requests.adapters import HTTPAdapter 14from requests.exceptions import HTTPError 15 16from subliminal.exceptions import AuthenticationError, ConfigurationError, DownloadLimitExceeded, Error, ProviderError 17from subliminal.providers import ParserBeautifulSoup 18from subliminal.subtitle import fix_line_ending 19from subliminal.video import Episode, Movie 20 21from subliminal_patch.exceptions import ParseResponseError 22from subliminal_patch.providers import Provider 23from subliminal_patch.providers.mixins import ProviderSubtitleArchiveMixin 24from subliminal_patch.score import framerate_equal 25from subliminal_patch.subtitle import Subtitle, guess_matches, sanitize 26 27from subzero.language import Language 28 29from .utils import FIRST_THOUSAND_OR_SO_USER_AGENTS as AGENT_LIST 30 31logger = logging.getLogger(__name__) 32 33 34class TitulkySubtitle(Subtitle): 35 """Titulky.com subtitle""" 36 provider_name = 'titulky' 37 38 hash_verifiable = False 39 hearing_impaired_verifiable = False 40 41 def __init__(self, 42 sub_id, 43 imdb_id, 44 language, 45 names, 46 season, 47 episode, 48 year, 49 releases, 50 fps, 51 uploader, 52 approved, 53 page_link, 54 download_link, 55 skip_wrong_fps=False, 56 asked_for_episode=None): 57 super().__init__(language, page_link=page_link) 58 59 self.names = names 60 self.year = year 61 self.sub_id = sub_id 62 self.imdb_id = imdb_id 63 self.fps = fps 64 self.season = season 65 self.episode = episode 66 self.releases = releases 67 self.release_info = ', '.join(releases) 68 self.language = language 69 self.approved = approved 70 self.page_link = page_link 71 self.uploader = uploader 72 self.download_link = download_link 73 self.skip_wrong_fps = skip_wrong_fps 74 self.asked_for_episode = asked_for_episode 75 self.matches = None 76 77 # Try to parse S00E00 string from the main subtitle name 78 season_episode_string = re.findall(r'S(\d+)E(\d+)', self.names[0], 79 re.IGNORECASE) 80 81 # If we did not search for subtitles with season and episode numbers in search query, 82 # try to parse it from the main subtitle name that most likely contains it 83 if season_episode_string: 84 if self.season is None: 85 self.season = int(season_episode_string[0][0]) 86 if self.episode is None: 87 self.episode = int(season_episode_string[0][1]) 88 89 @property 90 def id(self): 91 return self.sub_id 92 93 def get_fps(self): 94 return self.fps 95 96 def get_matches(self, video): 97 matches = set() 98 _type = 'movie' if isinstance(video, Movie) else 'episode' 99 100 sub_names = self._remove_season_episode_string(self.names) 101 102 if _type == 'episode': 103 ## EPISODE 104 105 # match imdb_id of a series 106 if video.series_imdb_id and video.series_imdb_id == self.imdb_id: 107 matches.add('series_imdb_id') 108 109 # match season/episode 110 if self.season and self.season == video.season: 111 matches.add('season') 112 if self.episode and self.episode == video.episode: 113 matches.add('episode') 114 115 # match series name 116 series_names = [video.series] + video.alternative_series 117 logger.debug( 118 f"Titulky.com: Finding exact match between subtitle names {sub_names} and series names {series_names}" 119 ) 120 if _contains_element(_from=series_names, 121 _in=sub_names, 122 exactly=True): 123 matches.add('series') 124 125 # match episode title 126 episode_titles = [video.title] 127 logger.debug( 128 f"Titulky.com: Finding exact match between subtitle names {sub_names} and episode titles {episode_titles}" 129 ) 130 if _contains_element(_from=episode_titles, 131 _in=sub_names, 132 exactly=True): 133 matches.add('episode_title') 134 135 elif _type == 'movie': 136 ## MOVIE 137 138 # match imdb_id of a movie 139 if video.imdb_id and video.imdb_id == self.imdb_id: 140 matches.add('imdb_id') 141 142 # match movie title 143 video_titles = [video.title] + video.alternative_titles 144 logger.debug( 145 f"Titulky.com: Finding exact match between subtitle names {sub_names} and video titles {video_titles}" 146 ) 147 if _contains_element(_from=video_titles, 148 _in=sub_names, 149 exactly=True): 150 matches.add('title') 151 152 ## MOVIE OR EPISODE 153 154 # match year 155 if video.year and video.year == self.year: 156 matches.add('year') 157 158 # match other properties based on release infos 159 for release in self.releases: 160 matches |= guess_matches(video, guessit(release, {"type": _type})) 161 162 # If turned on in settings, then do not match if video FPS is not equal to subtitle FPS 163 if self.skip_wrong_fps and video.fps and self.fps and not framerate_equal( 164 video.fps, self.fps): 165 logger.info(f"Titulky.com: Skipping subtitle {self}: wrong FPS") 166 matches.clear() 167 168 self.matches = matches 169 170 return matches 171 172 # Remove the S00E00 from elements of names array 173 def _remove_season_episode_string(self, names): 174 result = names.copy() 175 176 for i, name in enumerate(result): 177 cleaned_name = re.sub(r'S\d+E\d+', '', name, flags=re.IGNORECASE) 178 cleaned_name = cleaned_name.strip() 179 180 result[i] = cleaned_name 181 182 return result 183 184 185class TitulkyProvider(Provider, ProviderSubtitleArchiveMixin): 186 """Titulky.com provider""" 187 188 languages = {Language(l) for l in ['ces', 'slk']} 189 video_types = (Episode, Movie) 190 hash_verifiable = False 191 hearing_impaired_verifiable = False 192 193 server_url = 'https://premium.titulky.com' 194 login_url = server_url 195 logout_url = f"{server_url}?action=logout" 196 download_url = f"{server_url}/download.php?id=" 197 198 timeout = 30 199 max_threads = 5 200 201 subtitle_class = TitulkySubtitle 202 203 def __init__(self, 204 username=None, 205 password=None, 206 skip_wrong_fps=None, 207 approved_only=None, 208 multithreading=None): 209 if not all([username, password]): 210 raise ConfigurationError("Username and password must be specified!") 211 212 if type(skip_wrong_fps) is not bool: 213 raise ConfigurationError( 214 f"Skip_wrong_fps {skip_wrong_fps} must be a boolean!") 215 216 if type(approved_only) is not bool: 217 raise ConfigurationError( 218 f"Approved_only {approved_only} must be a boolean!") 219 220 if type(multithreading) is not bool: 221 raise ConfigurationError( 222 f"Multithreading {multithreading} must be a boolean!") 223 224 self.username = username 225 self.password = password 226 self.skip_wrong_fps = skip_wrong_fps 227 self.approved_only = approved_only 228 self.multithreading = multithreading 229 230 self.session = None 231 232 def initialize(self): 233 self.session = Session() 234 # Set max pool size to the max number of threads we will use (i .e. the max number of search result rows) 235 # or set it to the default value if multithreading is disabled. 236 pool_maxsize = self.max_threads + 3 if self.max_threads > 10 else 10 237 self.session.mount('https://', HTTPAdapter(pool_maxsize=pool_maxsize)) 238 self.session.mount('http://', HTTPAdapter(pool_maxsize=pool_maxsize)) 239 240 # Set headers 241 self.session.headers['User-Agent'] = AGENT_LIST[randint( 242 0, 243 len(AGENT_LIST) - 1)] 244 self.session.headers['Accept'] = 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8' 245 self.session.headers['Accept-Language'] = 'sk,cz,en;q=0.5' 246 self.session.headers['Accept-Encoding'] = 'gzip, deflate' 247 self.session.headers['DNT'] = '1' 248 self.session.headers['Connection'] = 'keep-alive' 249 self.session.headers['Upgrade-Insecure-Requests'] = '1' 250 self.session.headers['Cache-Control'] = 'max-age=0' 251 252 self.login() 253 254 def terminate(self): 255 self.logout() 256 self.session.close() 257 258 def login(self): 259 logger.info("Titulky.com: Logging in") 260 261 self.session.get(self.server_url) 262 263 data = {'LoginName': self.username, 'LoginPassword': self.password} 264 res = self.session.post(self.server_url, 265 data, 266 allow_redirects=False, 267 timeout=self.timeout) 268 269 # If the response is a redirect and doesnt point to an error message page, then we are logged in 270 if res.status_code == 302 and 'msg_type=i' in res.headers['Location']: 271 return True 272 else: 273 raise AuthenticationError("Login failed") 274 275 def logout(self): 276 logger.info("Titulky.com: Logging out") 277 278 res = self.session.get(self.logout_url, 279 allow_redirects=False, 280 timeout=self.timeout) 281 282 # If the response is a redirect and doesnt point to an error message page, then we are logged out 283 if res.status_code == 302 and 'msg_type=i' in res.headers['Location']: 284 return True 285 else: 286 raise AuthenticationError("Logout failed.") 287 288 def fetch_page(self, url, ref=None): 289 logger.debug(f"Titulky.com: Fetching url: {url}") 290 291 res = self.session.get( 292 url, 293 timeout=self.timeout, 294 headers={'Referer': ref if ref else self.server_url}) 295 296 if res.status_code != 200: 297 raise HTTPError(f"Fetch failed with status code {res.status_code}") 298 if not res.text: 299 raise ProviderError("No response returned from the provider") 300 301 return res.text 302 303 def build_search_url(self, params): 304 result = f"{self.server_url}/?" 305 306 params['action'] = 'search' 307 # Requires subtitle names to match full search keyword 308 params['fsf'] = 1 309 310 for key, value in params.items(): 311 result += f'{key}={value}&' 312 313 # Remove the last & 314 result = result[:-1] 315 316 # Remove spaces 317 result = result.replace(' ', '+') 318 319 return result 320 321 # Parse details of an individual subtitle: imdb_id, release, language, uploader, fps and year 322 def parse_details(self, details_url, search_url): 323 html_src = self.fetch_page(details_url, ref=search_url) 324 details_page_soup = ParserBeautifulSoup(html_src, 325 ['lxml', 'html.parser']) 326 327 details_container = details_page_soup.find('div', class_='detail') 328 if not details_container: 329 # The subtitles could be removed and got redirected to a different page. Better treat this silently. 330 logger.info("Titulky.com: Could not find details div container. Skipping.") 331 return False 332 333 ### IMDB ID 334 imdb_id = None 335 imdb_tag = details_container.find('a', attrs={'target': 'imdb'}) 336 337 if imdb_tag: 338 imdb_url = imdb_tag.get('href') 339 imdb_id = re.findall(r'tt(\d+)', imdb_url)[0] 340 341 if not imdb_id: 342 logger.debug("Titulky.com: No IMDB ID supplied on details page.") 343 344 ### RELEASE 345 release = None 346 release_tag = details_container.find('div', class_='releas') 347 348 if not release_tag: 349 raise ParseResponseError( 350 "Could not find release tag. Did the HTML source change?") 351 352 release = release_tag.get_text(strip=True) 353 354 if not release: 355 logger.debug("Titulky.com: No release information supplied on details page.") 356 357 ### LANGUAGE 358 language = None 359 czech_flag = details_container.select('img[src*=\'flag-CZ\']') 360 slovak_flag = details_container.select('img[src*=\'flag-SK\']') 361 362 if czech_flag and not slovak_flag: 363 language = Language('ces') 364 elif slovak_flag and not czech_flag: 365 language = Language('slk') 366 367 if not language: 368 logger.debug("Titulky.com: No language information supplied on details page.") 369 370 ### UPLOADER 371 uploader = None 372 uploader_tag = details_container.find('div', class_='ulozil') 373 374 if not uploader_tag: 375 raise ParseResponseError( 376 "Could not find uploader tag. Did the HTML source change?") 377 378 uploader_anchor_tag = uploader_tag.find('a') 379 380 if not uploader_anchor_tag: 381 raise ParseResponseError( 382 "Could not find uploader anchor tag. Did the HTML source change?" 383 ) 384 385 uploader = uploader_anchor_tag.string.strip( 386 ) if uploader_anchor_tag else None 387 388 if not uploader: 389 logger.debug("Titulky.com: No uploader name supplied on details page.") 390 391 ### FPS 392 fps = None 393 fps_icon_tag_selection = details_container.select( 394 'img[src*=\'Movieroll\']') 395 396 if not fps_icon_tag_selection and not hasattr(fps_icon_tag_selection[0], 397 'parent'): 398 raise ParseResponseError( 399 "Could not find parent of the fps icon tag. Did the HTML source change?" 400 ) 401 402 fps_icon_tag = fps_icon_tag_selection[0] 403 parent_text = fps_icon_tag.parent.get_text(strip=True) 404 match = re.findall(r'(\d+,\d+) fps', parent_text) 405 406 # If the match is found, change the decimal separator to a dot and convert to float 407 fps = float(match[0].replace(',', '.')) if len(match) > 0 else None 408 409 if not fps: 410 logger.debug("Titulky.com: No fps supplied on details page.") 411 412 ### YEAR 413 year = None 414 h1_tag = details_container.find('h1', id='titulky') 415 416 if not h1_tag: 417 raise ParseResponseError( 418 "Could not find h1 tag. Did the HTML source change?") 419 420 # The h1 tag contains the name of the subtitle and a year 421 h1_texts = [text for text in h1_tag.stripped_strings] 422 year = int(h1_texts[1]) if len(h1_texts) > 1 else None 423 424 if not year: 425 logger.debug("Titulky.com: No year supplied on details page.") 426 427 # Clean up 428 details_page_soup.decompose() 429 details_page_soup = None 430 431 # Return the subtitle details 432 return { 433 'releases': [release], 434 'language': language, 435 'uploader': uploader, 436 'fps': fps, 437 'year': year, 438 'imdb_id': imdb_id 439 } 440 441 def process_row(self, 442 row, 443 video_names, 444 search_url, 445 thread_id=None, 446 threads_data=None): 447 try: 448 # The first anchor tag is an image preview, the second is the name 449 anchor_tag = row.find_all('a')[1] 450 # The details link is relative, so we need to remove the dot at the beginning 451 details_link = f"{self.server_url}{anchor_tag.get('href')[1:]}" 452 id_match = re.findall(r'id=(\d+)', details_link) 453 sub_id = id_match[0] if len(id_match) > 0 else None 454 download_link = f"{self.download_url}{sub_id}" 455 456 # Approved subtitles have a pbl1 class for their row, others have a pbl0 class 457 approved = True if 'pbl1' in row.get('class') else False 458 459 # Subtitle name + its alternative names 460 table_columns = row.findAll('td') 461 main_sub_name = anchor_tag.get_text(strip=True) 462 463 alt_sub_names = [ 464 alt_sub_name.strip() 465 for alt_sub_name in table_columns[2].string.split('/') 466 ] if table_columns[2].string else [] 467 sub_names = [main_sub_name] + alt_sub_names 468 469 # Does at least one subtitle name contain one of the video names? 470 # Skip subtitles that do not match 471 # Video names -> the main title and alternative titles of a movie or an episode and so on... 472 # Subtitle names -> the main name and alternative names of a subtitle displayed in search results. 473 # Could be handled in TitulkySubtitle class, however we want to keep the number of requests 474 # as low as possible and this prevents the from requesting the details page unnecessarily 475 if not _contains_element(_from=video_names, _in=sub_names): 476 logger.info( 477 f"Titulky.com: Skipping subtitle with names: {sub_names}, because there was no match with video names: {video_names}" 478 ) 479 if type(threads_data) is list and type(thread_id) is int: 480 threads_data[thread_id] = { 481 'sub_info': None, 482 'exception': None 483 } 484 485 return None 486 487 details = self.parse_details(details_link, search_url) 488 if not details: 489 # Details parsing was NOT successful, skipping 490 if type(threads_data) is list and type(thread_id) is int: 491 threads_data[thread_id] = { 492 'sub_info': None, 493 'exception': None 494 } 495 496 return None 497 498 # Combine all subtitle data into one dict 499 result = { 500 'names': sub_names, 501 'id': sub_id, 502 'approved': approved, 503 'details_link': details_link, 504 'download_link': download_link 505 } 506 507 result.update(details) 508 509 if type(threads_data) is list and type(thread_id) is int: 510 threads_data[thread_id] = { 511 'sub_info': result, 512 'exception': None 513 } 514 515 return details 516 except Exception as e: 517 if type(threads_data) is list and type(thread_id) is int: 518 threads_data[thread_id] = {'sub_info': None, 'exception': e} 519 520 raise e 521 522 # There are multiple ways to find subs from this provider: 523 # 1. SEARCH by sub title 524 # - parameter: .................. Fulltext=<SUB NAME> 525 # 2. SEARCH by imdb id 526 # - parameter: .................. IMDB=<IMDB ID> 527 # 3. SEARCH by season/episode 528 # - parameter: .................. Sezona=<SEASON> 529 # - parameter: .................. Epizoda=<EPISODE> 530 # 4. SEARCH by year 531 # - parameter: .................. Rok=<YEAR> 532 # 5. SEARCH by video type 533 # - parameter: .................. Serial=<('S' for series | 'F' for movies | '' for all)> 534 # 6. SEARCH by language 535 # - parameter: .................. Jazyk=<('CZ' for czech | 'SK' for slovak | '' for all)> 536 # 7. SEARCH by status 537 # - parameter: .................. ASchvalene=<('1' for approved only | '-0' for subs awaiting approval | '' for all)> 538 # - redirects should NOT be allowed here 539 # 540 # 8. BROWSE subtitles by IMDB ID 541 # - Subtitles are here categorised by seasons and episodes 542 # - URL: https://premium.titulky.com/?action=serial&step=<SEASON>&id=<IMDB ID> 543 # - it seems that the url redirects to a page with their own internal ID, redirects should be allowed here 544 def query(self, 545 language, 546 video_names, 547 type, 548 keyword=None, 549 year=None, 550 season=None, 551 episode=None, 552 imdb_id=None): 553 ## Build the search URL 554 params = {} 555 556 # Keyword 557 if keyword: 558 params['Fulltext'] = keyword 559 # Video type 560 if type == 'episode': 561 params['Serial'] = 'S' 562 else: 563 params['Serial'] = 'F' 564 # Season / Episode 565 if season: 566 params['Sezona'] = season 567 if episode: 568 params['Epizoda'] = episode 569 # IMDB ID 570 if imdb_id: 571 params['IMDB'] = imdb_id[2:] # Remove the tt from the imdb id 572 # Year 573 if year: 574 params['Rok'] = year 575 # Language 576 if language == Language('ces'): 577 params['Jazyk'] = 'CZ' 578 elif language == Language('slk'): 579 params['Jazyk'] = 'SK' 580 elif language == None: 581 params['Jazyk'] = '' 582 else: 583 return [] 584 # Status 585 if self.approved_only: 586 logger.debug(f"Titulky.com: Searching only for approved subtitles") 587 params['ASchvalene'] = '1' 588 else: 589 params['ASchvalene'] = '' 590 591 search_url = self.build_search_url(params) 592 593 ## Search results page parsing 594 html_src = self.fetch_page(search_url) 595 search_page_soup = ParserBeautifulSoup(html_src, 596 ['lxml', 'html.parser']) 597 598 # If there is a message containing "Žádny odpovídající záznam", it means that there are no results 599 # If that's the case, return an empty list 600 error_message = search_page_soup.select('.panel-body > strong') 601 if len( 602 error_message 603 ) > 0 and 'Žádný odpovídající záznam' in error_message[0].get_text( 604 strip=True): 605 logger.info("Titulky.com: No results found") 606 return [] 607 608 # Get the table containing the search results 609 table = search_page_soup.find('table', class_='table') 610 if not table: 611 logger.debug("Titulky.com: Could not find table") 612 raise ParseResponseError( 613 "Could not find table. Did the HTML source change?") 614 615 # Get table body containing rows of subtitles 616 table_body = table.find('tbody') 617 if not table_body: 618 logger.debug("Titulky.com: Could not find table body") 619 raise ParseResponseError( 620 "Could not find table body. Did the HTML source change?") 621 622 ## Loop over all subtitles on the first page and put them in a list 623 subtitles = [] 624 rows = table_body.find_all('tr') 625 626 if not self.multithreading: 627 # Process the rows sequentially 628 logger.info("Titulky.com: processing results in sequence") 629 for i, row in enumerate(rows): 630 sub_info = self.process_row(row, video_names, search_url) 631 632 # If subtitle info was returned, then everything was okay 633 # and we can instationate it and add it to the list 634 if sub_info: 635 logger.debug( 636 f"Titulky.com: Sucessfully retrieved subtitle info, row: {i}" 637 ) 638 639 # If we found the subtitle by IMDB ID, no need to get it from details page 640 sub_imdb_id = imdb_id or sub_info['imdb_id'] 641 642 subtitle_instance = self.subtitle_class( 643 sub_info['id'], 644 sub_imdb_id, 645 sub_info['language'], 646 sub_info['names'], 647 season, 648 episode, 649 sub_info['year'], 650 sub_info['releases'], 651 sub_info['fps'], 652 sub_info['uploader'], 653 sub_info['approved'], 654 sub_info['details_link'], 655 sub_info['download_link'], 656 skip_wrong_fps=self.skip_wrong_fps, 657 asked_for_episode=(type == 'episode')) 658 subtitles.append(subtitle_instance) 659 else: 660 # No subtitle info was returned, i. e. something unexpected 661 # happend during subtitle details page fetching and processing. 662 logger.debug(f"Titulky.com: No subtitle info retrieved, row: {i}") 663 else: 664 # Process the rows in paralell 665 logger.info( 666 f"Titulky.com: processing results in parelell, {self.max_threads} rows at a time." 667 ) 668 669 threads = [None] * len(rows) 670 threads_data = [None] * len(rows) 671 672 # Process rows in parallel, self.max_threads at a time. 673 cycles = math.ceil(len(rows) / self.max_threads) 674 for i in range(cycles): 675 # Batch number i 676 starting_index = i * self.max_threads # Inclusive 677 ending_index = starting_index + self.max_threads # Non-inclusive 678 679 # Create threads for all rows in this batch 680 for j in range(starting_index, ending_index): 681 # Check if j-th row exists 682 if j < len(rows): 683 # Row number j 684 logger.debug( 685 f"Titulky.com: Creating thread {j} (batch: {i})") 686 # Create a thread for row j and start it 687 threads[j] = Thread( 688 target=self.process_row, 689 args=[rows[j], video_names, search_url], 690 kwargs={ 691 'thread_id': j, 692 'threads_data': threads_data 693 }) 694 threads[j].start() 695 696 # Wait for all created threads to finish before moving to another batch of rows 697 for j in range(starting_index, ending_index): 698 # Check if j-th row exists 699 if j < len(rows): 700 threads[j].join() 701 702 # Process the resulting data from all threads 703 for i in range(len(threads_data)): 704 thread_data = threads_data[i] 705 706 # If the thread returned didn't return anything, but expected a dict object 707 if not thread_data: 708 raise ProviderError(f"No data returned from thread ID: {i}") 709 710 # If an exception was raised in a thread, raise it again here 711 if 'exception' in thread_data and thread_data['exception']: 712 logger.debug( 713 f"Titulky.com: An error occured while processing a row in the thread ID {i}" 714 ) 715 raise thread_data['exception'] 716 717 # If the thread returned a subtitle info, great, instantiate it and add it to the list 718 if 'sub_info' in thread_data and thread_data['sub_info']: 719 # Instantiate the subtitle object 720 logger.debug( 721 f"Titulky.com: Sucessfully retrieved subtitle info, thread ID: {i}" 722 ) 723 sub_info = thread_data['sub_info'] 724 725 # If we found the subtitle by IMDB ID, no need to get it from details page 726 sub_imdb_id = imdb_id or sub_info['imdb_id'] 727 728 subtitle_instance = self.subtitle_class( 729 sub_info['id'], 730 sub_imdb_id, 731 sub_info['language'], 732 sub_info['names'], 733 season, 734 episode, 735 sub_info['year'], 736 sub_info['releases'], 737 sub_info['fps'], 738 sub_info['uploader'], 739 sub_info['approved'], 740 sub_info['details_link'], 741 sub_info['download_link'], 742 skip_wrong_fps=self.skip_wrong_fps, 743 asked_for_episode=(type == 'episode')) 744 subtitles.append(subtitle_instance) 745 else: 746 # The thread returned data, but it didn't contain a subtitle info, i. e. something unexpected 747 # happend during subtitle details page fetching and processing. 748 logger.debug( 749 f"Titulky.com: No subtitle info retrieved, thread ID: {i}" 750 ) 751 752 # Clean up 753 search_page_soup.decompose() 754 search_page_soup = None 755 756 logger.debug(f"Titulky.com: Found subtitles: {subtitles}") 757 758 return subtitles 759 760 def list_subtitles(self, video, languages): 761 subtitles = [] 762 763 # Possible paths: 764 # (1) Search by IMDB ID [and season/episode for tv series] 765 # (2) Search by keyword: video (title|series) [and season/episode for tv series] 766 # (3) Search by keyword: video series + S00E00 (tv series only) 767 768 for language in languages: 769 if isinstance(video, Episode): 770 video_names = [video.series, video.title 771 ] + video.alternative_series 772 773 # (1) 774 logger.info( 775 "Titulky.com: Finding subtitles by IMDB ID, Season and Episode (1)" 776 ) 777 if video.series_imdb_id: 778 partial_subs = self.query(language, 779 video_names, 780 'episode', 781 imdb_id=video.series_imdb_id, 782 season=video.season, 783 episode=video.episode) 784 if (len(partial_subs) > 0): 785 subtitles += partial_subs 786 continue 787 788 # (2) 789 logger.info( 790 "Titulky.com: Finding subtitles by keyword, Season and Episode (2)" 791 ) 792 keyword = video.series 793 partial_subs = self.query(language, 794 video_names, 795 'episode', 796 keyword=keyword, 797 season=video.season, 798 episode=video.episode) 799 if (len(partial_subs) > 0): 800 subtitles += partial_subs 801 continue 802 803 # (3) 804 logger.info("Titulky.com: Finding subtitles by keyword only (3)") 805 keyword = f"{video.series} S{video.season:02d}E{video.episode:02d}" 806 partial_subs = self.query(language, 807 video_names, 808 'episode', 809 keyword=keyword) 810 subtitles += partial_subs 811 elif isinstance(video, Movie): 812 video_names = [video.title] + video.alternative_titles 813 814 # (1) 815 logger.info("Titulky.com: Finding subtitles by IMDB ID (1)") 816 if video.imdb_id: 817 partial_subs = self.query(language, 818 video_names, 819 'movie', 820 imdb_id=video.imdb_id) 821 if (len(partial_subs) > 0): 822 subtitles += partial_subs 823 continue 824 825 # (2) 826 logger.info("Titulky.com: Finding subtitles by keyword (2)") 827 keyword = video.title 828 partial_subs = self.query(language, 829 video_names, 830 'movie', 831 keyword=keyword) 832 subtitles += partial_subs 833 834 return subtitles 835 836 def download_subtitle(self, subtitle): 837 res = self.session.get(subtitle.download_link, 838 headers={'Referer': subtitle.page_link}, 839 timeout=self.timeout) 840 841 try: 842 res.raise_for_status() 843 except: 844 raise HTTPError( 845 f"An error occured during the download request to {subtitle.download_link}" 846 ) 847 848 archive_stream = io.BytesIO(res.content) 849 archive = None 850 if rarfile.is_rarfile(archive_stream): 851 logger.debug("Titulky.com: Identified rar archive") 852 archive = rarfile.RarFile(archive_stream) 853 subtitle_content = self.get_subtitle_from_archive(subtitle, archive) 854 elif zipfile.is_zipfile(archive_stream): 855 logger.debug("Titulky.com: Identified zip archive") 856 archive = zipfile.ZipFile(archive_stream) 857 subtitle_content = self.get_subtitle_from_archive(subtitle, archive) 858 else: 859 subtitle_content = fix_line_ending(res.content) 860 861 if not subtitle_content: 862 logger.debug( 863 "Titulky.com: No subtitle content found. The downloading limit has been most likely exceeded." 864 ) 865 raise DownloadLimitExceeded( 866 "Subtitles download limit has been exceeded") 867 868 subtitle.content = subtitle_content 869 870 871# Check if any element from source array is contained partially or exactly in any element from target array 872# Returns on the first match 873def _contains_element(_from=None, _in=None, exactly=False): 874 source_array = _from 875 target_array = _in 876 877 for source in source_array: 878 for target in target_array: 879 if exactly: 880 if sanitize(source) == sanitize(target): 881 return True 882 else: 883 if sanitize(source) in sanitize(target): 884 return True 885 886 return False 887