1# -*- coding: utf-8 -*-
2import io
3import logging
4import math
5import re
6import zipfile
7from random import randint
8from threading import Thread
9
10import rarfile
11from guessit import guessit
12from requests import Session
13from requests.adapters import HTTPAdapter
14from requests.exceptions import HTTPError
15
16from subliminal.exceptions import AuthenticationError, ConfigurationError, DownloadLimitExceeded, Error, ProviderError
17from subliminal.providers import ParserBeautifulSoup
18from subliminal.subtitle import fix_line_ending
19from subliminal.video import Episode, Movie
20
21from subliminal_patch.exceptions import ParseResponseError
22from subliminal_patch.providers import Provider
23from subliminal_patch.providers.mixins import ProviderSubtitleArchiveMixin
24from subliminal_patch.score import framerate_equal
25from subliminal_patch.subtitle import Subtitle, guess_matches, sanitize
26
27from subzero.language import Language
28
29from .utils import FIRST_THOUSAND_OR_SO_USER_AGENTS as AGENT_LIST
30
31logger = logging.getLogger(__name__)
32
33
34class TitulkySubtitle(Subtitle):
35    """Titulky.com subtitle"""
36    provider_name = 'titulky'
37
38    hash_verifiable = False
39    hearing_impaired_verifiable = False
40
41    def __init__(self,
42                 sub_id,
43                 imdb_id,
44                 language,
45                 names,
46                 season,
47                 episode,
48                 year,
49                 releases,
50                 fps,
51                 uploader,
52                 approved,
53                 page_link,
54                 download_link,
55                 skip_wrong_fps=False,
56                 asked_for_episode=None):
57        super().__init__(language, page_link=page_link)
58
59        self.names = names
60        self.year = year
61        self.sub_id = sub_id
62        self.imdb_id = imdb_id
63        self.fps = fps
64        self.season = season
65        self.episode = episode
66        self.releases = releases
67        self.release_info = ', '.join(releases)
68        self.language = language
69        self.approved = approved
70        self.page_link = page_link
71        self.uploader = uploader
72        self.download_link = download_link
73        self.skip_wrong_fps = skip_wrong_fps
74        self.asked_for_episode = asked_for_episode
75        self.matches = None
76
77        # Try to parse S00E00 string from the main subtitle name
78        season_episode_string = re.findall(r'S(\d+)E(\d+)', self.names[0],
79                                           re.IGNORECASE)
80
81        # If we did not search for subtitles with season and episode numbers in search query,
82        # try to parse it from the main subtitle name that most likely contains it
83        if season_episode_string:
84            if self.season is None:
85                self.season = int(season_episode_string[0][0])
86            if self.episode is None:
87                self.episode = int(season_episode_string[0][1])
88
89    @property
90    def id(self):
91        return self.sub_id
92
93    def get_fps(self):
94        return self.fps
95
96    def get_matches(self, video):
97        matches = set()
98        _type = 'movie' if isinstance(video, Movie) else 'episode'
99
100        sub_names = self._remove_season_episode_string(self.names)
101
102        if _type == 'episode':
103            ## EPISODE
104
105            # match imdb_id of a series
106            if video.series_imdb_id and video.series_imdb_id == self.imdb_id:
107                matches.add('series_imdb_id')
108
109            # match season/episode
110            if self.season and self.season == video.season:
111                matches.add('season')
112            if self.episode and self.episode == video.episode:
113                matches.add('episode')
114
115            # match series name
116            series_names = [video.series] + video.alternative_series
117            logger.debug(
118                f"Titulky.com: Finding exact match between subtitle names {sub_names} and series names {series_names}"
119            )
120            if _contains_element(_from=series_names,
121                                 _in=sub_names,
122                                 exactly=True):
123                matches.add('series')
124
125            # match episode title
126            episode_titles = [video.title]
127            logger.debug(
128                f"Titulky.com: Finding exact match between subtitle names {sub_names} and episode titles {episode_titles}"
129            )
130            if _contains_element(_from=episode_titles,
131                                 _in=sub_names,
132                                 exactly=True):
133                matches.add('episode_title')
134
135        elif _type == 'movie':
136            ## MOVIE
137
138            # match imdb_id of a movie
139            if video.imdb_id and video.imdb_id == self.imdb_id:
140                matches.add('imdb_id')
141
142            # match movie title
143            video_titles = [video.title] + video.alternative_titles
144            logger.debug(
145                f"Titulky.com: Finding exact match between subtitle names {sub_names} and video titles {video_titles}"
146            )
147            if _contains_element(_from=video_titles,
148                                 _in=sub_names,
149                                 exactly=True):
150                matches.add('title')
151
152        ## MOVIE OR EPISODE
153
154        # match year
155        if video.year and video.year == self.year:
156            matches.add('year')
157
158        # match other properties based on release infos
159        for release in self.releases:
160            matches |= guess_matches(video, guessit(release, {"type": _type}))
161
162        # If turned on in settings, then do not match if video FPS is not equal to subtitle FPS
163        if self.skip_wrong_fps and video.fps and self.fps and not framerate_equal(
164                video.fps, self.fps):
165            logger.info(f"Titulky.com: Skipping subtitle {self}: wrong FPS")
166            matches.clear()
167
168        self.matches = matches
169
170        return matches
171
172    # Remove the S00E00 from elements of names array
173    def _remove_season_episode_string(self, names):
174        result = names.copy()
175
176        for i, name in enumerate(result):
177            cleaned_name = re.sub(r'S\d+E\d+', '', name, flags=re.IGNORECASE)
178            cleaned_name = cleaned_name.strip()
179
180            result[i] = cleaned_name
181
182        return result
183
184
185class TitulkyProvider(Provider, ProviderSubtitleArchiveMixin):
186    """Titulky.com provider"""
187
188    languages = {Language(l) for l in ['ces', 'slk']}
189    video_types = (Episode, Movie)
190    hash_verifiable = False
191    hearing_impaired_verifiable = False
192
193    server_url = 'https://premium.titulky.com'
194    login_url = server_url
195    logout_url = f"{server_url}?action=logout"
196    download_url = f"{server_url}/download.php?id="
197
198    timeout = 30
199    max_threads = 5
200
201    subtitle_class = TitulkySubtitle
202
203    def __init__(self,
204                 username=None,
205                 password=None,
206                 skip_wrong_fps=None,
207                 approved_only=None,
208                 multithreading=None):
209        if not all([username, password]):
210            raise ConfigurationError("Username and password must be specified!")
211
212        if type(skip_wrong_fps) is not bool:
213            raise ConfigurationError(
214                f"Skip_wrong_fps {skip_wrong_fps} must be a boolean!")
215
216        if type(approved_only) is not bool:
217            raise ConfigurationError(
218                f"Approved_only {approved_only} must be a boolean!")
219
220        if type(multithreading) is not bool:
221            raise ConfigurationError(
222                f"Multithreading {multithreading} must be a boolean!")
223
224        self.username = username
225        self.password = password
226        self.skip_wrong_fps = skip_wrong_fps
227        self.approved_only = approved_only
228        self.multithreading = multithreading
229
230        self.session = None
231
232    def initialize(self):
233        self.session = Session()
234        # Set max pool size to the max number of threads we will use (i .e. the max number of search result rows)
235        # or set it to the default value if multithreading is disabled.
236        pool_maxsize = self.max_threads + 3 if self.max_threads > 10 else 10
237        self.session.mount('https://', HTTPAdapter(pool_maxsize=pool_maxsize))
238        self.session.mount('http://', HTTPAdapter(pool_maxsize=pool_maxsize))
239
240        # Set headers
241        self.session.headers['User-Agent'] = AGENT_LIST[randint(
242            0,
243            len(AGENT_LIST) - 1)]
244        self.session.headers['Accept'] = 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8'
245        self.session.headers['Accept-Language'] = 'sk,cz,en;q=0.5'
246        self.session.headers['Accept-Encoding'] = 'gzip, deflate'
247        self.session.headers['DNT'] = '1'
248        self.session.headers['Connection'] = 'keep-alive'
249        self.session.headers['Upgrade-Insecure-Requests'] = '1'
250        self.session.headers['Cache-Control'] = 'max-age=0'
251
252        self.login()
253
254    def terminate(self):
255        self.logout()
256        self.session.close()
257
258    def login(self):
259        logger.info("Titulky.com: Logging in")
260
261        self.session.get(self.server_url)
262
263        data = {'LoginName': self.username, 'LoginPassword': self.password}
264        res = self.session.post(self.server_url,
265                                data,
266                                allow_redirects=False,
267                                timeout=self.timeout)
268
269        # If the response is a redirect and doesnt point to an error message page, then we are logged in
270        if res.status_code == 302 and 'msg_type=i' in res.headers['Location']:
271            return True
272        else:
273            raise AuthenticationError("Login failed")
274
275    def logout(self):
276        logger.info("Titulky.com: Logging out")
277
278        res = self.session.get(self.logout_url,
279                               allow_redirects=False,
280                               timeout=self.timeout)
281
282        # If the response is a redirect and doesnt point to an error message page, then we are logged out
283        if res.status_code == 302 and 'msg_type=i' in res.headers['Location']:
284            return True
285        else:
286            raise AuthenticationError("Logout failed.")
287
288    def fetch_page(self, url, ref=None):
289        logger.debug(f"Titulky.com: Fetching url: {url}")
290
291        res = self.session.get(
292            url,
293            timeout=self.timeout,
294            headers={'Referer': ref if ref else self.server_url})
295
296        if res.status_code != 200:
297            raise HTTPError(f"Fetch failed with status code {res.status_code}")
298        if not res.text:
299            raise ProviderError("No response returned from the provider")
300
301        return res.text
302
303    def build_search_url(self, params):
304        result = f"{self.server_url}/?"
305
306        params['action'] = 'search'
307        # Requires subtitle names to match full search keyword
308        params['fsf'] = 1
309
310        for key, value in params.items():
311            result += f'{key}={value}&'
312
313        # Remove the last &
314        result = result[:-1]
315
316        # Remove spaces
317        result = result.replace(' ', '+')
318
319        return result
320
321    # Parse details of an individual subtitle: imdb_id, release, language, uploader, fps and year
322    def parse_details(self, details_url, search_url):
323        html_src = self.fetch_page(details_url, ref=search_url)
324        details_page_soup = ParserBeautifulSoup(html_src,
325                                                ['lxml', 'html.parser'])
326
327        details_container = details_page_soup.find('div', class_='detail')
328        if not details_container:
329            # The subtitles could be removed and got redirected to a different page. Better treat this silently.
330            logger.info("Titulky.com: Could not find details div container. Skipping.")
331            return False
332
333        ### IMDB ID
334        imdb_id = None
335        imdb_tag = details_container.find('a', attrs={'target': 'imdb'})
336
337        if imdb_tag:
338            imdb_url = imdb_tag.get('href')
339            imdb_id = re.findall(r'tt(\d+)', imdb_url)[0]
340
341        if not imdb_id:
342            logger.debug("Titulky.com: No IMDB ID supplied on details page.")
343
344        ### RELEASE
345        release = None
346        release_tag = details_container.find('div', class_='releas')
347
348        if not release_tag:
349            raise ParseResponseError(
350                "Could not find release tag. Did the HTML source change?")
351
352        release = release_tag.get_text(strip=True)
353
354        if not release:
355            logger.debug("Titulky.com: No release information supplied on details page.")
356
357        ### LANGUAGE
358        language = None
359        czech_flag = details_container.select('img[src*=\'flag-CZ\']')
360        slovak_flag = details_container.select('img[src*=\'flag-SK\']')
361
362        if czech_flag and not slovak_flag:
363            language = Language('ces')
364        elif slovak_flag and not czech_flag:
365            language = Language('slk')
366
367        if not language:
368            logger.debug("Titulky.com: No language information supplied on details page.")
369
370        ### UPLOADER
371        uploader = None
372        uploader_tag = details_container.find('div', class_='ulozil')
373
374        if not uploader_tag:
375            raise ParseResponseError(
376                "Could not find uploader tag. Did the HTML source change?")
377
378        uploader_anchor_tag = uploader_tag.find('a')
379
380        if not uploader_anchor_tag:
381            raise ParseResponseError(
382                "Could not find uploader anchor tag. Did the HTML source change?"
383            )
384
385        uploader = uploader_anchor_tag.string.strip(
386        ) if uploader_anchor_tag else None
387
388        if not uploader:
389            logger.debug("Titulky.com: No uploader name supplied on details page.")
390
391        ### FPS
392        fps = None
393        fps_icon_tag_selection = details_container.select(
394            'img[src*=\'Movieroll\']')
395
396        if not fps_icon_tag_selection and not hasattr(fps_icon_tag_selection[0],
397                                                      'parent'):
398            raise ParseResponseError(
399                "Could not find parent of the fps icon tag. Did the HTML source change?"
400            )
401
402        fps_icon_tag = fps_icon_tag_selection[0]
403        parent_text = fps_icon_tag.parent.get_text(strip=True)
404        match = re.findall(r'(\d+,\d+) fps', parent_text)
405
406        # If the match is found, change the decimal separator to a dot and convert to float
407        fps = float(match[0].replace(',', '.')) if len(match) > 0 else None
408
409        if not fps:
410            logger.debug("Titulky.com: No fps supplied on details page.")
411
412        ### YEAR
413        year = None
414        h1_tag = details_container.find('h1', id='titulky')
415
416        if not h1_tag:
417            raise ParseResponseError(
418                "Could not find h1 tag. Did the HTML source change?")
419
420        # The h1 tag contains the name of the subtitle and a year
421        h1_texts = [text for text in h1_tag.stripped_strings]
422        year = int(h1_texts[1]) if len(h1_texts) > 1 else None
423
424        if not year:
425            logger.debug("Titulky.com: No year supplied on details page.")
426
427        # Clean up
428        details_page_soup.decompose()
429        details_page_soup = None
430
431        # Return the subtitle details
432        return {
433            'releases': [release],
434            'language': language,
435            'uploader': uploader,
436            'fps': fps,
437            'year': year,
438            'imdb_id': imdb_id
439        }
440
441    def process_row(self,
442                    row,
443                    video_names,
444                    search_url,
445                    thread_id=None,
446                    threads_data=None):
447        try:
448            # The first anchor tag is an image preview, the second is the name
449            anchor_tag = row.find_all('a')[1]
450            # The details link is relative, so we need to remove the dot at the beginning
451            details_link = f"{self.server_url}{anchor_tag.get('href')[1:]}"
452            id_match = re.findall(r'id=(\d+)', details_link)
453            sub_id = id_match[0] if len(id_match) > 0 else None
454            download_link = f"{self.download_url}{sub_id}"
455
456            # Approved subtitles have a pbl1 class for their row, others have a pbl0 class
457            approved = True if 'pbl1' in row.get('class') else False
458
459            # Subtitle name + its alternative names
460            table_columns = row.findAll('td')
461            main_sub_name = anchor_tag.get_text(strip=True)
462
463            alt_sub_names = [
464                alt_sub_name.strip()
465                for alt_sub_name in table_columns[2].string.split('/')
466            ] if table_columns[2].string else []
467            sub_names = [main_sub_name] + alt_sub_names
468
469            # Does at least one subtitle name contain one of the video names?
470            # Skip subtitles that do not match
471            # Video names -> the main title and alternative titles of a movie or an episode and so on...
472            # Subtitle names -> the main name and alternative names of a subtitle displayed in search results.
473            # Could be handled in TitulkySubtitle class, however we want to keep the number of requests
474            # as low as possible and this prevents the from requesting the details page unnecessarily
475            if not _contains_element(_from=video_names, _in=sub_names):
476                logger.info(
477                    f"Titulky.com: Skipping subtitle with names: {sub_names}, because there was no match with video names: {video_names}"
478                )
479                if type(threads_data) is list and type(thread_id) is int:
480                    threads_data[thread_id] = {
481                        'sub_info': None,
482                        'exception': None
483                    }
484
485                return None
486
487            details = self.parse_details(details_link, search_url)
488            if not details:
489                # Details parsing was NOT successful, skipping
490                if type(threads_data) is list and type(thread_id) is int:
491                    threads_data[thread_id] = {
492                        'sub_info': None,
493                        'exception': None
494                    }
495
496                return None
497
498            # Combine all subtitle data into one dict
499            result = {
500                'names': sub_names,
501                'id': sub_id,
502                'approved': approved,
503                'details_link': details_link,
504                'download_link': download_link
505            }
506
507            result.update(details)
508
509            if type(threads_data) is list and type(thread_id) is int:
510                threads_data[thread_id] = {
511                    'sub_info': result,
512                    'exception': None
513                }
514
515            return details
516        except Exception as e:
517            if type(threads_data) is list and type(thread_id) is int:
518                threads_data[thread_id] = {'sub_info': None, 'exception': e}
519
520            raise e
521
522    # There are multiple ways to find subs from this provider:
523    # 1. SEARCH by sub title
524    #    - parameter: .................. Fulltext=<SUB NAME>
525    # 2. SEARCH by imdb id
526    #    - parameter: .................. IMDB=<IMDB ID>
527    # 3. SEARCH by season/episode
528    #    - parameter: .................. Sezona=<SEASON>
529    #    - parameter: .................. Epizoda=<EPISODE>
530    # 4. SEARCH by year
531    #    - parameter: .................. Rok=<YEAR>
532    # 5. SEARCH by video type
533    #    - parameter: .................. Serial=<('S' for series | 'F' for movies | '' for all)>
534    # 6. SEARCH by language
535    #    - parameter: .................. Jazyk=<('CZ' for czech | 'SK' for slovak | '' for all)>
536    # 7. SEARCH by status
537    #    - parameter: .................. ASchvalene=<('1' for approved only | '-0' for subs awaiting approval | '' for all)>
538    # - redirects should NOT be allowed here
539    #
540    # 8. BROWSE subtitles by IMDB ID
541    #   - Subtitles are here categorised by seasons and episodes
542    #   - URL: https://premium.titulky.com/?action=serial&step=<SEASON>&id=<IMDB ID>
543    #   - it seems that the url redirects to a page with their own internal ID, redirects should be allowed here
544    def query(self,
545              language,
546              video_names,
547              type,
548              keyword=None,
549              year=None,
550              season=None,
551              episode=None,
552              imdb_id=None):
553        ## Build the search URL
554        params = {}
555
556        # Keyword
557        if keyword:
558            params['Fulltext'] = keyword
559        # Video type
560        if type == 'episode':
561            params['Serial'] = 'S'
562        else:
563            params['Serial'] = 'F'
564        # Season / Episode
565        if season:
566            params['Sezona'] = season
567        if episode:
568            params['Epizoda'] = episode
569        # IMDB ID
570        if imdb_id:
571            params['IMDB'] = imdb_id[2:]  # Remove the tt from the imdb id
572        # Year
573        if year:
574            params['Rok'] = year
575        # Language
576        if language == Language('ces'):
577            params['Jazyk'] = 'CZ'
578        elif language == Language('slk'):
579            params['Jazyk'] = 'SK'
580        elif language == None:
581            params['Jazyk'] = ''
582        else:
583            return []
584        # Status
585        if self.approved_only:
586            logger.debug(f"Titulky.com: Searching only for approved subtitles")
587            params['ASchvalene'] = '1'
588        else:
589            params['ASchvalene'] = ''
590
591        search_url = self.build_search_url(params)
592
593        ## Search results page parsing
594        html_src = self.fetch_page(search_url)
595        search_page_soup = ParserBeautifulSoup(html_src,
596                                               ['lxml', 'html.parser'])
597
598        # If there is a message containing "Žádny odpovídající záznam", it means that there are no results
599        # If that's the case, return an empty list
600        error_message = search_page_soup.select('.panel-body > strong')
601        if len(
602                error_message
603        ) > 0 and 'Žádný odpovídající záznam' in error_message[0].get_text(
604                strip=True):
605            logger.info("Titulky.com: No results found")
606            return []
607
608        # Get the table containing the search results
609        table = search_page_soup.find('table', class_='table')
610        if not table:
611            logger.debug("Titulky.com: Could not find table")
612            raise ParseResponseError(
613                "Could not find table. Did the HTML source change?")
614
615        # Get table body containing rows of subtitles
616        table_body = table.find('tbody')
617        if not table_body:
618            logger.debug("Titulky.com: Could not find table body")
619            raise ParseResponseError(
620                "Could not find table body. Did the HTML source change?")
621
622        ## Loop over all subtitles on the first page and put them in a list
623        subtitles = []
624        rows = table_body.find_all('tr')
625
626        if not self.multithreading:
627            # Process the rows sequentially
628            logger.info("Titulky.com: processing results in sequence")
629            for i, row in enumerate(rows):
630                sub_info = self.process_row(row, video_names, search_url)
631
632                # If subtitle info was returned, then everything was okay
633                # and we can instationate it and add it to the list
634                if sub_info:
635                    logger.debug(
636                        f"Titulky.com: Sucessfully retrieved subtitle info, row: {i}"
637                    )
638
639                    # If we found the subtitle by IMDB ID, no need to get it from details page
640                    sub_imdb_id = imdb_id or sub_info['imdb_id']
641
642                    subtitle_instance = self.subtitle_class(
643                        sub_info['id'],
644                        sub_imdb_id,
645                        sub_info['language'],
646                        sub_info['names'],
647                        season,
648                        episode,
649                        sub_info['year'],
650                        sub_info['releases'],
651                        sub_info['fps'],
652                        sub_info['uploader'],
653                        sub_info['approved'],
654                        sub_info['details_link'],
655                        sub_info['download_link'],
656                        skip_wrong_fps=self.skip_wrong_fps,
657                        asked_for_episode=(type == 'episode'))
658                    subtitles.append(subtitle_instance)
659                else:
660                    # No subtitle info was returned, i. e. something unexpected
661                    # happend during subtitle details page fetching and processing.
662                    logger.debug(f"Titulky.com: No subtitle info retrieved, row: {i}")
663        else:
664            # Process the rows in paralell
665            logger.info(
666                f"Titulky.com: processing results in parelell, {self.max_threads} rows at a time."
667            )
668
669            threads = [None] * len(rows)
670            threads_data = [None] * len(rows)
671
672            # Process rows in parallel, self.max_threads at a time.
673            cycles = math.ceil(len(rows) / self.max_threads)
674            for i in range(cycles):
675                # Batch number i
676                starting_index = i * self.max_threads  # Inclusive
677                ending_index = starting_index + self.max_threads  # Non-inclusive
678
679                # Create threads for all rows in this batch
680                for j in range(starting_index, ending_index):
681                    # Check if j-th row exists
682                    if j < len(rows):
683                        # Row number j
684                        logger.debug(
685                            f"Titulky.com: Creating thread {j} (batch: {i})")
686                        # Create a thread for row j and start it
687                        threads[j] = Thread(
688                            target=self.process_row,
689                            args=[rows[j], video_names, search_url],
690                            kwargs={
691                                'thread_id': j,
692                                'threads_data': threads_data
693                            })
694                        threads[j].start()
695
696                # Wait for all created threads to finish before moving to another batch of rows
697                for j in range(starting_index, ending_index):
698                    # Check if j-th row exists
699                    if j < len(rows):
700                        threads[j].join()
701
702            # Process the resulting data from all threads
703            for i in range(len(threads_data)):
704                thread_data = threads_data[i]
705
706                # If the thread returned didn't return anything, but expected a dict object
707                if not thread_data:
708                    raise ProviderError(f"No data returned from thread ID: {i}")
709
710                # If an exception was raised in a thread, raise it again here
711                if 'exception' in thread_data and thread_data['exception']:
712                    logger.debug(
713                        f"Titulky.com: An error occured while processing a row in the thread ID {i}"
714                    )
715                    raise thread_data['exception']
716
717                # If the thread returned a subtitle info, great, instantiate it and add it to the list
718                if 'sub_info' in thread_data and thread_data['sub_info']:
719                    # Instantiate the subtitle object
720                    logger.debug(
721                        f"Titulky.com: Sucessfully retrieved subtitle info, thread ID: {i}"
722                    )
723                    sub_info = thread_data['sub_info']
724
725                    # If we found the subtitle by IMDB ID, no need to get it from details page
726                    sub_imdb_id = imdb_id or sub_info['imdb_id']
727
728                    subtitle_instance = self.subtitle_class(
729                        sub_info['id'],
730                        sub_imdb_id,
731                        sub_info['language'],
732                        sub_info['names'],
733                        season,
734                        episode,
735                        sub_info['year'],
736                        sub_info['releases'],
737                        sub_info['fps'],
738                        sub_info['uploader'],
739                        sub_info['approved'],
740                        sub_info['details_link'],
741                        sub_info['download_link'],
742                        skip_wrong_fps=self.skip_wrong_fps,
743                        asked_for_episode=(type == 'episode'))
744                    subtitles.append(subtitle_instance)
745                else:
746                    # The thread returned data, but it didn't contain a subtitle info, i. e. something unexpected
747                    # happend during subtitle details page fetching and processing.
748                    logger.debug(
749                        f"Titulky.com: No subtitle info retrieved, thread ID: {i}"
750                    )
751
752        # Clean up
753        search_page_soup.decompose()
754        search_page_soup = None
755
756        logger.debug(f"Titulky.com: Found subtitles: {subtitles}")
757
758        return subtitles
759
760    def list_subtitles(self, video, languages):
761        subtitles = []
762
763        # Possible paths:
764        # (1) Search by IMDB ID [and season/episode for tv series]
765        # (2) Search by keyword: video (title|series) [and season/episode for tv series]
766        # (3) Search by keyword: video series + S00E00 (tv series only)
767
768        for language in languages:
769            if isinstance(video, Episode):
770                video_names = [video.series, video.title
771                              ] + video.alternative_series
772
773                # (1)
774                logger.info(
775                    "Titulky.com: Finding subtitles by IMDB ID, Season and Episode (1)"
776                )
777                if video.series_imdb_id:
778                    partial_subs = self.query(language,
779                                              video_names,
780                                              'episode',
781                                              imdb_id=video.series_imdb_id,
782                                              season=video.season,
783                                              episode=video.episode)
784                    if (len(partial_subs) > 0):
785                        subtitles += partial_subs
786                        continue
787
788                # (2)
789                logger.info(
790                    "Titulky.com: Finding subtitles by keyword, Season and Episode (2)"
791                )
792                keyword = video.series
793                partial_subs = self.query(language,
794                                          video_names,
795                                          'episode',
796                                          keyword=keyword,
797                                          season=video.season,
798                                          episode=video.episode)
799                if (len(partial_subs) > 0):
800                    subtitles += partial_subs
801                    continue
802
803                # (3)
804                logger.info("Titulky.com: Finding subtitles by keyword only (3)")
805                keyword = f"{video.series} S{video.season:02d}E{video.episode:02d}"
806                partial_subs = self.query(language,
807                                          video_names,
808                                          'episode',
809                                          keyword=keyword)
810                subtitles += partial_subs
811            elif isinstance(video, Movie):
812                video_names = [video.title] + video.alternative_titles
813
814                # (1)
815                logger.info("Titulky.com: Finding subtitles by IMDB ID (1)")
816                if video.imdb_id:
817                    partial_subs = self.query(language,
818                                              video_names,
819                                              'movie',
820                                              imdb_id=video.imdb_id)
821                    if (len(partial_subs) > 0):
822                        subtitles += partial_subs
823                        continue
824
825                # (2)
826                logger.info("Titulky.com: Finding subtitles by keyword (2)")
827                keyword = video.title
828                partial_subs = self.query(language,
829                                          video_names,
830                                          'movie',
831                                          keyword=keyword)
832                subtitles += partial_subs
833
834        return subtitles
835
836    def download_subtitle(self, subtitle):
837        res = self.session.get(subtitle.download_link,
838                               headers={'Referer': subtitle.page_link},
839                               timeout=self.timeout)
840
841        try:
842            res.raise_for_status()
843        except:
844            raise HTTPError(
845                f"An error occured during the download request to {subtitle.download_link}"
846            )
847
848        archive_stream = io.BytesIO(res.content)
849        archive = None
850        if rarfile.is_rarfile(archive_stream):
851            logger.debug("Titulky.com: Identified rar archive")
852            archive = rarfile.RarFile(archive_stream)
853            subtitle_content = self.get_subtitle_from_archive(subtitle, archive)
854        elif zipfile.is_zipfile(archive_stream):
855            logger.debug("Titulky.com: Identified zip archive")
856            archive = zipfile.ZipFile(archive_stream)
857            subtitle_content = self.get_subtitle_from_archive(subtitle, archive)
858        else:
859            subtitle_content = fix_line_ending(res.content)
860
861        if not subtitle_content:
862            logger.debug(
863                "Titulky.com: No subtitle content found. The downloading limit has been most likely exceeded."
864            )
865            raise DownloadLimitExceeded(
866                "Subtitles download limit has been exceeded")
867
868        subtitle.content = subtitle_content
869
870
871# Check if any element from source array is contained partially or exactly in any element from target array
872# Returns on the first match
873def _contains_element(_from=None, _in=None, exactly=False):
874    source_array = _from
875    target_array = _in
876
877    for source in source_array:
878        for target in target_array:
879            if exactly:
880                if sanitize(source) == sanitize(target):
881                    return True
882            else:
883                if sanitize(source) in sanitize(target):
884                    return True
885
886    return False
887