1# coding: utf-8
2from __future__ import unicode_literals
3
4import re
5
6from .common import InfoExtractor
7from ..compat import (
8    compat_urllib_parse_unquote,
9    compat_urllib_parse_urlparse,
10)
11from ..utils import (
12    ExtractorError,
13    float_or_none,
14    sanitized_Request,
15    unescapeHTML,
16    update_url_query,
17    urlencode_postdata,
18    USER_AGENTS,
19)
20
21
22class CeskaTelevizeIE(InfoExtractor):
23    _VALID_URL = r'https?://(?:www\.)?ceskatelevize\.cz/ivysilani/(?:[^/?#&]+/)*(?P<id>[^/#?]+)'
24    _TESTS = [{
25        'url': 'http://www.ceskatelevize.cz/ivysilani/ivysilani/10441294653-hyde-park-civilizace/214411058091220',
26        'info_dict': {
27            'id': '61924494877246241',
28            'ext': 'mp4',
29            'title': 'Hyde Park Civilizace: Život v Grónsku',
30            'description': 'md5:3fec8f6bb497be5cdb0c9e8781076626',
31            'thumbnail': r're:^https?://.*\.jpg',
32            'duration': 3350,
33        },
34        'params': {
35            # m3u8 download
36            'skip_download': True,
37        },
38    }, {
39        'url': 'http://www.ceskatelevize.cz/ivysilani/10441294653-hyde-park-civilizace/215411058090502/bonus/20641-bonus-01-en',
40        'info_dict': {
41            'id': '61924494877028507',
42            'ext': 'mp4',
43            'title': 'Hyde Park Civilizace: Bonus 01 - En',
44            'description': 'English Subtittles',
45            'thumbnail': r're:^https?://.*\.jpg',
46            'duration': 81.3,
47        },
48        'params': {
49            # m3u8 download
50            'skip_download': True,
51        },
52    }, {
53        # live stream
54        'url': 'http://www.ceskatelevize.cz/ivysilani/zive/ct4/',
55        'info_dict': {
56            'id': 402,
57            'ext': 'mp4',
58            'title': r're:^ČT Sport \d{4}-\d{2}-\d{2} \d{2}:\d{2}$',
59            'is_live': True,
60        },
61        'params': {
62            # m3u8 download
63            'skip_download': True,
64        },
65        'skip': 'Georestricted to Czech Republic',
66    }, {
67        'url': 'http://www.ceskatelevize.cz/ivysilani/embed/iFramePlayer.php?hash=d6a3e1370d2e4fa76296b90bad4dfc19673b641e&IDEC=217 562 22150/0004&channelID=1&width=100%25',
68        'only_matching': True,
69    }]
70
71    def _real_extract(self, url):
72        playlist_id = self._match_id(url)
73
74        webpage = self._download_webpage(url, playlist_id)
75
76        NOT_AVAILABLE_STRING = 'This content is not available at your territory due to limited copyright.'
77        if '%s</p>' % NOT_AVAILABLE_STRING in webpage:
78            raise ExtractorError(NOT_AVAILABLE_STRING, expected=True)
79
80        type_ = None
81        episode_id = None
82
83        playlist = self._parse_json(
84            self._search_regex(
85                r'getPlaylistUrl\(\[({.+?})\]', webpage, 'playlist',
86                default='{}'), playlist_id)
87        if playlist:
88            type_ = playlist.get('type')
89            episode_id = playlist.get('id')
90
91        if not type_:
92            type_ = self._html_search_regex(
93                r'getPlaylistUrl\(\[\{"type":"(.+?)","id":".+?"\}\],',
94                webpage, 'type')
95        if not episode_id:
96            episode_id = self._html_search_regex(
97                r'getPlaylistUrl\(\[\{"type":".+?","id":"(.+?)"\}\],',
98                webpage, 'episode_id')
99
100        data = {
101            'playlist[0][type]': type_,
102            'playlist[0][id]': episode_id,
103            'requestUrl': compat_urllib_parse_urlparse(url).path,
104            'requestSource': 'iVysilani',
105        }
106
107        entries = []
108
109        for user_agent in (None, USER_AGENTS['Safari']):
110            req = sanitized_Request(
111                'https://www.ceskatelevize.cz/ivysilani/ajax/get-client-playlist',
112                data=urlencode_postdata(data))
113
114            req.add_header('Content-type', 'application/x-www-form-urlencoded')
115            req.add_header('x-addr', '127.0.0.1')
116            req.add_header('X-Requested-With', 'XMLHttpRequest')
117            if user_agent:
118                req.add_header('User-Agent', user_agent)
119            req.add_header('Referer', url)
120
121            playlistpage = self._download_json(req, playlist_id, fatal=False)
122
123            if not playlistpage:
124                continue
125
126            playlist_url = playlistpage['url']
127            if playlist_url == 'error_region':
128                raise ExtractorError(NOT_AVAILABLE_STRING, expected=True)
129
130            req = sanitized_Request(compat_urllib_parse_unquote(playlist_url))
131            req.add_header('Referer', url)
132
133            playlist_title = self._og_search_title(webpage, default=None)
134            playlist_description = self._og_search_description(webpage, default=None)
135
136            playlist = self._download_json(req, playlist_id, fatal=False)
137            if not playlist:
138                continue
139
140            playlist = playlist.get('playlist')
141            if not isinstance(playlist, list):
142                continue
143
144            playlist_len = len(playlist)
145
146            for num, item in enumerate(playlist):
147                is_live = item.get('type') == 'LIVE'
148                formats = []
149                for format_id, stream_url in item.get('streamUrls', {}).items():
150                    if 'drmOnly=true' in stream_url:
151                        continue
152                    if 'playerType=flash' in stream_url:
153                        stream_formats = self._extract_m3u8_formats(
154                            stream_url, playlist_id, 'mp4', 'm3u8_native',
155                            m3u8_id='hls-%s' % format_id, fatal=False)
156                    else:
157                        stream_formats = self._extract_mpd_formats(
158                            stream_url, playlist_id,
159                            mpd_id='dash-%s' % format_id, fatal=False)
160                    # See https://github.com/ytdl-org/youtube-dl/issues/12119#issuecomment-280037031
161                    if format_id == 'audioDescription':
162                        for f in stream_formats:
163                            f['source_preference'] = -10
164                    formats.extend(stream_formats)
165
166                if user_agent and len(entries) == playlist_len:
167                    entries[num]['formats'].extend(formats)
168                    continue
169
170                item_id = item.get('id') or item['assetId']
171                title = item['title']
172
173                duration = float_or_none(item.get('duration'))
174                thumbnail = item.get('previewImageUrl')
175
176                subtitles = {}
177                if item.get('type') == 'VOD':
178                    subs = item.get('subtitles')
179                    if subs:
180                        subtitles = self.extract_subtitles(episode_id, subs)
181
182                if playlist_len == 1:
183                    final_title = playlist_title or title
184                    if is_live:
185                        final_title = self._live_title(final_title)
186                else:
187                    final_title = '%s (%s)' % (playlist_title, title)
188
189                entries.append({
190                    'id': item_id,
191                    'title': final_title,
192                    'description': playlist_description if playlist_len == 1 else None,
193                    'thumbnail': thumbnail,
194                    'duration': duration,
195                    'formats': formats,
196                    'subtitles': subtitles,
197                    'is_live': is_live,
198                })
199
200        for e in entries:
201            self._sort_formats(e['formats'])
202
203        return self.playlist_result(entries, playlist_id, playlist_title, playlist_description)
204
205    def _get_subtitles(self, episode_id, subs):
206        original_subtitles = self._download_webpage(
207            subs[0]['url'], episode_id, 'Downloading subtitles')
208        srt_subs = self._fix_subtitles(original_subtitles)
209        return {
210            'cs': [{
211                'ext': 'srt',
212                'data': srt_subs,
213            }]
214        }
215
216    @staticmethod
217    def _fix_subtitles(subtitles):
218        """ Convert millisecond-based subtitles to SRT """
219
220        def _msectotimecode(msec):
221            """ Helper utility to convert milliseconds to timecode """
222            components = []
223            for divider in [1000, 60, 60, 100]:
224                components.append(msec % divider)
225                msec //= divider
226            return '{3:02}:{2:02}:{1:02},{0:03}'.format(*components)
227
228        def _fix_subtitle(subtitle):
229            for line in subtitle.splitlines():
230                m = re.match(r'^\s*([0-9]+);\s*([0-9]+)\s+([0-9]+)\s*$', line)
231                if m:
232                    yield m.group(1)
233                    start, stop = (_msectotimecode(int(t)) for t in m.groups()[1:])
234                    yield '{0} --> {1}'.format(start, stop)
235                else:
236                    yield line
237
238        return '\r\n'.join(_fix_subtitle(subtitles))
239
240
241class CeskaTelevizePoradyIE(InfoExtractor):
242    _VALID_URL = r'https?://(?:www\.)?ceskatelevize\.cz/porady/(?:[^/?#&]+/)*(?P<id>[^/#?]+)'
243    _TESTS = [{
244        # video with 18+ caution trailer
245        'url': 'http://www.ceskatelevize.cz/porady/10520528904-queer/215562210900007-bogotart/',
246        'info_dict': {
247            'id': '215562210900007-bogotart',
248            'title': 'Queer: Bogotart',
249            'description': 'Alternativní průvodce současným queer světem',
250        },
251        'playlist': [{
252            'info_dict': {
253                'id': '61924494876844842',
254                'ext': 'mp4',
255                'title': 'Queer: Bogotart (Varování 18+)',
256                'duration': 10.2,
257            },
258        }, {
259            'info_dict': {
260                'id': '61924494877068022',
261                'ext': 'mp4',
262                'title': 'Queer: Bogotart (Queer)',
263                'thumbnail': r're:^https?://.*\.jpg',
264                'duration': 1558.3,
265            },
266        }],
267        'params': {
268            # m3u8 download
269            'skip_download': True,
270        },
271    }, {
272        # iframe embed
273        'url': 'http://www.ceskatelevize.cz/porady/10614999031-neviditelni/21251212048/',
274        'only_matching': True,
275    }]
276
277    def _real_extract(self, url):
278        video_id = self._match_id(url)
279
280        webpage = self._download_webpage(url, video_id)
281
282        data_url = update_url_query(unescapeHTML(self._search_regex(
283            (r'<span[^>]*\bdata-url=(["\'])(?P<url>(?:(?!\1).)+)\1',
284             r'<iframe[^>]+\bsrc=(["\'])(?P<url>(?:https?:)?//(?:www\.)?ceskatelevize\.cz/ivysilani/embed/iFramePlayer\.php.*?)\1'),
285            webpage, 'iframe player url', group='url')), query={
286                'autoStart': 'true',
287        })
288
289        return self.url_result(data_url, ie=CeskaTelevizeIE.ie_key())
290