1# coding: utf-8
2from __future__ import unicode_literals
3
4import re
5
6from .common import InfoExtractor
7from ..compat import compat_str
8from ..utils import (
9    clean_html,
10    determine_ext,
11    float_or_none,
12    HEADRequest,
13    int_or_none,
14    join_nonempty,
15    orderedSet,
16    remove_end,
17    str_or_none,
18    strip_jsonp,
19    unescapeHTML,
20    unified_strdate,
21    url_or_none,
22)
23
24
25class ORFTVthekIE(InfoExtractor):
26    IE_NAME = 'orf:tvthek'
27    IE_DESC = 'ORF TVthek'
28    _VALID_URL = r'https?://tvthek\.orf\.at/(?:[^/]+/)+(?P<id>\d+)'
29
30    _TESTS = [{
31        'url': 'http://tvthek.orf.at/program/Aufgetischt/2745173/Aufgetischt-Mit-der-Steirischen-Tafelrunde/8891389',
32        'playlist': [{
33            'md5': '2942210346ed779588f428a92db88712',
34            'info_dict': {
35                'id': '8896777',
36                'ext': 'mp4',
37                'title': 'Aufgetischt: Mit der Steirischen Tafelrunde',
38                'description': 'md5:c1272f0245537812d4e36419c207b67d',
39                'duration': 2668,
40                'upload_date': '20141208',
41            },
42        }],
43        'skip': 'Blocked outside of Austria / Germany',
44    }, {
45        'url': 'http://tvthek.orf.at/topic/Im-Wandel-der-Zeit/8002126/Best-of-Ingrid-Thurnher/7982256',
46        'info_dict': {
47            'id': '7982259',
48            'ext': 'mp4',
49            'title': 'Best of Ingrid Thurnher',
50            'upload_date': '20140527',
51            'description': 'Viele Jahre war Ingrid Thurnher das "Gesicht" der ZIB 2. Vor ihrem Wechsel zur ZIB 2 im Jahr 1995 moderierte sie unter anderem "Land und Leute", "Österreich-Bild" und "Niederösterreich heute".',
52        },
53        'params': {
54            'skip_download': True,  # rtsp downloads
55        },
56        'skip': 'Blocked outside of Austria / Germany',
57    }, {
58        'url': 'http://tvthek.orf.at/topic/Fluechtlingskrise/10463081/Heimat-Fremde-Heimat/13879132/Senioren-betreuen-Migrantenkinder/13879141',
59        'only_matching': True,
60    }, {
61        'url': 'http://tvthek.orf.at/profile/Universum/35429',
62        'only_matching': True,
63    }]
64
65    def _real_extract(self, url):
66        playlist_id = self._match_id(url)
67        webpage = self._download_webpage(url, playlist_id)
68
69        data_jsb = self._parse_json(
70            self._search_regex(
71                r'<div[^>]+class=(["\']).*?VideoPlaylist.*?\1[^>]+data-jsb=(["\'])(?P<json>.+?)\2',
72                webpage, 'playlist', group='json'),
73            playlist_id, transform_source=unescapeHTML)['playlist']['videos']
74
75        entries = []
76        for sd in data_jsb:
77            video_id, title = sd.get('id'), sd.get('title')
78            if not video_id or not title:
79                continue
80            video_id = compat_str(video_id)
81            formats = []
82            for fd in sd['sources']:
83                src = url_or_none(fd.get('src'))
84                if not src:
85                    continue
86                format_id = join_nonempty('delivery', 'quality', 'quality_string', from_dict=fd)
87                ext = determine_ext(src)
88                if ext == 'm3u8':
89                    m3u8_formats = self._extract_m3u8_formats(
90                        src, video_id, 'mp4', m3u8_id=format_id, fatal=False)
91                    if any('/geoprotection' in f['url'] for f in m3u8_formats):
92                        self.raise_geo_restricted()
93                    formats.extend(m3u8_formats)
94                elif ext == 'f4m':
95                    formats.extend(self._extract_f4m_formats(
96                        src, video_id, f4m_id=format_id, fatal=False))
97                elif ext == 'mpd':
98                    formats.extend(self._extract_mpd_formats(
99                        src, video_id, mpd_id=format_id, fatal=False))
100                else:
101                    formats.append({
102                        'format_id': format_id,
103                        'url': src,
104                        'protocol': fd.get('protocol'),
105                    })
106
107            # Check for geoblocking.
108            # There is a property is_geoprotection, but that's always false
109            geo_str = sd.get('geoprotection_string')
110            if geo_str:
111                try:
112                    http_url = next(
113                        f['url']
114                        for f in formats
115                        if re.match(r'^https?://.*\.mp4$', f['url']))
116                except StopIteration:
117                    pass
118                else:
119                    req = HEADRequest(http_url)
120                    self._request_webpage(
121                        req, video_id,
122                        note='Testing for geoblocking',
123                        errnote=((
124                            'This video seems to be blocked outside of %s. '
125                            'You may want to try the streaming-* formats.')
126                            % geo_str),
127                        fatal=False)
128
129            self._check_formats(formats, video_id)
130            self._sort_formats(formats)
131
132            subtitles = {}
133            for sub in sd.get('subtitles', []):
134                sub_src = sub.get('src')
135                if not sub_src:
136                    continue
137                subtitles.setdefault(sub.get('lang', 'de-AT'), []).append({
138                    'url': sub_src,
139                })
140
141            upload_date = unified_strdate(sd.get('created_date'))
142
143            thumbnails = []
144            preview = sd.get('preview_image_url')
145            if preview:
146                thumbnails.append({
147                    'id': 'preview',
148                    'url': preview,
149                    'preference': 0,
150                })
151            image = sd.get('image_full_url')
152            if not image and len(data_jsb) == 1:
153                image = self._og_search_thumbnail(webpage)
154            if image:
155                thumbnails.append({
156                    'id': 'full',
157                    'url': image,
158                    'preference': 1,
159                })
160
161            entries.append({
162                '_type': 'video',
163                'id': video_id,
164                'title': title,
165                'formats': formats,
166                'subtitles': subtitles,
167                'description': sd.get('description'),
168                'duration': int_or_none(sd.get('duration_in_seconds')),
169                'upload_date': upload_date,
170                'thumbnails': thumbnails,
171            })
172
173        return {
174            '_type': 'playlist',
175            'entries': entries,
176            'id': playlist_id,
177        }
178
179
180class ORFRadioIE(InfoExtractor):
181    def _real_extract(self, url):
182        mobj = self._match_valid_url(url)
183        show_date = mobj.group('date')
184        show_id = mobj.group('show')
185
186        data = self._download_json(
187            'http://audioapi.orf.at/%s/api/json/current/broadcast/%s/%s'
188            % (self._API_STATION, show_id, show_date), show_id)
189
190        entries = []
191        for info in data['streams']:
192            loop_stream_id = str_or_none(info.get('loopStreamId'))
193            if not loop_stream_id:
194                continue
195            title = str_or_none(data.get('title'))
196            if not title:
197                continue
198            start = int_or_none(info.get('start'), scale=1000)
199            end = int_or_none(info.get('end'), scale=1000)
200            duration = end - start if end and start else None
201            entries.append({
202                'id': loop_stream_id.replace('.mp3', ''),
203                'url': 'https://loopstream01.apa.at/?channel=%s&id=%s' % (self._LOOP_STATION, loop_stream_id),
204                'title': title,
205                'description': clean_html(data.get('subtitle')),
206                'duration': duration,
207                'timestamp': start,
208                'ext': 'mp3',
209                'series': data.get('programTitle'),
210            })
211
212        return {
213            '_type': 'playlist',
214            'id': show_id,
215            'title': data.get('title'),
216            'description': clean_html(data.get('subtitle')),
217            'entries': entries,
218        }
219
220
221class ORFFM4IE(ORFRadioIE):
222    IE_NAME = 'orf:fm4'
223    IE_DESC = 'radio FM4'
224    _VALID_URL = r'https?://(?P<station>fm4)\.orf\.at/player/(?P<date>[0-9]+)/(?P<show>4\w+)'
225    _API_STATION = 'fm4'
226    _LOOP_STATION = 'fm4'
227
228    _TEST = {
229        'url': 'http://fm4.orf.at/player/20170107/4CC',
230        'md5': '2b0be47375432a7ef104453432a19212',
231        'info_dict': {
232            'id': '2017-01-07_2100_tl_54_7DaysSat18_31295',
233            'ext': 'mp3',
234            'title': 'Solid Steel Radioshow',
235            'description': 'Die Mixshow von Coldcut und Ninja Tune.',
236            'duration': 3599,
237            'timestamp': 1483819257,
238            'upload_date': '20170107',
239        },
240        'skip': 'Shows from ORF radios are only available for 7 days.',
241        'only_matching': True,
242    }
243
244
245class ORFNOEIE(ORFRadioIE):
246    IE_NAME = 'orf:noe'
247    IE_DESC = 'Radio Niederösterreich'
248    _VALID_URL = r'https?://(?P<station>noe)\.orf\.at/player/(?P<date>[0-9]+)/(?P<show>\w+)'
249    _API_STATION = 'noe'
250    _LOOP_STATION = 'oe2n'
251
252    _TEST = {
253        'url': 'https://noe.orf.at/player/20200423/NGM',
254        'only_matching': True,
255    }
256
257
258class ORFWIEIE(ORFRadioIE):
259    IE_NAME = 'orf:wien'
260    IE_DESC = 'Radio Wien'
261    _VALID_URL = r'https?://(?P<station>wien)\.orf\.at/player/(?P<date>[0-9]+)/(?P<show>\w+)'
262    _API_STATION = 'wie'
263    _LOOP_STATION = 'oe2w'
264
265    _TEST = {
266        'url': 'https://wien.orf.at/player/20200423/WGUM',
267        'only_matching': True,
268    }
269
270
271class ORFBGLIE(ORFRadioIE):
272    IE_NAME = 'orf:burgenland'
273    IE_DESC = 'Radio Burgenland'
274    _VALID_URL = r'https?://(?P<station>burgenland)\.orf\.at/player/(?P<date>[0-9]+)/(?P<show>\w+)'
275    _API_STATION = 'bgl'
276    _LOOP_STATION = 'oe2b'
277
278    _TEST = {
279        'url': 'https://burgenland.orf.at/player/20200423/BGM',
280        'only_matching': True,
281    }
282
283
284class ORFOOEIE(ORFRadioIE):
285    IE_NAME = 'orf:oberoesterreich'
286    IE_DESC = 'Radio Oberösterreich'
287    _VALID_URL = r'https?://(?P<station>ooe)\.orf\.at/player/(?P<date>[0-9]+)/(?P<show>\w+)'
288    _API_STATION = 'ooe'
289    _LOOP_STATION = 'oe2o'
290
291    _TEST = {
292        'url': 'https://ooe.orf.at/player/20200423/OGMO',
293        'only_matching': True,
294    }
295
296
297class ORFSTMIE(ORFRadioIE):
298    IE_NAME = 'orf:steiermark'
299    IE_DESC = 'Radio Steiermark'
300    _VALID_URL = r'https?://(?P<station>steiermark)\.orf\.at/player/(?P<date>[0-9]+)/(?P<show>\w+)'
301    _API_STATION = 'stm'
302    _LOOP_STATION = 'oe2st'
303
304    _TEST = {
305        'url': 'https://steiermark.orf.at/player/20200423/STGMS',
306        'only_matching': True,
307    }
308
309
310class ORFKTNIE(ORFRadioIE):
311    IE_NAME = 'orf:kaernten'
312    IE_DESC = 'Radio Kärnten'
313    _VALID_URL = r'https?://(?P<station>kaernten)\.orf\.at/player/(?P<date>[0-9]+)/(?P<show>\w+)'
314    _API_STATION = 'ktn'
315    _LOOP_STATION = 'oe2k'
316
317    _TEST = {
318        'url': 'https://kaernten.orf.at/player/20200423/KGUMO',
319        'only_matching': True,
320    }
321
322
323class ORFSBGIE(ORFRadioIE):
324    IE_NAME = 'orf:salzburg'
325    IE_DESC = 'Radio Salzburg'
326    _VALID_URL = r'https?://(?P<station>salzburg)\.orf\.at/player/(?P<date>[0-9]+)/(?P<show>\w+)'
327    _API_STATION = 'sbg'
328    _LOOP_STATION = 'oe2s'
329
330    _TEST = {
331        'url': 'https://salzburg.orf.at/player/20200423/SGUM',
332        'only_matching': True,
333    }
334
335
336class ORFTIRIE(ORFRadioIE):
337    IE_NAME = 'orf:tirol'
338    IE_DESC = 'Radio Tirol'
339    _VALID_URL = r'https?://(?P<station>tirol)\.orf\.at/player/(?P<date>[0-9]+)/(?P<show>\w+)'
340    _API_STATION = 'tir'
341    _LOOP_STATION = 'oe2t'
342
343    _TEST = {
344        'url': 'https://tirol.orf.at/player/20200423/TGUMO',
345        'only_matching': True,
346    }
347
348
349class ORFVBGIE(ORFRadioIE):
350    IE_NAME = 'orf:vorarlberg'
351    IE_DESC = 'Radio Vorarlberg'
352    _VALID_URL = r'https?://(?P<station>vorarlberg)\.orf\.at/player/(?P<date>[0-9]+)/(?P<show>\w+)'
353    _API_STATION = 'vbg'
354    _LOOP_STATION = 'oe2v'
355
356    _TEST = {
357        'url': 'https://vorarlberg.orf.at/player/20200423/VGUM',
358        'only_matching': True,
359    }
360
361
362class ORFOE3IE(ORFRadioIE):
363    IE_NAME = 'orf:oe3'
364    IE_DESC = 'Radio Österreich 3'
365    _VALID_URL = r'https?://(?P<station>oe3)\.orf\.at/player/(?P<date>[0-9]+)/(?P<show>\w+)'
366    _API_STATION = 'oe3'
367    _LOOP_STATION = 'oe3'
368
369    _TEST = {
370        'url': 'https://oe3.orf.at/player/20200424/3WEK',
371        'only_matching': True,
372    }
373
374
375class ORFOE1IE(ORFRadioIE):
376    IE_NAME = 'orf:oe1'
377    IE_DESC = 'Radio Österreich 1'
378    _VALID_URL = r'https?://(?P<station>oe1)\.orf\.at/player/(?P<date>[0-9]+)/(?P<show>\w+)'
379    _API_STATION = 'oe1'
380    _LOOP_STATION = 'oe1'
381
382    _TEST = {
383        'url': 'http://oe1.orf.at/player/20170108/456544',
384        'md5': '34d8a6e67ea888293741c86a099b745b',
385        'info_dict': {
386            'id': '2017-01-08_0759_tl_51_7DaysSun6_256141',
387            'ext': 'mp3',
388            'title': 'Morgenjournal',
389            'duration': 609,
390            'timestamp': 1483858796,
391            'upload_date': '20170108',
392        },
393        'skip': 'Shows from ORF radios are only available for 7 days.'
394    }
395
396
397class ORFIPTVIE(InfoExtractor):
398    IE_NAME = 'orf:iptv'
399    IE_DESC = 'iptv.ORF.at'
400    _VALID_URL = r'https?://iptv\.orf\.at/(?:#/)?stories/(?P<id>\d+)'
401
402    _TEST = {
403        'url': 'http://iptv.orf.at/stories/2275236/',
404        'md5': 'c8b22af4718a4b4af58342529453e3e5',
405        'info_dict': {
406            'id': '350612',
407            'ext': 'flv',
408            'title': 'Weitere Evakuierungen um Vulkan Calbuco',
409            'description': 'md5:d689c959bdbcf04efeddedbf2299d633',
410            'duration': 68.197,
411            'thumbnail': r're:^https?://.*\.jpg$',
412            'upload_date': '20150425',
413        },
414    }
415
416    def _real_extract(self, url):
417        story_id = self._match_id(url)
418
419        webpage = self._download_webpage(
420            'http://iptv.orf.at/stories/%s' % story_id, story_id)
421
422        video_id = self._search_regex(
423            r'data-video(?:id)?="(\d+)"', webpage, 'video id')
424
425        data = self._download_json(
426            'http://bits.orf.at/filehandler/static-api/json/current/data.json?file=%s' % video_id,
427            video_id)[0]
428
429        duration = float_or_none(data['duration'], 1000)
430
431        video = data['sources']['default']
432        load_balancer_url = video['loadBalancerUrl']
433        abr = int_or_none(video.get('audioBitrate'))
434        vbr = int_or_none(video.get('bitrate'))
435        fps = int_or_none(video.get('videoFps'))
436        width = int_or_none(video.get('videoWidth'))
437        height = int_or_none(video.get('videoHeight'))
438        thumbnail = video.get('preview')
439
440        rendition = self._download_json(
441            load_balancer_url, video_id, transform_source=strip_jsonp)
442
443        f = {
444            'abr': abr,
445            'vbr': vbr,
446            'fps': fps,
447            'width': width,
448            'height': height,
449        }
450
451        formats = []
452        for format_id, format_url in rendition['redirect'].items():
453            if format_id == 'rtmp':
454                ff = f.copy()
455                ff.update({
456                    'url': format_url,
457                    'format_id': format_id,
458                })
459                formats.append(ff)
460            elif determine_ext(format_url) == 'f4m':
461                formats.extend(self._extract_f4m_formats(
462                    format_url, video_id, f4m_id=format_id))
463            elif determine_ext(format_url) == 'm3u8':
464                formats.extend(self._extract_m3u8_formats(
465                    format_url, video_id, 'mp4', m3u8_id=format_id))
466            else:
467                continue
468        self._sort_formats(formats)
469
470        title = remove_end(self._og_search_title(webpage), ' - iptv.ORF.at')
471        description = self._og_search_description(webpage)
472        upload_date = unified_strdate(self._html_search_meta(
473            'dc.date', webpage, 'upload date'))
474
475        return {
476            'id': video_id,
477            'title': title,
478            'description': description,
479            'duration': duration,
480            'thumbnail': thumbnail,
481            'upload_date': upload_date,
482            'formats': formats,
483        }
484
485
486class ORFFM4StoryIE(InfoExtractor):
487    IE_NAME = 'orf:fm4:story'
488    IE_DESC = 'fm4.orf.at stories'
489    _VALID_URL = r'https?://fm4\.orf\.at/stories/(?P<id>\d+)'
490
491    _TEST = {
492        'url': 'http://fm4.orf.at/stories/2865738/',
493        'playlist': [{
494            'md5': 'e1c2c706c45c7b34cf478bbf409907ca',
495            'info_dict': {
496                'id': '547792',
497                'ext': 'flv',
498                'title': 'Manu Delago und Inner Tongue live',
499                'description': 'Manu Delago und Inner Tongue haben bei der FM4 Soundpark Session live alles gegeben. Hier gibt es Fotos und die gesamte Session als Video.',
500                'duration': 1748.52,
501                'thumbnail': r're:^https?://.*\.jpg$',
502                'upload_date': '20170913',
503            },
504        }, {
505            'md5': 'c6dd2179731f86f4f55a7b49899d515f',
506            'info_dict': {
507                'id': '547798',
508                'ext': 'flv',
509                'title': 'Manu Delago und Inner Tongue live (2)',
510                'duration': 1504.08,
511                'thumbnail': r're:^https?://.*\.jpg$',
512                'upload_date': '20170913',
513                'description': 'Manu Delago und Inner Tongue haben bei der FM4 Soundpark Session live alles gegeben. Hier gibt es Fotos und die gesamte Session als Video.',
514            },
515        }],
516    }
517
518    def _real_extract(self, url):
519        story_id = self._match_id(url)
520        webpage = self._download_webpage(url, story_id)
521
522        entries = []
523        all_ids = orderedSet(re.findall(r'data-video(?:id)?="(\d+)"', webpage))
524        for idx, video_id in enumerate(all_ids):
525            data = self._download_json(
526                'http://bits.orf.at/filehandler/static-api/json/current/data.json?file=%s' % video_id,
527                video_id)[0]
528
529            duration = float_or_none(data['duration'], 1000)
530
531            video = data['sources']['q8c']
532            load_balancer_url = video['loadBalancerUrl']
533            abr = int_or_none(video.get('audioBitrate'))
534            vbr = int_or_none(video.get('bitrate'))
535            fps = int_or_none(video.get('videoFps'))
536            width = int_or_none(video.get('videoWidth'))
537            height = int_or_none(video.get('videoHeight'))
538            thumbnail = video.get('preview')
539
540            rendition = self._download_json(
541                load_balancer_url, video_id, transform_source=strip_jsonp)
542
543            f = {
544                'abr': abr,
545                'vbr': vbr,
546                'fps': fps,
547                'width': width,
548                'height': height,
549            }
550
551            formats = []
552            for format_id, format_url in rendition['redirect'].items():
553                if format_id == 'rtmp':
554                    ff = f.copy()
555                    ff.update({
556                        'url': format_url,
557                        'format_id': format_id,
558                    })
559                    formats.append(ff)
560                elif determine_ext(format_url) == 'f4m':
561                    formats.extend(self._extract_f4m_formats(
562                        format_url, video_id, f4m_id=format_id))
563                elif determine_ext(format_url) == 'm3u8':
564                    formats.extend(self._extract_m3u8_formats(
565                        format_url, video_id, 'mp4', m3u8_id=format_id))
566                else:
567                    continue
568            self._sort_formats(formats)
569
570            title = remove_end(self._og_search_title(webpage), ' - fm4.ORF.at')
571            if idx >= 1:
572                # Titles are duplicates, make them unique
573                title += ' (' + str(idx + 1) + ')'
574            description = self._og_search_description(webpage)
575            upload_date = unified_strdate(self._html_search_meta(
576                'dc.date', webpage, 'upload date'))
577
578            entries.append({
579                'id': video_id,
580                'title': title,
581                'description': description,
582                'duration': duration,
583                'thumbnail': thumbnail,
584                'upload_date': upload_date,
585                'formats': formats,
586            })
587
588        return self.playlist_result(entries)
589