1# coding: utf-8 2from __future__ import unicode_literals 3 4import re 5 6from .common import InfoExtractor 7from ..compat import compat_str 8from ..utils import ( 9 clean_html, 10 determine_ext, 11 float_or_none, 12 HEADRequest, 13 int_or_none, 14 join_nonempty, 15 orderedSet, 16 remove_end, 17 str_or_none, 18 strip_jsonp, 19 unescapeHTML, 20 unified_strdate, 21 url_or_none, 22) 23 24 25class ORFTVthekIE(InfoExtractor): 26 IE_NAME = 'orf:tvthek' 27 IE_DESC = 'ORF TVthek' 28 _VALID_URL = r'https?://tvthek\.orf\.at/(?:[^/]+/)+(?P<id>\d+)' 29 30 _TESTS = [{ 31 'url': 'http://tvthek.orf.at/program/Aufgetischt/2745173/Aufgetischt-Mit-der-Steirischen-Tafelrunde/8891389', 32 'playlist': [{ 33 'md5': '2942210346ed779588f428a92db88712', 34 'info_dict': { 35 'id': '8896777', 36 'ext': 'mp4', 37 'title': 'Aufgetischt: Mit der Steirischen Tafelrunde', 38 'description': 'md5:c1272f0245537812d4e36419c207b67d', 39 'duration': 2668, 40 'upload_date': '20141208', 41 }, 42 }], 43 'skip': 'Blocked outside of Austria / Germany', 44 }, { 45 'url': 'http://tvthek.orf.at/topic/Im-Wandel-der-Zeit/8002126/Best-of-Ingrid-Thurnher/7982256', 46 'info_dict': { 47 'id': '7982259', 48 'ext': 'mp4', 49 'title': 'Best of Ingrid Thurnher', 50 'upload_date': '20140527', 51 'description': 'Viele Jahre war Ingrid Thurnher das "Gesicht" der ZIB 2. Vor ihrem Wechsel zur ZIB 2 im Jahr 1995 moderierte sie unter anderem "Land und Leute", "Österreich-Bild" und "Niederösterreich heute".', 52 }, 53 'params': { 54 'skip_download': True, # rtsp downloads 55 }, 56 'skip': 'Blocked outside of Austria / Germany', 57 }, { 58 'url': 'http://tvthek.orf.at/topic/Fluechtlingskrise/10463081/Heimat-Fremde-Heimat/13879132/Senioren-betreuen-Migrantenkinder/13879141', 59 'only_matching': True, 60 }, { 61 'url': 'http://tvthek.orf.at/profile/Universum/35429', 62 'only_matching': True, 63 }] 64 65 def _real_extract(self, url): 66 playlist_id = self._match_id(url) 67 webpage = self._download_webpage(url, playlist_id) 68 69 data_jsb = self._parse_json( 70 self._search_regex( 71 r'<div[^>]+class=(["\']).*?VideoPlaylist.*?\1[^>]+data-jsb=(["\'])(?P<json>.+?)\2', 72 webpage, 'playlist', group='json'), 73 playlist_id, transform_source=unescapeHTML)['playlist']['videos'] 74 75 entries = [] 76 for sd in data_jsb: 77 video_id, title = sd.get('id'), sd.get('title') 78 if not video_id or not title: 79 continue 80 video_id = compat_str(video_id) 81 formats = [] 82 for fd in sd['sources']: 83 src = url_or_none(fd.get('src')) 84 if not src: 85 continue 86 format_id = join_nonempty('delivery', 'quality', 'quality_string', from_dict=fd) 87 ext = determine_ext(src) 88 if ext == 'm3u8': 89 m3u8_formats = self._extract_m3u8_formats( 90 src, video_id, 'mp4', m3u8_id=format_id, fatal=False) 91 if any('/geoprotection' in f['url'] for f in m3u8_formats): 92 self.raise_geo_restricted() 93 formats.extend(m3u8_formats) 94 elif ext == 'f4m': 95 formats.extend(self._extract_f4m_formats( 96 src, video_id, f4m_id=format_id, fatal=False)) 97 elif ext == 'mpd': 98 formats.extend(self._extract_mpd_formats( 99 src, video_id, mpd_id=format_id, fatal=False)) 100 else: 101 formats.append({ 102 'format_id': format_id, 103 'url': src, 104 'protocol': fd.get('protocol'), 105 }) 106 107 # Check for geoblocking. 108 # There is a property is_geoprotection, but that's always false 109 geo_str = sd.get('geoprotection_string') 110 if geo_str: 111 try: 112 http_url = next( 113 f['url'] 114 for f in formats 115 if re.match(r'^https?://.*\.mp4$', f['url'])) 116 except StopIteration: 117 pass 118 else: 119 req = HEADRequest(http_url) 120 self._request_webpage( 121 req, video_id, 122 note='Testing for geoblocking', 123 errnote=(( 124 'This video seems to be blocked outside of %s. ' 125 'You may want to try the streaming-* formats.') 126 % geo_str), 127 fatal=False) 128 129 self._check_formats(formats, video_id) 130 self._sort_formats(formats) 131 132 subtitles = {} 133 for sub in sd.get('subtitles', []): 134 sub_src = sub.get('src') 135 if not sub_src: 136 continue 137 subtitles.setdefault(sub.get('lang', 'de-AT'), []).append({ 138 'url': sub_src, 139 }) 140 141 upload_date = unified_strdate(sd.get('created_date')) 142 143 thumbnails = [] 144 preview = sd.get('preview_image_url') 145 if preview: 146 thumbnails.append({ 147 'id': 'preview', 148 'url': preview, 149 'preference': 0, 150 }) 151 image = sd.get('image_full_url') 152 if not image and len(data_jsb) == 1: 153 image = self._og_search_thumbnail(webpage) 154 if image: 155 thumbnails.append({ 156 'id': 'full', 157 'url': image, 158 'preference': 1, 159 }) 160 161 entries.append({ 162 '_type': 'video', 163 'id': video_id, 164 'title': title, 165 'formats': formats, 166 'subtitles': subtitles, 167 'description': sd.get('description'), 168 'duration': int_or_none(sd.get('duration_in_seconds')), 169 'upload_date': upload_date, 170 'thumbnails': thumbnails, 171 }) 172 173 return { 174 '_type': 'playlist', 175 'entries': entries, 176 'id': playlist_id, 177 } 178 179 180class ORFRadioIE(InfoExtractor): 181 def _real_extract(self, url): 182 mobj = self._match_valid_url(url) 183 show_date = mobj.group('date') 184 show_id = mobj.group('show') 185 186 data = self._download_json( 187 'http://audioapi.orf.at/%s/api/json/current/broadcast/%s/%s' 188 % (self._API_STATION, show_id, show_date), show_id) 189 190 entries = [] 191 for info in data['streams']: 192 loop_stream_id = str_or_none(info.get('loopStreamId')) 193 if not loop_stream_id: 194 continue 195 title = str_or_none(data.get('title')) 196 if not title: 197 continue 198 start = int_or_none(info.get('start'), scale=1000) 199 end = int_or_none(info.get('end'), scale=1000) 200 duration = end - start if end and start else None 201 entries.append({ 202 'id': loop_stream_id.replace('.mp3', ''), 203 'url': 'https://loopstream01.apa.at/?channel=%s&id=%s' % (self._LOOP_STATION, loop_stream_id), 204 'title': title, 205 'description': clean_html(data.get('subtitle')), 206 'duration': duration, 207 'timestamp': start, 208 'ext': 'mp3', 209 'series': data.get('programTitle'), 210 }) 211 212 return { 213 '_type': 'playlist', 214 'id': show_id, 215 'title': data.get('title'), 216 'description': clean_html(data.get('subtitle')), 217 'entries': entries, 218 } 219 220 221class ORFFM4IE(ORFRadioIE): 222 IE_NAME = 'orf:fm4' 223 IE_DESC = 'radio FM4' 224 _VALID_URL = r'https?://(?P<station>fm4)\.orf\.at/player/(?P<date>[0-9]+)/(?P<show>4\w+)' 225 _API_STATION = 'fm4' 226 _LOOP_STATION = 'fm4' 227 228 _TEST = { 229 'url': 'http://fm4.orf.at/player/20170107/4CC', 230 'md5': '2b0be47375432a7ef104453432a19212', 231 'info_dict': { 232 'id': '2017-01-07_2100_tl_54_7DaysSat18_31295', 233 'ext': 'mp3', 234 'title': 'Solid Steel Radioshow', 235 'description': 'Die Mixshow von Coldcut und Ninja Tune.', 236 'duration': 3599, 237 'timestamp': 1483819257, 238 'upload_date': '20170107', 239 }, 240 'skip': 'Shows from ORF radios are only available for 7 days.', 241 'only_matching': True, 242 } 243 244 245class ORFNOEIE(ORFRadioIE): 246 IE_NAME = 'orf:noe' 247 IE_DESC = 'Radio Niederösterreich' 248 _VALID_URL = r'https?://(?P<station>noe)\.orf\.at/player/(?P<date>[0-9]+)/(?P<show>\w+)' 249 _API_STATION = 'noe' 250 _LOOP_STATION = 'oe2n' 251 252 _TEST = { 253 'url': 'https://noe.orf.at/player/20200423/NGM', 254 'only_matching': True, 255 } 256 257 258class ORFWIEIE(ORFRadioIE): 259 IE_NAME = 'orf:wien' 260 IE_DESC = 'Radio Wien' 261 _VALID_URL = r'https?://(?P<station>wien)\.orf\.at/player/(?P<date>[0-9]+)/(?P<show>\w+)' 262 _API_STATION = 'wie' 263 _LOOP_STATION = 'oe2w' 264 265 _TEST = { 266 'url': 'https://wien.orf.at/player/20200423/WGUM', 267 'only_matching': True, 268 } 269 270 271class ORFBGLIE(ORFRadioIE): 272 IE_NAME = 'orf:burgenland' 273 IE_DESC = 'Radio Burgenland' 274 _VALID_URL = r'https?://(?P<station>burgenland)\.orf\.at/player/(?P<date>[0-9]+)/(?P<show>\w+)' 275 _API_STATION = 'bgl' 276 _LOOP_STATION = 'oe2b' 277 278 _TEST = { 279 'url': 'https://burgenland.orf.at/player/20200423/BGM', 280 'only_matching': True, 281 } 282 283 284class ORFOOEIE(ORFRadioIE): 285 IE_NAME = 'orf:oberoesterreich' 286 IE_DESC = 'Radio Oberösterreich' 287 _VALID_URL = r'https?://(?P<station>ooe)\.orf\.at/player/(?P<date>[0-9]+)/(?P<show>\w+)' 288 _API_STATION = 'ooe' 289 _LOOP_STATION = 'oe2o' 290 291 _TEST = { 292 'url': 'https://ooe.orf.at/player/20200423/OGMO', 293 'only_matching': True, 294 } 295 296 297class ORFSTMIE(ORFRadioIE): 298 IE_NAME = 'orf:steiermark' 299 IE_DESC = 'Radio Steiermark' 300 _VALID_URL = r'https?://(?P<station>steiermark)\.orf\.at/player/(?P<date>[0-9]+)/(?P<show>\w+)' 301 _API_STATION = 'stm' 302 _LOOP_STATION = 'oe2st' 303 304 _TEST = { 305 'url': 'https://steiermark.orf.at/player/20200423/STGMS', 306 'only_matching': True, 307 } 308 309 310class ORFKTNIE(ORFRadioIE): 311 IE_NAME = 'orf:kaernten' 312 IE_DESC = 'Radio Kärnten' 313 _VALID_URL = r'https?://(?P<station>kaernten)\.orf\.at/player/(?P<date>[0-9]+)/(?P<show>\w+)' 314 _API_STATION = 'ktn' 315 _LOOP_STATION = 'oe2k' 316 317 _TEST = { 318 'url': 'https://kaernten.orf.at/player/20200423/KGUMO', 319 'only_matching': True, 320 } 321 322 323class ORFSBGIE(ORFRadioIE): 324 IE_NAME = 'orf:salzburg' 325 IE_DESC = 'Radio Salzburg' 326 _VALID_URL = r'https?://(?P<station>salzburg)\.orf\.at/player/(?P<date>[0-9]+)/(?P<show>\w+)' 327 _API_STATION = 'sbg' 328 _LOOP_STATION = 'oe2s' 329 330 _TEST = { 331 'url': 'https://salzburg.orf.at/player/20200423/SGUM', 332 'only_matching': True, 333 } 334 335 336class ORFTIRIE(ORFRadioIE): 337 IE_NAME = 'orf:tirol' 338 IE_DESC = 'Radio Tirol' 339 _VALID_URL = r'https?://(?P<station>tirol)\.orf\.at/player/(?P<date>[0-9]+)/(?P<show>\w+)' 340 _API_STATION = 'tir' 341 _LOOP_STATION = 'oe2t' 342 343 _TEST = { 344 'url': 'https://tirol.orf.at/player/20200423/TGUMO', 345 'only_matching': True, 346 } 347 348 349class ORFVBGIE(ORFRadioIE): 350 IE_NAME = 'orf:vorarlberg' 351 IE_DESC = 'Radio Vorarlberg' 352 _VALID_URL = r'https?://(?P<station>vorarlberg)\.orf\.at/player/(?P<date>[0-9]+)/(?P<show>\w+)' 353 _API_STATION = 'vbg' 354 _LOOP_STATION = 'oe2v' 355 356 _TEST = { 357 'url': 'https://vorarlberg.orf.at/player/20200423/VGUM', 358 'only_matching': True, 359 } 360 361 362class ORFOE3IE(ORFRadioIE): 363 IE_NAME = 'orf:oe3' 364 IE_DESC = 'Radio Österreich 3' 365 _VALID_URL = r'https?://(?P<station>oe3)\.orf\.at/player/(?P<date>[0-9]+)/(?P<show>\w+)' 366 _API_STATION = 'oe3' 367 _LOOP_STATION = 'oe3' 368 369 _TEST = { 370 'url': 'https://oe3.orf.at/player/20200424/3WEK', 371 'only_matching': True, 372 } 373 374 375class ORFOE1IE(ORFRadioIE): 376 IE_NAME = 'orf:oe1' 377 IE_DESC = 'Radio Österreich 1' 378 _VALID_URL = r'https?://(?P<station>oe1)\.orf\.at/player/(?P<date>[0-9]+)/(?P<show>\w+)' 379 _API_STATION = 'oe1' 380 _LOOP_STATION = 'oe1' 381 382 _TEST = { 383 'url': 'http://oe1.orf.at/player/20170108/456544', 384 'md5': '34d8a6e67ea888293741c86a099b745b', 385 'info_dict': { 386 'id': '2017-01-08_0759_tl_51_7DaysSun6_256141', 387 'ext': 'mp3', 388 'title': 'Morgenjournal', 389 'duration': 609, 390 'timestamp': 1483858796, 391 'upload_date': '20170108', 392 }, 393 'skip': 'Shows from ORF radios are only available for 7 days.' 394 } 395 396 397class ORFIPTVIE(InfoExtractor): 398 IE_NAME = 'orf:iptv' 399 IE_DESC = 'iptv.ORF.at' 400 _VALID_URL = r'https?://iptv\.orf\.at/(?:#/)?stories/(?P<id>\d+)' 401 402 _TEST = { 403 'url': 'http://iptv.orf.at/stories/2275236/', 404 'md5': 'c8b22af4718a4b4af58342529453e3e5', 405 'info_dict': { 406 'id': '350612', 407 'ext': 'flv', 408 'title': 'Weitere Evakuierungen um Vulkan Calbuco', 409 'description': 'md5:d689c959bdbcf04efeddedbf2299d633', 410 'duration': 68.197, 411 'thumbnail': r're:^https?://.*\.jpg$', 412 'upload_date': '20150425', 413 }, 414 } 415 416 def _real_extract(self, url): 417 story_id = self._match_id(url) 418 419 webpage = self._download_webpage( 420 'http://iptv.orf.at/stories/%s' % story_id, story_id) 421 422 video_id = self._search_regex( 423 r'data-video(?:id)?="(\d+)"', webpage, 'video id') 424 425 data = self._download_json( 426 'http://bits.orf.at/filehandler/static-api/json/current/data.json?file=%s' % video_id, 427 video_id)[0] 428 429 duration = float_or_none(data['duration'], 1000) 430 431 video = data['sources']['default'] 432 load_balancer_url = video['loadBalancerUrl'] 433 abr = int_or_none(video.get('audioBitrate')) 434 vbr = int_or_none(video.get('bitrate')) 435 fps = int_or_none(video.get('videoFps')) 436 width = int_or_none(video.get('videoWidth')) 437 height = int_or_none(video.get('videoHeight')) 438 thumbnail = video.get('preview') 439 440 rendition = self._download_json( 441 load_balancer_url, video_id, transform_source=strip_jsonp) 442 443 f = { 444 'abr': abr, 445 'vbr': vbr, 446 'fps': fps, 447 'width': width, 448 'height': height, 449 } 450 451 formats = [] 452 for format_id, format_url in rendition['redirect'].items(): 453 if format_id == 'rtmp': 454 ff = f.copy() 455 ff.update({ 456 'url': format_url, 457 'format_id': format_id, 458 }) 459 formats.append(ff) 460 elif determine_ext(format_url) == 'f4m': 461 formats.extend(self._extract_f4m_formats( 462 format_url, video_id, f4m_id=format_id)) 463 elif determine_ext(format_url) == 'm3u8': 464 formats.extend(self._extract_m3u8_formats( 465 format_url, video_id, 'mp4', m3u8_id=format_id)) 466 else: 467 continue 468 self._sort_formats(formats) 469 470 title = remove_end(self._og_search_title(webpage), ' - iptv.ORF.at') 471 description = self._og_search_description(webpage) 472 upload_date = unified_strdate(self._html_search_meta( 473 'dc.date', webpage, 'upload date')) 474 475 return { 476 'id': video_id, 477 'title': title, 478 'description': description, 479 'duration': duration, 480 'thumbnail': thumbnail, 481 'upload_date': upload_date, 482 'formats': formats, 483 } 484 485 486class ORFFM4StoryIE(InfoExtractor): 487 IE_NAME = 'orf:fm4:story' 488 IE_DESC = 'fm4.orf.at stories' 489 _VALID_URL = r'https?://fm4\.orf\.at/stories/(?P<id>\d+)' 490 491 _TEST = { 492 'url': 'http://fm4.orf.at/stories/2865738/', 493 'playlist': [{ 494 'md5': 'e1c2c706c45c7b34cf478bbf409907ca', 495 'info_dict': { 496 'id': '547792', 497 'ext': 'flv', 498 'title': 'Manu Delago und Inner Tongue live', 499 'description': 'Manu Delago und Inner Tongue haben bei der FM4 Soundpark Session live alles gegeben. Hier gibt es Fotos und die gesamte Session als Video.', 500 'duration': 1748.52, 501 'thumbnail': r're:^https?://.*\.jpg$', 502 'upload_date': '20170913', 503 }, 504 }, { 505 'md5': 'c6dd2179731f86f4f55a7b49899d515f', 506 'info_dict': { 507 'id': '547798', 508 'ext': 'flv', 509 'title': 'Manu Delago und Inner Tongue live (2)', 510 'duration': 1504.08, 511 'thumbnail': r're:^https?://.*\.jpg$', 512 'upload_date': '20170913', 513 'description': 'Manu Delago und Inner Tongue haben bei der FM4 Soundpark Session live alles gegeben. Hier gibt es Fotos und die gesamte Session als Video.', 514 }, 515 }], 516 } 517 518 def _real_extract(self, url): 519 story_id = self._match_id(url) 520 webpage = self._download_webpage(url, story_id) 521 522 entries = [] 523 all_ids = orderedSet(re.findall(r'data-video(?:id)?="(\d+)"', webpage)) 524 for idx, video_id in enumerate(all_ids): 525 data = self._download_json( 526 'http://bits.orf.at/filehandler/static-api/json/current/data.json?file=%s' % video_id, 527 video_id)[0] 528 529 duration = float_or_none(data['duration'], 1000) 530 531 video = data['sources']['q8c'] 532 load_balancer_url = video['loadBalancerUrl'] 533 abr = int_or_none(video.get('audioBitrate')) 534 vbr = int_or_none(video.get('bitrate')) 535 fps = int_or_none(video.get('videoFps')) 536 width = int_or_none(video.get('videoWidth')) 537 height = int_or_none(video.get('videoHeight')) 538 thumbnail = video.get('preview') 539 540 rendition = self._download_json( 541 load_balancer_url, video_id, transform_source=strip_jsonp) 542 543 f = { 544 'abr': abr, 545 'vbr': vbr, 546 'fps': fps, 547 'width': width, 548 'height': height, 549 } 550 551 formats = [] 552 for format_id, format_url in rendition['redirect'].items(): 553 if format_id == 'rtmp': 554 ff = f.copy() 555 ff.update({ 556 'url': format_url, 557 'format_id': format_id, 558 }) 559 formats.append(ff) 560 elif determine_ext(format_url) == 'f4m': 561 formats.extend(self._extract_f4m_formats( 562 format_url, video_id, f4m_id=format_id)) 563 elif determine_ext(format_url) == 'm3u8': 564 formats.extend(self._extract_m3u8_formats( 565 format_url, video_id, 'mp4', m3u8_id=format_id)) 566 else: 567 continue 568 self._sort_formats(formats) 569 570 title = remove_end(self._og_search_title(webpage), ' - fm4.ORF.at') 571 if idx >= 1: 572 # Titles are duplicates, make them unique 573 title += ' (' + str(idx + 1) + ')' 574 description = self._og_search_description(webpage) 575 upload_date = unified_strdate(self._html_search_meta( 576 'dc.date', webpage, 'upload date')) 577 578 entries.append({ 579 'id': video_id, 580 'title': title, 581 'description': description, 582 'duration': duration, 583 'thumbnail': thumbnail, 584 'upload_date': upload_date, 585 'formats': formats, 586 }) 587 588 return self.playlist_result(entries) 589