1# coding: utf-8 2from __future__ import unicode_literals 3 4import collections 5import itertools 6import json 7import random 8import re 9 10from .common import InfoExtractor 11from ..compat import ( 12 compat_parse_qs, 13 compat_str, 14 compat_urlparse, 15 compat_urllib_parse_urlencode, 16 compat_urllib_parse_urlparse, 17) 18from ..utils import ( 19 clean_html, 20 dict_get, 21 ExtractorError, 22 float_or_none, 23 int_or_none, 24 parse_duration, 25 parse_iso8601, 26 qualities, 27 try_get, 28 unified_timestamp, 29 update_url_query, 30 url_or_none, 31 urljoin, 32) 33 34 35class TwitchBaseIE(InfoExtractor): 36 _VALID_URL_BASE = r'https?://(?:(?:www|go|m)\.)?twitch\.tv' 37 38 _API_BASE = 'https://api.twitch.tv' 39 _USHER_BASE = 'https://usher.ttvnw.net' 40 _LOGIN_FORM_URL = 'https://www.twitch.tv/login' 41 _LOGIN_POST_URL = 'https://passport.twitch.tv/login' 42 _CLIENT_ID = 'kimne78kx3ncx6brgo4mv6wki5h1ko' 43 _NETRC_MACHINE = 'twitch' 44 45 _OPERATION_HASHES = { 46 'CollectionSideBar': '27111f1b382effad0b6def325caef1909c733fe6a4fbabf54f8d491ef2cf2f14', 47 'FilterableVideoTower_Videos': 'a937f1d22e269e39a03b509f65a7490f9fc247d7f83d6ac1421523e3b68042cb', 48 'ClipsCards__User': 'b73ad2bfaecfd30a9e6c28fada15bd97032c83ec77a0440766a56fe0bd632777', 49 'ChannelCollectionsContent': '07e3691a1bad77a36aba590c351180439a40baefc1c275356f40fc7082419a84', 50 'StreamMetadata': '1c719a40e481453e5c48d9bb585d971b8b372f8ebb105b17076722264dfa5b3e', 51 'ComscoreStreamingQuery': 'e1edae8122517d013405f237ffcc124515dc6ded82480a88daef69c83b53ac01', 52 'VideoAccessToken_Clip': '36b89d2507fce29e5ca551df756d27c1cfe079e2609642b4390aa4c35796eb11', 53 'VideoPreviewOverlay': '3006e77e51b128d838fa4e835723ca4dc9a05c5efd4466c1085215c6e437e65c', 54 'VideoMetadata': '226edb3e692509f727fd56821f5653c05740242c82b0388883e0c0e75dcbf687', 55 } 56 57 def _real_initialize(self): 58 self._login() 59 60 def _login(self): 61 username, password = self._get_login_info() 62 if username is None: 63 return 64 65 def fail(message): 66 raise ExtractorError( 67 'Unable to login. Twitch said: %s' % message, expected=True) 68 69 def login_step(page, urlh, note, data): 70 form = self._hidden_inputs(page) 71 form.update(data) 72 73 page_url = urlh.geturl() 74 post_url = self._search_regex( 75 r'<form[^>]+action=(["\'])(?P<url>.+?)\1', page, 76 'post url', default=self._LOGIN_POST_URL, group='url') 77 post_url = urljoin(page_url, post_url) 78 79 headers = { 80 'Referer': page_url, 81 'Origin': 'https://www.twitch.tv', 82 'Content-Type': 'text/plain;charset=UTF-8', 83 } 84 85 response = self._download_json( 86 post_url, None, note, data=json.dumps(form).encode(), 87 headers=headers, expected_status=400) 88 error = dict_get(response, ('error', 'error_description', 'error_code')) 89 if error: 90 fail(error) 91 92 if 'Authenticated successfully' in response.get('message', ''): 93 return None, None 94 95 redirect_url = urljoin( 96 post_url, 97 response.get('redirect') or response['redirect_path']) 98 return self._download_webpage_handle( 99 redirect_url, None, 'Downloading login redirect page', 100 headers=headers) 101 102 login_page, handle = self._download_webpage_handle( 103 self._LOGIN_FORM_URL, None, 'Downloading login page') 104 105 # Some TOR nodes and public proxies are blocked completely 106 if 'blacklist_message' in login_page: 107 fail(clean_html(login_page)) 108 109 redirect_page, handle = login_step( 110 login_page, handle, 'Logging in', { 111 'username': username, 112 'password': password, 113 'client_id': self._CLIENT_ID, 114 }) 115 116 # Successful login 117 if not redirect_page: 118 return 119 120 if re.search(r'(?i)<form[^>]+id="two-factor-submit"', redirect_page) is not None: 121 # TODO: Add mechanism to request an SMS or phone call 122 tfa_token = self._get_tfa_info('two-factor authentication token') 123 login_step(redirect_page, handle, 'Submitting TFA token', { 124 'authy_token': tfa_token, 125 'remember_2fa': 'true', 126 }) 127 128 def _prefer_source(self, formats): 129 try: 130 source = next(f for f in formats if f['format_id'] == 'Source') 131 source['quality'] = 10 132 except StopIteration: 133 for f in formats: 134 if '/chunked/' in f['url']: 135 f.update({ 136 'quality': 10, 137 'format_note': 'Source', 138 }) 139 self._sort_formats(formats) 140 141 def _download_base_gql(self, video_id, ops, note, fatal=True): 142 headers = { 143 'Content-Type': 'text/plain;charset=UTF-8', 144 'Client-ID': self._CLIENT_ID, 145 } 146 gql_auth = self._get_cookies('https://gql.twitch.tv').get('auth-token') 147 if gql_auth: 148 headers['Authorization'] = 'OAuth ' + gql_auth.value 149 return self._download_json( 150 'https://gql.twitch.tv/gql', video_id, note, 151 data=json.dumps(ops).encode(), 152 headers=headers, fatal=fatal) 153 154 def _download_gql(self, video_id, ops, note, fatal=True): 155 for op in ops: 156 op['extensions'] = { 157 'persistedQuery': { 158 'version': 1, 159 'sha256Hash': self._OPERATION_HASHES[op['operationName']], 160 } 161 } 162 return self._download_base_gql(video_id, ops, note) 163 164 def _download_access_token(self, video_id, token_kind, param_name): 165 method = '%sPlaybackAccessToken' % token_kind 166 ops = { 167 'query': '''{ 168 %s( 169 %s: "%s", 170 params: { 171 platform: "web", 172 playerBackend: "mediaplayer", 173 playerType: "site" 174 } 175 ) 176 { 177 value 178 signature 179 } 180 }''' % (method, param_name, video_id), 181 } 182 return self._download_base_gql( 183 video_id, ops, 184 'Downloading %s access token GraphQL' % token_kind)['data'][method] 185 186 187class TwitchVodIE(TwitchBaseIE): 188 IE_NAME = 'twitch:vod' 189 _VALID_URL = r'''(?x) 190 https?:// 191 (?: 192 (?:(?:www|go|m)\.)?twitch\.tv/(?:[^/]+/v(?:ideo)?|videos)/| 193 player\.twitch\.tv/\?.*?\bvideo=v? 194 ) 195 (?P<id>\d+) 196 ''' 197 198 _TESTS = [{ 199 'url': 'http://www.twitch.tv/riotgames/v/6528877?t=5m10s', 200 'info_dict': { 201 'id': 'v6528877', 202 'ext': 'mp4', 203 'title': 'LCK Summer Split - Week 6 Day 1', 204 'thumbnail': r're:^https?://.*\.jpg$', 205 'duration': 17208, 206 'timestamp': 1435131734, 207 'upload_date': '20150624', 208 'uploader': 'Riot Games', 209 'uploader_id': 'riotgames', 210 'view_count': int, 211 'start_time': 310, 212 }, 213 'params': { 214 # m3u8 download 215 'skip_download': True, 216 }, 217 }, { 218 # Untitled broadcast (title is None) 219 'url': 'http://www.twitch.tv/belkao_o/v/11230755', 220 'info_dict': { 221 'id': 'v11230755', 222 'ext': 'mp4', 223 'title': 'Untitled Broadcast', 224 'thumbnail': r're:^https?://.*\.jpg$', 225 'duration': 1638, 226 'timestamp': 1439746708, 227 'upload_date': '20150816', 228 'uploader': 'BelkAO_o', 229 'uploader_id': 'belkao_o', 230 'view_count': int, 231 }, 232 'params': { 233 # m3u8 download 234 'skip_download': True, 235 }, 236 'skip': 'HTTP Error 404: Not Found', 237 }, { 238 'url': 'http://player.twitch.tv/?t=5m10s&video=v6528877', 239 'only_matching': True, 240 }, { 241 'url': 'https://www.twitch.tv/videos/6528877', 242 'only_matching': True, 243 }, { 244 'url': 'https://m.twitch.tv/beagsandjam/v/247478721', 245 'only_matching': True, 246 }, { 247 'url': 'https://www.twitch.tv/northernlion/video/291940395', 248 'only_matching': True, 249 }, { 250 'url': 'https://player.twitch.tv/?video=480452374', 251 'only_matching': True, 252 }] 253 254 def _download_info(self, item_id): 255 data = self._download_gql( 256 item_id, [{ 257 'operationName': 'VideoMetadata', 258 'variables': { 259 'channelLogin': '', 260 'videoID': item_id, 261 }, 262 }], 263 'Downloading stream metadata GraphQL')[0]['data'] 264 video = data.get('video') 265 if video is None: 266 raise ExtractorError( 267 'Video %s does not exist' % item_id, expected=True) 268 return self._extract_info_gql(video, item_id) 269 270 @staticmethod 271 def _extract_info(info): 272 status = info.get('status') 273 if status == 'recording': 274 is_live = True 275 elif status == 'recorded': 276 is_live = False 277 else: 278 is_live = None 279 _QUALITIES = ('small', 'medium', 'large') 280 quality_key = qualities(_QUALITIES) 281 thumbnails = [] 282 preview = info.get('preview') 283 if isinstance(preview, dict): 284 for thumbnail_id, thumbnail_url in preview.items(): 285 thumbnail_url = url_or_none(thumbnail_url) 286 if not thumbnail_url: 287 continue 288 if thumbnail_id not in _QUALITIES: 289 continue 290 thumbnails.append({ 291 'url': thumbnail_url, 292 'preference': quality_key(thumbnail_id), 293 }) 294 return { 295 'id': info['_id'], 296 'title': info.get('title') or 'Untitled Broadcast', 297 'description': info.get('description'), 298 'duration': int_or_none(info.get('length')), 299 'thumbnails': thumbnails, 300 'uploader': info.get('channel', {}).get('display_name'), 301 'uploader_id': info.get('channel', {}).get('name'), 302 'timestamp': parse_iso8601(info.get('recorded_at')), 303 'view_count': int_or_none(info.get('views')), 304 'is_live': is_live, 305 } 306 307 @staticmethod 308 def _extract_info_gql(info, item_id): 309 vod_id = info.get('id') or item_id 310 # id backward compatibility for download archives 311 if vod_id[0] != 'v': 312 vod_id = 'v%s' % vod_id 313 thumbnail = url_or_none(info.get('previewThumbnailURL')) 314 if thumbnail: 315 for p in ('width', 'height'): 316 thumbnail = thumbnail.replace('{%s}' % p, '0') 317 return { 318 'id': vod_id, 319 'title': info.get('title') or 'Untitled Broadcast', 320 'description': info.get('description'), 321 'duration': int_or_none(info.get('lengthSeconds')), 322 'thumbnail': thumbnail, 323 'uploader': try_get(info, lambda x: x['owner']['displayName'], compat_str), 324 'uploader_id': try_get(info, lambda x: x['owner']['login'], compat_str), 325 'timestamp': unified_timestamp(info.get('publishedAt')), 326 'view_count': int_or_none(info.get('viewCount')), 327 } 328 329 def _real_extract(self, url): 330 vod_id = self._match_id(url) 331 332 info = self._download_info(vod_id) 333 access_token = self._download_access_token(vod_id, 'video', 'id') 334 335 formats = self._extract_m3u8_formats( 336 '%s/vod/%s.m3u8?%s' % ( 337 self._USHER_BASE, vod_id, 338 compat_urllib_parse_urlencode({ 339 'allow_source': 'true', 340 'allow_audio_only': 'true', 341 'allow_spectre': 'true', 342 'player': 'twitchweb', 343 'playlist_include_framerate': 'true', 344 'nauth': access_token['value'], 345 'nauthsig': access_token['signature'], 346 })), 347 vod_id, 'mp4', entry_protocol='m3u8_native') 348 349 self._prefer_source(formats) 350 info['formats'] = formats 351 352 parsed_url = compat_urllib_parse_urlparse(url) 353 query = compat_parse_qs(parsed_url.query) 354 if 't' in query: 355 info['start_time'] = parse_duration(query['t'][0]) 356 357 if info.get('timestamp') is not None: 358 info['subtitles'] = { 359 'rechat': [{ 360 'url': update_url_query( 361 'https://api.twitch.tv/v5/videos/%s/comments' % vod_id, { 362 'client_id': self._CLIENT_ID, 363 }), 364 'ext': 'json', 365 }], 366 } 367 368 return info 369 370 371def _make_video_result(node): 372 assert isinstance(node, dict) 373 video_id = node.get('id') 374 if not video_id: 375 return 376 return { 377 '_type': 'url_transparent', 378 'ie_key': TwitchVodIE.ie_key(), 379 'id': video_id, 380 'url': 'https://www.twitch.tv/videos/%s' % video_id, 381 'title': node.get('title'), 382 'thumbnail': node.get('previewThumbnailURL'), 383 'duration': float_or_none(node.get('lengthSeconds')), 384 'view_count': int_or_none(node.get('viewCount')), 385 } 386 387 388class TwitchCollectionIE(TwitchBaseIE): 389 _VALID_URL = r'https?://(?:(?:www|go|m)\.)?twitch\.tv/collections/(?P<id>[^/]+)' 390 391 _TESTS = [{ 392 'url': 'https://www.twitch.tv/collections/wlDCoH0zEBZZbQ', 393 'info_dict': { 394 'id': 'wlDCoH0zEBZZbQ', 395 'title': 'Overthrow Nook, capitalism for children', 396 }, 397 'playlist_mincount': 13, 398 }] 399 400 _OPERATION_NAME = 'CollectionSideBar' 401 402 def _real_extract(self, url): 403 collection_id = self._match_id(url) 404 collection = self._download_gql( 405 collection_id, [{ 406 'operationName': self._OPERATION_NAME, 407 'variables': {'collectionID': collection_id}, 408 }], 409 'Downloading collection GraphQL')[0]['data']['collection'] 410 title = collection.get('title') 411 entries = [] 412 for edge in collection['items']['edges']: 413 if not isinstance(edge, dict): 414 continue 415 node = edge.get('node') 416 if not isinstance(node, dict): 417 continue 418 video = _make_video_result(node) 419 if video: 420 entries.append(video) 421 return self.playlist_result( 422 entries, playlist_id=collection_id, playlist_title=title) 423 424 425class TwitchPlaylistBaseIE(TwitchBaseIE): 426 _PAGE_LIMIT = 100 427 428 def _entries(self, channel_name, *args): 429 cursor = None 430 variables_common = self._make_variables(channel_name, *args) 431 entries_key = '%ss' % self._ENTRY_KIND 432 for page_num in itertools.count(1): 433 variables = variables_common.copy() 434 variables['limit'] = self._PAGE_LIMIT 435 if cursor: 436 variables['cursor'] = cursor 437 page = self._download_gql( 438 channel_name, [{ 439 'operationName': self._OPERATION_NAME, 440 'variables': variables, 441 }], 442 'Downloading %ss GraphQL page %s' % (self._NODE_KIND, page_num), 443 fatal=False) 444 if not page: 445 break 446 edges = try_get( 447 page, lambda x: x[0]['data']['user'][entries_key]['edges'], list) 448 if not edges: 449 break 450 for edge in edges: 451 if not isinstance(edge, dict): 452 continue 453 if edge.get('__typename') != self._EDGE_KIND: 454 continue 455 node = edge.get('node') 456 if not isinstance(node, dict): 457 continue 458 if node.get('__typename') != self._NODE_KIND: 459 continue 460 entry = self._extract_entry(node) 461 if entry: 462 cursor = edge.get('cursor') 463 yield entry 464 if not cursor or not isinstance(cursor, compat_str): 465 break 466 467 468class TwitchVideosIE(TwitchPlaylistBaseIE): 469 _VALID_URL = r'https?://(?:(?:www|go|m)\.)?twitch\.tv/(?P<id>[^/]+)/(?:videos|profile)' 470 471 _TESTS = [{ 472 # All Videos sorted by Date 473 'url': 'https://www.twitch.tv/spamfish/videos?filter=all', 474 'info_dict': { 475 'id': 'spamfish', 476 'title': 'spamfish - All Videos sorted by Date', 477 }, 478 'playlist_mincount': 924, 479 }, { 480 # All Videos sorted by Popular 481 'url': 'https://www.twitch.tv/spamfish/videos?filter=all&sort=views', 482 'info_dict': { 483 'id': 'spamfish', 484 'title': 'spamfish - All Videos sorted by Popular', 485 }, 486 'playlist_mincount': 931, 487 }, { 488 # Past Broadcasts sorted by Date 489 'url': 'https://www.twitch.tv/spamfish/videos?filter=archives', 490 'info_dict': { 491 'id': 'spamfish', 492 'title': 'spamfish - Past Broadcasts sorted by Date', 493 }, 494 'playlist_mincount': 27, 495 }, { 496 # Highlights sorted by Date 497 'url': 'https://www.twitch.tv/spamfish/videos?filter=highlights', 498 'info_dict': { 499 'id': 'spamfish', 500 'title': 'spamfish - Highlights sorted by Date', 501 }, 502 'playlist_mincount': 901, 503 }, { 504 # Uploads sorted by Date 505 'url': 'https://www.twitch.tv/esl_csgo/videos?filter=uploads&sort=time', 506 'info_dict': { 507 'id': 'esl_csgo', 508 'title': 'esl_csgo - Uploads sorted by Date', 509 }, 510 'playlist_mincount': 5, 511 }, { 512 # Past Premieres sorted by Date 513 'url': 'https://www.twitch.tv/spamfish/videos?filter=past_premieres', 514 'info_dict': { 515 'id': 'spamfish', 516 'title': 'spamfish - Past Premieres sorted by Date', 517 }, 518 'playlist_mincount': 1, 519 }, { 520 'url': 'https://www.twitch.tv/spamfish/videos/all', 521 'only_matching': True, 522 }, { 523 'url': 'https://m.twitch.tv/spamfish/videos/all', 524 'only_matching': True, 525 }, { 526 'url': 'https://www.twitch.tv/spamfish/videos', 527 'only_matching': True, 528 }] 529 530 Broadcast = collections.namedtuple('Broadcast', ['type', 'label']) 531 532 _DEFAULT_BROADCAST = Broadcast(None, 'All Videos') 533 _BROADCASTS = { 534 'archives': Broadcast('ARCHIVE', 'Past Broadcasts'), 535 'highlights': Broadcast('HIGHLIGHT', 'Highlights'), 536 'uploads': Broadcast('UPLOAD', 'Uploads'), 537 'past_premieres': Broadcast('PAST_PREMIERE', 'Past Premieres'), 538 'all': _DEFAULT_BROADCAST, 539 } 540 541 _DEFAULT_SORTED_BY = 'Date' 542 _SORTED_BY = { 543 'time': _DEFAULT_SORTED_BY, 544 'views': 'Popular', 545 } 546 547 _OPERATION_NAME = 'FilterableVideoTower_Videos' 548 _ENTRY_KIND = 'video' 549 _EDGE_KIND = 'VideoEdge' 550 _NODE_KIND = 'Video' 551 552 @classmethod 553 def suitable(cls, url): 554 return (False 555 if any(ie.suitable(url) for ie in ( 556 TwitchVideosClipsIE, 557 TwitchVideosCollectionsIE)) 558 else super(TwitchVideosIE, cls).suitable(url)) 559 560 @staticmethod 561 def _make_variables(channel_name, broadcast_type, sort): 562 return { 563 'channelOwnerLogin': channel_name, 564 'broadcastType': broadcast_type, 565 'videoSort': sort.upper(), 566 } 567 568 @staticmethod 569 def _extract_entry(node): 570 return _make_video_result(node) 571 572 def _real_extract(self, url): 573 channel_name = self._match_id(url) 574 qs = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query) 575 filter = qs.get('filter', ['all'])[0] 576 sort = qs.get('sort', ['time'])[0] 577 broadcast = self._BROADCASTS.get(filter, self._DEFAULT_BROADCAST) 578 return self.playlist_result( 579 self._entries(channel_name, broadcast.type, sort), 580 playlist_id=channel_name, 581 playlist_title='%s - %s sorted by %s' 582 % (channel_name, broadcast.label, 583 self._SORTED_BY.get(sort, self._DEFAULT_SORTED_BY))) 584 585 586class TwitchVideosClipsIE(TwitchPlaylistBaseIE): 587 _VALID_URL = r'https?://(?:(?:www|go|m)\.)?twitch\.tv/(?P<id>[^/]+)/(?:clips|videos/*?\?.*?\bfilter=clips)' 588 589 _TESTS = [{ 590 # Clips 591 'url': 'https://www.twitch.tv/vanillatv/clips?filter=clips&range=all', 592 'info_dict': { 593 'id': 'vanillatv', 594 'title': 'vanillatv - Clips Top All', 595 }, 596 'playlist_mincount': 1, 597 }, { 598 'url': 'https://www.twitch.tv/dota2ruhub/videos?filter=clips&range=7d', 599 'only_matching': True, 600 }] 601 602 Clip = collections.namedtuple('Clip', ['filter', 'label']) 603 604 _DEFAULT_CLIP = Clip('LAST_WEEK', 'Top 7D') 605 _RANGE = { 606 '24hr': Clip('LAST_DAY', 'Top 24H'), 607 '7d': _DEFAULT_CLIP, 608 '30d': Clip('LAST_MONTH', 'Top 30D'), 609 'all': Clip('ALL_TIME', 'Top All'), 610 } 611 612 # NB: values other than 20 result in skipped videos 613 _PAGE_LIMIT = 20 614 615 _OPERATION_NAME = 'ClipsCards__User' 616 _ENTRY_KIND = 'clip' 617 _EDGE_KIND = 'ClipEdge' 618 _NODE_KIND = 'Clip' 619 620 @staticmethod 621 def _make_variables(channel_name, filter): 622 return { 623 'login': channel_name, 624 'criteria': { 625 'filter': filter, 626 }, 627 } 628 629 @staticmethod 630 def _extract_entry(node): 631 assert isinstance(node, dict) 632 clip_url = url_or_none(node.get('url')) 633 if not clip_url: 634 return 635 return { 636 '_type': 'url_transparent', 637 'ie_key': TwitchClipsIE.ie_key(), 638 'id': node.get('id'), 639 'url': clip_url, 640 'title': node.get('title'), 641 'thumbnail': node.get('thumbnailURL'), 642 'duration': float_or_none(node.get('durationSeconds')), 643 'timestamp': unified_timestamp(node.get('createdAt')), 644 'view_count': int_or_none(node.get('viewCount')), 645 'language': node.get('language'), 646 } 647 648 def _real_extract(self, url): 649 channel_name = self._match_id(url) 650 qs = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query) 651 range = qs.get('range', ['7d'])[0] 652 clip = self._RANGE.get(range, self._DEFAULT_CLIP) 653 return self.playlist_result( 654 self._entries(channel_name, clip.filter), 655 playlist_id=channel_name, 656 playlist_title='%s - Clips %s' % (channel_name, clip.label)) 657 658 659class TwitchVideosCollectionsIE(TwitchPlaylistBaseIE): 660 _VALID_URL = r'https?://(?:(?:www|go|m)\.)?twitch\.tv/(?P<id>[^/]+)/videos/*?\?.*?\bfilter=collections' 661 662 _TESTS = [{ 663 # Collections 664 'url': 'https://www.twitch.tv/spamfish/videos?filter=collections', 665 'info_dict': { 666 'id': 'spamfish', 667 'title': 'spamfish - Collections', 668 }, 669 'playlist_mincount': 3, 670 }] 671 672 _OPERATION_NAME = 'ChannelCollectionsContent' 673 _ENTRY_KIND = 'collection' 674 _EDGE_KIND = 'CollectionsItemEdge' 675 _NODE_KIND = 'Collection' 676 677 @staticmethod 678 def _make_variables(channel_name): 679 return { 680 'ownerLogin': channel_name, 681 } 682 683 @staticmethod 684 def _extract_entry(node): 685 assert isinstance(node, dict) 686 collection_id = node.get('id') 687 if not collection_id: 688 return 689 return { 690 '_type': 'url_transparent', 691 'ie_key': TwitchCollectionIE.ie_key(), 692 'id': collection_id, 693 'url': 'https://www.twitch.tv/collections/%s' % collection_id, 694 'title': node.get('title'), 695 'thumbnail': node.get('thumbnailURL'), 696 'duration': float_or_none(node.get('lengthSeconds')), 697 'timestamp': unified_timestamp(node.get('updatedAt')), 698 'view_count': int_or_none(node.get('viewCount')), 699 } 700 701 def _real_extract(self, url): 702 channel_name = self._match_id(url) 703 return self.playlist_result( 704 self._entries(channel_name), playlist_id=channel_name, 705 playlist_title='%s - Collections' % channel_name) 706 707 708class TwitchStreamIE(TwitchBaseIE): 709 IE_NAME = 'twitch:stream' 710 _VALID_URL = r'''(?x) 711 https?:// 712 (?: 713 (?:(?:www|go|m)\.)?twitch\.tv/| 714 player\.twitch\.tv/\?.*?\bchannel= 715 ) 716 (?P<id>[^/#?]+) 717 ''' 718 719 _TESTS = [{ 720 'url': 'http://www.twitch.tv/shroomztv', 721 'info_dict': { 722 'id': '12772022048', 723 'display_id': 'shroomztv', 724 'ext': 'mp4', 725 'title': 're:^ShroomzTV [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$', 726 'description': 'H1Z1 - lonewolfing with ShroomzTV | A3 Battle Royale later - @ShroomzTV', 727 'is_live': True, 728 'timestamp': 1421928037, 729 'upload_date': '20150122', 730 'uploader': 'ShroomzTV', 731 'uploader_id': 'shroomztv', 732 'view_count': int, 733 }, 734 'params': { 735 # m3u8 download 736 'skip_download': True, 737 }, 738 }, { 739 'url': 'http://www.twitch.tv/miracle_doto#profile-0', 740 'only_matching': True, 741 }, { 742 'url': 'https://player.twitch.tv/?channel=lotsofs', 743 'only_matching': True, 744 }, { 745 'url': 'https://go.twitch.tv/food', 746 'only_matching': True, 747 }, { 748 'url': 'https://m.twitch.tv/food', 749 'only_matching': True, 750 }] 751 752 @classmethod 753 def suitable(cls, url): 754 return (False 755 if any(ie.suitable(url) for ie in ( 756 TwitchVodIE, 757 TwitchCollectionIE, 758 TwitchVideosIE, 759 TwitchVideosClipsIE, 760 TwitchVideosCollectionsIE, 761 TwitchClipsIE)) 762 else super(TwitchStreamIE, cls).suitable(url)) 763 764 def _real_extract(self, url): 765 channel_name = self._match_id(url).lower() 766 767 gql = self._download_gql( 768 channel_name, [{ 769 'operationName': 'StreamMetadata', 770 'variables': {'channelLogin': channel_name}, 771 }, { 772 'operationName': 'ComscoreStreamingQuery', 773 'variables': { 774 'channel': channel_name, 775 'clipSlug': '', 776 'isClip': False, 777 'isLive': True, 778 'isVodOrCollection': False, 779 'vodID': '', 780 }, 781 }, { 782 'operationName': 'VideoPreviewOverlay', 783 'variables': {'login': channel_name}, 784 }], 785 'Downloading stream GraphQL') 786 787 user = gql[0]['data']['user'] 788 789 if not user: 790 raise ExtractorError( 791 '%s does not exist' % channel_name, expected=True) 792 793 stream = user['stream'] 794 795 if not stream: 796 raise ExtractorError('%s is offline' % channel_name, expected=True) 797 798 access_token = self._download_access_token( 799 channel_name, 'stream', 'channelName') 800 token = access_token['value'] 801 802 stream_id = stream.get('id') or channel_name 803 query = { 804 'allow_source': 'true', 805 'allow_audio_only': 'true', 806 'allow_spectre': 'true', 807 'p': random.randint(1000000, 10000000), 808 'player': 'twitchweb', 809 'playlist_include_framerate': 'true', 810 'segment_preference': '4', 811 'sig': access_token['signature'].encode('utf-8'), 812 'token': token.encode('utf-8'), 813 } 814 formats = self._extract_m3u8_formats( 815 '%s/api/channel/hls/%s.m3u8' % (self._USHER_BASE, channel_name), 816 stream_id, 'mp4', query=query) 817 self._prefer_source(formats) 818 819 view_count = stream.get('viewers') 820 timestamp = unified_timestamp(stream.get('createdAt')) 821 822 sq_user = try_get(gql, lambda x: x[1]['data']['user'], dict) or {} 823 uploader = sq_user.get('displayName') 824 description = try_get( 825 sq_user, lambda x: x['broadcastSettings']['title'], compat_str) 826 827 thumbnail = url_or_none(try_get( 828 gql, lambda x: x[2]['data']['user']['stream']['previewImageURL'], 829 compat_str)) 830 831 title = uploader or channel_name 832 stream_type = stream.get('type') 833 if stream_type in ['rerun', 'live']: 834 title += ' (%s)' % stream_type 835 836 return { 837 'id': stream_id, 838 'display_id': channel_name, 839 'title': self._live_title(title), 840 'description': description, 841 'thumbnail': thumbnail, 842 'uploader': uploader, 843 'uploader_id': channel_name, 844 'timestamp': timestamp, 845 'view_count': view_count, 846 'formats': formats, 847 'is_live': stream_type == 'live', 848 } 849 850 851class TwitchClipsIE(TwitchBaseIE): 852 IE_NAME = 'twitch:clips' 853 _VALID_URL = r'''(?x) 854 https?:// 855 (?: 856 clips\.twitch\.tv/(?:embed\?.*?\bclip=|(?:[^/]+/)*)| 857 (?:(?:www|go|m)\.)?twitch\.tv/[^/]+/clip/ 858 ) 859 (?P<id>[^/?#&]+) 860 ''' 861 862 _TESTS = [{ 863 'url': 'https://clips.twitch.tv/FaintLightGullWholeWheat', 864 'md5': '761769e1eafce0ffebfb4089cb3847cd', 865 'info_dict': { 866 'id': '42850523', 867 'ext': 'mp4', 868 'title': 'EA Play 2016 Live from the Novo Theatre', 869 'thumbnail': r're:^https?://.*\.jpg', 870 'timestamp': 1465767393, 871 'upload_date': '20160612', 872 'creator': 'EA', 873 'uploader': 'stereotype_', 874 'uploader_id': '43566419', 875 }, 876 }, { 877 # multiple formats 878 'url': 'https://clips.twitch.tv/rflegendary/UninterestedBeeDAESuppy', 879 'only_matching': True, 880 }, { 881 'url': 'https://www.twitch.tv/sergeynixon/clip/StormyThankfulSproutFutureMan', 882 'only_matching': True, 883 }, { 884 'url': 'https://clips.twitch.tv/embed?clip=InquisitiveBreakableYogurtJebaited', 885 'only_matching': True, 886 }, { 887 'url': 'https://m.twitch.tv/rossbroadcast/clip/ConfidentBraveHumanChefFrank', 888 'only_matching': True, 889 }, { 890 'url': 'https://go.twitch.tv/rossbroadcast/clip/ConfidentBraveHumanChefFrank', 891 'only_matching': True, 892 }] 893 894 def _real_extract(self, url): 895 video_id = self._match_id(url) 896 897 clip = self._download_gql( 898 video_id, [{ 899 'operationName': 'VideoAccessToken_Clip', 900 'variables': { 901 'slug': video_id, 902 }, 903 }], 904 'Downloading clip access token GraphQL')[0]['data']['clip'] 905 906 if not clip: 907 raise ExtractorError( 908 'This clip is no longer available', expected=True) 909 910 access_query = { 911 'sig': clip['playbackAccessToken']['signature'], 912 'token': clip['playbackAccessToken']['value'], 913 } 914 915 data = self._download_base_gql( 916 video_id, { 917 'query': '''{ 918 clip(slug: "%s") { 919 broadcaster { 920 displayName 921 } 922 createdAt 923 curator { 924 displayName 925 id 926 } 927 durationSeconds 928 id 929 tiny: thumbnailURL(width: 86, height: 45) 930 small: thumbnailURL(width: 260, height: 147) 931 medium: thumbnailURL(width: 480, height: 272) 932 title 933 videoQualities { 934 frameRate 935 quality 936 sourceURL 937 } 938 viewCount 939 } 940}''' % video_id}, 'Downloading clip GraphQL', fatal=False) 941 942 if data: 943 clip = try_get(data, lambda x: x['data']['clip'], dict) or clip 944 945 formats = [] 946 for option in clip.get('videoQualities', []): 947 if not isinstance(option, dict): 948 continue 949 source = url_or_none(option.get('sourceURL')) 950 if not source: 951 continue 952 formats.append({ 953 'url': update_url_query(source, access_query), 954 'format_id': option.get('quality'), 955 'height': int_or_none(option.get('quality')), 956 'fps': int_or_none(option.get('frameRate')), 957 }) 958 self._sort_formats(formats) 959 960 thumbnails = [] 961 for thumbnail_id in ('tiny', 'small', 'medium'): 962 thumbnail_url = clip.get(thumbnail_id) 963 if not thumbnail_url: 964 continue 965 thumb = { 966 'id': thumbnail_id, 967 'url': thumbnail_url, 968 } 969 mobj = re.search(r'-(\d+)x(\d+)\.', thumbnail_url) 970 if mobj: 971 thumb.update({ 972 'height': int(mobj.group(2)), 973 'width': int(mobj.group(1)), 974 }) 975 thumbnails.append(thumb) 976 977 return { 978 'id': clip.get('id') or video_id, 979 'title': clip.get('title') or video_id, 980 'formats': formats, 981 'duration': int_or_none(clip.get('durationSeconds')), 982 'views': int_or_none(clip.get('viewCount')), 983 'timestamp': unified_timestamp(clip.get('createdAt')), 984 'thumbnails': thumbnails, 985 'creator': try_get(clip, lambda x: x['broadcaster']['displayName'], compat_str), 986 'uploader': try_get(clip, lambda x: x['curator']['displayName'], compat_str), 987 'uploader_id': try_get(clip, lambda x: x['curator']['id'], compat_str), 988 } 989