1# coding: utf-8 2 3from __future__ import unicode_literals 4 5import calendar 6import copy 7import datetime 8import functools 9import hashlib 10import itertools 11import json 12import math 13import os.path 14import random 15import re 16import sys 17import time 18import traceback 19import threading 20 21from .common import InfoExtractor, SearchInfoExtractor 22from ..compat import ( 23 compat_chr, 24 compat_HTTPError, 25 compat_parse_qs, 26 compat_str, 27 compat_urllib_parse_unquote_plus, 28 compat_urllib_parse_urlencode, 29 compat_urllib_parse_urlparse, 30 compat_urlparse, 31) 32from ..jsinterp import JSInterpreter 33from ..utils import ( 34 bug_reports_message, 35 clean_html, 36 datetime_from_str, 37 dict_get, 38 error_to_compat_str, 39 ExtractorError, 40 float_or_none, 41 format_field, 42 int_or_none, 43 is_html, 44 join_nonempty, 45 mimetype2ext, 46 network_exceptions, 47 NO_DEFAULT, 48 orderedSet, 49 parse_codecs, 50 parse_count, 51 parse_duration, 52 parse_iso8601, 53 parse_qs, 54 qualities, 55 remove_end, 56 remove_start, 57 smuggle_url, 58 str_or_none, 59 str_to_int, 60 strftime_or_none, 61 traverse_obj, 62 try_get, 63 unescapeHTML, 64 unified_strdate, 65 unsmuggle_url, 66 update_url_query, 67 url_or_none, 68 urljoin, 69 variadic, 70) 71 72 73def get_first(obj, keys, **kwargs): 74 return traverse_obj(obj, (..., *variadic(keys)), **kwargs, get_all=False) 75 76 77# any clients starting with _ cannot be explicity requested by the user 78INNERTUBE_CLIENTS = { 79 'web': { 80 'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8', 81 'INNERTUBE_CONTEXT': { 82 'client': { 83 'clientName': 'WEB', 84 'clientVersion': '2.20210622.10.00', 85 } 86 }, 87 'INNERTUBE_CONTEXT_CLIENT_NAME': 1 88 }, 89 'web_embedded': { 90 'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8', 91 'INNERTUBE_CONTEXT': { 92 'client': { 93 'clientName': 'WEB_EMBEDDED_PLAYER', 94 'clientVersion': '1.20210620.0.1', 95 }, 96 }, 97 'INNERTUBE_CONTEXT_CLIENT_NAME': 56 98 }, 99 'web_music': { 100 'INNERTUBE_API_KEY': 'AIzaSyC9XL3ZjWddXya6X74dJoCTL-WEYFDNX30', 101 'INNERTUBE_HOST': 'music.youtube.com', 102 'INNERTUBE_CONTEXT': { 103 'client': { 104 'clientName': 'WEB_REMIX', 105 'clientVersion': '1.20210621.00.00', 106 } 107 }, 108 'INNERTUBE_CONTEXT_CLIENT_NAME': 67, 109 }, 110 'web_creator': { 111 'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8', 112 'INNERTUBE_CONTEXT': { 113 'client': { 114 'clientName': 'WEB_CREATOR', 115 'clientVersion': '1.20210621.00.00', 116 } 117 }, 118 'INNERTUBE_CONTEXT_CLIENT_NAME': 62, 119 }, 120 'android': { 121 'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8', 122 'INNERTUBE_CONTEXT': { 123 'client': { 124 'clientName': 'ANDROID', 125 'clientVersion': '16.20', 126 } 127 }, 128 'INNERTUBE_CONTEXT_CLIENT_NAME': 3, 129 'REQUIRE_JS_PLAYER': False 130 }, 131 'android_embedded': { 132 'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8', 133 'INNERTUBE_CONTEXT': { 134 'client': { 135 'clientName': 'ANDROID_EMBEDDED_PLAYER', 136 'clientVersion': '16.20', 137 }, 138 }, 139 'INNERTUBE_CONTEXT_CLIENT_NAME': 55, 140 'REQUIRE_JS_PLAYER': False 141 }, 142 'android_music': { 143 'INNERTUBE_API_KEY': 'AIzaSyC9XL3ZjWddXya6X74dJoCTL-WEYFDNX30', 144 'INNERTUBE_HOST': 'music.youtube.com', 145 'INNERTUBE_CONTEXT': { 146 'client': { 147 'clientName': 'ANDROID_MUSIC', 148 'clientVersion': '4.32', 149 } 150 }, 151 'INNERTUBE_CONTEXT_CLIENT_NAME': 21, 152 'REQUIRE_JS_PLAYER': False 153 }, 154 'android_creator': { 155 'INNERTUBE_CONTEXT': { 156 'client': { 157 'clientName': 'ANDROID_CREATOR', 158 'clientVersion': '21.24.100', 159 }, 160 }, 161 'INNERTUBE_CONTEXT_CLIENT_NAME': 14, 162 'REQUIRE_JS_PLAYER': False 163 }, 164 # ios has HLS live streams 165 # See: https://github.com/TeamNewPipe/NewPipeExtractor/issues/680 166 'ios': { 167 'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8', 168 'INNERTUBE_CONTEXT': { 169 'client': { 170 'clientName': 'IOS', 171 'clientVersion': '16.20', 172 } 173 }, 174 'INNERTUBE_CONTEXT_CLIENT_NAME': 5, 175 'REQUIRE_JS_PLAYER': False 176 }, 177 'ios_embedded': { 178 'INNERTUBE_API_KEY': 'AIzaSyDCU8hByM-4DrUqRUYnGn-3llEO78bcxq8', 179 'INNERTUBE_CONTEXT': { 180 'client': { 181 'clientName': 'IOS_MESSAGES_EXTENSION', 182 'clientVersion': '16.20', 183 }, 184 }, 185 'INNERTUBE_CONTEXT_CLIENT_NAME': 66, 186 'REQUIRE_JS_PLAYER': False 187 }, 188 'ios_music': { 189 'INNERTUBE_API_KEY': 'AIzaSyDK3iBpDP9nHVTk2qL73FLJICfOC3c51Og', 190 'INNERTUBE_HOST': 'music.youtube.com', 191 'INNERTUBE_CONTEXT': { 192 'client': { 193 'clientName': 'IOS_MUSIC', 194 'clientVersion': '4.32', 195 }, 196 }, 197 'INNERTUBE_CONTEXT_CLIENT_NAME': 26, 198 'REQUIRE_JS_PLAYER': False 199 }, 200 'ios_creator': { 201 'INNERTUBE_CONTEXT': { 202 'client': { 203 'clientName': 'IOS_CREATOR', 204 'clientVersion': '21.24.100', 205 }, 206 }, 207 'INNERTUBE_CONTEXT_CLIENT_NAME': 15, 208 'REQUIRE_JS_PLAYER': False 209 }, 210 # mweb has 'ultralow' formats 211 # See: https://github.com/yt-dlp/yt-dlp/pull/557 212 'mweb': { 213 'INNERTUBE_API_KEY': 'AIzaSyDCU8hByM-4DrUqRUYnGn-3llEO78bcxq8', 214 'INNERTUBE_CONTEXT': { 215 'client': { 216 'clientName': 'MWEB', 217 'clientVersion': '2.20210721.07.00', 218 } 219 }, 220 'INNERTUBE_CONTEXT_CLIENT_NAME': 2 221 }, 222} 223 224 225def build_innertube_clients(): 226 third_party = { 227 'embedUrl': 'https://google.com', # Can be any valid URL 228 } 229 base_clients = ('android', 'web', 'ios', 'mweb') 230 priority = qualities(base_clients[::-1]) 231 232 for client, ytcfg in tuple(INNERTUBE_CLIENTS.items()): 233 ytcfg.setdefault('INNERTUBE_API_KEY', 'AIzaSyDCU8hByM-4DrUqRUYnGn-3llEO78bcxq8') 234 ytcfg.setdefault('INNERTUBE_HOST', 'www.youtube.com') 235 ytcfg.setdefault('REQUIRE_JS_PLAYER', True) 236 ytcfg['INNERTUBE_CONTEXT']['client'].setdefault('hl', 'en') 237 ytcfg['priority'] = 10 * priority(client.split('_', 1)[0]) 238 239 if client in base_clients: 240 INNERTUBE_CLIENTS[f'{client}_agegate'] = agegate_ytcfg = copy.deepcopy(ytcfg) 241 agegate_ytcfg['INNERTUBE_CONTEXT']['client']['clientScreen'] = 'EMBED' 242 agegate_ytcfg['INNERTUBE_CONTEXT']['thirdParty'] = third_party 243 agegate_ytcfg['priority'] -= 1 244 elif client.endswith('_embedded'): 245 ytcfg['INNERTUBE_CONTEXT']['thirdParty'] = third_party 246 ytcfg['priority'] -= 2 247 else: 248 ytcfg['priority'] -= 3 249 250 251build_innertube_clients() 252 253 254class YoutubeBaseInfoExtractor(InfoExtractor): 255 """Provide base functions for Youtube extractors""" 256 257 _RESERVED_NAMES = ( 258 r'channel|c|user|playlist|watch|w|v|embed|e|watch_popup|clip|' 259 r'shorts|movies|results|shared|hashtag|trending|feed|feeds|' 260 r'browse|oembed|get_video_info|iframe_api|s/player|' 261 r'storefront|oops|index|account|reporthistory|t/terms|about|upload|signin|logout') 262 263 _PLAYLIST_ID_RE = r'(?:(?:PL|LL|EC|UU|FL|RD|UL|TL|PU|OLAK5uy_)[0-9A-Za-z-_]{10,}|RDMM|WL|LL|LM)' 264 265 _NETRC_MACHINE = 'youtube' 266 267 # If True it will raise an error if no login info is provided 268 _LOGIN_REQUIRED = False 269 270 _INVIDIOUS_SITES = ( 271 # invidious-redirect websites 272 r'(?:www\.)?redirect\.invidious\.io', 273 r'(?:(?:www|dev)\.)?invidio\.us', 274 # Invidious instances taken from https://github.com/iv-org/documentation/blob/master/Invidious-Instances.md 275 r'(?:www\.)?invidious\.pussthecat\.org', 276 r'(?:www\.)?invidious\.zee\.li', 277 r'(?:www\.)?invidious\.ethibox\.fr', 278 r'(?:www\.)?invidious\.3o7z6yfxhbw7n3za4rss6l434kmv55cgw2vuziwuigpwegswvwzqipyd\.onion', 279 # youtube-dl invidious instances list 280 r'(?:(?:www|no)\.)?invidiou\.sh', 281 r'(?:(?:www|fi)\.)?invidious\.snopyta\.org', 282 r'(?:www\.)?invidious\.kabi\.tk', 283 r'(?:www\.)?invidious\.mastodon\.host', 284 r'(?:www\.)?invidious\.zapashcanon\.fr', 285 r'(?:www\.)?(?:invidious(?:-us)?|piped)\.kavin\.rocks', 286 r'(?:www\.)?invidious\.tinfoil-hat\.net', 287 r'(?:www\.)?invidious\.himiko\.cloud', 288 r'(?:www\.)?invidious\.reallyancient\.tech', 289 r'(?:www\.)?invidious\.tube', 290 r'(?:www\.)?invidiou\.site', 291 r'(?:www\.)?invidious\.site', 292 r'(?:www\.)?invidious\.xyz', 293 r'(?:www\.)?invidious\.nixnet\.xyz', 294 r'(?:www\.)?invidious\.048596\.xyz', 295 r'(?:www\.)?invidious\.drycat\.fr', 296 r'(?:www\.)?inv\.skyn3t\.in', 297 r'(?:www\.)?tube\.poal\.co', 298 r'(?:www\.)?tube\.connect\.cafe', 299 r'(?:www\.)?vid\.wxzm\.sx', 300 r'(?:www\.)?vid\.mint\.lgbt', 301 r'(?:www\.)?vid\.puffyan\.us', 302 r'(?:www\.)?yewtu\.be', 303 r'(?:www\.)?yt\.elukerio\.org', 304 r'(?:www\.)?yt\.lelux\.fi', 305 r'(?:www\.)?invidious\.ggc-project\.de', 306 r'(?:www\.)?yt\.maisputain\.ovh', 307 r'(?:www\.)?ytprivate\.com', 308 r'(?:www\.)?invidious\.13ad\.de', 309 r'(?:www\.)?invidious\.toot\.koeln', 310 r'(?:www\.)?invidious\.fdn\.fr', 311 r'(?:www\.)?watch\.nettohikari\.com', 312 r'(?:www\.)?invidious\.namazso\.eu', 313 r'(?:www\.)?invidious\.silkky\.cloud', 314 r'(?:www\.)?invidious\.exonip\.de', 315 r'(?:www\.)?invidious\.riverside\.rocks', 316 r'(?:www\.)?invidious\.blamefran\.net', 317 r'(?:www\.)?invidious\.moomoo\.de', 318 r'(?:www\.)?ytb\.trom\.tf', 319 r'(?:www\.)?yt\.cyberhost\.uk', 320 r'(?:www\.)?kgg2m7yk5aybusll\.onion', 321 r'(?:www\.)?qklhadlycap4cnod\.onion', 322 r'(?:www\.)?axqzx4s6s54s32yentfqojs3x5i7faxza6xo3ehd4bzzsg2ii4fv2iid\.onion', 323 r'(?:www\.)?c7hqkpkpemu6e7emz5b4vyz7idjgdvgaaa3dyimmeojqbgpea3xqjoid\.onion', 324 r'(?:www\.)?fz253lmuao3strwbfbmx46yu7acac2jz27iwtorgmbqlkurlclmancad\.onion', 325 r'(?:www\.)?invidious\.l4qlywnpwqsluw65ts7md3khrivpirse744un3x7mlskqauz5pyuzgqd\.onion', 326 r'(?:www\.)?owxfohz4kjyv25fvlqilyxast7inivgiktls3th44jhk3ej3i7ya\.b32\.i2p', 327 r'(?:www\.)?4l2dgddgsrkf2ous66i6seeyi6etzfgrue332grh2n7madpwopotugyd\.onion', 328 r'(?:www\.)?w6ijuptxiku4xpnnaetxvnkc5vqcdu7mgns2u77qefoixi63vbvnpnqd\.onion', 329 r'(?:www\.)?kbjggqkzv65ivcqj6bumvp337z6264huv5kpkwuv6gu5yjiskvan7fad\.onion', 330 r'(?:www\.)?grwp24hodrefzvjjuccrkw3mjq4tzhaaq32amf33dzpmuxe7ilepcmad\.onion', 331 r'(?:www\.)?hpniueoejy4opn7bc4ftgazyqjoeqwlvh2uiku2xqku6zpoa4bf5ruid\.onion', 332 ) 333 334 def _login(self): 335 """ 336 Attempt to log in to YouTube. 337 If _LOGIN_REQUIRED is set and no authentication was provided, an error is raised. 338 """ 339 340 if (self._LOGIN_REQUIRED 341 and self.get_param('cookiefile') is None 342 and self.get_param('cookiesfrombrowser') is None): 343 self.raise_login_required( 344 'Login details are needed to download this content', method='cookies') 345 username, password = self._get_login_info() 346 if username: 347 self.report_warning(f'Cannot login to YouTube using username and password. {self._LOGIN_HINTS["cookies"]}') 348 349 def _initialize_consent(self): 350 cookies = self._get_cookies('https://www.youtube.com/') 351 if cookies.get('__Secure-3PSID'): 352 return 353 consent_id = None 354 consent = cookies.get('CONSENT') 355 if consent: 356 if 'YES' in consent.value: 357 return 358 consent_id = self._search_regex( 359 r'PENDING\+(\d+)', consent.value, 'consent', default=None) 360 if not consent_id: 361 consent_id = random.randint(100, 999) 362 self._set_cookie('.youtube.com', 'CONSENT', 'YES+cb.20210328-17-p0.en+FX+%s' % consent_id) 363 364 def _initialize_pref(self): 365 cookies = self._get_cookies('https://www.youtube.com/') 366 pref_cookie = cookies.get('PREF') 367 pref = {} 368 if pref_cookie: 369 try: 370 pref = dict(compat_urlparse.parse_qsl(pref_cookie.value)) 371 except ValueError: 372 self.report_warning('Failed to parse user PREF cookie' + bug_reports_message()) 373 pref.update({'hl': 'en'}) 374 self._set_cookie('.youtube.com', name='PREF', value=compat_urllib_parse_urlencode(pref)) 375 376 def _real_initialize(self): 377 self._initialize_pref() 378 self._initialize_consent() 379 self._login() 380 381 _YT_INITIAL_DATA_RE = r'(?:window\s*\[\s*["\']ytInitialData["\']\s*\]|ytInitialData)\s*=\s*({.+?})\s*;' 382 _YT_INITIAL_PLAYER_RESPONSE_RE = r'ytInitialPlayerResponse\s*=\s*({.+?})\s*;' 383 _YT_INITIAL_BOUNDARY_RE = r'(?:var\s+meta|</script|\n)' 384 385 def _get_default_ytcfg(self, client='web'): 386 return copy.deepcopy(INNERTUBE_CLIENTS[client]) 387 388 def _get_innertube_host(self, client='web'): 389 return INNERTUBE_CLIENTS[client]['INNERTUBE_HOST'] 390 391 def _ytcfg_get_safe(self, ytcfg, getter, expected_type=None, default_client='web'): 392 # try_get but with fallback to default ytcfg client values when present 393 _func = lambda y: try_get(y, getter, expected_type) 394 return _func(ytcfg) or _func(self._get_default_ytcfg(default_client)) 395 396 def _extract_client_name(self, ytcfg, default_client='web'): 397 return self._ytcfg_get_safe( 398 ytcfg, (lambda x: x['INNERTUBE_CLIENT_NAME'], 399 lambda x: x['INNERTUBE_CONTEXT']['client']['clientName']), compat_str, default_client) 400 401 def _extract_client_version(self, ytcfg, default_client='web'): 402 return self._ytcfg_get_safe( 403 ytcfg, (lambda x: x['INNERTUBE_CLIENT_VERSION'], 404 lambda x: x['INNERTUBE_CONTEXT']['client']['clientVersion']), compat_str, default_client) 405 406 def _extract_api_key(self, ytcfg=None, default_client='web'): 407 return self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_API_KEY'], compat_str, default_client) 408 409 def _extract_context(self, ytcfg=None, default_client='web'): 410 context = get_first( 411 (ytcfg, self._get_default_ytcfg(default_client)), 'INNERTUBE_CONTEXT', expected_type=dict) 412 # Enforce language for extraction 413 traverse_obj(context, 'client', expected_type=dict, default={})['hl'] = 'en' 414 return context 415 416 _SAPISID = None 417 418 def _generate_sapisidhash_header(self, origin='https://www.youtube.com'): 419 time_now = round(time.time()) 420 if self._SAPISID is None: 421 yt_cookies = self._get_cookies('https://www.youtube.com') 422 # Sometimes SAPISID cookie isn't present but __Secure-3PAPISID is. 423 # See: https://github.com/yt-dlp/yt-dlp/issues/393 424 sapisid_cookie = dict_get( 425 yt_cookies, ('__Secure-3PAPISID', 'SAPISID')) 426 if sapisid_cookie and sapisid_cookie.value: 427 self._SAPISID = sapisid_cookie.value 428 self.write_debug('Extracted SAPISID cookie') 429 # SAPISID cookie is required if not already present 430 if not yt_cookies.get('SAPISID'): 431 self.write_debug('Copying __Secure-3PAPISID cookie to SAPISID cookie') 432 self._set_cookie( 433 '.youtube.com', 'SAPISID', self._SAPISID, secure=True, expire_time=time_now + 3600) 434 else: 435 self._SAPISID = False 436 if not self._SAPISID: 437 return None 438 # SAPISIDHASH algorithm from https://stackoverflow.com/a/32065323 439 sapisidhash = hashlib.sha1( 440 f'{time_now} {self._SAPISID} {origin}'.encode('utf-8')).hexdigest() 441 return f'SAPISIDHASH {time_now}_{sapisidhash}' 442 443 def _call_api(self, ep, query, video_id, fatal=True, headers=None, 444 note='Downloading API JSON', errnote='Unable to download API page', 445 context=None, api_key=None, api_hostname=None, default_client='web'): 446 447 data = {'context': context} if context else {'context': self._extract_context(default_client=default_client)} 448 data.update(query) 449 real_headers = self.generate_api_headers(default_client=default_client) 450 real_headers.update({'content-type': 'application/json'}) 451 if headers: 452 real_headers.update(headers) 453 return self._download_json( 454 'https://%s/youtubei/v1/%s' % (api_hostname or self._get_innertube_host(default_client), ep), 455 video_id=video_id, fatal=fatal, note=note, errnote=errnote, 456 data=json.dumps(data).encode('utf8'), headers=real_headers, 457 query={'key': api_key or self._extract_api_key()}) 458 459 def extract_yt_initial_data(self, item_id, webpage, fatal=True): 460 data = self._search_regex( 461 (r'%s\s*%s' % (self._YT_INITIAL_DATA_RE, self._YT_INITIAL_BOUNDARY_RE), 462 self._YT_INITIAL_DATA_RE), webpage, 'yt initial data', fatal=fatal) 463 if data: 464 return self._parse_json(data, item_id, fatal=fatal) 465 466 @staticmethod 467 def _extract_session_index(*data): 468 """ 469 Index of current account in account list. 470 See: https://github.com/yt-dlp/yt-dlp/pull/519 471 """ 472 for ytcfg in data: 473 session_index = int_or_none(try_get(ytcfg, lambda x: x['SESSION_INDEX'])) 474 if session_index is not None: 475 return session_index 476 477 # Deprecated? 478 def _extract_identity_token(self, ytcfg=None, webpage=None): 479 if ytcfg: 480 token = try_get(ytcfg, lambda x: x['ID_TOKEN'], compat_str) 481 if token: 482 return token 483 if webpage: 484 return self._search_regex( 485 r'\bID_TOKEN["\']\s*:\s*["\'](.+?)["\']', webpage, 486 'identity token', default=None, fatal=False) 487 488 @staticmethod 489 def _extract_account_syncid(*args): 490 """ 491 Extract syncId required to download private playlists of secondary channels 492 @params response and/or ytcfg 493 """ 494 for data in args: 495 # ytcfg includes channel_syncid if on secondary channel 496 delegated_sid = try_get(data, lambda x: x['DELEGATED_SESSION_ID'], compat_str) 497 if delegated_sid: 498 return delegated_sid 499 sync_ids = (try_get( 500 data, (lambda x: x['responseContext']['mainAppWebResponseContext']['datasyncId'], 501 lambda x: x['DATASYNC_ID']), compat_str) or '').split('||') 502 if len(sync_ids) >= 2 and sync_ids[1]: 503 # datasyncid is of the form "channel_syncid||user_syncid" for secondary channel 504 # and just "user_syncid||" for primary channel. We only want the channel_syncid 505 return sync_ids[0] 506 507 @staticmethod 508 def _extract_visitor_data(*args): 509 """ 510 Extracts visitorData from an API response or ytcfg 511 Appears to be used to track session state 512 """ 513 return get_first( 514 args, (('VISITOR_DATA', ('INNERTUBE_CONTEXT', 'client', 'visitorData'), ('responseContext', 'visitorData'))), 515 expected_type=str) 516 517 @property 518 def is_authenticated(self): 519 return bool(self._generate_sapisidhash_header()) 520 521 def extract_ytcfg(self, video_id, webpage): 522 if not webpage: 523 return {} 524 return self._parse_json( 525 self._search_regex( 526 r'ytcfg\.set\s*\(\s*({.+?})\s*\)\s*;', webpage, 'ytcfg', 527 default='{}'), video_id, fatal=False) or {} 528 529 def generate_api_headers( 530 self, *, ytcfg=None, account_syncid=None, session_index=None, 531 visitor_data=None, identity_token=None, api_hostname=None, default_client='web'): 532 533 origin = 'https://' + (api_hostname if api_hostname else self._get_innertube_host(default_client)) 534 headers = { 535 'X-YouTube-Client-Name': compat_str( 536 self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_CONTEXT_CLIENT_NAME'], default_client=default_client)), 537 'X-YouTube-Client-Version': self._extract_client_version(ytcfg, default_client), 538 'Origin': origin, 539 'X-Youtube-Identity-Token': identity_token or self._extract_identity_token(ytcfg), 540 'X-Goog-PageId': account_syncid or self._extract_account_syncid(ytcfg), 541 'X-Goog-Visitor-Id': visitor_data or self._extract_visitor_data(ytcfg) 542 } 543 if session_index is None: 544 session_index = self._extract_session_index(ytcfg) 545 if account_syncid or session_index is not None: 546 headers['X-Goog-AuthUser'] = session_index if session_index is not None else 0 547 548 auth = self._generate_sapisidhash_header(origin) 549 if auth is not None: 550 headers['Authorization'] = auth 551 headers['X-Origin'] = origin 552 return {h: v for h, v in headers.items() if v is not None} 553 554 @staticmethod 555 def _build_api_continuation_query(continuation, ctp=None): 556 query = { 557 'continuation': continuation 558 } 559 # TODO: Inconsistency with clickTrackingParams. 560 # Currently we have a fixed ctp contained within context (from ytcfg) 561 # and a ctp in root query for continuation. 562 if ctp: 563 query['clickTracking'] = {'clickTrackingParams': ctp} 564 return query 565 566 @classmethod 567 def _extract_next_continuation_data(cls, renderer): 568 next_continuation = try_get( 569 renderer, (lambda x: x['continuations'][0]['nextContinuationData'], 570 lambda x: x['continuation']['reloadContinuationData']), dict) 571 if not next_continuation: 572 return 573 continuation = next_continuation.get('continuation') 574 if not continuation: 575 return 576 ctp = next_continuation.get('clickTrackingParams') 577 return cls._build_api_continuation_query(continuation, ctp) 578 579 @classmethod 580 def _extract_continuation_ep_data(cls, continuation_ep: dict): 581 if isinstance(continuation_ep, dict): 582 continuation = try_get( 583 continuation_ep, lambda x: x['continuationCommand']['token'], compat_str) 584 if not continuation: 585 return 586 ctp = continuation_ep.get('clickTrackingParams') 587 return cls._build_api_continuation_query(continuation, ctp) 588 589 @classmethod 590 def _extract_continuation(cls, renderer): 591 next_continuation = cls._extract_next_continuation_data(renderer) 592 if next_continuation: 593 return next_continuation 594 595 contents = [] 596 for key in ('contents', 'items'): 597 contents.extend(try_get(renderer, lambda x: x[key], list) or []) 598 599 for content in contents: 600 if not isinstance(content, dict): 601 continue 602 continuation_ep = try_get( 603 content, (lambda x: x['continuationItemRenderer']['continuationEndpoint'], 604 lambda x: x['continuationItemRenderer']['button']['buttonRenderer']['command']), 605 dict) 606 continuation = cls._extract_continuation_ep_data(continuation_ep) 607 if continuation: 608 return continuation 609 610 @classmethod 611 def _extract_alerts(cls, data): 612 for alert_dict in try_get(data, lambda x: x['alerts'], list) or []: 613 if not isinstance(alert_dict, dict): 614 continue 615 for alert in alert_dict.values(): 616 alert_type = alert.get('type') 617 if not alert_type: 618 continue 619 message = cls._get_text(alert, 'text') 620 if message: 621 yield alert_type, message 622 623 def _report_alerts(self, alerts, expected=True, fatal=True, only_once=False): 624 errors = [] 625 warnings = [] 626 for alert_type, alert_message in alerts: 627 if alert_type.lower() == 'error' and fatal: 628 errors.append([alert_type, alert_message]) 629 else: 630 warnings.append([alert_type, alert_message]) 631 632 for alert_type, alert_message in (warnings + errors[:-1]): 633 self.report_warning('YouTube said: %s - %s' % (alert_type, alert_message), only_once=only_once) 634 if errors: 635 raise ExtractorError('YouTube said: %s' % errors[-1][1], expected=expected) 636 637 def _extract_and_report_alerts(self, data, *args, **kwargs): 638 return self._report_alerts(self._extract_alerts(data), *args, **kwargs) 639 640 def _extract_badges(self, renderer: dict): 641 badges = set() 642 for badge in try_get(renderer, lambda x: x['badges'], list) or []: 643 label = try_get(badge, lambda x: x['metadataBadgeRenderer']['label'], compat_str) 644 if label: 645 badges.add(label.lower()) 646 return badges 647 648 @staticmethod 649 def _get_text(data, *path_list, max_runs=None): 650 for path in path_list or [None]: 651 if path is None: 652 obj = [data] 653 else: 654 obj = traverse_obj(data, path, default=[]) 655 if not any(key is ... or isinstance(key, (list, tuple)) for key in variadic(path)): 656 obj = [obj] 657 for item in obj: 658 text = try_get(item, lambda x: x['simpleText'], compat_str) 659 if text: 660 return text 661 runs = try_get(item, lambda x: x['runs'], list) or [] 662 if not runs and isinstance(item, list): 663 runs = item 664 665 runs = runs[:min(len(runs), max_runs or len(runs))] 666 text = ''.join(traverse_obj(runs, (..., 'text'), expected_type=str, default=[])) 667 if text: 668 return text 669 670 @staticmethod 671 def _extract_thumbnails(data, *path_list): 672 """ 673 Extract thumbnails from thumbnails dict 674 @param path_list: path list to level that contains 'thumbnails' key 675 """ 676 thumbnails = [] 677 for path in path_list or [()]: 678 for thumbnail in traverse_obj(data, (*variadic(path), 'thumbnails', ...), default=[]): 679 thumbnail_url = url_or_none(thumbnail.get('url')) 680 if not thumbnail_url: 681 continue 682 # Sometimes youtube gives a wrong thumbnail URL. See: 683 # https://github.com/yt-dlp/yt-dlp/issues/233 684 # https://github.com/ytdl-org/youtube-dl/issues/28023 685 if 'maxresdefault' in thumbnail_url: 686 thumbnail_url = thumbnail_url.split('?')[0] 687 thumbnails.append({ 688 'url': thumbnail_url, 689 'height': int_or_none(thumbnail.get('height')), 690 'width': int_or_none(thumbnail.get('width')), 691 }) 692 return thumbnails 693 694 @staticmethod 695 def extract_relative_time(relative_time_text): 696 """ 697 Extracts a relative time from string and converts to dt object 698 e.g. 'streamed 6 days ago', '5 seconds ago (edited)' 699 """ 700 mobj = re.search(r'(?P<time>\d+)\s*(?P<unit>microsecond|second|minute|hour|day|week|month|year)s?\s*ago', relative_time_text) 701 if mobj: 702 try: 703 return datetime_from_str('now-%s%s' % (mobj.group('time'), mobj.group('unit')), precision='auto') 704 except ValueError: 705 return None 706 707 def _extract_time_text(self, renderer, *path_list): 708 text = self._get_text(renderer, *path_list) or '' 709 dt = self.extract_relative_time(text) 710 timestamp = None 711 if isinstance(dt, datetime.datetime): 712 timestamp = calendar.timegm(dt.timetuple()) 713 if text and timestamp is None: 714 self.report_warning('Cannot parse localized time text' + bug_reports_message(), only_once=True) 715 return timestamp, text 716 717 def _extract_response(self, item_id, query, note='Downloading API JSON', headers=None, 718 ytcfg=None, check_get_keys=None, ep='browse', fatal=True, api_hostname=None, 719 default_client='web'): 720 response = None 721 last_error = None 722 count = -1 723 retries = self.get_param('extractor_retries', 3) 724 if check_get_keys is None: 725 check_get_keys = [] 726 while count < retries: 727 count += 1 728 if last_error: 729 self.report_warning('%s. Retrying ...' % remove_end(last_error, '.')) 730 try: 731 response = self._call_api( 732 ep=ep, fatal=True, headers=headers, 733 video_id=item_id, query=query, 734 context=self._extract_context(ytcfg, default_client), 735 api_key=self._extract_api_key(ytcfg, default_client), 736 api_hostname=api_hostname, default_client=default_client, 737 note='%s%s' % (note, ' (retry #%d)' % count if count else '')) 738 except ExtractorError as e: 739 if isinstance(e.cause, network_exceptions): 740 if isinstance(e.cause, compat_HTTPError) and not is_html(e.cause.read(512)): 741 e.cause.seek(0) 742 yt_error = try_get( 743 self._parse_json(e.cause.read().decode(), item_id, fatal=False), 744 lambda x: x['error']['message'], compat_str) 745 if yt_error: 746 self._report_alerts([('ERROR', yt_error)], fatal=False) 747 # Downloading page may result in intermittent 5xx HTTP error 748 # Sometimes a 404 is also recieved. See: https://github.com/ytdl-org/youtube-dl/issues/28289 749 # We also want to catch all other network exceptions since errors in later pages can be troublesome 750 # See https://github.com/yt-dlp/yt-dlp/issues/507#issuecomment-880188210 751 if not isinstance(e.cause, compat_HTTPError) or e.cause.code not in (403, 429): 752 last_error = error_to_compat_str(e.cause or e.msg) 753 if count < retries: 754 continue 755 if fatal: 756 raise 757 else: 758 self.report_warning(error_to_compat_str(e)) 759 return 760 761 else: 762 try: 763 self._extract_and_report_alerts(response, only_once=True) 764 except ExtractorError as e: 765 # YouTube servers may return errors we want to retry on in a 200 OK response 766 # See: https://github.com/yt-dlp/yt-dlp/issues/839 767 if 'unknown error' in e.msg.lower(): 768 last_error = e.msg 769 continue 770 if fatal: 771 raise 772 self.report_warning(error_to_compat_str(e)) 773 return 774 if not check_get_keys or dict_get(response, check_get_keys): 775 break 776 # Youtube sometimes sends incomplete data 777 # See: https://github.com/ytdl-org/youtube-dl/issues/28194 778 last_error = 'Incomplete data received' 779 if count >= retries: 780 if fatal: 781 raise ExtractorError(last_error) 782 else: 783 self.report_warning(last_error) 784 return 785 return response 786 787 @staticmethod 788 def is_music_url(url): 789 return re.match(r'https?://music\.youtube\.com/', url) is not None 790 791 def _extract_video(self, renderer): 792 video_id = renderer.get('videoId') 793 title = self._get_text(renderer, 'title') 794 description = self._get_text(renderer, 'descriptionSnippet') 795 duration = parse_duration(self._get_text( 796 renderer, 'lengthText', ('thumbnailOverlays', ..., 'thumbnailOverlayTimeStatusRenderer', 'text'))) 797 view_count_text = self._get_text(renderer, 'viewCountText') or '' 798 view_count = str_to_int(self._search_regex( 799 r'^([\d,]+)', re.sub(r'\s', '', view_count_text), 800 'view count', default=None)) 801 802 uploader = self._get_text(renderer, 'ownerText', 'shortBylineText') 803 channel_id = traverse_obj( 804 renderer, ('shortBylineText', 'runs', ..., 'navigationEndpoint', 'browseEndpoint', 'browseId'), expected_type=str, get_all=False) 805 timestamp, time_text = self._extract_time_text(renderer, 'publishedTimeText') 806 scheduled_timestamp = str_to_int(traverse_obj(renderer, ('upcomingEventData', 'startTime'), get_all=False)) 807 overlay_style = traverse_obj( 808 renderer, ('thumbnailOverlays', ..., 'thumbnailOverlayTimeStatusRenderer', 'style'), get_all=False, expected_type=str) 809 badges = self._extract_badges(renderer) 810 thumbnails = self._extract_thumbnails(renderer, 'thumbnail') 811 812 return { 813 '_type': 'url', 814 'ie_key': YoutubeIE.ie_key(), 815 'id': video_id, 816 'url': f'https://www.youtube.com/watch?v={video_id}', 817 'title': title, 818 'description': description, 819 'duration': duration, 820 'view_count': view_count, 821 'uploader': uploader, 822 'channel_id': channel_id, 823 'thumbnails': thumbnails, 824 'upload_date': strftime_or_none(timestamp, '%Y%m%d'), 825 'live_status': ('is_upcoming' if scheduled_timestamp is not None 826 else 'was_live' if 'streamed' in time_text.lower() 827 else 'is_live' if overlay_style is not None and overlay_style == 'LIVE' or 'live now' in badges 828 else None), 829 'release_timestamp': scheduled_timestamp, 830 'availability': self._availability(needs_premium='premium' in badges, needs_subscription='members only' in badges) 831 } 832 833 834class YoutubeIE(YoutubeBaseInfoExtractor): 835 IE_DESC = 'YouTube' 836 _VALID_URL = r"""(?x)^ 837 ( 838 (?:https?://|//) # http(s):// or protocol-independent URL 839 (?:(?:(?:(?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie|kids)?\.com| 840 (?:www\.)?deturl\.com/www\.youtube\.com| 841 (?:www\.)?pwnyoutube\.com| 842 (?:www\.)?hooktube\.com| 843 (?:www\.)?yourepeat\.com| 844 tube\.majestyc\.net| 845 %(invidious)s| 846 youtube\.googleapis\.com)/ # the various hostnames, with wildcard subdomains 847 (?:.*?\#/)? # handle anchor (#/) redirect urls 848 (?: # the various things that can precede the ID: 849 (?:(?:v|embed|e|shorts)/(?!videoseries)) # v/ or embed/ or e/ or shorts/ 850 |(?: # or the v= param in all its forms 851 (?:(?:watch|movie)(?:_popup)?(?:\.php)?/?)? # preceding watch(_popup|.php) or nothing (like /?v=xxxx) 852 (?:\?|\#!?) # the params delimiter ? or # or #! 853 (?:.*?[&;])?? # any other preceding param (like /?s=tuff&v=xxxx or ?s=tuff&v=V36LpHqtcDY) 854 v= 855 ) 856 )) 857 |(?: 858 youtu\.be| # just youtu.be/xxxx 859 vid\.plus| # or vid.plus/xxxx 860 zwearz\.com/watch| # or zwearz.com/watch/xxxx 861 %(invidious)s 862 )/ 863 |(?:www\.)?cleanvideosearch\.com/media/action/yt/watch\?videoId= 864 ) 865 )? # all until now is optional -> you can pass the naked ID 866 (?P<id>[0-9A-Za-z_-]{11}) # here is it! the YouTube video ID 867 (?(1).+)? # if we found the ID, everything can follow 868 (?:\#|$)""" % { 869 'invidious': '|'.join(YoutubeBaseInfoExtractor._INVIDIOUS_SITES), 870 } 871 _PLAYER_INFO_RE = ( 872 r'/s/player/(?P<id>[a-zA-Z0-9_-]{8,})/player', 873 r'/(?P<id>[a-zA-Z0-9_-]{8,})/player(?:_ias\.vflset(?:/[a-zA-Z]{2,3}_[a-zA-Z]{2,3})?|-plasma-ias-(?:phone|tablet)-[a-z]{2}_[A-Z]{2}\.vflset)/base\.js$', 874 r'\b(?P<id>vfl[a-zA-Z0-9_-]+)\b.*?\.js$', 875 ) 876 _formats = { 877 '5': {'ext': 'flv', 'width': 400, 'height': 240, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'}, 878 '6': {'ext': 'flv', 'width': 450, 'height': 270, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'}, 879 '13': {'ext': '3gp', 'acodec': 'aac', 'vcodec': 'mp4v'}, 880 '17': {'ext': '3gp', 'width': 176, 'height': 144, 'acodec': 'aac', 'abr': 24, 'vcodec': 'mp4v'}, 881 '18': {'ext': 'mp4', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 96, 'vcodec': 'h264'}, 882 '22': {'ext': 'mp4', 'width': 1280, 'height': 720, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'}, 883 '34': {'ext': 'flv', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'}, 884 '35': {'ext': 'flv', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'}, 885 # itag 36 videos are either 320x180 (BaW_jenozKc) or 320x240 (__2ABJjxzNo), abr varies as well 886 '36': {'ext': '3gp', 'width': 320, 'acodec': 'aac', 'vcodec': 'mp4v'}, 887 '37': {'ext': 'mp4', 'width': 1920, 'height': 1080, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'}, 888 '38': {'ext': 'mp4', 'width': 4096, 'height': 3072, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'}, 889 '43': {'ext': 'webm', 'width': 640, 'height': 360, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'}, 890 '44': {'ext': 'webm', 'width': 854, 'height': 480, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'}, 891 '45': {'ext': 'webm', 'width': 1280, 'height': 720, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'}, 892 '46': {'ext': 'webm', 'width': 1920, 'height': 1080, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'}, 893 '59': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'}, 894 '78': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'}, 895 896 897 # 3D videos 898 '82': {'ext': 'mp4', 'height': 360, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20}, 899 '83': {'ext': 'mp4', 'height': 480, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20}, 900 '84': {'ext': 'mp4', 'height': 720, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20}, 901 '85': {'ext': 'mp4', 'height': 1080, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20}, 902 '100': {'ext': 'webm', 'height': 360, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8', 'preference': -20}, 903 '101': {'ext': 'webm', 'height': 480, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20}, 904 '102': {'ext': 'webm', 'height': 720, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20}, 905 906 # Apple HTTP Live Streaming 907 '91': {'ext': 'mp4', 'height': 144, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10}, 908 '92': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10}, 909 '93': {'ext': 'mp4', 'height': 360, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10}, 910 '94': {'ext': 'mp4', 'height': 480, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10}, 911 '95': {'ext': 'mp4', 'height': 720, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10}, 912 '96': {'ext': 'mp4', 'height': 1080, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10}, 913 '132': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10}, 914 '151': {'ext': 'mp4', 'height': 72, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 24, 'vcodec': 'h264', 'preference': -10}, 915 916 # DASH mp4 video 917 '133': {'ext': 'mp4', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'h264'}, 918 '134': {'ext': 'mp4', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'h264'}, 919 '135': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'}, 920 '136': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264'}, 921 '137': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264'}, 922 '138': {'ext': 'mp4', 'format_note': 'DASH video', 'vcodec': 'h264'}, # Height can vary (https://github.com/ytdl-org/youtube-dl/issues/4559) 923 '160': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'vcodec': 'h264'}, 924 '212': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'}, 925 '264': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'h264'}, 926 '298': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60}, 927 '299': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60}, 928 '266': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'h264'}, 929 930 # Dash mp4 audio 931 '139': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 48, 'container': 'm4a_dash'}, 932 '140': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 128, 'container': 'm4a_dash'}, 933 '141': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 256, 'container': 'm4a_dash'}, 934 '256': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'}, 935 '258': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'}, 936 '325': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'dtse', 'container': 'm4a_dash'}, 937 '328': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'ec-3', 'container': 'm4a_dash'}, 938 939 # Dash webm 940 '167': {'ext': 'webm', 'height': 360, 'width': 640, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'}, 941 '168': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'}, 942 '169': {'ext': 'webm', 'height': 720, 'width': 1280, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'}, 943 '170': {'ext': 'webm', 'height': 1080, 'width': 1920, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'}, 944 '218': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'}, 945 '219': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'}, 946 '278': {'ext': 'webm', 'height': 144, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp9'}, 947 '242': {'ext': 'webm', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'vp9'}, 948 '243': {'ext': 'webm', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'vp9'}, 949 '244': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'}, 950 '245': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'}, 951 '246': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'}, 952 '247': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9'}, 953 '248': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9'}, 954 '271': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9'}, 955 # itag 272 videos are either 3840x2160 (e.g. RtoitU2A-3E) or 7680x4320 (sLprVF6d7Ug) 956 '272': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'}, 957 '302': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60}, 958 '303': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60}, 959 '308': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60}, 960 '313': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'}, 961 '315': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60}, 962 963 # Dash webm audio 964 '171': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 128}, 965 '172': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 256}, 966 967 # Dash webm audio with opus inside 968 '249': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 50}, 969 '250': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 70}, 970 '251': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 160}, 971 972 # RTMP (unnamed) 973 '_rtmp': {'protocol': 'rtmp'}, 974 975 # av01 video only formats sometimes served with "unknown" codecs 976 '394': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'vcodec': 'av01.0.00M.08'}, 977 '395': {'ext': 'mp4', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'av01.0.00M.08'}, 978 '396': {'ext': 'mp4', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'av01.0.01M.08'}, 979 '397': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'av01.0.04M.08'}, 980 '398': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'av01.0.05M.08'}, 981 '399': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'av01.0.08M.08'}, 982 '400': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'av01.0.12M.08'}, 983 '401': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'av01.0.12M.08'}, 984 } 985 _SUBTITLE_FORMATS = ('json3', 'srv1', 'srv2', 'srv3', 'ttml', 'vtt') 986 987 _GEO_BYPASS = False 988 989 IE_NAME = 'youtube' 990 _TESTS = [ 991 { 992 'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&t=1s&end=9', 993 'info_dict': { 994 'id': 'BaW_jenozKc', 995 'ext': 'mp4', 996 'title': 'youtube-dl test video "\'/\\ä↭', 997 'uploader': 'Philipp Hagemeister', 998 'uploader_id': 'phihag', 999 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag', 1000 'channel': 'Philipp Hagemeister', 1001 'channel_id': 'UCLqxVugv74EIW3VWh2NOa3Q', 1002 'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCLqxVugv74EIW3VWh2NOa3Q', 1003 'upload_date': '20121002', 1004 'description': 'md5:8fb536f4877b8a7455c2ec23794dbc22', 1005 'categories': ['Science & Technology'], 1006 'tags': ['youtube-dl'], 1007 'duration': 10, 1008 'view_count': int, 1009 'like_count': int, 1010 # 'dislike_count': int, 1011 'availability': 'public', 1012 'playable_in_embed': True, 1013 'thumbnail': 'https://i.ytimg.com/vi/BaW_jenozKc/maxresdefault.jpg', 1014 'live_status': 'not_live', 1015 'age_limit': 0, 1016 'start_time': 1, 1017 'end_time': 9, 1018 } 1019 }, 1020 { 1021 'url': '//www.YouTube.com/watch?v=yZIXLfi8CZQ', 1022 'note': 'Embed-only video (#1746)', 1023 'info_dict': { 1024 'id': 'yZIXLfi8CZQ', 1025 'ext': 'mp4', 1026 'upload_date': '20120608', 1027 'title': 'Principal Sexually Assaults A Teacher - Episode 117 - 8th June 2012', 1028 'description': 'md5:09b78bd971f1e3e289601dfba15ca4f7', 1029 'uploader': 'SET India', 1030 'uploader_id': 'setindia', 1031 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/setindia', 1032 'age_limit': 18, 1033 }, 1034 'skip': 'Private video', 1035 }, 1036 { 1037 'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&v=yZIXLfi8CZQ', 1038 'note': 'Use the first video ID in the URL', 1039 'info_dict': { 1040 'id': 'BaW_jenozKc', 1041 'ext': 'mp4', 1042 'title': 'youtube-dl test video "\'/\\ä↭', 1043 'uploader': 'Philipp Hagemeister', 1044 'uploader_id': 'phihag', 1045 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag', 1046 'upload_date': '20121002', 1047 'description': 'test chars: "\'/\\ä↭\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .', 1048 'categories': ['Science & Technology'], 1049 'tags': ['youtube-dl'], 1050 'duration': 10, 1051 'view_count': int, 1052 'like_count': int, 1053 'dislike_count': int, 1054 }, 1055 'params': { 1056 'skip_download': True, 1057 }, 1058 }, 1059 { 1060 'url': 'https://www.youtube.com/watch?v=a9LDPn-MO4I', 1061 'note': '256k DASH audio (format 141) via DASH manifest', 1062 'info_dict': { 1063 'id': 'a9LDPn-MO4I', 1064 'ext': 'm4a', 1065 'upload_date': '20121002', 1066 'uploader_id': '8KVIDEO', 1067 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/8KVIDEO', 1068 'description': '', 1069 'uploader': '8KVIDEO', 1070 'title': 'UHDTV TEST 8K VIDEO.mp4' 1071 }, 1072 'params': { 1073 'youtube_include_dash_manifest': True, 1074 'format': '141', 1075 }, 1076 'skip': 'format 141 not served anymore', 1077 }, 1078 # DASH manifest with encrypted signature 1079 { 1080 'url': 'https://www.youtube.com/watch?v=IB3lcPjvWLA', 1081 'info_dict': { 1082 'id': 'IB3lcPjvWLA', 1083 'ext': 'm4a', 1084 'title': 'Afrojack, Spree Wilson - The Spark (Official Music Video) ft. Spree Wilson', 1085 'description': 'md5:8f5e2b82460520b619ccac1f509d43bf', 1086 'duration': 244, 1087 'uploader': 'AfrojackVEVO', 1088 'uploader_id': 'AfrojackVEVO', 1089 'upload_date': '20131011', 1090 'abr': 129.495, 1091 }, 1092 'params': { 1093 'youtube_include_dash_manifest': True, 1094 'format': '141/bestaudio[ext=m4a]', 1095 }, 1096 }, 1097 # Age-gate videos. See https://github.com/yt-dlp/yt-dlp/pull/575#issuecomment-888837000 1098 { 1099 'note': 'Embed allowed age-gate video', 1100 'url': 'https://youtube.com/watch?v=HtVdAasjOgU', 1101 'info_dict': { 1102 'id': 'HtVdAasjOgU', 1103 'ext': 'mp4', 1104 'title': 'The Witcher 3: Wild Hunt - The Sword Of Destiny Trailer', 1105 'description': r're:(?s).{100,}About the Game\n.*?The Witcher 3: Wild Hunt.{100,}', 1106 'duration': 142, 1107 'uploader': 'The Witcher', 1108 'uploader_id': 'WitcherGame', 1109 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/WitcherGame', 1110 'upload_date': '20140605', 1111 'age_limit': 18, 1112 }, 1113 }, 1114 { 1115 'note': 'Age-gate video with embed allowed in public site', 1116 'url': 'https://youtube.com/watch?v=HsUATh_Nc2U', 1117 'info_dict': { 1118 'id': 'HsUATh_Nc2U', 1119 'ext': 'mp4', 1120 'title': 'Godzilla 2 (Official Video)', 1121 'description': 'md5:bf77e03fcae5529475e500129b05668a', 1122 'upload_date': '20200408', 1123 'uploader_id': 'FlyingKitty900', 1124 'uploader': 'FlyingKitty', 1125 'age_limit': 18, 1126 }, 1127 }, 1128 { 1129 'note': 'Age-gate video embedable only with clientScreen=EMBED', 1130 'url': 'https://youtube.com/watch?v=Tq92D6wQ1mg', 1131 'info_dict': { 1132 'id': 'Tq92D6wQ1mg', 1133 'title': '[MMD] Adios - EVERGLOW [+Motion DL]', 1134 'ext': 'mp4', 1135 'upload_date': '20191227', 1136 'uploader_id': 'UC1yoRdFoFJaCY-AGfD9W0wQ', 1137 'uploader': 'Projekt Melody', 1138 'description': 'md5:17eccca93a786d51bc67646756894066', 1139 'age_limit': 18, 1140 }, 1141 }, 1142 { 1143 'note': 'Non-Agegated non-embeddable video', 1144 'url': 'https://youtube.com/watch?v=MeJVWBSsPAY', 1145 'info_dict': { 1146 'id': 'MeJVWBSsPAY', 1147 'ext': 'mp4', 1148 'title': 'OOMPH! - Such Mich Find Mich (Lyrics)', 1149 'uploader': 'Herr Lurik', 1150 'uploader_id': 'st3in234', 1151 'description': 'Fan Video. Music & Lyrics by OOMPH!.', 1152 'upload_date': '20130730', 1153 }, 1154 }, 1155 { 1156 'note': 'Non-bypassable age-gated video', 1157 'url': 'https://youtube.com/watch?v=Cr381pDsSsA', 1158 'only_matching': True, 1159 }, 1160 # video_info is None (https://github.com/ytdl-org/youtube-dl/issues/4421) 1161 # YouTube Red ad is not captured for creator 1162 { 1163 'url': '__2ABJjxzNo', 1164 'info_dict': { 1165 'id': '__2ABJjxzNo', 1166 'ext': 'mp4', 1167 'duration': 266, 1168 'upload_date': '20100430', 1169 'uploader_id': 'deadmau5', 1170 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/deadmau5', 1171 'creator': 'deadmau5', 1172 'description': 'md5:6cbcd3a92ce1bc676fc4d6ab4ace2336', 1173 'uploader': 'deadmau5', 1174 'title': 'Deadmau5 - Some Chords (HD)', 1175 'alt_title': 'Some Chords', 1176 }, 1177 'expected_warnings': [ 1178 'DASH manifest missing', 1179 ] 1180 }, 1181 # Olympics (https://github.com/ytdl-org/youtube-dl/issues/4431) 1182 { 1183 'url': 'lqQg6PlCWgI', 1184 'info_dict': { 1185 'id': 'lqQg6PlCWgI', 1186 'ext': 'mp4', 1187 'duration': 6085, 1188 'upload_date': '20150827', 1189 'uploader_id': 'olympic', 1190 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/olympic', 1191 'description': 'HO09 - Women - GER-AUS - Hockey - 31 July 2012 - London 2012 Olympic Games', 1192 'uploader': 'Olympics', 1193 'title': 'Hockey - Women - GER-AUS - London 2012 Olympic Games', 1194 }, 1195 'params': { 1196 'skip_download': 'requires avconv', 1197 } 1198 }, 1199 # Non-square pixels 1200 { 1201 'url': 'https://www.youtube.com/watch?v=_b-2C3KPAM0', 1202 'info_dict': { 1203 'id': '_b-2C3KPAM0', 1204 'ext': 'mp4', 1205 'stretched_ratio': 16 / 9., 1206 'duration': 85, 1207 'upload_date': '20110310', 1208 'uploader_id': 'AllenMeow', 1209 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/AllenMeow', 1210 'description': 'made by Wacom from Korea | 字幕&加油添醋 by TY\'s Allen | 感謝heylisa00cavey1001同學熱情提供梗及翻譯', 1211 'uploader': '孫ᄋᄅ', 1212 'title': '[A-made] 變態妍字幕版 太妍 我就是這樣的人', 1213 }, 1214 }, 1215 # url_encoded_fmt_stream_map is empty string 1216 { 1217 'url': 'qEJwOuvDf7I', 1218 'info_dict': { 1219 'id': 'qEJwOuvDf7I', 1220 'ext': 'webm', 1221 'title': 'Обсуждение судебной практики по выборам 14 сентября 2014 года в Санкт-Петербурге', 1222 'description': '', 1223 'upload_date': '20150404', 1224 'uploader_id': 'spbelect', 1225 'uploader': 'Наблюдатели Петербурга', 1226 }, 1227 'params': { 1228 'skip_download': 'requires avconv', 1229 }, 1230 'skip': 'This live event has ended.', 1231 }, 1232 # Extraction from multiple DASH manifests (https://github.com/ytdl-org/youtube-dl/pull/6097) 1233 { 1234 'url': 'https://www.youtube.com/watch?v=FIl7x6_3R5Y', 1235 'info_dict': { 1236 'id': 'FIl7x6_3R5Y', 1237 'ext': 'webm', 1238 'title': 'md5:7b81415841e02ecd4313668cde88737a', 1239 'description': 'md5:116377fd2963b81ec4ce64b542173306', 1240 'duration': 220, 1241 'upload_date': '20150625', 1242 'uploader_id': 'dorappi2000', 1243 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/dorappi2000', 1244 'uploader': 'dorappi2000', 1245 'formats': 'mincount:31', 1246 }, 1247 'skip': 'not actual anymore', 1248 }, 1249 # DASH manifest with segment_list 1250 { 1251 'url': 'https://www.youtube.com/embed/CsmdDsKjzN8', 1252 'md5': '8ce563a1d667b599d21064e982ab9e31', 1253 'info_dict': { 1254 'id': 'CsmdDsKjzN8', 1255 'ext': 'mp4', 1256 'upload_date': '20150501', # According to '<meta itemprop="datePublished"', but in other places it's 20150510 1257 'uploader': 'Airtek', 1258 'description': 'Retransmisión en directo de la XVIII media maratón de Zaragoza.', 1259 'uploader_id': 'UCzTzUmjXxxacNnL8I3m4LnQ', 1260 'title': 'Retransmisión XVIII Media maratón Zaragoza 2015', 1261 }, 1262 'params': { 1263 'youtube_include_dash_manifest': True, 1264 'format': '135', # bestvideo 1265 }, 1266 'skip': 'This live event has ended.', 1267 }, 1268 { 1269 # Multifeed videos (multiple cameras), URL is for Main Camera 1270 'url': 'https://www.youtube.com/watch?v=jvGDaLqkpTg', 1271 'info_dict': { 1272 'id': 'jvGDaLqkpTg', 1273 'title': 'Tom Clancy Free Weekend Rainbow Whatever', 1274 'description': 'md5:e03b909557865076822aa169218d6a5d', 1275 }, 1276 'playlist': [{ 1277 'info_dict': { 1278 'id': 'jvGDaLqkpTg', 1279 'ext': 'mp4', 1280 'title': 'Tom Clancy Free Weekend Rainbow Whatever (Main Camera)', 1281 'description': 'md5:e03b909557865076822aa169218d6a5d', 1282 'duration': 10643, 1283 'upload_date': '20161111', 1284 'uploader': 'Team PGP', 1285 'uploader_id': 'UChORY56LMMETTuGjXaJXvLg', 1286 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg', 1287 }, 1288 }, { 1289 'info_dict': { 1290 'id': '3AKt1R1aDnw', 1291 'ext': 'mp4', 1292 'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 2)', 1293 'description': 'md5:e03b909557865076822aa169218d6a5d', 1294 'duration': 10991, 1295 'upload_date': '20161111', 1296 'uploader': 'Team PGP', 1297 'uploader_id': 'UChORY56LMMETTuGjXaJXvLg', 1298 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg', 1299 }, 1300 }, { 1301 'info_dict': { 1302 'id': 'RtAMM00gpVc', 1303 'ext': 'mp4', 1304 'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 3)', 1305 'description': 'md5:e03b909557865076822aa169218d6a5d', 1306 'duration': 10995, 1307 'upload_date': '20161111', 1308 'uploader': 'Team PGP', 1309 'uploader_id': 'UChORY56LMMETTuGjXaJXvLg', 1310 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg', 1311 }, 1312 }, { 1313 'info_dict': { 1314 'id': '6N2fdlP3C5U', 1315 'ext': 'mp4', 1316 'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 4)', 1317 'description': 'md5:e03b909557865076822aa169218d6a5d', 1318 'duration': 10990, 1319 'upload_date': '20161111', 1320 'uploader': 'Team PGP', 1321 'uploader_id': 'UChORY56LMMETTuGjXaJXvLg', 1322 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg', 1323 }, 1324 }], 1325 'params': { 1326 'skip_download': True, 1327 }, 1328 'skip': 'Not multifeed anymore', 1329 }, 1330 { 1331 # Multifeed video with comma in title (see https://github.com/ytdl-org/youtube-dl/issues/8536) 1332 'url': 'https://www.youtube.com/watch?v=gVfLd0zydlo', 1333 'info_dict': { 1334 'id': 'gVfLd0zydlo', 1335 'title': 'DevConf.cz 2016 Day 2 Workshops 1 14:00 - 15:30', 1336 }, 1337 'playlist_count': 2, 1338 'skip': 'Not multifeed anymore', 1339 }, 1340 { 1341 'url': 'https://vid.plus/FlRa-iH7PGw', 1342 'only_matching': True, 1343 }, 1344 { 1345 'url': 'https://zwearz.com/watch/9lWxNJF-ufM/electra-woman-dyna-girl-official-trailer-grace-helbig.html', 1346 'only_matching': True, 1347 }, 1348 { 1349 # Title with JS-like syntax "};" (see https://github.com/ytdl-org/youtube-dl/issues/7468) 1350 # Also tests cut-off URL expansion in video description (see 1351 # https://github.com/ytdl-org/youtube-dl/issues/1892, 1352 # https://github.com/ytdl-org/youtube-dl/issues/8164) 1353 'url': 'https://www.youtube.com/watch?v=lsguqyKfVQg', 1354 'info_dict': { 1355 'id': 'lsguqyKfVQg', 1356 'ext': 'mp4', 1357 'title': '{dark walk}; Loki/AC/Dishonored; collab w/Elflover21', 1358 'alt_title': 'Dark Walk', 1359 'description': 'md5:8085699c11dc3f597ce0410b0dcbb34a', 1360 'duration': 133, 1361 'upload_date': '20151119', 1362 'uploader_id': 'IronSoulElf', 1363 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/IronSoulElf', 1364 'uploader': 'IronSoulElf', 1365 'creator': 'Todd Haberman;\nDaniel Law Heath and Aaron Kaplan', 1366 'track': 'Dark Walk', 1367 'artist': 'Todd Haberman;\nDaniel Law Heath and Aaron Kaplan', 1368 'album': 'Position Music - Production Music Vol. 143 - Dark Walk', 1369 }, 1370 'params': { 1371 'skip_download': True, 1372 }, 1373 }, 1374 { 1375 # Tags with '};' (see https://github.com/ytdl-org/youtube-dl/issues/7468) 1376 'url': 'https://www.youtube.com/watch?v=Ms7iBXnlUO8', 1377 'only_matching': True, 1378 }, 1379 { 1380 # Video with yt:stretch=17:0 1381 'url': 'https://www.youtube.com/watch?v=Q39EVAstoRM', 1382 'info_dict': { 1383 'id': 'Q39EVAstoRM', 1384 'ext': 'mp4', 1385 'title': 'Clash Of Clans#14 Dicas De Ataque Para CV 4', 1386 'description': 'md5:ee18a25c350637c8faff806845bddee9', 1387 'upload_date': '20151107', 1388 'uploader_id': 'UCCr7TALkRbo3EtFzETQF1LA', 1389 'uploader': 'CH GAMER DROID', 1390 }, 1391 'params': { 1392 'skip_download': True, 1393 }, 1394 'skip': 'This video does not exist.', 1395 }, 1396 { 1397 # Video with incomplete 'yt:stretch=16:' 1398 'url': 'https://www.youtube.com/watch?v=FRhJzUSJbGI', 1399 'only_matching': True, 1400 }, 1401 { 1402 # Video licensed under Creative Commons 1403 'url': 'https://www.youtube.com/watch?v=M4gD1WSo5mA', 1404 'info_dict': { 1405 'id': 'M4gD1WSo5mA', 1406 'ext': 'mp4', 1407 'title': 'md5:e41008789470fc2533a3252216f1c1d1', 1408 'description': 'md5:a677553cf0840649b731a3024aeff4cc', 1409 'duration': 721, 1410 'upload_date': '20150127', 1411 'uploader_id': 'BerkmanCenter', 1412 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/BerkmanCenter', 1413 'uploader': 'The Berkman Klein Center for Internet & Society', 1414 'license': 'Creative Commons Attribution license (reuse allowed)', 1415 }, 1416 'params': { 1417 'skip_download': True, 1418 }, 1419 }, 1420 { 1421 # Channel-like uploader_url 1422 'url': 'https://www.youtube.com/watch?v=eQcmzGIKrzg', 1423 'info_dict': { 1424 'id': 'eQcmzGIKrzg', 1425 'ext': 'mp4', 1426 'title': 'Democratic Socialism and Foreign Policy | Bernie Sanders', 1427 'description': 'md5:13a2503d7b5904ef4b223aa101628f39', 1428 'duration': 4060, 1429 'upload_date': '20151119', 1430 'uploader': 'Bernie Sanders', 1431 'uploader_id': 'UCH1dpzjCEiGAt8CXkryhkZg', 1432 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCH1dpzjCEiGAt8CXkryhkZg', 1433 'license': 'Creative Commons Attribution license (reuse allowed)', 1434 }, 1435 'params': { 1436 'skip_download': True, 1437 }, 1438 }, 1439 { 1440 'url': 'https://www.youtube.com/watch?feature=player_embedded&amp;v=V36LpHqtcDY', 1441 'only_matching': True, 1442 }, 1443 { 1444 # YouTube Red paid video (https://github.com/ytdl-org/youtube-dl/issues/10059) 1445 'url': 'https://www.youtube.com/watch?v=i1Ko8UG-Tdo', 1446 'only_matching': True, 1447 }, 1448 { 1449 # Rental video preview 1450 'url': 'https://www.youtube.com/watch?v=yYr8q0y5Jfg', 1451 'info_dict': { 1452 'id': 'uGpuVWrhIzE', 1453 'ext': 'mp4', 1454 'title': 'Piku - Trailer', 1455 'description': 'md5:c36bd60c3fd6f1954086c083c72092eb', 1456 'upload_date': '20150811', 1457 'uploader': 'FlixMatrix', 1458 'uploader_id': 'FlixMatrixKaravan', 1459 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/FlixMatrixKaravan', 1460 'license': 'Standard YouTube License', 1461 }, 1462 'params': { 1463 'skip_download': True, 1464 }, 1465 'skip': 'This video is not available.', 1466 }, 1467 { 1468 # YouTube Red video with episode data 1469 'url': 'https://www.youtube.com/watch?v=iqKdEhx-dD4', 1470 'info_dict': { 1471 'id': 'iqKdEhx-dD4', 1472 'ext': 'mp4', 1473 'title': 'Isolation - Mind Field (Ep 1)', 1474 'description': 'md5:f540112edec5d09fc8cc752d3d4ba3cd', 1475 'duration': 2085, 1476 'upload_date': '20170118', 1477 'uploader': 'Vsauce', 1478 'uploader_id': 'Vsauce', 1479 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Vsauce', 1480 'series': 'Mind Field', 1481 'season_number': 1, 1482 'episode_number': 1, 1483 }, 1484 'params': { 1485 'skip_download': True, 1486 }, 1487 'expected_warnings': [ 1488 'Skipping DASH manifest', 1489 ], 1490 }, 1491 { 1492 # The following content has been identified by the YouTube community 1493 # as inappropriate or offensive to some audiences. 1494 'url': 'https://www.youtube.com/watch?v=6SJNVb0GnPI', 1495 'info_dict': { 1496 'id': '6SJNVb0GnPI', 1497 'ext': 'mp4', 1498 'title': 'Race Differences in Intelligence', 1499 'description': 'md5:5d161533167390427a1f8ee89a1fc6f1', 1500 'duration': 965, 1501 'upload_date': '20140124', 1502 'uploader': 'New Century Foundation', 1503 'uploader_id': 'UCEJYpZGqgUob0zVVEaLhvVg', 1504 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCEJYpZGqgUob0zVVEaLhvVg', 1505 }, 1506 'params': { 1507 'skip_download': True, 1508 }, 1509 'skip': 'This video has been removed for violating YouTube\'s policy on hate speech.', 1510 }, 1511 { 1512 # itag 212 1513 'url': '1t24XAntNCY', 1514 'only_matching': True, 1515 }, 1516 { 1517 # geo restricted to JP 1518 'url': 'sJL6WA-aGkQ', 1519 'only_matching': True, 1520 }, 1521 { 1522 'url': 'https://invidio.us/watch?v=BaW_jenozKc', 1523 'only_matching': True, 1524 }, 1525 { 1526 'url': 'https://redirect.invidious.io/watch?v=BaW_jenozKc', 1527 'only_matching': True, 1528 }, 1529 { 1530 # from https://nitter.pussthecat.org/YouTube/status/1360363141947944964#m 1531 'url': 'https://redirect.invidious.io/Yh0AhrY9GjA', 1532 'only_matching': True, 1533 }, 1534 { 1535 # DRM protected 1536 'url': 'https://www.youtube.com/watch?v=s7_qI6_mIXc', 1537 'only_matching': True, 1538 }, 1539 { 1540 # Video with unsupported adaptive stream type formats 1541 'url': 'https://www.youtube.com/watch?v=Z4Vy8R84T1U', 1542 'info_dict': { 1543 'id': 'Z4Vy8R84T1U', 1544 'ext': 'mp4', 1545 'title': 'saman SMAN 53 Jakarta(Sancety) opening COFFEE4th at SMAN 53 Jakarta', 1546 'description': 'md5:d41d8cd98f00b204e9800998ecf8427e', 1547 'duration': 433, 1548 'upload_date': '20130923', 1549 'uploader': 'Amelia Putri Harwita', 1550 'uploader_id': 'UCpOxM49HJxmC1qCalXyB3_Q', 1551 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCpOxM49HJxmC1qCalXyB3_Q', 1552 'formats': 'maxcount:10', 1553 }, 1554 'params': { 1555 'skip_download': True, 1556 'youtube_include_dash_manifest': False, 1557 }, 1558 'skip': 'not actual anymore', 1559 }, 1560 { 1561 # Youtube Music Auto-generated description 1562 'url': 'https://music.youtube.com/watch?v=MgNrAu2pzNs', 1563 'info_dict': { 1564 'id': 'MgNrAu2pzNs', 1565 'ext': 'mp4', 1566 'title': 'Voyeur Girl', 1567 'description': 'md5:7ae382a65843d6df2685993e90a8628f', 1568 'upload_date': '20190312', 1569 'uploader': 'Stephen - Topic', 1570 'uploader_id': 'UC-pWHpBjdGG69N9mM2auIAA', 1571 'artist': 'Stephen', 1572 'track': 'Voyeur Girl', 1573 'album': 'it\'s too much love to know my dear', 1574 'release_date': '20190313', 1575 'release_year': 2019, 1576 }, 1577 'params': { 1578 'skip_download': True, 1579 }, 1580 }, 1581 { 1582 'url': 'https://www.youtubekids.com/watch?v=3b8nCWDgZ6Q', 1583 'only_matching': True, 1584 }, 1585 { 1586 # invalid -> valid video id redirection 1587 'url': 'DJztXj2GPfl', 1588 'info_dict': { 1589 'id': 'DJztXj2GPfk', 1590 'ext': 'mp4', 1591 'title': 'Panjabi MC - Mundian To Bach Ke (The Dictator Soundtrack)', 1592 'description': 'md5:bf577a41da97918e94fa9798d9228825', 1593 'upload_date': '20090125', 1594 'uploader': 'Prochorowka', 1595 'uploader_id': 'Prochorowka', 1596 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Prochorowka', 1597 'artist': 'Panjabi MC', 1598 'track': 'Beware of the Boys (Mundian to Bach Ke) - Motivo Hi-Lectro Remix', 1599 'album': 'Beware of the Boys (Mundian To Bach Ke)', 1600 }, 1601 'params': { 1602 'skip_download': True, 1603 }, 1604 'skip': 'Video unavailable', 1605 }, 1606 { 1607 # empty description results in an empty string 1608 'url': 'https://www.youtube.com/watch?v=x41yOUIvK2k', 1609 'info_dict': { 1610 'id': 'x41yOUIvK2k', 1611 'ext': 'mp4', 1612 'title': 'IMG 3456', 1613 'description': '', 1614 'upload_date': '20170613', 1615 'uploader_id': 'ElevageOrVert', 1616 'uploader': 'ElevageOrVert', 1617 }, 1618 'params': { 1619 'skip_download': True, 1620 }, 1621 }, 1622 { 1623 # with '};' inside yt initial data (see [1]) 1624 # see [2] for an example with '};' inside ytInitialPlayerResponse 1625 # 1. https://github.com/ytdl-org/youtube-dl/issues/27093 1626 # 2. https://github.com/ytdl-org/youtube-dl/issues/27216 1627 'url': 'https://www.youtube.com/watch?v=CHqg6qOn4no', 1628 'info_dict': { 1629 'id': 'CHqg6qOn4no', 1630 'ext': 'mp4', 1631 'title': 'Part 77 Sort a list of simple types in c#', 1632 'description': 'md5:b8746fa52e10cdbf47997903f13b20dc', 1633 'upload_date': '20130831', 1634 'uploader_id': 'kudvenkat', 1635 'uploader': 'kudvenkat', 1636 }, 1637 'params': { 1638 'skip_download': True, 1639 }, 1640 }, 1641 { 1642 # another example of '};' in ytInitialData 1643 'url': 'https://www.youtube.com/watch?v=gVfgbahppCY', 1644 'only_matching': True, 1645 }, 1646 { 1647 'url': 'https://www.youtube.com/watch_popup?v=63RmMXCd_bQ', 1648 'only_matching': True, 1649 }, 1650 { 1651 # https://github.com/ytdl-org/youtube-dl/pull/28094 1652 'url': 'OtqTfy26tG0', 1653 'info_dict': { 1654 'id': 'OtqTfy26tG0', 1655 'ext': 'mp4', 1656 'title': 'Burn Out', 1657 'description': 'md5:8d07b84dcbcbfb34bc12a56d968b6131', 1658 'upload_date': '20141120', 1659 'uploader': 'The Cinematic Orchestra - Topic', 1660 'uploader_id': 'UCIzsJBIyo8hhpFm1NK0uLgw', 1661 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCIzsJBIyo8hhpFm1NK0uLgw', 1662 'artist': 'The Cinematic Orchestra', 1663 'track': 'Burn Out', 1664 'album': 'Every Day', 1665 'release_data': None, 1666 'release_year': None, 1667 }, 1668 'params': { 1669 'skip_download': True, 1670 }, 1671 }, 1672 { 1673 # controversial video, only works with bpctr when authenticated with cookies 1674 'url': 'https://www.youtube.com/watch?v=nGC3D_FkCmg', 1675 'only_matching': True, 1676 }, 1677 { 1678 # controversial video, requires bpctr/contentCheckOk 1679 'url': 'https://www.youtube.com/watch?v=SZJvDhaSDnc', 1680 'info_dict': { 1681 'id': 'SZJvDhaSDnc', 1682 'ext': 'mp4', 1683 'title': 'San Diego teen commits suicide after bullying over embarrassing video', 1684 'channel_id': 'UC-SJ6nODDmufqBzPBwCvYvQ', 1685 'uploader': 'CBS This Morning', 1686 'uploader_id': 'CBSThisMorning', 1687 'upload_date': '20140716', 1688 'description': 'md5:acde3a73d3f133fc97e837a9f76b53b7' 1689 } 1690 }, 1691 { 1692 # restricted location, https://github.com/ytdl-org/youtube-dl/issues/28685 1693 'url': 'cBvYw8_A0vQ', 1694 'info_dict': { 1695 'id': 'cBvYw8_A0vQ', 1696 'ext': 'mp4', 1697 'title': '4K Ueno Okachimachi Street Scenes 上野御徒町歩き', 1698 'description': 'md5:ea770e474b7cd6722b4c95b833c03630', 1699 'upload_date': '20201120', 1700 'uploader': 'Walk around Japan', 1701 'uploader_id': 'UC3o_t8PzBmXf5S9b7GLx1Mw', 1702 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UC3o_t8PzBmXf5S9b7GLx1Mw', 1703 }, 1704 'params': { 1705 'skip_download': True, 1706 }, 1707 }, { 1708 # Has multiple audio streams 1709 'url': 'WaOKSUlf4TM', 1710 'only_matching': True 1711 }, { 1712 # Requires Premium: has format 141 when requested using YTM url 1713 'url': 'https://music.youtube.com/watch?v=XclachpHxis', 1714 'only_matching': True 1715 }, { 1716 # multiple subtitles with same lang_code 1717 'url': 'https://www.youtube.com/watch?v=wsQiKKfKxug', 1718 'only_matching': True, 1719 }, { 1720 # Force use android client fallback 1721 'url': 'https://www.youtube.com/watch?v=YOelRv7fMxY', 1722 'info_dict': { 1723 'id': 'YOelRv7fMxY', 1724 'title': 'DIGGING A SECRET TUNNEL Part 1', 1725 'ext': '3gp', 1726 'upload_date': '20210624', 1727 'channel_id': 'UCp68_FLety0O-n9QU6phsgw', 1728 'uploader': 'colinfurze', 1729 'uploader_id': 'colinfurze', 1730 'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCp68_FLety0O-n9QU6phsgw', 1731 'description': 'md5:b5096f56af7ccd7a555c84db81738b22' 1732 }, 1733 'params': { 1734 'format': '17', # 3gp format available on android 1735 'extractor_args': {'youtube': {'player_client': ['android']}}, 1736 }, 1737 }, 1738 { 1739 # Skip download of additional client configs (remix client config in this case) 1740 'url': 'https://music.youtube.com/watch?v=MgNrAu2pzNs', 1741 'only_matching': True, 1742 'params': { 1743 'extractor_args': {'youtube': {'player_skip': ['configs']}}, 1744 }, 1745 }, { 1746 # shorts 1747 'url': 'https://www.youtube.com/shorts/BGQWPY4IigY', 1748 'only_matching': True, 1749 }, { 1750 'note': 'Storyboards', 1751 'url': 'https://www.youtube.com/watch?v=5KLPxDtMqe8', 1752 'info_dict': { 1753 'id': '5KLPxDtMqe8', 1754 'ext': 'mhtml', 1755 'format_id': 'sb0', 1756 'title': 'Your Brain is Plastic', 1757 'uploader_id': 'scishow', 1758 'description': 'md5:89cd86034bdb5466cd87c6ba206cd2bc', 1759 'upload_date': '20140324', 1760 'uploader': 'SciShow', 1761 }, 'params': {'format': 'mhtml', 'skip_download': True} 1762 } 1763 ] 1764 1765 @classmethod 1766 def suitable(cls, url): 1767 from ..utils import parse_qs 1768 1769 qs = parse_qs(url) 1770 if qs.get('list', [None])[0]: 1771 return False 1772 return super(YoutubeIE, cls).suitable(url) 1773 1774 def __init__(self, *args, **kwargs): 1775 super(YoutubeIE, self).__init__(*args, **kwargs) 1776 self._code_cache = {} 1777 self._player_cache = {} 1778 1779 def _prepare_live_from_start_formats(self, formats, video_id, live_start_time, url, webpage_url, smuggled_data): 1780 lock = threading.Lock() 1781 1782 is_live = True 1783 start_time = time.time() 1784 formats = [f for f in formats if f.get('is_from_start')] 1785 1786 def refetch_manifest(format_id, delay): 1787 nonlocal formats, start_time, is_live 1788 if time.time() <= start_time + delay: 1789 return 1790 1791 _, _, prs, player_url = self._download_player_responses(url, smuggled_data, video_id, webpage_url) 1792 video_details = traverse_obj( 1793 prs, (..., 'videoDetails'), expected_type=dict, default=[]) 1794 microformats = traverse_obj( 1795 prs, (..., 'microformat', 'playerMicroformatRenderer'), 1796 expected_type=dict, default=[]) 1797 _, is_live, _, formats = self._list_formats(video_id, microformats, video_details, prs, player_url) 1798 start_time = time.time() 1799 1800 def mpd_feed(format_id, delay): 1801 """ 1802 @returns (manifest_url, manifest_stream_number, is_live) or None 1803 """ 1804 with lock: 1805 refetch_manifest(format_id, delay) 1806 1807 f = next((f for f in formats if f['format_id'] == format_id), None) 1808 if not f: 1809 if not is_live: 1810 self.to_screen(f'{video_id}: Video is no longer live') 1811 else: 1812 self.report_warning( 1813 f'Cannot find refreshed manifest for format {format_id}{bug_reports_message()}') 1814 return None 1815 return f['manifest_url'], f['manifest_stream_number'], is_live 1816 1817 for f in formats: 1818 f['protocol'] = 'http_dash_segments_generator' 1819 f['fragments'] = functools.partial( 1820 self._live_dash_fragments, f['format_id'], live_start_time, mpd_feed) 1821 1822 def _live_dash_fragments(self, format_id, live_start_time, mpd_feed, ctx): 1823 FETCH_SPAN, MAX_DURATION = 5, 432000 1824 1825 mpd_url, stream_number, is_live = None, None, True 1826 1827 begin_index = 0 1828 download_start_time = ctx.get('start') or time.time() 1829 1830 lack_early_segments = download_start_time - (live_start_time or download_start_time) > MAX_DURATION 1831 if lack_early_segments: 1832 self.report_warning(bug_reports_message( 1833 'Starting download from the last 120 hours of the live stream since ' 1834 'YouTube does not have data before that. If you think this is wrong,'), only_once=True) 1835 lack_early_segments = True 1836 1837 known_idx, no_fragment_score, last_segment_url = begin_index, 0, None 1838 fragments, fragment_base_url = None, None 1839 1840 def _extract_sequence_from_mpd(refresh_sequence): 1841 nonlocal mpd_url, stream_number, is_live, no_fragment_score, fragments, fragment_base_url 1842 # Obtain from MPD's maximum seq value 1843 old_mpd_url = mpd_url 1844 last_error = ctx.pop('last_error', None) 1845 expire_fast = last_error and isinstance(last_error, compat_HTTPError) and last_error.code == 403 1846 mpd_url, stream_number, is_live = (mpd_feed(format_id, 5 if expire_fast else 18000) 1847 or (mpd_url, stream_number, False)) 1848 if not refresh_sequence: 1849 if expire_fast and not is_live: 1850 return False, last_seq 1851 elif old_mpd_url == mpd_url: 1852 return True, last_seq 1853 try: 1854 fmts, _ = self._extract_mpd_formats_and_subtitles( 1855 mpd_url, None, note=False, errnote=False, fatal=False) 1856 except ExtractorError: 1857 fmts = None 1858 if not fmts: 1859 no_fragment_score += 1 1860 return False, last_seq 1861 fmt_info = next(x for x in fmts if x['manifest_stream_number'] == stream_number) 1862 fragments = fmt_info['fragments'] 1863 fragment_base_url = fmt_info['fragment_base_url'] 1864 assert fragment_base_url 1865 1866 _last_seq = int(re.search(r'(?:/|^)sq/(\d+)', fragments[-1]['path']).group(1)) 1867 return True, _last_seq 1868 1869 while is_live: 1870 fetch_time = time.time() 1871 if no_fragment_score > 30: 1872 return 1873 if last_segment_url: 1874 # Obtain from "X-Head-Seqnum" header value from each segment 1875 try: 1876 urlh = self._request_webpage( 1877 last_segment_url, None, note=False, errnote=False, fatal=False) 1878 except ExtractorError: 1879 urlh = None 1880 last_seq = try_get(urlh, lambda x: int_or_none(x.headers['X-Head-Seqnum'])) 1881 if last_seq is None: 1882 no_fragment_score += 1 1883 last_segment_url = None 1884 continue 1885 else: 1886 should_continue, last_seq = _extract_sequence_from_mpd(True) 1887 if not should_continue: 1888 continue 1889 1890 if known_idx > last_seq: 1891 last_segment_url = None 1892 continue 1893 1894 last_seq += 1 1895 1896 if begin_index < 0 and known_idx < 0: 1897 # skip from the start when it's negative value 1898 known_idx = last_seq + begin_index 1899 if lack_early_segments: 1900 known_idx = max(known_idx, last_seq - int(MAX_DURATION // fragments[-1]['duration'])) 1901 try: 1902 for idx in range(known_idx, last_seq): 1903 # do not update sequence here or you'll get skipped some part of it 1904 should_continue, _ = _extract_sequence_from_mpd(False) 1905 if not should_continue: 1906 known_idx = idx - 1 1907 raise ExtractorError('breaking out of outer loop') 1908 last_segment_url = urljoin(fragment_base_url, 'sq/%d' % idx) 1909 yield { 1910 'url': last_segment_url, 1911 } 1912 if known_idx == last_seq: 1913 no_fragment_score += 5 1914 else: 1915 no_fragment_score = 0 1916 known_idx = last_seq 1917 except ExtractorError: 1918 continue 1919 1920 time.sleep(max(0, FETCH_SPAN + fetch_time - time.time())) 1921 1922 def _extract_player_url(self, *ytcfgs, webpage=None): 1923 player_url = traverse_obj( 1924 ytcfgs, (..., 'PLAYER_JS_URL'), (..., 'WEB_PLAYER_CONTEXT_CONFIGS', ..., 'jsUrl'), 1925 get_all=False, expected_type=compat_str) 1926 if not player_url: 1927 return 1928 if player_url.startswith('//'): 1929 player_url = 'https:' + player_url 1930 elif not re.match(r'https?://', player_url): 1931 player_url = compat_urlparse.urljoin( 1932 'https://www.youtube.com', player_url) 1933 return player_url 1934 1935 def _download_player_url(self, video_id, fatal=False): 1936 res = self._download_webpage( 1937 'https://www.youtube.com/iframe_api', 1938 note='Downloading iframe API JS', video_id=video_id, fatal=fatal) 1939 if res: 1940 player_version = self._search_regex( 1941 r'player\\?/([0-9a-fA-F]{8})\\?/', res, 'player version', fatal=fatal) 1942 if player_version: 1943 return f'https://www.youtube.com/s/player/{player_version}/player_ias.vflset/en_US/base.js' 1944 1945 def _signature_cache_id(self, example_sig): 1946 """ Return a string representation of a signature """ 1947 return '.'.join(compat_str(len(part)) for part in example_sig.split('.')) 1948 1949 @classmethod 1950 def _extract_player_info(cls, player_url): 1951 for player_re in cls._PLAYER_INFO_RE: 1952 id_m = re.search(player_re, player_url) 1953 if id_m: 1954 break 1955 else: 1956 raise ExtractorError('Cannot identify player %r' % player_url) 1957 return id_m.group('id') 1958 1959 def _load_player(self, video_id, player_url, fatal=True): 1960 player_id = self._extract_player_info(player_url) 1961 if player_id not in self._code_cache: 1962 code = self._download_webpage( 1963 player_url, video_id, fatal=fatal, 1964 note='Downloading player ' + player_id, 1965 errnote='Download of %s failed' % player_url) 1966 if code: 1967 self._code_cache[player_id] = code 1968 return self._code_cache.get(player_id) 1969 1970 def _extract_signature_function(self, video_id, player_url, example_sig): 1971 player_id = self._extract_player_info(player_url) 1972 1973 # Read from filesystem cache 1974 func_id = 'js_%s_%s' % ( 1975 player_id, self._signature_cache_id(example_sig)) 1976 assert os.path.basename(func_id) == func_id 1977 1978 cache_spec = self._downloader.cache.load('youtube-sigfuncs', func_id) 1979 if cache_spec is not None: 1980 return lambda s: ''.join(s[i] for i in cache_spec) 1981 1982 code = self._load_player(video_id, player_url) 1983 if code: 1984 res = self._parse_sig_js(code) 1985 1986 test_string = ''.join(map(compat_chr, range(len(example_sig)))) 1987 cache_res = res(test_string) 1988 cache_spec = [ord(c) for c in cache_res] 1989 1990 self._downloader.cache.store('youtube-sigfuncs', func_id, cache_spec) 1991 return res 1992 1993 def _print_sig_code(self, func, example_sig): 1994 if not self.get_param('youtube_print_sig_code'): 1995 return 1996 1997 def gen_sig_code(idxs): 1998 def _genslice(start, end, step): 1999 starts = '' if start == 0 else str(start) 2000 ends = (':%d' % (end + step)) if end + step >= 0 else ':' 2001 steps = '' if step == 1 else (':%d' % step) 2002 return 's[%s%s%s]' % (starts, ends, steps) 2003 2004 step = None 2005 # Quelch pyflakes warnings - start will be set when step is set 2006 start = '(Never used)' 2007 for i, prev in zip(idxs[1:], idxs[:-1]): 2008 if step is not None: 2009 if i - prev == step: 2010 continue 2011 yield _genslice(start, prev, step) 2012 step = None 2013 continue 2014 if i - prev in [-1, 1]: 2015 step = i - prev 2016 start = prev 2017 continue 2018 else: 2019 yield 's[%d]' % prev 2020 if step is None: 2021 yield 's[%d]' % i 2022 else: 2023 yield _genslice(start, i, step) 2024 2025 test_string = ''.join(map(compat_chr, range(len(example_sig)))) 2026 cache_res = func(test_string) 2027 cache_spec = [ord(c) for c in cache_res] 2028 expr_code = ' + '.join(gen_sig_code(cache_spec)) 2029 signature_id_tuple = '(%s)' % ( 2030 ', '.join(compat_str(len(p)) for p in example_sig.split('.'))) 2031 code = ('if tuple(len(p) for p in s.split(\'.\')) == %s:\n' 2032 ' return %s\n') % (signature_id_tuple, expr_code) 2033 self.to_screen('Extracted signature function:\n' + code) 2034 2035 def _parse_sig_js(self, jscode): 2036 funcname = self._search_regex( 2037 (r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(', 2038 r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(', 2039 r'\bm=(?P<sig>[a-zA-Z0-9$]{2,})\(decodeURIComponent\(h\.s\)\)', 2040 r'\bc&&\(c=(?P<sig>[a-zA-Z0-9$]{2,})\(decodeURIComponent\(c\)\)', 2041 r'(?:\b|[^a-zA-Z0-9$])(?P<sig>[a-zA-Z0-9$]{2,})\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\);[a-zA-Z0-9$]{2}\.[a-zA-Z0-9$]{2}\(a,\d+\)', 2042 r'(?:\b|[^a-zA-Z0-9$])(?P<sig>[a-zA-Z0-9$]{2,})\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)', 2043 r'(?P<sig>[a-zA-Z0-9$]+)\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)', 2044 # Obsolete patterns 2045 r'(["\'])signature\1\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(', 2046 r'\.sig\|\|(?P<sig>[a-zA-Z0-9$]+)\(', 2047 r'yt\.akamaized\.net/\)\s*\|\|\s*.*?\s*[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?:encodeURIComponent\s*\()?\s*(?P<sig>[a-zA-Z0-9$]+)\(', 2048 r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(', 2049 r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(', 2050 r'\bc\s*&&\s*a\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(', 2051 r'\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(', 2052 r'\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\('), 2053 jscode, 'Initial JS player signature function name', group='sig') 2054 2055 jsi = JSInterpreter(jscode) 2056 initial_function = jsi.extract_function(funcname) 2057 return lambda s: initial_function([s]) 2058 2059 def _decrypt_signature(self, s, video_id, player_url): 2060 """Turn the encrypted s field into a working signature""" 2061 2062 if player_url is None: 2063 raise ExtractorError('Cannot decrypt signature without player_url') 2064 2065 try: 2066 player_id = (player_url, self._signature_cache_id(s)) 2067 if player_id not in self._player_cache: 2068 func = self._extract_signature_function( 2069 video_id, player_url, s 2070 ) 2071 self._player_cache[player_id] = func 2072 func = self._player_cache[player_id] 2073 self._print_sig_code(func, s) 2074 return func(s) 2075 except Exception as e: 2076 raise ExtractorError('Signature extraction failed: ' + traceback.format_exc(), cause=e) 2077 2078 def _decrypt_nsig(self, s, video_id, player_url): 2079 """Turn the encrypted n field into a working signature""" 2080 if player_url is None: 2081 raise ExtractorError('Cannot decrypt nsig without player_url') 2082 if player_url.startswith('//'): 2083 player_url = 'https:' + player_url 2084 elif not re.match(r'https?://', player_url): 2085 player_url = compat_urlparse.urljoin( 2086 'https://www.youtube.com', player_url) 2087 2088 sig_id = ('nsig_value', s) 2089 if sig_id in self._player_cache: 2090 return self._player_cache[sig_id] 2091 2092 try: 2093 player_id = ('nsig', player_url) 2094 if player_id not in self._player_cache: 2095 self._player_cache[player_id] = self._extract_n_function(video_id, player_url) 2096 func = self._player_cache[player_id] 2097 self._player_cache[sig_id] = func(s) 2098 self.write_debug(f'Decrypted nsig {s} => {self._player_cache[sig_id]}') 2099 return self._player_cache[sig_id] 2100 except Exception as e: 2101 raise ExtractorError(traceback.format_exc(), cause=e, video_id=video_id) 2102 2103 def _extract_n_function_name(self, jscode): 2104 return self._search_regex( 2105 (r'\.get\("n"\)\)&&\(b=(?P<nfunc>[a-zA-Z0-9$]{3})\([a-zA-Z0-9]\)',), 2106 jscode, 'Initial JS player n function name', group='nfunc') 2107 2108 def _extract_n_function(self, video_id, player_url): 2109 player_id = self._extract_player_info(player_url) 2110 func_code = self._downloader.cache.load('youtube-nsig', player_id) 2111 2112 if func_code: 2113 jsi = JSInterpreter(func_code) 2114 else: 2115 jscode = self._load_player(video_id, player_url) 2116 funcname = self._extract_n_function_name(jscode) 2117 jsi = JSInterpreter(jscode) 2118 func_code = jsi.extract_function_code(funcname) 2119 self._downloader.cache.store('youtube-nsig', player_id, func_code) 2120 2121 if self.get_param('youtube_print_sig_code'): 2122 self.to_screen(f'Extracted nsig function from {player_id}:\n{func_code[1]}\n') 2123 2124 return lambda s: jsi.extract_function_from_code(*func_code)([s]) 2125 2126 def _extract_signature_timestamp(self, video_id, player_url, ytcfg=None, fatal=False): 2127 """ 2128 Extract signatureTimestamp (sts) 2129 Required to tell API what sig/player version is in use. 2130 """ 2131 sts = None 2132 if isinstance(ytcfg, dict): 2133 sts = int_or_none(ytcfg.get('STS')) 2134 2135 if not sts: 2136 # Attempt to extract from player 2137 if player_url is None: 2138 error_msg = 'Cannot extract signature timestamp without player_url.' 2139 if fatal: 2140 raise ExtractorError(error_msg) 2141 self.report_warning(error_msg) 2142 return 2143 code = self._load_player(video_id, player_url, fatal=fatal) 2144 if code: 2145 sts = int_or_none(self._search_regex( 2146 r'(?:signatureTimestamp|sts)\s*:\s*(?P<sts>[0-9]{5})', code, 2147 'JS player signature timestamp', group='sts', fatal=fatal)) 2148 return sts 2149 2150 def _mark_watched(self, video_id, player_responses): 2151 playback_url = get_first( 2152 player_responses, ('playbackTracking', 'videostatsPlaybackUrl', 'baseUrl'), 2153 expected_type=url_or_none) 2154 if not playback_url: 2155 self.report_warning('Unable to mark watched') 2156 return 2157 parsed_playback_url = compat_urlparse.urlparse(playback_url) 2158 qs = compat_urlparse.parse_qs(parsed_playback_url.query) 2159 2160 # cpn generation algorithm is reverse engineered from base.js. 2161 # In fact it works even with dummy cpn. 2162 CPN_ALPHABET = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-_' 2163 cpn = ''.join((CPN_ALPHABET[random.randint(0, 256) & 63] for _ in range(0, 16))) 2164 2165 qs.update({ 2166 'ver': ['2'], 2167 'cpn': [cpn], 2168 }) 2169 playback_url = compat_urlparse.urlunparse( 2170 parsed_playback_url._replace(query=compat_urllib_parse_urlencode(qs, True))) 2171 2172 self._download_webpage( 2173 playback_url, video_id, 'Marking watched', 2174 'Unable to mark watched', fatal=False) 2175 2176 @staticmethod 2177 def _extract_urls(webpage): 2178 # Embedded YouTube player 2179 entries = [ 2180 unescapeHTML(mobj.group('url')) 2181 for mobj in re.finditer(r'''(?x) 2182 (?: 2183 <iframe[^>]+?src=| 2184 data-video-url=| 2185 <embed[^>]+?src=| 2186 embedSWF\(?:\s*| 2187 <object[^>]+data=| 2188 new\s+SWFObject\( 2189 ) 2190 (["\']) 2191 (?P<url>(?:https?:)?//(?:www\.)?youtube(?:-nocookie)?\.com/ 2192 (?:embed|v|p)/[0-9A-Za-z_-]{11}.*?) 2193 \1''', webpage)] 2194 2195 # lazyYT YouTube embed 2196 entries.extend(list(map( 2197 unescapeHTML, 2198 re.findall(r'class="lazyYT" data-youtube-id="([^"]+)"', webpage)))) 2199 2200 # Wordpress "YouTube Video Importer" plugin 2201 matches = re.findall(r'''(?x)<div[^>]+ 2202 class=(?P<q1>[\'"])[^\'"]*\byvii_single_video_player\b[^\'"]*(?P=q1)[^>]+ 2203 data-video_id=(?P<q2>[\'"])([^\'"]+)(?P=q2)''', webpage) 2204 entries.extend(m[-1] for m in matches) 2205 2206 return entries 2207 2208 @staticmethod 2209 def _extract_url(webpage): 2210 urls = YoutubeIE._extract_urls(webpage) 2211 return urls[0] if urls else None 2212 2213 @classmethod 2214 def extract_id(cls, url): 2215 mobj = re.match(cls._VALID_URL, url, re.VERBOSE) 2216 if mobj is None: 2217 raise ExtractorError('Invalid URL: %s' % url) 2218 return mobj.group('id') 2219 2220 def _extract_chapters_from_json(self, data, duration): 2221 chapter_list = traverse_obj( 2222 data, ( 2223 'playerOverlays', 'playerOverlayRenderer', 'decoratedPlayerBarRenderer', 2224 'decoratedPlayerBarRenderer', 'playerBar', 'chapteredPlayerBarRenderer', 'chapters' 2225 ), expected_type=list) 2226 2227 return self._extract_chapters( 2228 chapter_list, 2229 chapter_time=lambda chapter: float_or_none( 2230 traverse_obj(chapter, ('chapterRenderer', 'timeRangeStartMillis')), scale=1000), 2231 chapter_title=lambda chapter: traverse_obj( 2232 chapter, ('chapterRenderer', 'title', 'simpleText'), expected_type=str), 2233 duration=duration) 2234 2235 def _extract_chapters_from_engagement_panel(self, data, duration): 2236 content_list = traverse_obj( 2237 data, 2238 ('engagementPanels', ..., 'engagementPanelSectionListRenderer', 'content', 'macroMarkersListRenderer', 'contents'), 2239 expected_type=list, default=[]) 2240 chapter_time = lambda chapter: parse_duration(self._get_text(chapter, 'timeDescription')) 2241 chapter_title = lambda chapter: self._get_text(chapter, 'title') 2242 2243 return next(( 2244 filter(None, ( 2245 self._extract_chapters( 2246 traverse_obj(contents, (..., 'macroMarkersListItemRenderer')), 2247 chapter_time, chapter_title, duration) 2248 for contents in content_list 2249 ))), []) 2250 2251 def _extract_chapters(self, chapter_list, chapter_time, chapter_title, duration): 2252 chapters = [] 2253 last_chapter = {'start_time': 0} 2254 for idx, chapter in enumerate(chapter_list or []): 2255 title = chapter_title(chapter) 2256 start_time = chapter_time(chapter) 2257 if start_time is None: 2258 continue 2259 last_chapter['end_time'] = start_time 2260 if start_time < last_chapter['start_time']: 2261 if idx == 1: 2262 chapters.pop() 2263 self.report_warning('Invalid start time for chapter "%s"' % last_chapter['title']) 2264 else: 2265 self.report_warning(f'Invalid start time for chapter "{title}"') 2266 continue 2267 last_chapter = {'start_time': start_time, 'title': title} 2268 chapters.append(last_chapter) 2269 last_chapter['end_time'] = duration 2270 return chapters 2271 2272 def _extract_yt_initial_variable(self, webpage, regex, video_id, name): 2273 return self._parse_json(self._search_regex( 2274 (r'%s\s*%s' % (regex, self._YT_INITIAL_BOUNDARY_RE), 2275 regex), webpage, name, default='{}'), video_id, fatal=False) 2276 2277 def _extract_comment(self, comment_renderer, parent=None): 2278 comment_id = comment_renderer.get('commentId') 2279 if not comment_id: 2280 return 2281 2282 text = self._get_text(comment_renderer, 'contentText') 2283 2284 # note: timestamp is an estimate calculated from the current time and time_text 2285 timestamp, time_text = self._extract_time_text(comment_renderer, 'publishedTimeText') 2286 author = self._get_text(comment_renderer, 'authorText') 2287 author_id = try_get(comment_renderer, 2288 lambda x: x['authorEndpoint']['browseEndpoint']['browseId'], compat_str) 2289 2290 votes = parse_count(try_get(comment_renderer, (lambda x: x['voteCount']['simpleText'], 2291 lambda x: x['likeCount']), compat_str)) or 0 2292 author_thumbnail = try_get(comment_renderer, 2293 lambda x: x['authorThumbnail']['thumbnails'][-1]['url'], compat_str) 2294 2295 author_is_uploader = try_get(comment_renderer, lambda x: x['authorIsChannelOwner'], bool) 2296 is_favorited = 'creatorHeart' in (try_get( 2297 comment_renderer, lambda x: x['actionButtons']['commentActionButtonsRenderer'], dict) or {}) 2298 return { 2299 'id': comment_id, 2300 'text': text, 2301 'timestamp': timestamp, 2302 'time_text': time_text, 2303 'like_count': votes, 2304 'is_favorited': is_favorited, 2305 'author': author, 2306 'author_id': author_id, 2307 'author_thumbnail': author_thumbnail, 2308 'author_is_uploader': author_is_uploader, 2309 'parent': parent or 'root' 2310 } 2311 2312 def _comment_entries(self, root_continuation_data, ytcfg, video_id, parent=None, tracker=None): 2313 2314 get_single_config_arg = lambda c: self._configuration_arg(c, [''])[0] 2315 2316 def extract_header(contents): 2317 _continuation = None 2318 for content in contents: 2319 comments_header_renderer = traverse_obj(content, 'commentsHeaderRenderer') 2320 expected_comment_count = parse_count(self._get_text( 2321 comments_header_renderer, 'countText', 'commentsCount', max_runs=1)) 2322 2323 if expected_comment_count: 2324 tracker['est_total'] = expected_comment_count 2325 self.to_screen(f'Downloading ~{expected_comment_count} comments') 2326 comment_sort_index = int(get_single_config_arg('comment_sort') != 'top') # 1 = new, 0 = top 2327 2328 sort_menu_item = try_get( 2329 comments_header_renderer, 2330 lambda x: x['sortMenu']['sortFilterSubMenuRenderer']['subMenuItems'][comment_sort_index], dict) or {} 2331 sort_continuation_ep = sort_menu_item.get('serviceEndpoint') or {} 2332 2333 _continuation = self._extract_continuation_ep_data(sort_continuation_ep) or self._extract_continuation(sort_menu_item) 2334 if not _continuation: 2335 continue 2336 2337 sort_text = str_or_none(sort_menu_item.get('title')) 2338 if not sort_text: 2339 sort_text = 'top comments' if comment_sort_index == 0 else 'newest first' 2340 self.to_screen('Sorting comments by %s' % sort_text.lower()) 2341 break 2342 return _continuation 2343 2344 def extract_thread(contents): 2345 if not parent: 2346 tracker['current_page_thread'] = 0 2347 for content in contents: 2348 if not parent and tracker['total_parent_comments'] >= max_parents: 2349 yield 2350 comment_thread_renderer = try_get(content, lambda x: x['commentThreadRenderer']) 2351 comment_renderer = get_first( 2352 (comment_thread_renderer, content), [['commentRenderer', ('comment', 'commentRenderer')]], 2353 expected_type=dict, default={}) 2354 2355 comment = self._extract_comment(comment_renderer, parent) 2356 if not comment: 2357 continue 2358 2359 tracker['running_total'] += 1 2360 tracker['total_reply_comments' if parent else 'total_parent_comments'] += 1 2361 yield comment 2362 2363 # Attempt to get the replies 2364 comment_replies_renderer = try_get( 2365 comment_thread_renderer, lambda x: x['replies']['commentRepliesRenderer'], dict) 2366 2367 if comment_replies_renderer: 2368 tracker['current_page_thread'] += 1 2369 comment_entries_iter = self._comment_entries( 2370 comment_replies_renderer, ytcfg, video_id, 2371 parent=comment.get('id'), tracker=tracker) 2372 for reply_comment in itertools.islice(comment_entries_iter, min(max_replies_per_thread, max(0, max_replies - tracker['total_reply_comments']))): 2373 yield reply_comment 2374 2375 # Keeps track of counts across recursive calls 2376 if not tracker: 2377 tracker = dict( 2378 running_total=0, 2379 est_total=0, 2380 current_page_thread=0, 2381 total_parent_comments=0, 2382 total_reply_comments=0) 2383 2384 # TODO: Deprecated 2385 # YouTube comments have a max depth of 2 2386 max_depth = int_or_none(get_single_config_arg('max_comment_depth')) 2387 if max_depth: 2388 self._downloader.deprecation_warning( 2389 '[youtube] max_comment_depth extractor argument is deprecated. Set max replies in the max-comments extractor argument instead.') 2390 if max_depth == 1 and parent: 2391 return 2392 2393 max_comments, max_parents, max_replies, max_replies_per_thread, *_ = map( 2394 lambda p: int_or_none(p, default=sys.maxsize), self._configuration_arg('max_comments', ) + [''] * 4) 2395 2396 continuation = self._extract_continuation(root_continuation_data) 2397 message = self._get_text(root_continuation_data, ('contents', ..., 'messageRenderer', 'text'), max_runs=1) 2398 if message and not parent: 2399 self.report_warning(message, video_id=video_id) 2400 2401 response = None 2402 is_first_continuation = parent is None 2403 2404 for page_num in itertools.count(0): 2405 if not continuation: 2406 break 2407 headers = self.generate_api_headers(ytcfg=ytcfg, visitor_data=self._extract_visitor_data(response)) 2408 comment_prog_str = f"({tracker['running_total']}/{tracker['est_total']})" 2409 if page_num == 0: 2410 if is_first_continuation: 2411 note_prefix = 'Downloading comment section API JSON' 2412 else: 2413 note_prefix = ' Downloading comment API JSON reply thread %d %s' % ( 2414 tracker['current_page_thread'], comment_prog_str) 2415 else: 2416 note_prefix = '%sDownloading comment%s API JSON page %d %s' % ( 2417 ' ' if parent else '', ' replies' if parent else '', 2418 page_num, comment_prog_str) 2419 2420 response = self._extract_response( 2421 item_id=None, query=continuation, 2422 ep='next', ytcfg=ytcfg, headers=headers, note=note_prefix, 2423 check_get_keys='onResponseReceivedEndpoints') 2424 2425 continuation_contents = traverse_obj( 2426 response, 'onResponseReceivedEndpoints', expected_type=list, default=[]) 2427 2428 continuation = None 2429 for continuation_section in continuation_contents: 2430 continuation_items = traverse_obj( 2431 continuation_section, 2432 (('reloadContinuationItemsCommand', 'appendContinuationItemsAction'), 'continuationItems'), 2433 get_all=False, expected_type=list) or [] 2434 if is_first_continuation: 2435 continuation = extract_header(continuation_items) 2436 is_first_continuation = False 2437 if continuation: 2438 break 2439 continue 2440 2441 for entry in extract_thread(continuation_items): 2442 if not entry: 2443 return 2444 yield entry 2445 continuation = self._extract_continuation({'contents': continuation_items}) 2446 if continuation: 2447 break 2448 2449 def _get_comments(self, ytcfg, video_id, contents, webpage): 2450 """Entry for comment extraction""" 2451 def _real_comment_extract(contents): 2452 renderer = next(( 2453 item for item in traverse_obj(contents, (..., 'itemSectionRenderer'), default={}) 2454 if item.get('sectionIdentifier') == 'comment-item-section'), None) 2455 yield from self._comment_entries(renderer, ytcfg, video_id) 2456 2457 max_comments = int_or_none(self._configuration_arg('max_comments', [''])[0]) 2458 return itertools.islice(_real_comment_extract(contents), 0, max_comments) 2459 2460 @staticmethod 2461 def _get_checkok_params(): 2462 return {'contentCheckOk': True, 'racyCheckOk': True} 2463 2464 @classmethod 2465 def _generate_player_context(cls, sts=None): 2466 context = { 2467 'html5Preference': 'HTML5_PREF_WANTS', 2468 } 2469 if sts is not None: 2470 context['signatureTimestamp'] = sts 2471 return { 2472 'playbackContext': { 2473 'contentPlaybackContext': context 2474 }, 2475 **cls._get_checkok_params() 2476 } 2477 2478 @staticmethod 2479 def _is_agegated(player_response): 2480 if traverse_obj(player_response, ('playabilityStatus', 'desktopLegacyAgeGateReason')): 2481 return True 2482 2483 reasons = traverse_obj(player_response, ('playabilityStatus', ('status', 'reason')), default=[]) 2484 AGE_GATE_REASONS = ( 2485 'confirm your age', 'age-restricted', 'inappropriate', # reason 2486 'age_verification_required', 'age_check_required', # status 2487 ) 2488 return any(expected in reason for expected in AGE_GATE_REASONS for reason in reasons) 2489 2490 @staticmethod 2491 def _is_unplayable(player_response): 2492 return traverse_obj(player_response, ('playabilityStatus', 'status')) == 'UNPLAYABLE' 2493 2494 def _extract_player_response(self, client, video_id, master_ytcfg, player_ytcfg, player_url, initial_pr): 2495 2496 session_index = self._extract_session_index(player_ytcfg, master_ytcfg) 2497 syncid = self._extract_account_syncid(player_ytcfg, master_ytcfg, initial_pr) 2498 sts = self._extract_signature_timestamp(video_id, player_url, master_ytcfg, fatal=False) if player_url else None 2499 headers = self.generate_api_headers( 2500 ytcfg=player_ytcfg, account_syncid=syncid, session_index=session_index, default_client=client) 2501 2502 yt_query = {'videoId': video_id} 2503 yt_query.update(self._generate_player_context(sts)) 2504 return self._extract_response( 2505 item_id=video_id, ep='player', query=yt_query, 2506 ytcfg=player_ytcfg, headers=headers, fatal=True, 2507 default_client=client, 2508 note='Downloading %s player API JSON' % client.replace('_', ' ').strip() 2509 ) or None 2510 2511 def _get_requested_clients(self, url, smuggled_data): 2512 requested_clients = [] 2513 default = ['android', 'web'] 2514 allowed_clients = sorted( 2515 [client for client in INNERTUBE_CLIENTS.keys() if client[:1] != '_'], 2516 key=lambda client: INNERTUBE_CLIENTS[client]['priority'], reverse=True) 2517 for client in self._configuration_arg('player_client'): 2518 if client in allowed_clients: 2519 requested_clients.append(client) 2520 elif client == 'default': 2521 requested_clients.extend(default) 2522 elif client == 'all': 2523 requested_clients.extend(allowed_clients) 2524 else: 2525 self.report_warning(f'Skipping unsupported client {client}') 2526 if not requested_clients: 2527 requested_clients = default 2528 2529 if smuggled_data.get('is_music_url') or self.is_music_url(url): 2530 requested_clients.extend( 2531 f'{client}_music' for client in requested_clients if f'{client}_music' in INNERTUBE_CLIENTS) 2532 2533 return orderedSet(requested_clients) 2534 2535 def _extract_player_ytcfg(self, client, video_id): 2536 url = { 2537 'web_music': 'https://music.youtube.com', 2538 'web_embedded': f'https://www.youtube.com/embed/{video_id}?html5=1' 2539 }.get(client) 2540 if not url: 2541 return {} 2542 webpage = self._download_webpage(url, video_id, fatal=False, note=f'Downloading {client} config') 2543 return self.extract_ytcfg(video_id, webpage) or {} 2544 2545 def _extract_player_responses(self, clients, video_id, webpage, master_ytcfg): 2546 initial_pr = None 2547 if webpage: 2548 initial_pr = self._extract_yt_initial_variable( 2549 webpage, self._YT_INITIAL_PLAYER_RESPONSE_RE, 2550 video_id, 'initial player response') 2551 2552 original_clients = clients 2553 clients = clients[::-1] 2554 prs = [] 2555 2556 def append_client(client_name): 2557 if client_name in INNERTUBE_CLIENTS and client_name not in original_clients: 2558 clients.append(client_name) 2559 2560 # Android player_response does not have microFormats which are needed for 2561 # extraction of some data. So we return the initial_pr with formats 2562 # stripped out even if not requested by the user 2563 # See: https://github.com/yt-dlp/yt-dlp/issues/501 2564 if initial_pr: 2565 pr = dict(initial_pr) 2566 pr['streamingData'] = None 2567 prs.append(pr) 2568 2569 last_error = None 2570 tried_iframe_fallback = False 2571 player_url = None 2572 while clients: 2573 client = clients.pop() 2574 player_ytcfg = master_ytcfg if client == 'web' else {} 2575 if 'configs' not in self._configuration_arg('player_skip'): 2576 player_ytcfg = self._extract_player_ytcfg(client, video_id) or player_ytcfg 2577 2578 player_url = player_url or self._extract_player_url(master_ytcfg, player_ytcfg, webpage=webpage) 2579 require_js_player = self._get_default_ytcfg(client).get('REQUIRE_JS_PLAYER') 2580 if 'js' in self._configuration_arg('player_skip'): 2581 require_js_player = False 2582 player_url = None 2583 2584 if not player_url and not tried_iframe_fallback and require_js_player: 2585 player_url = self._download_player_url(video_id) 2586 tried_iframe_fallback = True 2587 2588 try: 2589 pr = initial_pr if client == 'web' and initial_pr else self._extract_player_response( 2590 client, video_id, player_ytcfg or master_ytcfg, player_ytcfg, player_url if require_js_player else None, initial_pr) 2591 except ExtractorError as e: 2592 if last_error: 2593 self.report_warning(last_error) 2594 last_error = e 2595 continue 2596 2597 if pr: 2598 prs.append(pr) 2599 2600 # creator clients can bypass AGE_VERIFICATION_REQUIRED if logged in 2601 if client.endswith('_agegate') and self._is_unplayable(pr) and self.is_authenticated: 2602 append_client(client.replace('_agegate', '_creator')) 2603 elif self._is_agegated(pr): 2604 append_client(f'{client}_agegate') 2605 2606 if last_error: 2607 if not len(prs): 2608 raise last_error 2609 self.report_warning(last_error) 2610 return prs, player_url 2611 2612 def _extract_formats(self, streaming_data, video_id, player_url, is_live): 2613 itags, stream_ids = {}, [] 2614 itag_qualities, res_qualities = {}, {} 2615 q = qualities([ 2616 # Normally tiny is the smallest video-only formats. But 2617 # audio-only formats with unknown quality may get tagged as tiny 2618 'tiny', 2619 'audio_quality_ultralow', 'audio_quality_low', 'audio_quality_medium', 'audio_quality_high', # Audio only formats 2620 'small', 'medium', 'large', 'hd720', 'hd1080', 'hd1440', 'hd2160', 'hd2880', 'highres' 2621 ]) 2622 streaming_formats = traverse_obj(streaming_data, (..., ('formats', 'adaptiveFormats'), ...), default=[]) 2623 2624 for fmt in streaming_formats: 2625 if fmt.get('targetDurationSec') or fmt.get('drmFamilies'): 2626 continue 2627 2628 itag = str_or_none(fmt.get('itag')) 2629 audio_track = fmt.get('audioTrack') or {} 2630 stream_id = '%s.%s' % (itag or '', audio_track.get('id', '')) 2631 if stream_id in stream_ids: 2632 continue 2633 2634 quality = fmt.get('quality') 2635 height = int_or_none(fmt.get('height')) 2636 if quality == 'tiny' or not quality: 2637 quality = fmt.get('audioQuality', '').lower() or quality 2638 # The 3gp format (17) in android client has a quality of "small", 2639 # but is actually worse than other formats 2640 if itag == '17': 2641 quality = 'tiny' 2642 if quality: 2643 if itag: 2644 itag_qualities[itag] = quality 2645 if height: 2646 res_qualities[height] = quality 2647 # FORMAT_STREAM_TYPE_OTF(otf=1) requires downloading the init fragment 2648 # (adding `&sq=0` to the URL) and parsing emsg box to determine the 2649 # number of fragment that would subsequently requested with (`&sq=N`) 2650 if fmt.get('type') == 'FORMAT_STREAM_TYPE_OTF': 2651 continue 2652 2653 fmt_url = fmt.get('url') 2654 if not fmt_url: 2655 sc = compat_parse_qs(fmt.get('signatureCipher')) 2656 fmt_url = url_or_none(try_get(sc, lambda x: x['url'][0])) 2657 encrypted_sig = try_get(sc, lambda x: x['s'][0]) 2658 if not (sc and fmt_url and encrypted_sig): 2659 continue 2660 if not player_url: 2661 continue 2662 signature = self._decrypt_signature(sc['s'][0], video_id, player_url) 2663 sp = try_get(sc, lambda x: x['sp'][0]) or 'signature' 2664 fmt_url += '&' + sp + '=' + signature 2665 2666 query = parse_qs(fmt_url) 2667 throttled = False 2668 if query.get('n'): 2669 try: 2670 fmt_url = update_url_query(fmt_url, { 2671 'n': self._decrypt_nsig(query['n'][0], video_id, player_url)}) 2672 except ExtractorError as e: 2673 self.report_warning( 2674 f'nsig extraction failed: You may experience throttling for some formats\n' 2675 f'n = {query["n"][0]} ; player = {player_url}\n{e}', only_once=True) 2676 throttled = True 2677 2678 if itag: 2679 itags[itag] = 'https' 2680 stream_ids.append(stream_id) 2681 2682 tbr = float_or_none( 2683 fmt.get('averageBitrate') or fmt.get('bitrate'), 1000) 2684 dct = { 2685 'asr': int_or_none(fmt.get('audioSampleRate')), 2686 'filesize': int_or_none(fmt.get('contentLength')), 2687 'format_id': itag, 2688 'format_note': join_nonempty( 2689 '%s%s' % (audio_track.get('displayName') or '', 2690 ' (default)' if audio_track.get('audioIsDefault') else ''), 2691 fmt.get('qualityLabel') or quality.replace('audio_quality_', ''), 2692 throttled and 'THROTTLED', delim=', '), 2693 'source_preference': -10 if throttled else -1, 2694 'fps': int_or_none(fmt.get('fps')) or None, 2695 'height': height, 2696 'quality': q(quality), 2697 'tbr': tbr, 2698 'url': fmt_url, 2699 'width': int_or_none(fmt.get('width')), 2700 'language': audio_track.get('id', '').split('.')[0], 2701 'language_preference': 1 if audio_track.get('audioIsDefault') else -1, 2702 } 2703 mime_mobj = re.match( 2704 r'((?:[^/]+)/(?:[^;]+))(?:;\s*codecs="([^"]+)")?', fmt.get('mimeType') or '') 2705 if mime_mobj: 2706 dct['ext'] = mimetype2ext(mime_mobj.group(1)) 2707 dct.update(parse_codecs(mime_mobj.group(2))) 2708 no_audio = dct.get('acodec') == 'none' 2709 no_video = dct.get('vcodec') == 'none' 2710 if no_audio: 2711 dct['vbr'] = tbr 2712 if no_video: 2713 dct['abr'] = tbr 2714 if no_audio or no_video: 2715 dct['downloader_options'] = { 2716 # Youtube throttles chunks >~10M 2717 'http_chunk_size': 10485760, 2718 } 2719 if dct.get('ext'): 2720 dct['container'] = dct['ext'] + '_dash' 2721 yield dct 2722 2723 live_from_start = is_live and self.get_param('live_from_start') 2724 skip_manifests = self._configuration_arg('skip') 2725 if not self.get_param('youtube_include_hls_manifest', True): 2726 skip_manifests.append('hls') 2727 get_dash = 'dash' not in skip_manifests and ( 2728 not is_live or live_from_start or self._configuration_arg('include_live_dash')) 2729 get_hls = not live_from_start and 'hls' not in skip_manifests 2730 2731 def process_manifest_format(f, proto, itag): 2732 if itag in itags: 2733 if itags[itag] == proto or f'{itag}-{proto}' in itags: 2734 return False 2735 itag = f'{itag}-{proto}' 2736 if itag: 2737 f['format_id'] = itag 2738 itags[itag] = proto 2739 2740 f['quality'] = next(( 2741 q(qdict[val]) 2742 for val, qdict in ((f.get('format_id', '').split('-')[0], itag_qualities), (f.get('height'), res_qualities)) 2743 if val in qdict), -1) 2744 return True 2745 2746 for sd in streaming_data: 2747 hls_manifest_url = get_hls and sd.get('hlsManifestUrl') 2748 if hls_manifest_url: 2749 for f in self._extract_m3u8_formats(hls_manifest_url, video_id, 'mp4', fatal=False): 2750 if process_manifest_format(f, 'hls', self._search_regex( 2751 r'/itag/(\d+)', f['url'], 'itag', default=None)): 2752 yield f 2753 2754 dash_manifest_url = get_dash and sd.get('dashManifestUrl') 2755 if dash_manifest_url: 2756 for f in self._extract_mpd_formats(dash_manifest_url, video_id, fatal=False): 2757 if process_manifest_format(f, 'dash', f['format_id']): 2758 f['filesize'] = int_or_none(self._search_regex( 2759 r'/clen/(\d+)', f.get('fragment_base_url') or f['url'], 'file size', default=None)) 2760 if live_from_start: 2761 f['is_from_start'] = True 2762 2763 yield f 2764 2765 def _extract_storyboard(self, player_responses, duration): 2766 spec = get_first( 2767 player_responses, ('storyboards', 'playerStoryboardSpecRenderer', 'spec'), default='').split('|')[::-1] 2768 if not spec: 2769 return 2770 base_url = spec.pop() 2771 L = len(spec) - 1 2772 for i, args in enumerate(spec): 2773 args = args.split('#') 2774 counts = list(map(int_or_none, args[:5])) 2775 if len(args) != 8 or not all(counts): 2776 self.report_warning(f'Malformed storyboard {i}: {"#".join(args)}{bug_reports_message()}') 2777 continue 2778 width, height, frame_count, cols, rows = counts 2779 N, sigh = args[6:] 2780 2781 url = base_url.replace('$L', str(L - i)).replace('$N', N) + f'&sigh={sigh}' 2782 fragment_count = frame_count / (cols * rows) 2783 fragment_duration = duration / fragment_count 2784 yield { 2785 'format_id': f'sb{i}', 2786 'format_note': 'storyboard', 2787 'ext': 'mhtml', 2788 'protocol': 'mhtml', 2789 'acodec': 'none', 2790 'vcodec': 'none', 2791 'url': url, 2792 'width': width, 2793 'height': height, 2794 'fragments': [{ 2795 'path': url.replace('$M', str(j)), 2796 'duration': min(fragment_duration, duration - (j * fragment_duration)), 2797 } for j in range(math.ceil(fragment_count))], 2798 } 2799 2800 def _download_player_responses(self, url, smuggled_data, video_id, webpage_url): 2801 webpage = None 2802 if 'webpage' not in self._configuration_arg('player_skip'): 2803 webpage = self._download_webpage( 2804 webpage_url + '&bpctr=9999999999&has_verified=1', video_id, fatal=False) 2805 2806 master_ytcfg = self.extract_ytcfg(video_id, webpage) or self._get_default_ytcfg() 2807 2808 player_responses, player_url = self._extract_player_responses( 2809 self._get_requested_clients(url, smuggled_data), 2810 video_id, webpage, master_ytcfg) 2811 2812 return webpage, master_ytcfg, player_responses, player_url 2813 2814 def _list_formats(self, video_id, microformats, video_details, player_responses, player_url): 2815 live_broadcast_details = traverse_obj(microformats, (..., 'liveBroadcastDetails')) 2816 is_live = get_first(video_details, 'isLive') 2817 if is_live is None: 2818 is_live = get_first(live_broadcast_details, 'isLiveNow') 2819 2820 streaming_data = traverse_obj(player_responses, (..., 'streamingData'), default=[]) 2821 formats = list(self._extract_formats(streaming_data, video_id, player_url, is_live)) 2822 2823 return live_broadcast_details, is_live, streaming_data, formats 2824 2825 def _real_extract(self, url): 2826 url, smuggled_data = unsmuggle_url(url, {}) 2827 video_id = self._match_id(url) 2828 2829 base_url = self.http_scheme() + '//www.youtube.com/' 2830 webpage_url = base_url + 'watch?v=' + video_id 2831 2832 webpage, master_ytcfg, player_responses, player_url = self._download_player_responses(url, smuggled_data, video_id, webpage_url) 2833 2834 playability_statuses = traverse_obj( 2835 player_responses, (..., 'playabilityStatus'), expected_type=dict, default=[]) 2836 2837 trailer_video_id = get_first( 2838 playability_statuses, 2839 ('errorScreen', 'playerLegacyDesktopYpcTrailerRenderer', 'trailerVideoId'), 2840 expected_type=str) 2841 if trailer_video_id: 2842 return self.url_result( 2843 trailer_video_id, self.ie_key(), trailer_video_id) 2844 2845 search_meta = ((lambda x: self._html_search_meta(x, webpage, default=None)) 2846 if webpage else (lambda x: None)) 2847 2848 video_details = traverse_obj( 2849 player_responses, (..., 'videoDetails'), expected_type=dict, default=[]) 2850 microformats = traverse_obj( 2851 player_responses, (..., 'microformat', 'playerMicroformatRenderer'), 2852 expected_type=dict, default=[]) 2853 video_title = ( 2854 get_first(video_details, 'title') 2855 or self._get_text(microformats, (..., 'title')) 2856 or search_meta(['og:title', 'twitter:title', 'title'])) 2857 video_description = get_first(video_details, 'shortDescription') 2858 2859 multifeed_metadata_list = get_first( 2860 player_responses, 2861 ('multicamera', 'playerLegacyMulticameraRenderer', 'metadataList'), 2862 expected_type=str) 2863 if multifeed_metadata_list and not smuggled_data.get('force_singlefeed'): 2864 if self.get_param('noplaylist'): 2865 self.to_screen('Downloading just video %s because of --no-playlist' % video_id) 2866 else: 2867 entries = [] 2868 feed_ids = [] 2869 for feed in multifeed_metadata_list.split(','): 2870 # Unquote should take place before split on comma (,) since textual 2871 # fields may contain comma as well (see 2872 # https://github.com/ytdl-org/youtube-dl/issues/8536) 2873 feed_data = compat_parse_qs( 2874 compat_urllib_parse_unquote_plus(feed)) 2875 2876 def feed_entry(name): 2877 return try_get( 2878 feed_data, lambda x: x[name][0], compat_str) 2879 2880 feed_id = feed_entry('id') 2881 if not feed_id: 2882 continue 2883 feed_title = feed_entry('title') 2884 title = video_title 2885 if feed_title: 2886 title += ' (%s)' % feed_title 2887 entries.append({ 2888 '_type': 'url_transparent', 2889 'ie_key': 'Youtube', 2890 'url': smuggle_url( 2891 '%swatch?v=%s' % (base_url, feed_data['id'][0]), 2892 {'force_singlefeed': True}), 2893 'title': title, 2894 }) 2895 feed_ids.append(feed_id) 2896 self.to_screen( 2897 'Downloading multifeed video (%s) - add --no-playlist to just download video %s' 2898 % (', '.join(feed_ids), video_id)) 2899 return self.playlist_result( 2900 entries, video_id, video_title, video_description) 2901 2902 live_broadcast_details, is_live, streaming_data, formats = self._list_formats(video_id, microformats, video_details, player_responses, player_url) 2903 2904 if not formats: 2905 if not self.get_param('allow_unplayable_formats') and traverse_obj(streaming_data, (..., 'licenseInfos')): 2906 self.report_drm(video_id) 2907 pemr = get_first( 2908 playability_statuses, 2909 ('errorScreen', 'playerErrorMessageRenderer'), expected_type=dict) or {} 2910 reason = self._get_text(pemr, 'reason') or get_first(playability_statuses, 'reason') 2911 subreason = clean_html(self._get_text(pemr, 'subreason') or '') 2912 if subreason: 2913 if subreason == 'The uploader has not made this video available in your country.': 2914 countries = get_first(microformats, 'availableCountries') 2915 if not countries: 2916 regions_allowed = search_meta('regionsAllowed') 2917 countries = regions_allowed.split(',') if regions_allowed else None 2918 self.raise_geo_restricted(subreason, countries, metadata_available=True) 2919 reason += f'. {subreason}' 2920 if reason: 2921 self.raise_no_formats(reason, expected=True) 2922 2923 keywords = get_first(video_details, 'keywords', expected_type=list) or [] 2924 if not keywords and webpage: 2925 keywords = [ 2926 unescapeHTML(m.group('content')) 2927 for m in re.finditer(self._meta_regex('og:video:tag'), webpage)] 2928 for keyword in keywords: 2929 if keyword.startswith('yt:stretch='): 2930 mobj = re.search(r'(\d+)\s*:\s*(\d+)', keyword) 2931 if mobj: 2932 # NB: float is intentional for forcing float division 2933 w, h = (float(v) for v in mobj.groups()) 2934 if w > 0 and h > 0: 2935 ratio = w / h 2936 for f in formats: 2937 if f.get('vcodec') != 'none': 2938 f['stretched_ratio'] = ratio 2939 break 2940 thumbnails = self._extract_thumbnails((video_details, microformats), (..., ..., 'thumbnail')) 2941 thumbnail_url = search_meta(['og:image', 'twitter:image']) 2942 if thumbnail_url: 2943 thumbnails.append({ 2944 'url': thumbnail_url, 2945 }) 2946 original_thumbnails = thumbnails.copy() 2947 2948 # The best resolution thumbnails sometimes does not appear in the webpage 2949 # See: https://github.com/ytdl-org/youtube-dl/issues/29049, https://github.com/yt-dlp/yt-dlp/issues/340 2950 # List of possible thumbnails - Ref: <https://stackoverflow.com/a/20542029> 2951 thumbnail_names = [ 2952 'maxresdefault', 'hq720', 'sddefault', 'sd1', 'sd2', 'sd3', 2953 'hqdefault', 'hq1', 'hq2', 'hq3', '0', 2954 'mqdefault', 'mq1', 'mq2', 'mq3', 2955 'default', '1', '2', '3' 2956 ] 2957 n_thumbnail_names = len(thumbnail_names) 2958 thumbnails.extend({ 2959 'url': 'https://i.ytimg.com/vi{webp}/{video_id}/{name}{live}.{ext}'.format( 2960 video_id=video_id, name=name, ext=ext, 2961 webp='_webp' if ext == 'webp' else '', live='_live' if is_live else ''), 2962 } for name in thumbnail_names for ext in ('webp', 'jpg')) 2963 for thumb in thumbnails: 2964 i = next((i for i, t in enumerate(thumbnail_names) if f'/{video_id}/{t}' in thumb['url']), n_thumbnail_names) 2965 thumb['preference'] = (0 if '.webp' in thumb['url'] else -1) - (2 * i) 2966 self._remove_duplicate_formats(thumbnails) 2967 self._downloader._sort_thumbnails(original_thumbnails) 2968 2969 category = get_first(microformats, 'category') or search_meta('genre') 2970 channel_id = str_or_none( 2971 get_first(video_details, 'channelId') 2972 or get_first(microformats, 'externalChannelId') 2973 or search_meta('channelId')) 2974 duration = int_or_none( 2975 get_first(video_details, 'lengthSeconds') 2976 or get_first(microformats, 'lengthSeconds') 2977 or parse_duration(search_meta('duration'))) or None 2978 owner_profile_url = get_first(microformats, 'ownerProfileUrl') 2979 2980 live_content = get_first(video_details, 'isLiveContent') 2981 is_upcoming = get_first(video_details, 'isUpcoming') 2982 if is_live is None: 2983 if is_upcoming or live_content is False: 2984 is_live = False 2985 if is_upcoming is None and (live_content or is_live): 2986 is_upcoming = False 2987 live_start_time = parse_iso8601(get_first(live_broadcast_details, 'startTimestamp')) 2988 live_end_time = parse_iso8601(get_first(live_broadcast_details, 'endTimestamp')) 2989 if not duration and live_end_time and live_start_time: 2990 duration = live_end_time - live_start_time 2991 2992 if is_live and self.get_param('live_from_start'): 2993 self._prepare_live_from_start_formats(formats, video_id, live_start_time, url, webpage_url, smuggled_data) 2994 2995 formats.extend(self._extract_storyboard(player_responses, duration)) 2996 2997 # Source is given priority since formats that throttle are given lower source_preference 2998 # When throttling issue is fully fixed, remove this 2999 self._sort_formats(formats, ('quality', 'res', 'fps', 'hdr:12', 'source', 'codec:vp9.2', 'lang', 'proto')) 3000 3001 info = { 3002 'id': video_id, 3003 'title': video_title, 3004 'formats': formats, 3005 'thumbnails': thumbnails, 3006 # The best thumbnail that we are sure exists. Prevents unnecessary 3007 # URL checking if user don't care about getting the best possible thumbnail 3008 'thumbnail': traverse_obj(original_thumbnails, (-1, 'url')), 3009 'description': video_description, 3010 'upload_date': unified_strdate( 3011 get_first(microformats, 'uploadDate') 3012 or search_meta('uploadDate')), 3013 'uploader': get_first(video_details, 'author'), 3014 'uploader_id': self._search_regex(r'/(?:channel|user)/([^/?&#]+)', owner_profile_url, 'uploader id') if owner_profile_url else None, 3015 'uploader_url': owner_profile_url, 3016 'channel_id': channel_id, 3017 'channel_url': f'https://www.youtube.com/channel/{channel_id}' if channel_id else None, 3018 'duration': duration, 3019 'view_count': int_or_none( 3020 get_first((video_details, microformats), (..., 'viewCount')) 3021 or search_meta('interactionCount')), 3022 'average_rating': float_or_none(get_first(video_details, 'averageRating')), 3023 'age_limit': 18 if ( 3024 get_first(microformats, 'isFamilySafe') is False 3025 or search_meta('isFamilyFriendly') == 'false' 3026 or search_meta('og:restrictions:age') == '18+') else 0, 3027 'webpage_url': webpage_url, 3028 'categories': [category] if category else None, 3029 'tags': keywords, 3030 'playable_in_embed': get_first(playability_statuses, 'playableInEmbed'), 3031 'is_live': is_live, 3032 'was_live': (False if is_live or is_upcoming or live_content is False 3033 else None if is_live is None or is_upcoming is None 3034 else live_content), 3035 'live_status': 'is_upcoming' if is_upcoming else None, # rest will be set by YoutubeDL 3036 'release_timestamp': live_start_time, 3037 } 3038 3039 pctr = traverse_obj(player_responses, (..., 'captions', 'playerCaptionsTracklistRenderer'), expected_type=dict) 3040 if pctr: 3041 def get_lang_code(track): 3042 return (remove_start(track.get('vssId') or '', '.').replace('.', '-') 3043 or track.get('languageCode')) 3044 3045 # Converted into dicts to remove duplicates 3046 captions = { 3047 get_lang_code(sub): sub 3048 for sub in traverse_obj(pctr, (..., 'captionTracks', ...), default=[])} 3049 translation_languages = { 3050 lang.get('languageCode'): self._get_text(lang.get('languageName'), max_runs=1) 3051 for lang in traverse_obj(pctr, (..., 'translationLanguages', ...), default=[])} 3052 3053 def process_language(container, base_url, lang_code, sub_name, query): 3054 lang_subs = container.setdefault(lang_code, []) 3055 for fmt in self._SUBTITLE_FORMATS: 3056 query.update({ 3057 'fmt': fmt, 3058 }) 3059 lang_subs.append({ 3060 'ext': fmt, 3061 'url': update_url_query(base_url, query), 3062 'name': sub_name, 3063 }) 3064 3065 subtitles, automatic_captions = {}, {} 3066 for lang_code, caption_track in captions.items(): 3067 base_url = caption_track.get('baseUrl') 3068 if not base_url: 3069 continue 3070 lang_name = self._get_text(caption_track, 'name', max_runs=1) 3071 if caption_track.get('kind') != 'asr': 3072 if not lang_code: 3073 continue 3074 process_language( 3075 subtitles, base_url, lang_code, lang_name, {}) 3076 if not caption_track.get('isTranslatable'): 3077 continue 3078 for trans_code, trans_name in translation_languages.items(): 3079 if not trans_code: 3080 continue 3081 if caption_track.get('kind') != 'asr': 3082 trans_code += f'-{lang_code}' 3083 trans_name += format_field(lang_name, template=' from %s') 3084 process_language( 3085 automatic_captions, base_url, trans_code, trans_name, {'tlang': trans_code}) 3086 info['automatic_captions'] = automatic_captions 3087 info['subtitles'] = subtitles 3088 3089 parsed_url = compat_urllib_parse_urlparse(url) 3090 for component in [parsed_url.fragment, parsed_url.query]: 3091 query = compat_parse_qs(component) 3092 for k, v in query.items(): 3093 for d_k, s_ks in [('start', ('start', 't')), ('end', ('end',))]: 3094 d_k += '_time' 3095 if d_k not in info and k in s_ks: 3096 info[d_k] = parse_duration(query[k][0]) 3097 3098 # Youtube Music Auto-generated description 3099 if video_description: 3100 mobj = re.search(r'(?s)(?P<track>[^·\n]+)·(?P<artist>[^\n]+)\n+(?P<album>[^\n]+)(?:.+?℗\s*(?P<release_year>\d{4})(?!\d))?(?:.+?Released on\s*:\s*(?P<release_date>\d{4}-\d{2}-\d{2}))?(.+?\nArtist\s*:\s*(?P<clean_artist>[^\n]+))?.+\nAuto-generated by YouTube\.\s*$', video_description) 3101 if mobj: 3102 release_year = mobj.group('release_year') 3103 release_date = mobj.group('release_date') 3104 if release_date: 3105 release_date = release_date.replace('-', '') 3106 if not release_year: 3107 release_year = release_date[:4] 3108 info.update({ 3109 'album': mobj.group('album'.strip()), 3110 'artist': mobj.group('clean_artist') or ', '.join(a.strip() for a in mobj.group('artist').split('·')), 3111 'track': mobj.group('track').strip(), 3112 'release_date': release_date, 3113 'release_year': int_or_none(release_year), 3114 }) 3115 3116 initial_data = None 3117 if webpage: 3118 initial_data = self._extract_yt_initial_variable( 3119 webpage, self._YT_INITIAL_DATA_RE, video_id, 3120 'yt initial data') 3121 if not initial_data: 3122 query = {'videoId': video_id} 3123 query.update(self._get_checkok_params()) 3124 initial_data = self._extract_response( 3125 item_id=video_id, ep='next', fatal=False, 3126 ytcfg=master_ytcfg, query=query, 3127 headers=self.generate_api_headers(ytcfg=master_ytcfg), 3128 note='Downloading initial data API JSON') 3129 3130 try: 3131 # This will error if there is no livechat 3132 initial_data['contents']['twoColumnWatchNextResults']['conversationBar']['liveChatRenderer']['continuations'][0]['reloadContinuationData']['continuation'] 3133 info.setdefault('subtitles', {})['live_chat'] = [{ 3134 'url': 'https://www.youtube.com/watch?v=%s' % video_id, # url is needed to set cookies 3135 'video_id': video_id, 3136 'ext': 'json', 3137 'protocol': 'youtube_live_chat' if is_live or is_upcoming else 'youtube_live_chat_replay', 3138 }] 3139 except (KeyError, IndexError, TypeError): 3140 pass 3141 3142 if initial_data: 3143 info['chapters'] = ( 3144 self._extract_chapters_from_json(initial_data, duration) 3145 or self._extract_chapters_from_engagement_panel(initial_data, duration) 3146 or None) 3147 3148 contents = try_get( 3149 initial_data, 3150 lambda x: x['contents']['twoColumnWatchNextResults']['results']['results']['contents'], 3151 list) or [] 3152 for content in contents: 3153 vpir = content.get('videoPrimaryInfoRenderer') 3154 if vpir: 3155 stl = vpir.get('superTitleLink') 3156 if stl: 3157 stl = self._get_text(stl) 3158 if try_get( 3159 vpir, 3160 lambda x: x['superTitleIcon']['iconType']) == 'LOCATION_PIN': 3161 info['location'] = stl 3162 else: 3163 mobj = re.search(r'(.+?)\s*S(\d+)\s*•\s*E(\d+)', stl) 3164 if mobj: 3165 info.update({ 3166 'series': mobj.group(1), 3167 'season_number': int(mobj.group(2)), 3168 'episode_number': int(mobj.group(3)), 3169 }) 3170 for tlb in (try_get( 3171 vpir, 3172 lambda x: x['videoActions']['menuRenderer']['topLevelButtons'], 3173 list) or []): 3174 tbr = tlb.get('toggleButtonRenderer') or {} 3175 for getter, regex in [( 3176 lambda x: x['defaultText']['accessibility']['accessibilityData'], 3177 r'(?P<count>[\d,]+)\s*(?P<type>(?:dis)?like)'), ([ 3178 lambda x: x['accessibility'], 3179 lambda x: x['accessibilityData']['accessibilityData'], 3180 ], r'(?P<type>(?:dis)?like) this video along with (?P<count>[\d,]+) other people')]: 3181 label = (try_get(tbr, getter, dict) or {}).get('label') 3182 if label: 3183 mobj = re.match(regex, label) 3184 if mobj: 3185 info[mobj.group('type') + '_count'] = str_to_int(mobj.group('count')) 3186 break 3187 sbr_tooltip = try_get( 3188 vpir, lambda x: x['sentimentBar']['sentimentBarRenderer']['tooltip']) 3189 if sbr_tooltip: 3190 like_count, dislike_count = sbr_tooltip.split(' / ') 3191 info.update({ 3192 'like_count': str_to_int(like_count), 3193 'dislike_count': str_to_int(dislike_count), 3194 }) 3195 vsir = content.get('videoSecondaryInfoRenderer') 3196 if vsir: 3197 info['channel'] = self._get_text(vsir, ('owner', 'videoOwnerRenderer', 'title')) 3198 rows = try_get( 3199 vsir, 3200 lambda x: x['metadataRowContainer']['metadataRowContainerRenderer']['rows'], 3201 list) or [] 3202 multiple_songs = False 3203 for row in rows: 3204 if try_get(row, lambda x: x['metadataRowRenderer']['hasDividerLine']) is True: 3205 multiple_songs = True 3206 break 3207 for row in rows: 3208 mrr = row.get('metadataRowRenderer') or {} 3209 mrr_title = mrr.get('title') 3210 if not mrr_title: 3211 continue 3212 mrr_title = self._get_text(mrr, 'title') 3213 mrr_contents_text = self._get_text(mrr, ('contents', 0)) 3214 if mrr_title == 'License': 3215 info['license'] = mrr_contents_text 3216 elif not multiple_songs: 3217 if mrr_title == 'Album': 3218 info['album'] = mrr_contents_text 3219 elif mrr_title == 'Artist': 3220 info['artist'] = mrr_contents_text 3221 elif mrr_title == 'Song': 3222 info['track'] = mrr_contents_text 3223 3224 fallbacks = { 3225 'channel': 'uploader', 3226 'channel_id': 'uploader_id', 3227 'channel_url': 'uploader_url', 3228 } 3229 for to, frm in fallbacks.items(): 3230 if not info.get(to): 3231 info[to] = info.get(frm) 3232 3233 for s_k, d_k in [('artist', 'creator'), ('track', 'alt_title')]: 3234 v = info.get(s_k) 3235 if v: 3236 info[d_k] = v 3237 3238 is_private = get_first(video_details, 'isPrivate', expected_type=bool) 3239 is_unlisted = get_first(microformats, 'isUnlisted', expected_type=bool) 3240 is_membersonly = None 3241 is_premium = None 3242 if initial_data and is_private is not None: 3243 is_membersonly = False 3244 is_premium = False 3245 contents = try_get(initial_data, lambda x: x['contents']['twoColumnWatchNextResults']['results']['results']['contents'], list) or [] 3246 badge_labels = set() 3247 for content in contents: 3248 if not isinstance(content, dict): 3249 continue 3250 badge_labels.update(self._extract_badges(content.get('videoPrimaryInfoRenderer'))) 3251 for badge_label in badge_labels: 3252 if badge_label.lower() == 'members only': 3253 is_membersonly = True 3254 elif badge_label.lower() == 'premium': 3255 is_premium = True 3256 elif badge_label.lower() == 'unlisted': 3257 is_unlisted = True 3258 3259 info['availability'] = self._availability( 3260 is_private=is_private, 3261 needs_premium=is_premium, 3262 needs_subscription=is_membersonly, 3263 needs_auth=info['age_limit'] >= 18, 3264 is_unlisted=None if is_private is None else is_unlisted) 3265 3266 info['__post_extractor'] = self.extract_comments(master_ytcfg, video_id, contents, webpage) 3267 3268 self.mark_watched(video_id, player_responses) 3269 3270 return info 3271 3272 3273class YoutubeTabBaseInfoExtractor(YoutubeBaseInfoExtractor): 3274 3275 def _extract_channel_id(self, webpage): 3276 channel_id = self._html_search_meta( 3277 'channelId', webpage, 'channel id', default=None) 3278 if channel_id: 3279 return channel_id 3280 channel_url = self._html_search_meta( 3281 ('og:url', 'al:ios:url', 'al:android:url', 'al:web:url', 3282 'twitter:url', 'twitter:app:url:iphone', 'twitter:app:url:ipad', 3283 'twitter:app:url:googleplay'), webpage, 'channel url') 3284 return self._search_regex( 3285 r'https?://(?:www\.)?youtube\.com/channel/([^/?#&])+', 3286 channel_url, 'channel id') 3287 3288 @staticmethod 3289 def _extract_basic_item_renderer(item): 3290 # Modified from _extract_grid_item_renderer 3291 known_basic_renderers = ( 3292 'playlistRenderer', 'videoRenderer', 'channelRenderer', 'showRenderer' 3293 ) 3294 for key, renderer in item.items(): 3295 if not isinstance(renderer, dict): 3296 continue 3297 elif key in known_basic_renderers: 3298 return renderer 3299 elif key.startswith('grid') and key.endswith('Renderer'): 3300 return renderer 3301 3302 def _grid_entries(self, grid_renderer): 3303 for item in grid_renderer['items']: 3304 if not isinstance(item, dict): 3305 continue 3306 renderer = self._extract_basic_item_renderer(item) 3307 if not isinstance(renderer, dict): 3308 continue 3309 title = self._get_text(renderer, 'title') 3310 3311 # playlist 3312 playlist_id = renderer.get('playlistId') 3313 if playlist_id: 3314 yield self.url_result( 3315 'https://www.youtube.com/playlist?list=%s' % playlist_id, 3316 ie=YoutubeTabIE.ie_key(), video_id=playlist_id, 3317 video_title=title) 3318 continue 3319 # video 3320 video_id = renderer.get('videoId') 3321 if video_id: 3322 yield self._extract_video(renderer) 3323 continue 3324 # channel 3325 channel_id = renderer.get('channelId') 3326 if channel_id: 3327 yield self.url_result( 3328 'https://www.youtube.com/channel/%s' % channel_id, 3329 ie=YoutubeTabIE.ie_key(), video_title=title) 3330 continue 3331 # generic endpoint URL support 3332 ep_url = urljoin('https://www.youtube.com/', try_get( 3333 renderer, lambda x: x['navigationEndpoint']['commandMetadata']['webCommandMetadata']['url'], 3334 compat_str)) 3335 if ep_url: 3336 for ie in (YoutubeTabIE, YoutubePlaylistIE, YoutubeIE): 3337 if ie.suitable(ep_url): 3338 yield self.url_result( 3339 ep_url, ie=ie.ie_key(), video_id=ie._match_id(ep_url), video_title=title) 3340 break 3341 3342 def _shelf_entries_from_content(self, shelf_renderer): 3343 content = shelf_renderer.get('content') 3344 if not isinstance(content, dict): 3345 return 3346 renderer = content.get('gridRenderer') or content.get('expandedShelfContentsRenderer') 3347 if renderer: 3348 # TODO: add support for nested playlists so each shelf is processed 3349 # as separate playlist 3350 # TODO: this includes only first N items 3351 for entry in self._grid_entries(renderer): 3352 yield entry 3353 renderer = content.get('horizontalListRenderer') 3354 if renderer: 3355 # TODO 3356 pass 3357 3358 def _shelf_entries(self, shelf_renderer, skip_channels=False): 3359 ep = try_get( 3360 shelf_renderer, lambda x: x['endpoint']['commandMetadata']['webCommandMetadata']['url'], 3361 compat_str) 3362 shelf_url = urljoin('https://www.youtube.com', ep) 3363 if shelf_url: 3364 # Skipping links to another channels, note that checking for 3365 # endpoint.commandMetadata.webCommandMetadata.webPageTypwebPageType == WEB_PAGE_TYPE_CHANNEL 3366 # will not work 3367 if skip_channels and '/channels?' in shelf_url: 3368 return 3369 title = self._get_text(shelf_renderer, 'title') 3370 yield self.url_result(shelf_url, video_title=title) 3371 # Shelf may not contain shelf URL, fallback to extraction from content 3372 for entry in self._shelf_entries_from_content(shelf_renderer): 3373 yield entry 3374 3375 def _playlist_entries(self, video_list_renderer): 3376 for content in video_list_renderer['contents']: 3377 if not isinstance(content, dict): 3378 continue 3379 renderer = content.get('playlistVideoRenderer') or content.get('playlistPanelVideoRenderer') 3380 if not isinstance(renderer, dict): 3381 continue 3382 video_id = renderer.get('videoId') 3383 if not video_id: 3384 continue 3385 yield self._extract_video(renderer) 3386 3387 def _rich_entries(self, rich_grid_renderer): 3388 renderer = try_get( 3389 rich_grid_renderer, lambda x: x['content']['videoRenderer'], dict) or {} 3390 video_id = renderer.get('videoId') 3391 if not video_id: 3392 return 3393 yield self._extract_video(renderer) 3394 3395 def _video_entry(self, video_renderer): 3396 video_id = video_renderer.get('videoId') 3397 if video_id: 3398 return self._extract_video(video_renderer) 3399 3400 def _post_thread_entries(self, post_thread_renderer): 3401 post_renderer = try_get( 3402 post_thread_renderer, lambda x: x['post']['backstagePostRenderer'], dict) 3403 if not post_renderer: 3404 return 3405 # video attachment 3406 video_renderer = try_get( 3407 post_renderer, lambda x: x['backstageAttachment']['videoRenderer'], dict) or {} 3408 video_id = video_renderer.get('videoId') 3409 if video_id: 3410 entry = self._extract_video(video_renderer) 3411 if entry: 3412 yield entry 3413 # playlist attachment 3414 playlist_id = try_get( 3415 post_renderer, lambda x: x['backstageAttachment']['playlistRenderer']['playlistId'], compat_str) 3416 if playlist_id: 3417 yield self.url_result( 3418 'https://www.youtube.com/playlist?list=%s' % playlist_id, 3419 ie=YoutubeTabIE.ie_key(), video_id=playlist_id) 3420 # inline video links 3421 runs = try_get(post_renderer, lambda x: x['contentText']['runs'], list) or [] 3422 for run in runs: 3423 if not isinstance(run, dict): 3424 continue 3425 ep_url = try_get( 3426 run, lambda x: x['navigationEndpoint']['urlEndpoint']['url'], compat_str) 3427 if not ep_url: 3428 continue 3429 if not YoutubeIE.suitable(ep_url): 3430 continue 3431 ep_video_id = YoutubeIE._match_id(ep_url) 3432 if video_id == ep_video_id: 3433 continue 3434 yield self.url_result(ep_url, ie=YoutubeIE.ie_key(), video_id=ep_video_id) 3435 3436 def _post_thread_continuation_entries(self, post_thread_continuation): 3437 contents = post_thread_continuation.get('contents') 3438 if not isinstance(contents, list): 3439 return 3440 for content in contents: 3441 renderer = content.get('backstagePostThreadRenderer') 3442 if not isinstance(renderer, dict): 3443 continue 3444 for entry in self._post_thread_entries(renderer): 3445 yield entry 3446 3447 r''' # unused 3448 def _rich_grid_entries(self, contents): 3449 for content in contents: 3450 video_renderer = try_get(content, lambda x: x['richItemRenderer']['content']['videoRenderer'], dict) 3451 if video_renderer: 3452 entry = self._video_entry(video_renderer) 3453 if entry: 3454 yield entry 3455 ''' 3456 def _extract_entries(self, parent_renderer, continuation_list): 3457 # continuation_list is modified in-place with continuation_list = [continuation_token] 3458 continuation_list[:] = [None] 3459 contents = try_get(parent_renderer, lambda x: x['contents'], list) or [] 3460 for content in contents: 3461 if not isinstance(content, dict): 3462 continue 3463 is_renderer = try_get(content, lambda x: x['itemSectionRenderer'], dict) 3464 if not is_renderer: 3465 renderer = content.get('richItemRenderer') 3466 if renderer: 3467 for entry in self._rich_entries(renderer): 3468 yield entry 3469 continuation_list[0] = self._extract_continuation(parent_renderer) 3470 continue 3471 isr_contents = try_get(is_renderer, lambda x: x['contents'], list) or [] 3472 for isr_content in isr_contents: 3473 if not isinstance(isr_content, dict): 3474 continue 3475 3476 known_renderers = { 3477 'playlistVideoListRenderer': self._playlist_entries, 3478 'gridRenderer': self._grid_entries, 3479 'shelfRenderer': lambda x: self._shelf_entries(x), 3480 'backstagePostThreadRenderer': self._post_thread_entries, 3481 'videoRenderer': lambda x: [self._video_entry(x)], 3482 'playlistRenderer': lambda x: self._grid_entries({'items': [{'playlistRenderer': x}]}), 3483 'channelRenderer': lambda x: self._grid_entries({'items': [{'channelRenderer': x}]}), 3484 } 3485 for key, renderer in isr_content.items(): 3486 if key not in known_renderers: 3487 continue 3488 for entry in known_renderers[key](renderer): 3489 if entry: 3490 yield entry 3491 continuation_list[0] = self._extract_continuation(renderer) 3492 break 3493 3494 if not continuation_list[0]: 3495 continuation_list[0] = self._extract_continuation(is_renderer) 3496 3497 if not continuation_list[0]: 3498 continuation_list[0] = self._extract_continuation(parent_renderer) 3499 3500 def _entries(self, tab, item_id, ytcfg, account_syncid, visitor_data): 3501 continuation_list = [None] 3502 extract_entries = lambda x: self._extract_entries(x, continuation_list) 3503 tab_content = try_get(tab, lambda x: x['content'], dict) 3504 if not tab_content: 3505 return 3506 parent_renderer = ( 3507 try_get(tab_content, lambda x: x['sectionListRenderer'], dict) 3508 or try_get(tab_content, lambda x: x['richGridRenderer'], dict) or {}) 3509 for entry in extract_entries(parent_renderer): 3510 yield entry 3511 continuation = continuation_list[0] 3512 3513 for page_num in itertools.count(1): 3514 if not continuation: 3515 break 3516 headers = self.generate_api_headers( 3517 ytcfg=ytcfg, account_syncid=account_syncid, visitor_data=visitor_data) 3518 response = self._extract_response( 3519 item_id='%s page %s' % (item_id, page_num), 3520 query=continuation, headers=headers, ytcfg=ytcfg, 3521 check_get_keys=('continuationContents', 'onResponseReceivedActions', 'onResponseReceivedEndpoints')) 3522 3523 if not response: 3524 break 3525 # Extracting updated visitor data is required to prevent an infinite extraction loop in some cases 3526 # See: https://github.com/ytdl-org/youtube-dl/issues/28702 3527 visitor_data = self._extract_visitor_data(response) or visitor_data 3528 3529 known_continuation_renderers = { 3530 'playlistVideoListContinuation': self._playlist_entries, 3531 'gridContinuation': self._grid_entries, 3532 'itemSectionContinuation': self._post_thread_continuation_entries, 3533 'sectionListContinuation': extract_entries, # for feeds 3534 } 3535 continuation_contents = try_get( 3536 response, lambda x: x['continuationContents'], dict) or {} 3537 continuation_renderer = None 3538 for key, value in continuation_contents.items(): 3539 if key not in known_continuation_renderers: 3540 continue 3541 continuation_renderer = value 3542 continuation_list = [None] 3543 for entry in known_continuation_renderers[key](continuation_renderer): 3544 yield entry 3545 continuation = continuation_list[0] or self._extract_continuation(continuation_renderer) 3546 break 3547 if continuation_renderer: 3548 continue 3549 3550 known_renderers = { 3551 'gridPlaylistRenderer': (self._grid_entries, 'items'), 3552 'gridVideoRenderer': (self._grid_entries, 'items'), 3553 'gridChannelRenderer': (self._grid_entries, 'items'), 3554 'playlistVideoRenderer': (self._playlist_entries, 'contents'), 3555 'itemSectionRenderer': (extract_entries, 'contents'), # for feeds 3556 'richItemRenderer': (extract_entries, 'contents'), # for hashtag 3557 'backstagePostThreadRenderer': (self._post_thread_continuation_entries, 'contents') 3558 } 3559 on_response_received = dict_get(response, ('onResponseReceivedActions', 'onResponseReceivedEndpoints')) 3560 continuation_items = try_get( 3561 on_response_received, lambda x: x[0]['appendContinuationItemsAction']['continuationItems'], list) 3562 continuation_item = try_get(continuation_items, lambda x: x[0], dict) or {} 3563 video_items_renderer = None 3564 for key, value in continuation_item.items(): 3565 if key not in known_renderers: 3566 continue 3567 video_items_renderer = {known_renderers[key][1]: continuation_items} 3568 continuation_list = [None] 3569 for entry in known_renderers[key][0](video_items_renderer): 3570 yield entry 3571 continuation = continuation_list[0] or self._extract_continuation(video_items_renderer) 3572 break 3573 if video_items_renderer: 3574 continue 3575 break 3576 3577 @staticmethod 3578 def _extract_selected_tab(tabs): 3579 for tab in tabs: 3580 renderer = dict_get(tab, ('tabRenderer', 'expandableTabRenderer')) or {} 3581 if renderer.get('selected') is True: 3582 return renderer 3583 else: 3584 raise ExtractorError('Unable to find selected tab') 3585 3586 @classmethod 3587 def _extract_uploader(cls, data): 3588 uploader = {} 3589 renderer = cls._extract_sidebar_info_renderer(data, 'playlistSidebarSecondaryInfoRenderer') or {} 3590 owner = try_get( 3591 renderer, lambda x: x['videoOwner']['videoOwnerRenderer']['title']['runs'][0], dict) 3592 if owner: 3593 uploader['uploader'] = owner.get('text') 3594 uploader['uploader_id'] = try_get( 3595 owner, lambda x: x['navigationEndpoint']['browseEndpoint']['browseId'], compat_str) 3596 uploader['uploader_url'] = urljoin( 3597 'https://www.youtube.com/', 3598 try_get(owner, lambda x: x['navigationEndpoint']['browseEndpoint']['canonicalBaseUrl'], compat_str)) 3599 return {k: v for k, v in uploader.items() if v is not None} 3600 3601 def _extract_from_tabs(self, item_id, ytcfg, data, tabs): 3602 playlist_id = title = description = channel_url = channel_name = channel_id = None 3603 tags = [] 3604 3605 selected_tab = self._extract_selected_tab(tabs) 3606 renderer = try_get( 3607 data, lambda x: x['metadata']['channelMetadataRenderer'], dict) 3608 if renderer: 3609 channel_name = renderer.get('title') 3610 channel_url = renderer.get('channelUrl') 3611 channel_id = renderer.get('externalId') 3612 else: 3613 renderer = try_get( 3614 data, lambda x: x['metadata']['playlistMetadataRenderer'], dict) 3615 3616 if renderer: 3617 title = renderer.get('title') 3618 description = renderer.get('description', '') 3619 playlist_id = channel_id 3620 tags = renderer.get('keywords', '').split() 3621 3622 thumbnails = ( 3623 self._extract_thumbnails(renderer, 'avatar') 3624 or self._extract_thumbnails( 3625 self._extract_sidebar_info_renderer(data, 'playlistSidebarPrimaryInfoRenderer'), 3626 ('thumbnailRenderer', 'playlistVideoThumbnailRenderer', 'thumbnail'))) 3627 3628 if playlist_id is None: 3629 playlist_id = item_id 3630 if title is None: 3631 title = ( 3632 try_get(data, lambda x: x['header']['hashtagHeaderRenderer']['hashtag']['simpleText']) 3633 or playlist_id) 3634 title += format_field(selected_tab, 'title', ' - %s') 3635 title += format_field(selected_tab, 'expandedText', ' - %s') 3636 metadata = { 3637 'playlist_id': playlist_id, 3638 'playlist_title': title, 3639 'playlist_description': description, 3640 'uploader': channel_name, 3641 'uploader_id': channel_id, 3642 'uploader_url': channel_url, 3643 'thumbnails': thumbnails, 3644 'tags': tags, 3645 } 3646 availability = self._extract_availability(data) 3647 if availability: 3648 metadata['availability'] = availability 3649 if not channel_id: 3650 metadata.update(self._extract_uploader(data)) 3651 metadata.update({ 3652 'channel': metadata['uploader'], 3653 'channel_id': metadata['uploader_id'], 3654 'channel_url': metadata['uploader_url']}) 3655 return self.playlist_result( 3656 self._entries( 3657 selected_tab, playlist_id, ytcfg, 3658 self._extract_account_syncid(ytcfg, data), 3659 self._extract_visitor_data(data, ytcfg)), 3660 **metadata) 3661 3662 def _extract_mix_playlist(self, playlist, playlist_id, data, ytcfg): 3663 first_id = last_id = response = None 3664 for page_num in itertools.count(1): 3665 videos = list(self._playlist_entries(playlist)) 3666 if not videos: 3667 return 3668 start = next((i for i, v in enumerate(videos) if v['id'] == last_id), -1) + 1 3669 if start >= len(videos): 3670 return 3671 for video in videos[start:]: 3672 if video['id'] == first_id: 3673 self.to_screen('First video %s found again; Assuming end of Mix' % first_id) 3674 return 3675 yield video 3676 first_id = first_id or videos[0]['id'] 3677 last_id = videos[-1]['id'] 3678 watch_endpoint = try_get( 3679 playlist, lambda x: x['contents'][-1]['playlistPanelVideoRenderer']['navigationEndpoint']['watchEndpoint']) 3680 headers = self.generate_api_headers( 3681 ytcfg=ytcfg, account_syncid=self._extract_account_syncid(ytcfg, data), 3682 visitor_data=self._extract_visitor_data(response, data, ytcfg)) 3683 query = { 3684 'playlistId': playlist_id, 3685 'videoId': watch_endpoint.get('videoId') or last_id, 3686 'index': watch_endpoint.get('index') or len(videos), 3687 'params': watch_endpoint.get('params') or 'OAE%3D' 3688 } 3689 response = self._extract_response( 3690 item_id='%s page %d' % (playlist_id, page_num), 3691 query=query, ep='next', headers=headers, ytcfg=ytcfg, 3692 check_get_keys='contents' 3693 ) 3694 playlist = try_get( 3695 response, lambda x: x['contents']['twoColumnWatchNextResults']['playlist']['playlist'], dict) 3696 3697 def _extract_from_playlist(self, item_id, url, data, playlist, ytcfg): 3698 title = playlist.get('title') or try_get( 3699 data, lambda x: x['titleText']['simpleText'], compat_str) 3700 playlist_id = playlist.get('playlistId') or item_id 3701 3702 # Delegating everything except mix playlists to regular tab-based playlist URL 3703 playlist_url = urljoin(url, try_get( 3704 playlist, lambda x: x['endpoint']['commandMetadata']['webCommandMetadata']['url'], 3705 compat_str)) 3706 if playlist_url and playlist_url != url: 3707 return self.url_result( 3708 playlist_url, ie=YoutubeTabIE.ie_key(), video_id=playlist_id, 3709 video_title=title) 3710 3711 return self.playlist_result( 3712 self._extract_mix_playlist(playlist, playlist_id, data, ytcfg), 3713 playlist_id=playlist_id, playlist_title=title) 3714 3715 def _extract_availability(self, data): 3716 """ 3717 Gets the availability of a given playlist/tab. 3718 Note: Unless YouTube tells us explicitly, we do not assume it is public 3719 @param data: response 3720 """ 3721 is_private = is_unlisted = None 3722 renderer = self._extract_sidebar_info_renderer(data, 'playlistSidebarPrimaryInfoRenderer') or {} 3723 badge_labels = self._extract_badges(renderer) 3724 3725 # Personal playlists, when authenticated, have a dropdown visibility selector instead of a badge 3726 privacy_dropdown_entries = try_get( 3727 renderer, lambda x: x['privacyForm']['dropdownFormFieldRenderer']['dropdown']['dropdownRenderer']['entries'], list) or [] 3728 for renderer_dict in privacy_dropdown_entries: 3729 is_selected = try_get( 3730 renderer_dict, lambda x: x['privacyDropdownItemRenderer']['isSelected'], bool) or False 3731 if not is_selected: 3732 continue 3733 label = self._get_text(renderer_dict, ('privacyDropdownItemRenderer', 'label')) 3734 if label: 3735 badge_labels.add(label.lower()) 3736 break 3737 3738 for badge_label in badge_labels: 3739 if badge_label == 'unlisted': 3740 is_unlisted = True 3741 elif badge_label == 'private': 3742 is_private = True 3743 elif badge_label == 'public': 3744 is_unlisted = is_private = False 3745 return self._availability(is_private, False, False, False, is_unlisted) 3746 3747 @staticmethod 3748 def _extract_sidebar_info_renderer(data, info_renderer, expected_type=dict): 3749 sidebar_renderer = try_get( 3750 data, lambda x: x['sidebar']['playlistSidebarRenderer']['items'], list) or [] 3751 for item in sidebar_renderer: 3752 renderer = try_get(item, lambda x: x[info_renderer], expected_type) 3753 if renderer: 3754 return renderer 3755 3756 def _reload_with_unavailable_videos(self, item_id, data, ytcfg): 3757 """ 3758 Get playlist with unavailable videos if the 'show unavailable videos' button exists. 3759 """ 3760 browse_id = params = None 3761 renderer = self._extract_sidebar_info_renderer(data, 'playlistSidebarPrimaryInfoRenderer') 3762 if not renderer: 3763 return 3764 menu_renderer = try_get( 3765 renderer, lambda x: x['menu']['menuRenderer']['items'], list) or [] 3766 for menu_item in menu_renderer: 3767 if not isinstance(menu_item, dict): 3768 continue 3769 nav_item_renderer = menu_item.get('menuNavigationItemRenderer') 3770 text = try_get( 3771 nav_item_renderer, lambda x: x['text']['simpleText'], compat_str) 3772 if not text or text.lower() != 'show unavailable videos': 3773 continue 3774 browse_endpoint = try_get( 3775 nav_item_renderer, lambda x: x['navigationEndpoint']['browseEndpoint'], dict) or {} 3776 browse_id = browse_endpoint.get('browseId') 3777 params = browse_endpoint.get('params') 3778 break 3779 3780 headers = self.generate_api_headers( 3781 ytcfg=ytcfg, account_syncid=self._extract_account_syncid(ytcfg, data), 3782 visitor_data=self._extract_visitor_data(data, ytcfg)) 3783 query = { 3784 'params': params or 'wgYCCAA=', 3785 'browseId': browse_id or 'VL%s' % item_id 3786 } 3787 return self._extract_response( 3788 item_id=item_id, headers=headers, query=query, 3789 check_get_keys='contents', fatal=False, ytcfg=ytcfg, 3790 note='Downloading API JSON with unavailable videos') 3791 3792 def _extract_webpage(self, url, item_id, fatal=True): 3793 retries = self.get_param('extractor_retries', 3) 3794 count = -1 3795 webpage = data = last_error = None 3796 while count < retries: 3797 count += 1 3798 # Sometimes youtube returns a webpage with incomplete ytInitialData 3799 # See: https://github.com/yt-dlp/yt-dlp/issues/116 3800 if last_error: 3801 self.report_warning('%s. Retrying ...' % last_error) 3802 try: 3803 webpage = self._download_webpage( 3804 url, item_id, 3805 note='Downloading webpage%s' % (' (retry #%d)' % count if count else '',)) 3806 data = self.extract_yt_initial_data(item_id, webpage or '', fatal=fatal) or {} 3807 except ExtractorError as e: 3808 if isinstance(e.cause, network_exceptions): 3809 if not isinstance(e.cause, compat_HTTPError) or e.cause.code not in (403, 429): 3810 last_error = error_to_compat_str(e.cause or e.msg) 3811 if count < retries: 3812 continue 3813 if fatal: 3814 raise 3815 self.report_warning(error_to_compat_str(e)) 3816 break 3817 else: 3818 try: 3819 self._extract_and_report_alerts(data) 3820 except ExtractorError as e: 3821 if fatal: 3822 raise 3823 self.report_warning(error_to_compat_str(e)) 3824 break 3825 3826 if dict_get(data, ('contents', 'currentVideoEndpoint')): 3827 break 3828 3829 last_error = 'Incomplete yt initial data received' 3830 if count >= retries: 3831 if fatal: 3832 raise ExtractorError(last_error) 3833 self.report_warning(last_error) 3834 break 3835 3836 return webpage, data 3837 3838 def _extract_data(self, url, item_id, ytcfg=None, fatal=True, webpage_fatal=False, default_client='web'): 3839 data = None 3840 if 'webpage' not in self._configuration_arg('skip'): 3841 webpage, data = self._extract_webpage(url, item_id, fatal=webpage_fatal) 3842 ytcfg = ytcfg or self.extract_ytcfg(item_id, webpage) 3843 if not data: 3844 if not ytcfg and self.is_authenticated: 3845 msg = 'Playlists that require authentication may not extract correctly without a successful webpage download.' 3846 if 'authcheck' not in self._configuration_arg('skip') and fatal: 3847 raise ExtractorError( 3848 msg + ' If you are not downloading private content, or your cookies are only for the first account and channel,' 3849 ' pass "--extractor-args youtubetab:skip=authcheck" to skip this check', 3850 expected=True) 3851 self.report_warning(msg, only_once=True) 3852 data = self._extract_tab_endpoint(url, item_id, ytcfg, fatal=fatal, default_client=default_client) 3853 return data, ytcfg 3854 3855 def _extract_tab_endpoint(self, url, item_id, ytcfg=None, fatal=True, default_client='web'): 3856 headers = self.generate_api_headers(ytcfg=ytcfg, default_client=default_client) 3857 resolve_response = self._extract_response( 3858 item_id=item_id, query={'url': url}, check_get_keys='endpoint', headers=headers, ytcfg=ytcfg, fatal=fatal, 3859 ep='navigation/resolve_url', note='Downloading API parameters API JSON', default_client=default_client) 3860 endpoints = {'browseEndpoint': 'browse', 'watchEndpoint': 'next'} 3861 for ep_key, ep in endpoints.items(): 3862 params = try_get(resolve_response, lambda x: x['endpoint'][ep_key], dict) 3863 if params: 3864 return self._extract_response( 3865 item_id=item_id, query=params, ep=ep, headers=headers, 3866 ytcfg=ytcfg, fatal=fatal, default_client=default_client, 3867 check_get_keys=('contents', 'currentVideoEndpoint')) 3868 err_note = 'Failed to resolve url (does the playlist exist?)' 3869 if fatal: 3870 raise ExtractorError(err_note, expected=True) 3871 self.report_warning(err_note, item_id) 3872 3873 @staticmethod 3874 def _smuggle_data(entries, data): 3875 for entry in entries: 3876 if data: 3877 entry['url'] = smuggle_url(entry['url'], data) 3878 yield entry 3879 3880 _SEARCH_PARAMS = None 3881 3882 def _search_results(self, query, params=NO_DEFAULT): 3883 data = {'query': query} 3884 if params is NO_DEFAULT: 3885 params = self._SEARCH_PARAMS 3886 if params: 3887 data['params'] = params 3888 continuation_list = [None] 3889 for page_num in itertools.count(1): 3890 data.update(continuation_list[0] or {}) 3891 search = self._extract_response( 3892 item_id='query "%s" page %s' % (query, page_num), ep='search', query=data, 3893 check_get_keys=('contents', 'onResponseReceivedCommands')) 3894 slr_contents = try_get( 3895 search, 3896 (lambda x: x['contents']['twoColumnSearchResultsRenderer']['primaryContents']['sectionListRenderer']['contents'], 3897 lambda x: x['onResponseReceivedCommands'][0]['appendContinuationItemsAction']['continuationItems']), 3898 list) 3899 yield from self._extract_entries({'contents': slr_contents}, continuation_list) 3900 if not continuation_list[0]: 3901 break 3902 3903 3904class YoutubeTabIE(YoutubeTabBaseInfoExtractor): 3905 IE_DESC = 'YouTube Tabs' 3906 _VALID_URL = r'''(?x: 3907 https?:// 3908 (?:\w+\.)? 3909 (?: 3910 youtube(?:kids)?\.com| 3911 %(invidious)s 3912 )/ 3913 (?: 3914 (?P<channel_type>channel|c|user|browse)/| 3915 (?P<not_channel> 3916 feed/|hashtag/| 3917 (?:playlist|watch)\?.*?\blist= 3918 )| 3919 (?!(?:%(reserved_names)s)\b) # Direct URLs 3920 ) 3921 (?P<id>[^/?\#&]+) 3922 )''' % { 3923 'reserved_names': YoutubeBaseInfoExtractor._RESERVED_NAMES, 3924 'invidious': '|'.join(YoutubeBaseInfoExtractor._INVIDIOUS_SITES), 3925 } 3926 IE_NAME = 'youtube:tab' 3927 3928 _TESTS = [{ 3929 'note': 'playlists, multipage', 3930 'url': 'https://www.youtube.com/c/ИгорьКлейнер/playlists?view=1&flow=grid', 3931 'playlist_mincount': 94, 3932 'info_dict': { 3933 'id': 'UCqj7Cz7revf5maW9g5pgNcg', 3934 'title': 'Игорь Клейнер - Playlists', 3935 'description': 'md5:be97ee0f14ee314f1f002cf187166ee2', 3936 'uploader': 'Игорь Клейнер', 3937 'uploader_id': 'UCqj7Cz7revf5maW9g5pgNcg', 3938 }, 3939 }, { 3940 'note': 'playlists, multipage, different order', 3941 'url': 'https://www.youtube.com/user/igorkle1/playlists?view=1&sort=dd', 3942 'playlist_mincount': 94, 3943 'info_dict': { 3944 'id': 'UCqj7Cz7revf5maW9g5pgNcg', 3945 'title': 'Игорь Клейнер - Playlists', 3946 'description': 'md5:be97ee0f14ee314f1f002cf187166ee2', 3947 'uploader_id': 'UCqj7Cz7revf5maW9g5pgNcg', 3948 'uploader': 'Игорь Клейнер', 3949 }, 3950 }, { 3951 'note': 'playlists, series', 3952 'url': 'https://www.youtube.com/c/3blue1brown/playlists?view=50&sort=dd&shelf_id=3', 3953 'playlist_mincount': 5, 3954 'info_dict': { 3955 'id': 'UCYO_jab_esuFRV4b17AJtAw', 3956 'title': '3Blue1Brown - Playlists', 3957 'description': 'md5:e1384e8a133307dd10edee76e875d62f', 3958 'uploader_id': 'UCYO_jab_esuFRV4b17AJtAw', 3959 'uploader': '3Blue1Brown', 3960 }, 3961 }, { 3962 'note': 'playlists, singlepage', 3963 'url': 'https://www.youtube.com/user/ThirstForScience/playlists', 3964 'playlist_mincount': 4, 3965 'info_dict': { 3966 'id': 'UCAEtajcuhQ6an9WEzY9LEMQ', 3967 'title': 'ThirstForScience - Playlists', 3968 'description': 'md5:609399d937ea957b0f53cbffb747a14c', 3969 'uploader': 'ThirstForScience', 3970 'uploader_id': 'UCAEtajcuhQ6an9WEzY9LEMQ', 3971 } 3972 }, { 3973 'url': 'https://www.youtube.com/c/ChristophLaimer/playlists', 3974 'only_matching': True, 3975 }, { 3976 'note': 'basic, single video playlist', 3977 'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc', 3978 'info_dict': { 3979 'uploader_id': 'UCmlqkdCBesrv2Lak1mF_MxA', 3980 'uploader': 'Sergey M.', 3981 'id': 'PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc', 3982 'title': 'youtube-dl public playlist', 3983 }, 3984 'playlist_count': 1, 3985 }, { 3986 'note': 'empty playlist', 3987 'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf', 3988 'info_dict': { 3989 'uploader_id': 'UCmlqkdCBesrv2Lak1mF_MxA', 3990 'uploader': 'Sergey M.', 3991 'id': 'PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf', 3992 'title': 'youtube-dl empty playlist', 3993 }, 3994 'playlist_count': 0, 3995 }, { 3996 'note': 'Home tab', 3997 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/featured', 3998 'info_dict': { 3999 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w', 4000 'title': 'lex will - Home', 4001 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488', 4002 'uploader': 'lex will', 4003 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w', 4004 }, 4005 'playlist_mincount': 2, 4006 }, { 4007 'note': 'Videos tab', 4008 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/videos', 4009 'info_dict': { 4010 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w', 4011 'title': 'lex will - Videos', 4012 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488', 4013 'uploader': 'lex will', 4014 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w', 4015 }, 4016 'playlist_mincount': 975, 4017 }, { 4018 'note': 'Videos tab, sorted by popular', 4019 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/videos?view=0&sort=p&flow=grid', 4020 'info_dict': { 4021 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w', 4022 'title': 'lex will - Videos', 4023 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488', 4024 'uploader': 'lex will', 4025 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w', 4026 }, 4027 'playlist_mincount': 199, 4028 }, { 4029 'note': 'Playlists tab', 4030 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/playlists', 4031 'info_dict': { 4032 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w', 4033 'title': 'lex will - Playlists', 4034 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488', 4035 'uploader': 'lex will', 4036 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w', 4037 }, 4038 'playlist_mincount': 17, 4039 }, { 4040 'note': 'Community tab', 4041 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/community', 4042 'info_dict': { 4043 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w', 4044 'title': 'lex will - Community', 4045 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488', 4046 'uploader': 'lex will', 4047 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w', 4048 }, 4049 'playlist_mincount': 18, 4050 }, { 4051 'note': 'Channels tab', 4052 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/channels', 4053 'info_dict': { 4054 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w', 4055 'title': 'lex will - Channels', 4056 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488', 4057 'uploader': 'lex will', 4058 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w', 4059 }, 4060 'playlist_mincount': 12, 4061 }, { 4062 'note': 'Search tab', 4063 'url': 'https://www.youtube.com/c/3blue1brown/search?query=linear%20algebra', 4064 'playlist_mincount': 40, 4065 'info_dict': { 4066 'id': 'UCYO_jab_esuFRV4b17AJtAw', 4067 'title': '3Blue1Brown - Search - linear algebra', 4068 'description': 'md5:e1384e8a133307dd10edee76e875d62f', 4069 'uploader': '3Blue1Brown', 4070 'uploader_id': 'UCYO_jab_esuFRV4b17AJtAw', 4071 }, 4072 }, { 4073 'url': 'https://invidio.us/channel/UCmlqkdCBesrv2Lak1mF_MxA', 4074 'only_matching': True, 4075 }, { 4076 'url': 'https://www.youtubekids.com/channel/UCmlqkdCBesrv2Lak1mF_MxA', 4077 'only_matching': True, 4078 }, { 4079 'url': 'https://music.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA', 4080 'only_matching': True, 4081 }, { 4082 'note': 'Playlist with deleted videos (#651). As a bonus, the video #51 is also twice in this list.', 4083 'url': 'https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC', 4084 'info_dict': { 4085 'title': '29C3: Not my department', 4086 'id': 'PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC', 4087 'uploader': 'Christiaan008', 4088 'uploader_id': 'UCEPzS1rYsrkqzSLNp76nrcg', 4089 'description': 'md5:a14dc1a8ef8307a9807fe136a0660268', 4090 }, 4091 'playlist_count': 96, 4092 }, { 4093 'note': 'Large playlist', 4094 'url': 'https://www.youtube.com/playlist?list=UUBABnxM4Ar9ten8Mdjj1j0Q', 4095 'info_dict': { 4096 'title': 'Uploads from Cauchemar', 4097 'id': 'UUBABnxM4Ar9ten8Mdjj1j0Q', 4098 'uploader': 'Cauchemar', 4099 'uploader_id': 'UCBABnxM4Ar9ten8Mdjj1j0Q', 4100 }, 4101 'playlist_mincount': 1123, 4102 }, { 4103 'note': 'even larger playlist, 8832 videos', 4104 'url': 'http://www.youtube.com/user/NASAgovVideo/videos', 4105 'only_matching': True, 4106 }, { 4107 'note': 'Buggy playlist: the webpage has a "Load more" button but it doesn\'t have more videos', 4108 'url': 'https://www.youtube.com/playlist?list=UUXw-G3eDE9trcvY2sBMM_aA', 4109 'info_dict': { 4110 'title': 'Uploads from Interstellar Movie', 4111 'id': 'UUXw-G3eDE9trcvY2sBMM_aA', 4112 'uploader': 'Interstellar Movie', 4113 'uploader_id': 'UCXw-G3eDE9trcvY2sBMM_aA', 4114 }, 4115 'playlist_mincount': 21, 4116 }, { 4117 'note': 'Playlist with "show unavailable videos" button', 4118 'url': 'https://www.youtube.com/playlist?list=UUTYLiWFZy8xtPwxFwX9rV7Q', 4119 'info_dict': { 4120 'title': 'Uploads from Phim Siêu Nhân Nhật Bản', 4121 'id': 'UUTYLiWFZy8xtPwxFwX9rV7Q', 4122 'uploader': 'Phim Siêu Nhân Nhật Bản', 4123 'uploader_id': 'UCTYLiWFZy8xtPwxFwX9rV7Q', 4124 }, 4125 'playlist_mincount': 200, 4126 }, { 4127 'note': 'Playlist with unavailable videos in page 7', 4128 'url': 'https://www.youtube.com/playlist?list=UU8l9frL61Yl5KFOl87nIm2w', 4129 'info_dict': { 4130 'title': 'Uploads from BlankTV', 4131 'id': 'UU8l9frL61Yl5KFOl87nIm2w', 4132 'uploader': 'BlankTV', 4133 'uploader_id': 'UC8l9frL61Yl5KFOl87nIm2w', 4134 }, 4135 'playlist_mincount': 1000, 4136 }, { 4137 'note': 'https://github.com/ytdl-org/youtube-dl/issues/21844', 4138 'url': 'https://www.youtube.com/playlist?list=PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba', 4139 'info_dict': { 4140 'title': 'Data Analysis with Dr Mike Pound', 4141 'id': 'PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba', 4142 'uploader_id': 'UC9-y-6csu5WGm29I7JiwpnA', 4143 'uploader': 'Computerphile', 4144 'description': 'md5:7f567c574d13d3f8c0954d9ffee4e487', 4145 }, 4146 'playlist_mincount': 11, 4147 }, { 4148 'url': 'https://invidio.us/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc', 4149 'only_matching': True, 4150 }, { 4151 'note': 'Playlist URL that does not actually serve a playlist', 4152 'url': 'https://www.youtube.com/watch?v=FqZTN594JQw&list=PLMYEtVRpaqY00V9W81Cwmzp6N6vZqfUKD4', 4153 'info_dict': { 4154 'id': 'FqZTN594JQw', 4155 'ext': 'webm', 4156 'title': "Smiley's People 01 detective, Adventure Series, Action", 4157 'uploader': 'STREEM', 4158 'uploader_id': 'UCyPhqAZgwYWZfxElWVbVJng', 4159 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCyPhqAZgwYWZfxElWVbVJng', 4160 'upload_date': '20150526', 4161 'license': 'Standard YouTube License', 4162 'description': 'md5:507cdcb5a49ac0da37a920ece610be80', 4163 'categories': ['People & Blogs'], 4164 'tags': list, 4165 'view_count': int, 4166 'like_count': int, 4167 'dislike_count': int, 4168 }, 4169 'params': { 4170 'skip_download': True, 4171 }, 4172 'skip': 'This video is not available.', 4173 'add_ie': [YoutubeIE.ie_key()], 4174 }, { 4175 'url': 'https://www.youtubekids.com/watch?v=Agk7R8I8o5U&list=PUZ6jURNr1WQZCNHF0ao-c0g', 4176 'only_matching': True, 4177 }, { 4178 'url': 'https://www.youtube.com/watch?v=MuAGGZNfUkU&list=RDMM', 4179 'only_matching': True, 4180 }, { 4181 'url': 'https://www.youtube.com/channel/UCoMdktPbSTixAyNGwb-UYkQ/live', 4182 'info_dict': { 4183 'id': '3yImotZU3tw', # This will keep changing 4184 'ext': 'mp4', 4185 'title': compat_str, 4186 'uploader': 'Sky News', 4187 'uploader_id': 'skynews', 4188 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/skynews', 4189 'upload_date': r're:\d{8}', 4190 'description': compat_str, 4191 'categories': ['News & Politics'], 4192 'tags': list, 4193 'like_count': int, 4194 'dislike_count': int, 4195 }, 4196 'params': { 4197 'skip_download': True, 4198 }, 4199 'expected_warnings': ['Downloading just video ', 'Ignoring subtitle tracks found in '], 4200 }, { 4201 'url': 'https://www.youtube.com/user/TheYoungTurks/live', 4202 'info_dict': { 4203 'id': 'a48o2S1cPoo', 4204 'ext': 'mp4', 4205 'title': 'The Young Turks - Live Main Show', 4206 'uploader': 'The Young Turks', 4207 'uploader_id': 'TheYoungTurks', 4208 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/TheYoungTurks', 4209 'upload_date': '20150715', 4210 'license': 'Standard YouTube License', 4211 'description': 'md5:438179573adcdff3c97ebb1ee632b891', 4212 'categories': ['News & Politics'], 4213 'tags': ['Cenk Uygur (TV Program Creator)', 'The Young Turks (Award-Winning Work)', 'Talk Show (TV Genre)'], 4214 'like_count': int, 4215 'dislike_count': int, 4216 }, 4217 'params': { 4218 'skip_download': True, 4219 }, 4220 'only_matching': True, 4221 }, { 4222 'url': 'https://www.youtube.com/channel/UC1yBKRuGpC1tSM73A0ZjYjQ/live', 4223 'only_matching': True, 4224 }, { 4225 'url': 'https://www.youtube.com/c/CommanderVideoHq/live', 4226 'only_matching': True, 4227 }, { 4228 'note': 'A channel that is not live. Should raise error', 4229 'url': 'https://www.youtube.com/user/numberphile/live', 4230 'only_matching': True, 4231 }, { 4232 'url': 'https://www.youtube.com/feed/trending', 4233 'only_matching': True, 4234 }, { 4235 'url': 'https://www.youtube.com/feed/library', 4236 'only_matching': True, 4237 }, { 4238 'url': 'https://www.youtube.com/feed/history', 4239 'only_matching': True, 4240 }, { 4241 'url': 'https://www.youtube.com/feed/subscriptions', 4242 'only_matching': True, 4243 }, { 4244 'url': 'https://www.youtube.com/feed/watch_later', 4245 'only_matching': True, 4246 }, { 4247 'note': 'Recommended - redirects to home page.', 4248 'url': 'https://www.youtube.com/feed/recommended', 4249 'only_matching': True, 4250 }, { 4251 'note': 'inline playlist with not always working continuations', 4252 'url': 'https://www.youtube.com/watch?v=UC6u0Tct-Fo&list=PL36D642111D65BE7C', 4253 'only_matching': True, 4254 }, { 4255 'url': 'https://www.youtube.com/course', 4256 'only_matching': True, 4257 }, { 4258 'url': 'https://www.youtube.com/zsecurity', 4259 'only_matching': True, 4260 }, { 4261 'url': 'http://www.youtube.com/NASAgovVideo/videos', 4262 'only_matching': True, 4263 }, { 4264 'url': 'https://www.youtube.com/TheYoungTurks/live', 4265 'only_matching': True, 4266 }, { 4267 'url': 'https://www.youtube.com/hashtag/cctv9', 4268 'info_dict': { 4269 'id': 'cctv9', 4270 'title': '#cctv9', 4271 }, 4272 'playlist_mincount': 350, 4273 }, { 4274 'url': 'https://www.youtube.com/watch?list=PLW4dVinRY435CBE_JD3t-0SRXKfnZHS1P&feature=youtu.be&v=M9cJMXmQ_ZU', 4275 'only_matching': True, 4276 }, { 4277 'note': 'Requires Premium: should request additional YTM-info webpage (and have format 141) for videos in playlist', 4278 'url': 'https://music.youtube.com/playlist?list=PLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq', 4279 'only_matching': True 4280 }, { 4281 'note': '/browse/ should redirect to /channel/', 4282 'url': 'https://music.youtube.com/browse/UC1a8OFewdjuLq6KlF8M_8Ng', 4283 'only_matching': True 4284 }, { 4285 'note': 'VLPL, should redirect to playlist?list=PL...', 4286 'url': 'https://music.youtube.com/browse/VLPLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq', 4287 'info_dict': { 4288 'id': 'PLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq', 4289 'uploader': 'NoCopyrightSounds', 4290 'description': 'Providing you with copyright free / safe music for gaming, live streaming, studying and more!', 4291 'uploader_id': 'UC_aEa8K-EOJ3D6gOs7HcyNg', 4292 'title': 'NCS Releases', 4293 }, 4294 'playlist_mincount': 166, 4295 }, { 4296 'note': 'Topic, should redirect to playlist?list=UU...', 4297 'url': 'https://music.youtube.com/browse/UC9ALqqC4aIeG5iDs7i90Bfw', 4298 'info_dict': { 4299 'id': 'UU9ALqqC4aIeG5iDs7i90Bfw', 4300 'uploader_id': 'UC9ALqqC4aIeG5iDs7i90Bfw', 4301 'title': 'Uploads from Royalty Free Music - Topic', 4302 'uploader': 'Royalty Free Music - Topic', 4303 }, 4304 'expected_warnings': [ 4305 'A channel/user page was given', 4306 'The URL does not have a videos tab', 4307 ], 4308 'playlist_mincount': 101, 4309 }, { 4310 'note': 'Topic without a UU playlist', 4311 'url': 'https://www.youtube.com/channel/UCtFRv9O2AHqOZjjynzrv-xg', 4312 'info_dict': { 4313 'id': 'UCtFRv9O2AHqOZjjynzrv-xg', 4314 'title': 'UCtFRv9O2AHqOZjjynzrv-xg', 4315 }, 4316 'expected_warnings': [ 4317 'A channel/user page was given', 4318 'The URL does not have a videos tab', 4319 'Falling back to channel URL', 4320 ], 4321 'playlist_mincount': 9, 4322 }, { 4323 'note': 'Youtube music Album', 4324 'url': 'https://music.youtube.com/browse/MPREb_gTAcphH99wE', 4325 'info_dict': { 4326 'id': 'OLAK5uy_l1m0thk3g31NmIIz_vMIbWtyv7eZixlH0', 4327 'title': 'Album - Royalty Free Music Library V2 (50 Songs)', 4328 }, 4329 'playlist_count': 50, 4330 }, { 4331 'note': 'unlisted single video playlist', 4332 'url': 'https://www.youtube.com/playlist?list=PLwL24UFy54GrB3s2KMMfjZscDi1x5Dajf', 4333 'info_dict': { 4334 'uploader_id': 'UC9zHu_mHU96r19o-wV5Qs1Q', 4335 'uploader': 'colethedj', 4336 'id': 'PLwL24UFy54GrB3s2KMMfjZscDi1x5Dajf', 4337 'title': 'yt-dlp unlisted playlist test', 4338 'availability': 'unlisted' 4339 }, 4340 'playlist_count': 1, 4341 }, { 4342 'note': 'API Fallback: Recommended - redirects to home page. Requires visitorData', 4343 'url': 'https://www.youtube.com/feed/recommended', 4344 'info_dict': { 4345 'id': 'recommended', 4346 'title': 'recommended', 4347 }, 4348 'playlist_mincount': 50, 4349 'params': { 4350 'skip_download': True, 4351 'extractor_args': {'youtubetab': {'skip': ['webpage']}} 4352 }, 4353 }, { 4354 'note': 'API Fallback: /videos tab, sorted by oldest first', 4355 'url': 'https://www.youtube.com/user/theCodyReeder/videos?view=0&sort=da&flow=grid', 4356 'info_dict': { 4357 'id': 'UCu6mSoMNzHQiBIOCkHUa2Aw', 4358 'title': 'Cody\'sLab - Videos', 4359 'description': 'md5:d083b7c2f0c67ee7a6c74c3e9b4243fa', 4360 'uploader': 'Cody\'sLab', 4361 'uploader_id': 'UCu6mSoMNzHQiBIOCkHUa2Aw', 4362 }, 4363 'playlist_mincount': 650, 4364 'params': { 4365 'skip_download': True, 4366 'extractor_args': {'youtubetab': {'skip': ['webpage']}} 4367 }, 4368 }, { 4369 'note': 'API Fallback: Topic, should redirect to playlist?list=UU...', 4370 'url': 'https://music.youtube.com/browse/UC9ALqqC4aIeG5iDs7i90Bfw', 4371 'info_dict': { 4372 'id': 'UU9ALqqC4aIeG5iDs7i90Bfw', 4373 'uploader_id': 'UC9ALqqC4aIeG5iDs7i90Bfw', 4374 'title': 'Uploads from Royalty Free Music - Topic', 4375 'uploader': 'Royalty Free Music - Topic', 4376 }, 4377 'expected_warnings': [ 4378 'A channel/user page was given', 4379 'The URL does not have a videos tab', 4380 ], 4381 'playlist_mincount': 101, 4382 'params': { 4383 'skip_download': True, 4384 'extractor_args': {'youtubetab': {'skip': ['webpage']}} 4385 }, 4386 }] 4387 4388 @classmethod 4389 def suitable(cls, url): 4390 return False if YoutubeIE.suitable(url) else super( 4391 YoutubeTabIE, cls).suitable(url) 4392 4393 def _real_extract(self, url): 4394 url, smuggled_data = unsmuggle_url(url, {}) 4395 if self.is_music_url(url): 4396 smuggled_data['is_music_url'] = True 4397 info_dict = self.__real_extract(url, smuggled_data) 4398 if info_dict.get('entries'): 4399 info_dict['entries'] = self._smuggle_data(info_dict['entries'], smuggled_data) 4400 return info_dict 4401 4402 _URL_RE = re.compile(rf'(?P<pre>{_VALID_URL})(?(channel_type)(?P<tab>/\w+))?(?P<post>.*)$') 4403 4404 def __real_extract(self, url, smuggled_data): 4405 item_id = self._match_id(url) 4406 url = compat_urlparse.urlunparse( 4407 compat_urlparse.urlparse(url)._replace(netloc='www.youtube.com')) 4408 compat_opts = self.get_param('compat_opts', []) 4409 4410 def get_mobj(url): 4411 mobj = self._URL_RE.match(url).groupdict() 4412 mobj.update((k, '') for k, v in mobj.items() if v is None) 4413 return mobj 4414 4415 mobj, redirect_warning = get_mobj(url), None 4416 # Youtube returns incomplete data if tabname is not lower case 4417 pre, tab, post, is_channel = mobj['pre'], mobj['tab'].lower(), mobj['post'], not mobj['not_channel'] 4418 if is_channel: 4419 if smuggled_data.get('is_music_url'): 4420 if item_id[:2] == 'VL': # Youtube music VL channels have an equivalent playlist 4421 item_id = item_id[2:] 4422 pre, tab, post, is_channel = f'https://www.youtube.com/playlist?list={item_id}', '', '', False 4423 elif item_id[:2] == 'MP': # Resolve albums (/[channel/browse]/MP...) to their equivalent playlist 4424 mdata = self._extract_tab_endpoint( 4425 f'https://music.youtube.com/channel/{item_id}', item_id, default_client='web_music') 4426 murl = traverse_obj(mdata, ('microformat', 'microformatDataRenderer', 'urlCanonical'), 4427 get_all=False, expected_type=compat_str) 4428 if not murl: 4429 raise ExtractorError('Failed to resolve album to playlist') 4430 return self.url_result(murl, ie=YoutubeTabIE.ie_key()) 4431 elif mobj['channel_type'] == 'browse': # Youtube music /browse/ should be changed to /channel/ 4432 pre = f'https://www.youtube.com/channel/{item_id}' 4433 4434 if is_channel and not tab and 'no-youtube-channel-redirect' not in compat_opts: 4435 # Home URLs should redirect to /videos/ 4436 redirect_warning = ('A channel/user page was given. All the channel\'s videos will be downloaded. ' 4437 'To download only the videos in the home page, add a "/featured" to the URL') 4438 tab = '/videos' 4439 4440 url = ''.join((pre, tab, post)) 4441 mobj = get_mobj(url) 4442 4443 # Handle both video/playlist URLs 4444 qs = parse_qs(url) 4445 video_id, playlist_id = [qs.get(key, [None])[0] for key in ('v', 'list')] 4446 4447 if not video_id and mobj['not_channel'].startswith('watch'): 4448 if not playlist_id: 4449 # If there is neither video or playlist ids, youtube redirects to home page, which is undesirable 4450 raise ExtractorError('Unable to recognize tab page') 4451 # Common mistake: https://www.youtube.com/watch?list=playlist_id 4452 self.report_warning(f'A video URL was given without video ID. Trying to download playlist {playlist_id}') 4453 url = f'https://www.youtube.com/playlist?list={playlist_id}' 4454 mobj = get_mobj(url) 4455 4456 if video_id and playlist_id: 4457 if self.get_param('noplaylist'): 4458 self.to_screen(f'Downloading just video {video_id} because of --no-playlist') 4459 return self.url_result(f'https://www.youtube.com/watch?v={video_id}', 4460 ie=YoutubeIE.ie_key(), video_id=video_id) 4461 self.to_screen(f'Downloading playlist {playlist_id}; add --no-playlist to just download video {video_id}') 4462 4463 data, ytcfg = self._extract_data(url, item_id) 4464 4465 tabs = traverse_obj(data, ('contents', 'twoColumnBrowseResultsRenderer', 'tabs'), expected_type=list) 4466 if tabs: 4467 selected_tab = self._extract_selected_tab(tabs) 4468 tab_name = selected_tab.get('title', '') 4469 if 'no-youtube-channel-redirect' not in compat_opts: 4470 if mobj['tab'] == '/live': 4471 # Live tab should have redirected to the video 4472 raise ExtractorError('The channel is not currently live', expected=True) 4473 if mobj['tab'] == '/videos' and tab_name.lower() != mobj['tab'][1:]: 4474 redirect_warning = f'The URL does not have a {mobj["tab"][1:]} tab' 4475 if not mobj['not_channel'] and item_id[:2] == 'UC': 4476 # Topic channels don't have /videos. Use the equivalent playlist instead 4477 pl_id = f'UU{item_id[2:]}' 4478 pl_url = f'https://www.youtube.com/playlist?list={pl_id}' 4479 try: 4480 data, ytcfg = self._extract_data(pl_url, pl_id, ytcfg=ytcfg, fatal=True) 4481 except ExtractorError: 4482 redirect_warning += ' and the playlist redirect gave error' 4483 else: 4484 item_id, url, tab_name = pl_id, pl_url, mobj['tab'][1:] 4485 redirect_warning += f'. Redirecting to playlist {pl_id} instead' 4486 if tab_name.lower() != mobj['tab'][1:]: 4487 redirect_warning += f'. {tab_name} tab is being downloaded instead' 4488 4489 if redirect_warning: 4490 self.report_warning(redirect_warning) 4491 self.write_debug(f'Final URL: {url}') 4492 4493 # YouTube sometimes provides a button to reload playlist with unavailable videos. 4494 if 'no-youtube-unavailable-videos' not in compat_opts: 4495 data = self._reload_with_unavailable_videos(item_id, data, ytcfg) or data 4496 self._extract_and_report_alerts(data, only_once=True) 4497 tabs = traverse_obj(data, ('contents', 'twoColumnBrowseResultsRenderer', 'tabs'), expected_type=list) 4498 if tabs: 4499 return self._extract_from_tabs(item_id, ytcfg, data, tabs) 4500 4501 playlist = traverse_obj( 4502 data, ('contents', 'twoColumnWatchNextResults', 'playlist', 'playlist'), expected_type=dict) 4503 if playlist: 4504 return self._extract_from_playlist(item_id, url, data, playlist, ytcfg) 4505 4506 video_id = traverse_obj( 4507 data, ('currentVideoEndpoint', 'watchEndpoint', 'videoId'), expected_type=str) or video_id 4508 if video_id: 4509 if mobj['tab'] != '/live': # live tab is expected to redirect to video 4510 self.report_warning(f'Unable to recognize playlist. Downloading just video {video_id}') 4511 return self.url_result(f'https://www.youtube.com/watch?v={video_id}', 4512 ie=YoutubeIE.ie_key(), video_id=video_id) 4513 4514 raise ExtractorError('Unable to recognize tab page') 4515 4516 4517class YoutubePlaylistIE(InfoExtractor): 4518 IE_DESC = 'YouTube playlists' 4519 _VALID_URL = r'''(?x)(?: 4520 (?:https?://)? 4521 (?:\w+\.)? 4522 (?: 4523 (?: 4524 youtube(?:kids)?\.com| 4525 %(invidious)s 4526 ) 4527 /.*?\?.*?\blist= 4528 )? 4529 (?P<id>%(playlist_id)s) 4530 )''' % { 4531 'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE, 4532 'invidious': '|'.join(YoutubeBaseInfoExtractor._INVIDIOUS_SITES), 4533 } 4534 IE_NAME = 'youtube:playlist' 4535 _TESTS = [{ 4536 'note': 'issue #673', 4537 'url': 'PLBB231211A4F62143', 4538 'info_dict': { 4539 'title': '[OLD]Team Fortress 2 (Class-based LP)', 4540 'id': 'PLBB231211A4F62143', 4541 'uploader': 'Wickydoo', 4542 'uploader_id': 'UCKSpbfbl5kRQpTdL7kMc-1Q', 4543 'description': 'md5:8fa6f52abb47a9552002fa3ddfc57fc2', 4544 }, 4545 'playlist_mincount': 29, 4546 }, { 4547 'url': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl', 4548 'info_dict': { 4549 'title': 'YDL_safe_search', 4550 'id': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl', 4551 }, 4552 'playlist_count': 2, 4553 'skip': 'This playlist is private', 4554 }, { 4555 'note': 'embedded', 4556 'url': 'https://www.youtube.com/embed/videoseries?list=PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu', 4557 'playlist_count': 4, 4558 'info_dict': { 4559 'title': 'JODA15', 4560 'id': 'PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu', 4561 'uploader': 'milan', 4562 'uploader_id': 'UCEI1-PVPcYXjB73Hfelbmaw', 4563 } 4564 }, { 4565 'url': 'http://www.youtube.com/embed/_xDOZElKyNU?list=PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl', 4566 'playlist_mincount': 654, 4567 'info_dict': { 4568 'title': '2018 Chinese New Singles (11/6 updated)', 4569 'id': 'PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl', 4570 'uploader': 'LBK', 4571 'uploader_id': 'UC21nz3_MesPLqtDqwdvnoxA', 4572 'description': 'md5:da521864744d60a198e3a88af4db0d9d', 4573 } 4574 }, { 4575 'url': 'TLGGrESM50VT6acwMjAyMjAxNw', 4576 'only_matching': True, 4577 }, { 4578 # music album playlist 4579 'url': 'OLAK5uy_m4xAFdmMC5rX3Ji3g93pQe3hqLZw_9LhM', 4580 'only_matching': True, 4581 }] 4582 4583 @classmethod 4584 def suitable(cls, url): 4585 if YoutubeTabIE.suitable(url): 4586 return False 4587 from ..utils import parse_qs 4588 qs = parse_qs(url) 4589 if qs.get('v', [None])[0]: 4590 return False 4591 return super(YoutubePlaylistIE, cls).suitable(url) 4592 4593 def _real_extract(self, url): 4594 playlist_id = self._match_id(url) 4595 is_music_url = YoutubeBaseInfoExtractor.is_music_url(url) 4596 url = update_url_query( 4597 'https://www.youtube.com/playlist', 4598 parse_qs(url) or {'list': playlist_id}) 4599 if is_music_url: 4600 url = smuggle_url(url, {'is_music_url': True}) 4601 return self.url_result(url, ie=YoutubeTabIE.ie_key(), video_id=playlist_id) 4602 4603 4604class YoutubeYtBeIE(InfoExtractor): 4605 IE_DESC = 'youtu.be' 4606 _VALID_URL = r'https?://youtu\.be/(?P<id>[0-9A-Za-z_-]{11})/*?.*?\blist=(?P<playlist_id>%(playlist_id)s)' % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE} 4607 _TESTS = [{ 4608 'url': 'https://youtu.be/yeWKywCrFtk?list=PL2qgrgXsNUG5ig9cat4ohreBjYLAPC0J5', 4609 'info_dict': { 4610 'id': 'yeWKywCrFtk', 4611 'ext': 'mp4', 4612 'title': 'Small Scale Baler and Braiding Rugs', 4613 'uploader': 'Backus-Page House Museum', 4614 'uploader_id': 'backuspagemuseum', 4615 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/backuspagemuseum', 4616 'upload_date': '20161008', 4617 'description': 'md5:800c0c78d5eb128500bffd4f0b4f2e8a', 4618 'categories': ['Nonprofits & Activism'], 4619 'tags': list, 4620 'like_count': int, 4621 'dislike_count': int, 4622 }, 4623 'params': { 4624 'noplaylist': True, 4625 'skip_download': True, 4626 }, 4627 }, { 4628 'url': 'https://youtu.be/uWyaPkt-VOI?list=PL9D9FC436B881BA21', 4629 'only_matching': True, 4630 }] 4631 4632 def _real_extract(self, url): 4633 mobj = self._match_valid_url(url) 4634 video_id = mobj.group('id') 4635 playlist_id = mobj.group('playlist_id') 4636 return self.url_result( 4637 update_url_query('https://www.youtube.com/watch', { 4638 'v': video_id, 4639 'list': playlist_id, 4640 'feature': 'youtu.be', 4641 }), ie=YoutubeTabIE.ie_key(), video_id=playlist_id) 4642 4643 4644class YoutubeYtUserIE(InfoExtractor): 4645 IE_DESC = 'YouTube user videos; "ytuser:" prefix' 4646 _VALID_URL = r'ytuser:(?P<id>.+)' 4647 _TESTS = [{ 4648 'url': 'ytuser:phihag', 4649 'only_matching': True, 4650 }] 4651 4652 def _real_extract(self, url): 4653 user_id = self._match_id(url) 4654 return self.url_result( 4655 'https://www.youtube.com/user/%s/videos' % user_id, 4656 ie=YoutubeTabIE.ie_key(), video_id=user_id) 4657 4658 4659class YoutubeFavouritesIE(YoutubeBaseInfoExtractor): 4660 IE_NAME = 'youtube:favorites' 4661 IE_DESC = 'YouTube liked videos; ":ytfav" keyword (requires cookies)' 4662 _VALID_URL = r':ytfav(?:ou?rite)?s?' 4663 _LOGIN_REQUIRED = True 4664 _TESTS = [{ 4665 'url': ':ytfav', 4666 'only_matching': True, 4667 }, { 4668 'url': ':ytfavorites', 4669 'only_matching': True, 4670 }] 4671 4672 def _real_extract(self, url): 4673 return self.url_result( 4674 'https://www.youtube.com/playlist?list=LL', 4675 ie=YoutubeTabIE.ie_key()) 4676 4677 4678class YoutubeSearchIE(YoutubeTabBaseInfoExtractor, SearchInfoExtractor): 4679 IE_DESC = 'YouTube search' 4680 IE_NAME = 'youtube:search' 4681 _SEARCH_KEY = 'ytsearch' 4682 _SEARCH_PARAMS = 'EgIQAQ%3D%3D' # Videos only 4683 _TESTS = [] 4684 4685 4686class YoutubeSearchDateIE(YoutubeTabBaseInfoExtractor, SearchInfoExtractor): 4687 IE_NAME = YoutubeSearchIE.IE_NAME + ':date' 4688 _SEARCH_KEY = 'ytsearchdate' 4689 IE_DESC = 'YouTube search, newest videos first' 4690 _SEARCH_PARAMS = 'CAISAhAB' # Videos only, sorted by date 4691 4692 4693class YoutubeSearchURLIE(YoutubeTabBaseInfoExtractor): 4694 IE_DESC = 'YouTube search URLs with sorting and filter support' 4695 IE_NAME = YoutubeSearchIE.IE_NAME + '_url' 4696 _VALID_URL = r'https?://(?:www\.)?youtube\.com/results\?(.*?&)?(?:search_query|q)=(?:[^&]+)(?:[&]|$)' 4697 _TESTS = [{ 4698 'url': 'https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video', 4699 'playlist_mincount': 5, 4700 'info_dict': { 4701 'id': 'youtube-dl test video', 4702 'title': 'youtube-dl test video', 4703 } 4704 }, { 4705 'url': 'https://www.youtube.com/results?search_query=python&sp=EgIQAg%253D%253D', 4706 'playlist_mincount': 5, 4707 'info_dict': { 4708 'id': 'python', 4709 'title': 'python', 4710 } 4711 4712 }, { 4713 'url': 'https://www.youtube.com/results?q=test&sp=EgQIBBgB', 4714 'only_matching': True, 4715 }] 4716 4717 def _real_extract(self, url): 4718 qs = parse_qs(url) 4719 query = (qs.get('search_query') or qs.get('q'))[0] 4720 return self.playlist_result(self._search_results(query, qs.get('sp', (None,))[0]), query, query) 4721 4722 4723class YoutubeFeedsInfoExtractor(YoutubeTabIE): 4724 """ 4725 Base class for feed extractors 4726 Subclasses must define the _FEED_NAME property. 4727 """ 4728 _LOGIN_REQUIRED = True 4729 _TESTS = [] 4730 4731 @property 4732 def IE_NAME(self): 4733 return 'youtube:%s' % self._FEED_NAME 4734 4735 def _real_extract(self, url): 4736 return self.url_result( 4737 'https://www.youtube.com/feed/%s' % self._FEED_NAME, 4738 ie=YoutubeTabIE.ie_key()) 4739 4740 4741class YoutubeWatchLaterIE(InfoExtractor): 4742 IE_NAME = 'youtube:watchlater' 4743 IE_DESC = 'Youtube watch later list; ":ytwatchlater" keyword (requires cookies)' 4744 _VALID_URL = r':ytwatchlater' 4745 _TESTS = [{ 4746 'url': ':ytwatchlater', 4747 'only_matching': True, 4748 }] 4749 4750 def _real_extract(self, url): 4751 return self.url_result( 4752 'https://www.youtube.com/playlist?list=WL', ie=YoutubeTabIE.ie_key()) 4753 4754 4755class YoutubeRecommendedIE(YoutubeFeedsInfoExtractor): 4756 IE_DESC = 'YouTube recommended videos; ":ytrec" keyword' 4757 _VALID_URL = r'https?://(?:www\.)?youtube\.com/?(?:[?#]|$)|:ytrec(?:ommended)?' 4758 _FEED_NAME = 'recommended' 4759 _LOGIN_REQUIRED = False 4760 _TESTS = [{ 4761 'url': ':ytrec', 4762 'only_matching': True, 4763 }, { 4764 'url': ':ytrecommended', 4765 'only_matching': True, 4766 }, { 4767 'url': 'https://youtube.com', 4768 'only_matching': True, 4769 }] 4770 4771 4772class YoutubeSubscriptionsIE(YoutubeFeedsInfoExtractor): 4773 IE_DESC = 'YouTube subscriptions feed; ":ytsubs" keyword (requires cookies)' 4774 _VALID_URL = r':ytsub(?:scription)?s?' 4775 _FEED_NAME = 'subscriptions' 4776 _TESTS = [{ 4777 'url': ':ytsubs', 4778 'only_matching': True, 4779 }, { 4780 'url': ':ytsubscriptions', 4781 'only_matching': True, 4782 }] 4783 4784 4785class YoutubeHistoryIE(YoutubeFeedsInfoExtractor): 4786 IE_DESC = 'Youtube watch history; ":ythis" keyword (requires cookies)' 4787 _VALID_URL = r':ythis(?:tory)?' 4788 _FEED_NAME = 'history' 4789 _TESTS = [{ 4790 'url': ':ythistory', 4791 'only_matching': True, 4792 }] 4793 4794 4795class YoutubeTruncatedURLIE(InfoExtractor): 4796 IE_NAME = 'youtube:truncated_url' 4797 IE_DESC = False # Do not list 4798 _VALID_URL = r'''(?x) 4799 (?:https?://)? 4800 (?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie)?\.com/ 4801 (?:watch\?(?: 4802 feature=[a-z_]+| 4803 annotation_id=annotation_[^&]+| 4804 x-yt-cl=[0-9]+| 4805 hl=[^&]*| 4806 t=[0-9]+ 4807 )? 4808 | 4809 attribution_link\?a=[^&]+ 4810 ) 4811 $ 4812 ''' 4813 4814 _TESTS = [{ 4815 'url': 'https://www.youtube.com/watch?annotation_id=annotation_3951667041', 4816 'only_matching': True, 4817 }, { 4818 'url': 'https://www.youtube.com/watch?', 4819 'only_matching': True, 4820 }, { 4821 'url': 'https://www.youtube.com/watch?x-yt-cl=84503534', 4822 'only_matching': True, 4823 }, { 4824 'url': 'https://www.youtube.com/watch?feature=foo', 4825 'only_matching': True, 4826 }, { 4827 'url': 'https://www.youtube.com/watch?hl=en-GB', 4828 'only_matching': True, 4829 }, { 4830 'url': 'https://www.youtube.com/watch?t=2372', 4831 'only_matching': True, 4832 }] 4833 4834 def _real_extract(self, url): 4835 raise ExtractorError( 4836 'Did you forget to quote the URL? Remember that & is a meta ' 4837 'character in most shells, so you want to put the URL in quotes, ' 4838 'like youtube-dl ' 4839 '"https://www.youtube.com/watch?feature=foo&v=BaW_jenozKc" ' 4840 ' or simply youtube-dl BaW_jenozKc .', 4841 expected=True) 4842 4843 4844class YoutubeClipIE(InfoExtractor): 4845 IE_NAME = 'youtube:clip' 4846 IE_DESC = False # Do not list 4847 _VALID_URL = r'https?://(?:www\.)?youtube\.com/clip/' 4848 4849 def _real_extract(self, url): 4850 self.report_warning('YouTube clips are not currently supported. The entire video will be downloaded instead') 4851 return self.url_result(url, 'Generic') 4852 4853 4854class YoutubeTruncatedIDIE(InfoExtractor): 4855 IE_NAME = 'youtube:truncated_id' 4856 IE_DESC = False # Do not list 4857 _VALID_URL = r'https?://(?:www\.)?youtube\.com/watch\?v=(?P<id>[0-9A-Za-z_-]{1,10})$' 4858 4859 _TESTS = [{ 4860 'url': 'https://www.youtube.com/watch?v=N_708QY7Ob', 4861 'only_matching': True, 4862 }] 4863 4864 def _real_extract(self, url): 4865 video_id = self._match_id(url) 4866 raise ExtractorError( 4867 'Incomplete YouTube ID %s. URL %s looks truncated.' % (video_id, url), 4868 expected=True) 4869