1#!/usr/bin/env python3 2# coding: utf-8 3 4from __future__ import absolute_import, unicode_literals 5 6import collections 7import contextlib 8import datetime 9import errno 10import fileinput 11import functools 12import io 13import itertools 14import json 15import locale 16import operator 17import os 18import platform 19import re 20import shutil 21import subprocess 22import sys 23import tempfile 24import time 25import tokenize 26import traceback 27import random 28import unicodedata 29 30from enum import Enum 31from string import ascii_letters 32 33from .compat import ( 34 compat_basestring, 35 compat_get_terminal_size, 36 compat_kwargs, 37 compat_numeric_types, 38 compat_os_name, 39 compat_pycrypto_AES, 40 compat_shlex_quote, 41 compat_str, 42 compat_tokenize_tokenize, 43 compat_urllib_error, 44 compat_urllib_request, 45 compat_urllib_request_DataHandler, 46 windows_enable_vt_mode, 47) 48from .cookies import load_cookies 49from .utils import ( 50 age_restricted, 51 args_to_str, 52 ContentTooShortError, 53 date_from_str, 54 DateRange, 55 DEFAULT_OUTTMPL, 56 determine_ext, 57 determine_protocol, 58 DownloadCancelled, 59 DownloadError, 60 encode_compat_str, 61 encodeFilename, 62 EntryNotInPlaylist, 63 error_to_compat_str, 64 ExistingVideoReached, 65 expand_path, 66 ExtractorError, 67 float_or_none, 68 format_bytes, 69 format_field, 70 format_decimal_suffix, 71 formatSeconds, 72 GeoRestrictedError, 73 get_domain, 74 HEADRequest, 75 int_or_none, 76 iri_to_uri, 77 ISO3166Utils, 78 join_nonempty, 79 LazyList, 80 LINK_TEMPLATES, 81 locked_file, 82 make_dir, 83 make_HTTPS_handler, 84 MaxDownloadsReached, 85 network_exceptions, 86 number_of_digits, 87 orderedSet, 88 OUTTMPL_TYPES, 89 PagedList, 90 parse_filesize, 91 PerRequestProxyHandler, 92 platform_name, 93 Popen, 94 PostProcessingError, 95 preferredencoding, 96 prepend_extension, 97 ReExtractInfo, 98 register_socks_protocols, 99 RejectedVideoReached, 100 remove_terminal_sequences, 101 render_table, 102 replace_extension, 103 SameFileError, 104 sanitize_filename, 105 sanitize_path, 106 sanitize_url, 107 sanitized_Request, 108 std_headers, 109 STR_FORMAT_RE_TMPL, 110 STR_FORMAT_TYPES, 111 str_or_none, 112 strftime_or_none, 113 subtitles_filename, 114 supports_terminal_sequences, 115 timetuple_from_msec, 116 to_high_limit_path, 117 traverse_obj, 118 try_get, 119 UnavailableVideoError, 120 url_basename, 121 variadic, 122 version_tuple, 123 write_json_file, 124 write_string, 125 YoutubeDLCookieProcessor, 126 YoutubeDLHandler, 127 YoutubeDLRedirectHandler, 128) 129from .cache import Cache 130from .minicurses import format_text 131from .extractor import ( 132 gen_extractor_classes, 133 get_info_extractor, 134 _LAZY_LOADER, 135 _PLUGIN_CLASSES as plugin_extractors 136) 137from .extractor.openload import PhantomJSwrapper 138from .downloader import ( 139 FFmpegFD, 140 get_suitable_downloader, 141 shorten_protocol_name 142) 143from .downloader.rtmp import rtmpdump_version 144from .postprocessor import ( 145 get_postprocessor, 146 EmbedThumbnailPP, 147 FFmpegFixupDuplicateMoovPP, 148 FFmpegFixupDurationPP, 149 FFmpegFixupM3u8PP, 150 FFmpegFixupM4aPP, 151 FFmpegFixupStretchedPP, 152 FFmpegFixupTimestampPP, 153 FFmpegMergerPP, 154 FFmpegPostProcessor, 155 MoveFilesAfterDownloadPP, 156 _PLUGIN_CLASSES as plugin_postprocessors 157) 158from .update import detect_variant 159from .version import __version__, RELEASE_GIT_HEAD 160 161if compat_os_name == 'nt': 162 import ctypes 163 164 165class YoutubeDL(object): 166 """YoutubeDL class. 167 168 YoutubeDL objects are the ones responsible of downloading the 169 actual video file and writing it to disk if the user has requested 170 it, among some other tasks. In most cases there should be one per 171 program. As, given a video URL, the downloader doesn't know how to 172 extract all the needed information, task that InfoExtractors do, it 173 has to pass the URL to one of them. 174 175 For this, YoutubeDL objects have a method that allows 176 InfoExtractors to be registered in a given order. When it is passed 177 a URL, the YoutubeDL object handles it to the first InfoExtractor it 178 finds that reports being able to handle it. The InfoExtractor extracts 179 all the information about the video or videos the URL refers to, and 180 YoutubeDL process the extracted information, possibly using a File 181 Downloader to download the video. 182 183 YoutubeDL objects accept a lot of parameters. In order not to saturate 184 the object constructor with arguments, it receives a dictionary of 185 options instead. These options are available through the params 186 attribute for the InfoExtractors to use. The YoutubeDL also 187 registers itself as the downloader in charge for the InfoExtractors 188 that are added to it, so this is a "mutual registration". 189 190 Available options: 191 192 username: Username for authentication purposes. 193 password: Password for authentication purposes. 194 videopassword: Password for accessing a video. 195 ap_mso: Adobe Pass multiple-system operator identifier. 196 ap_username: Multiple-system operator account username. 197 ap_password: Multiple-system operator account password. 198 usenetrc: Use netrc for authentication instead. 199 verbose: Print additional info to stdout. 200 quiet: Do not print messages to stdout. 201 no_warnings: Do not print out anything for warnings. 202 forceprint: A list of templates to force print 203 forceurl: Force printing final URL. (Deprecated) 204 forcetitle: Force printing title. (Deprecated) 205 forceid: Force printing ID. (Deprecated) 206 forcethumbnail: Force printing thumbnail URL. (Deprecated) 207 forcedescription: Force printing description. (Deprecated) 208 forcefilename: Force printing final filename. (Deprecated) 209 forceduration: Force printing duration. (Deprecated) 210 forcejson: Force printing info_dict as JSON. 211 dump_single_json: Force printing the info_dict of the whole playlist 212 (or video) as a single JSON line. 213 force_write_download_archive: Force writing download archive regardless 214 of 'skip_download' or 'simulate'. 215 simulate: Do not download the video files. If unset (or None), 216 simulate only if listsubtitles, listformats or list_thumbnails is used 217 format: Video format code. see "FORMAT SELECTION" for more details. 218 You can also pass a function. The function takes 'ctx' as 219 argument and returns the formats to download. 220 See "build_format_selector" for an implementation 221 allow_unplayable_formats: Allow unplayable formats to be extracted and downloaded. 222 ignore_no_formats_error: Ignore "No video formats" error. Usefull for 223 extracting metadata even if the video is not actually 224 available for download (experimental) 225 format_sort: A list of fields by which to sort the video formats. 226 See "Sorting Formats" for more details. 227 format_sort_force: Force the given format_sort. see "Sorting Formats" 228 for more details. 229 allow_multiple_video_streams: Allow multiple video streams to be merged 230 into a single file 231 allow_multiple_audio_streams: Allow multiple audio streams to be merged 232 into a single file 233 check_formats Whether to test if the formats are downloadable. 234 Can be True (check all), False (check none), 235 'selected' (check selected formats), 236 or None (check only if requested by extractor) 237 paths: Dictionary of output paths. The allowed keys are 'home' 238 'temp' and the keys of OUTTMPL_TYPES (in utils.py) 239 outtmpl: Dictionary of templates for output names. Allowed keys 240 are 'default' and the keys of OUTTMPL_TYPES (in utils.py). 241 For compatibility with youtube-dl, a single string can also be used 242 outtmpl_na_placeholder: Placeholder for unavailable meta fields. 243 restrictfilenames: Do not allow "&" and spaces in file names 244 trim_file_name: Limit length of filename (extension excluded) 245 windowsfilenames: Force the filenames to be windows compatible 246 ignoreerrors: Do not stop on download/postprocessing errors. 247 Can be 'only_download' to ignore only download errors. 248 Default is 'only_download' for CLI, but False for API 249 skip_playlist_after_errors: Number of allowed failures until the rest of 250 the playlist is skipped 251 force_generic_extractor: Force downloader to use the generic extractor 252 overwrites: Overwrite all video and metadata files if True, 253 overwrite only non-video files if None 254 and don't overwrite any file if False 255 For compatibility with youtube-dl, 256 "nooverwrites" may also be used instead 257 playliststart: Playlist item to start at. 258 playlistend: Playlist item to end at. 259 playlist_items: Specific indices of playlist to download. 260 playlistreverse: Download playlist items in reverse order. 261 playlistrandom: Download playlist items in random order. 262 matchtitle: Download only matching titles. 263 rejecttitle: Reject downloads for matching titles. 264 logger: Log messages to a logging.Logger instance. 265 logtostderr: Log messages to stderr instead of stdout. 266 consoletitle: Display progress in console window's titlebar. 267 writedescription: Write the video description to a .description file 268 writeinfojson: Write the video description to a .info.json file 269 clean_infojson: Remove private fields from the infojson 270 getcomments: Extract video comments. This will not be written to disk 271 unless writeinfojson is also given 272 writeannotations: Write the video annotations to a .annotations.xml file 273 writethumbnail: Write the thumbnail image to a file 274 allow_playlist_files: Whether to write playlists' description, infojson etc 275 also to disk when using the 'write*' options 276 write_all_thumbnails: Write all thumbnail formats to files 277 writelink: Write an internet shortcut file, depending on the 278 current platform (.url/.webloc/.desktop) 279 writeurllink: Write a Windows internet shortcut file (.url) 280 writewebloclink: Write a macOS internet shortcut file (.webloc) 281 writedesktoplink: Write a Linux internet shortcut file (.desktop) 282 writesubtitles: Write the video subtitles to a file 283 writeautomaticsub: Write the automatically generated subtitles to a file 284 allsubtitles: Deprecated - Use subtitleslangs = ['all'] 285 Downloads all the subtitles of the video 286 (requires writesubtitles or writeautomaticsub) 287 listsubtitles: Lists all available subtitles for the video 288 subtitlesformat: The format code for subtitles 289 subtitleslangs: List of languages of the subtitles to download (can be regex). 290 The list may contain "all" to refer to all the available 291 subtitles. The language can be prefixed with a "-" to 292 exclude it from the requested languages. Eg: ['all', '-live_chat'] 293 keepvideo: Keep the video file after post-processing 294 daterange: A DateRange object, download only if the upload_date is in the range. 295 skip_download: Skip the actual download of the video file 296 cachedir: Location of the cache files in the filesystem. 297 False to disable filesystem cache. 298 noplaylist: Download single video instead of a playlist if in doubt. 299 age_limit: An integer representing the user's age in years. 300 Unsuitable videos for the given age are skipped. 301 min_views: An integer representing the minimum view count the video 302 must have in order to not be skipped. 303 Videos without view count information are always 304 downloaded. None for no limit. 305 max_views: An integer representing the maximum view count. 306 Videos that are more popular than that are not 307 downloaded. 308 Videos without view count information are always 309 downloaded. None for no limit. 310 download_archive: File name of a file where all downloads are recorded. 311 Videos already present in the file are not downloaded 312 again. 313 break_on_existing: Stop the download process after attempting to download a 314 file that is in the archive. 315 break_on_reject: Stop the download process when encountering a video that 316 has been filtered out. 317 break_per_url: Whether break_on_reject and break_on_existing 318 should act on each input URL as opposed to for the entire queue 319 cookiefile: File name where cookies should be read from and dumped to 320 cookiesfrombrowser: A tuple containing the name of the browser, the profile 321 name/pathfrom where cookies are loaded, and the name of the 322 keyring. Eg: ('chrome', ) or ('vivaldi', 'default', 'BASICTEXT') 323 nocheckcertificate: Do not verify SSL certificates 324 prefer_insecure: Use HTTP instead of HTTPS to retrieve information. 325 At the moment, this is only supported by YouTube. 326 proxy: URL of the proxy server to use 327 geo_verification_proxy: URL of the proxy to use for IP address verification 328 on geo-restricted sites. 329 socket_timeout: Time to wait for unresponsive hosts, in seconds 330 bidi_workaround: Work around buggy terminals without bidirectional text 331 support, using fridibi 332 debug_printtraffic:Print out sent and received HTTP traffic 333 include_ads: Download ads as well (deprecated) 334 default_search: Prepend this string if an input url is not valid. 335 'auto' for elaborate guessing 336 encoding: Use this encoding instead of the system-specified. 337 extract_flat: Do not resolve URLs, return the immediate result. 338 Pass in 'in_playlist' to only show this behavior for 339 playlist items. 340 wait_for_video: If given, wait for scheduled streams to become available. 341 The value should be a tuple containing the range 342 (min_secs, max_secs) to wait between retries 343 postprocessors: A list of dictionaries, each with an entry 344 * key: The name of the postprocessor. See 345 yt_dlp/postprocessor/__init__.py for a list. 346 * when: When to run the postprocessor. Can be one of 347 pre_process|before_dl|post_process|after_move. 348 Assumed to be 'post_process' if not given 349 post_hooks: Deprecated - Register a custom postprocessor instead 350 A list of functions that get called as the final step 351 for each video file, after all postprocessors have been 352 called. The filename will be passed as the only argument. 353 progress_hooks: A list of functions that get called on download 354 progress, with a dictionary with the entries 355 * status: One of "downloading", "error", or "finished". 356 Check this first and ignore unknown values. 357 * info_dict: The extracted info_dict 358 359 If status is one of "downloading", or "finished", the 360 following properties may also be present: 361 * filename: The final filename (always present) 362 * tmpfilename: The filename we're currently writing to 363 * downloaded_bytes: Bytes on disk 364 * total_bytes: Size of the whole file, None if unknown 365 * total_bytes_estimate: Guess of the eventual file size, 366 None if unavailable. 367 * elapsed: The number of seconds since download started. 368 * eta: The estimated time in seconds, None if unknown 369 * speed: The download speed in bytes/second, None if 370 unknown 371 * fragment_index: The counter of the currently 372 downloaded video fragment. 373 * fragment_count: The number of fragments (= individual 374 files that will be merged) 375 376 Progress hooks are guaranteed to be called at least once 377 (with status "finished") if the download is successful. 378 postprocessor_hooks: A list of functions that get called on postprocessing 379 progress, with a dictionary with the entries 380 * status: One of "started", "processing", or "finished". 381 Check this first and ignore unknown values. 382 * postprocessor: Name of the postprocessor 383 * info_dict: The extracted info_dict 384 385 Progress hooks are guaranteed to be called at least twice 386 (with status "started" and "finished") if the processing is successful. 387 merge_output_format: Extension to use when merging formats. 388 final_ext: Expected final extension; used to detect when the file was 389 already downloaded and converted 390 fixup: Automatically correct known faults of the file. 391 One of: 392 - "never": do nothing 393 - "warn": only emit a warning 394 - "detect_or_warn": check whether we can do anything 395 about it, warn otherwise (default) 396 source_address: Client-side IP address to bind to. 397 call_home: Boolean, true iff we are allowed to contact the 398 yt-dlp servers for debugging. (BROKEN) 399 sleep_interval_requests: Number of seconds to sleep between requests 400 during extraction 401 sleep_interval: Number of seconds to sleep before each download when 402 used alone or a lower bound of a range for randomized 403 sleep before each download (minimum possible number 404 of seconds to sleep) when used along with 405 max_sleep_interval. 406 max_sleep_interval:Upper bound of a range for randomized sleep before each 407 download (maximum possible number of seconds to sleep). 408 Must only be used along with sleep_interval. 409 Actual sleep time will be a random float from range 410 [sleep_interval; max_sleep_interval]. 411 sleep_interval_subtitles: Number of seconds to sleep before each subtitle download 412 listformats: Print an overview of available video formats and exit. 413 list_thumbnails: Print a table of all thumbnails and exit. 414 match_filter: A function that gets called with the info_dict of 415 every video. 416 If it returns a message, the video is ignored. 417 If it returns None, the video is downloaded. 418 match_filter_func in utils.py is one example for this. 419 no_color: Do not emit color codes in output. 420 geo_bypass: Bypass geographic restriction via faking X-Forwarded-For 421 HTTP header 422 geo_bypass_country: 423 Two-letter ISO 3166-2 country code that will be used for 424 explicit geographic restriction bypassing via faking 425 X-Forwarded-For HTTP header 426 geo_bypass_ip_block: 427 IP range in CIDR notation that will be used similarly to 428 geo_bypass_country 429 430 The following options determine which downloader is picked: 431 external_downloader: A dictionary of protocol keys and the executable of the 432 external downloader to use for it. The allowed protocols 433 are default|http|ftp|m3u8|dash|rtsp|rtmp|mms. 434 Set the value to 'native' to use the native downloader 435 hls_prefer_native: Deprecated - Use external_downloader = {'m3u8': 'native'} 436 or {'m3u8': 'ffmpeg'} instead. 437 Use the native HLS downloader instead of ffmpeg/avconv 438 if True, otherwise use ffmpeg/avconv if False, otherwise 439 use downloader suggested by extractor if None. 440 compat_opts: Compatibility options. See "Differences in default behavior". 441 The following options do not work when used through the API: 442 filename, abort-on-error, multistreams, no-live-chat, format-sort 443 no-clean-infojson, no-playlist-metafiles, no-keep-subs, no-attach-info-json. 444 Refer __init__.py for their implementation 445 progress_template: Dictionary of templates for progress outputs. 446 Allowed keys are 'download', 'postprocess', 447 'download-title' (console title) and 'postprocess-title'. 448 The template is mapped on a dictionary with keys 'progress' and 'info' 449 450 The following parameters are not used by YoutubeDL itself, they are used by 451 the downloader (see yt_dlp/downloader/common.py): 452 nopart, updatetime, buffersize, ratelimit, throttledratelimit, min_filesize, 453 max_filesize, test, noresizebuffer, retries, file_access_retries, fragment_retries, 454 continuedl, noprogress, xattr_set_filesize, hls_use_mpegts, http_chunk_size, 455 external_downloader_args, concurrent_fragment_downloads. 456 457 The following options are used by the post processors: 458 prefer_ffmpeg: If False, use avconv instead of ffmpeg if both are available, 459 otherwise prefer ffmpeg. (avconv support is deprecated) 460 ffmpeg_location: Location of the ffmpeg/avconv binary; either the path 461 to the binary or its containing directory. 462 postprocessor_args: A dictionary of postprocessor/executable keys (in lower case) 463 and a list of additional command-line arguments for the 464 postprocessor/executable. The dict can also have "PP+EXE" keys 465 which are used when the given exe is used by the given PP. 466 Use 'default' as the name for arguments to passed to all PP 467 For compatibility with youtube-dl, a single list of args 468 can also be used 469 470 The following options are used by the extractors: 471 extractor_retries: Number of times to retry for known errors 472 dynamic_mpd: Whether to process dynamic DASH manifests (default: True) 473 hls_split_discontinuity: Split HLS playlists to different formats at 474 discontinuities such as ad breaks (default: False) 475 extractor_args: A dictionary of arguments to be passed to the extractors. 476 See "EXTRACTOR ARGUMENTS" for details. 477 Eg: {'youtube': {'skip': ['dash', 'hls']}} 478 youtube_include_dash_manifest: Deprecated - Use extractor_args instead. 479 If True (default), DASH manifests and related 480 data will be downloaded and processed by extractor. 481 You can reduce network I/O by disabling it if you don't 482 care about DASH. (only for youtube) 483 youtube_include_hls_manifest: Deprecated - Use extractor_args instead. 484 If True (default), HLS manifests and related 485 data will be downloaded and processed by extractor. 486 You can reduce network I/O by disabling it if you don't 487 care about HLS. (only for youtube) 488 """ 489 490 _NUMERIC_FIELDS = set(( 491 'width', 'height', 'tbr', 'abr', 'asr', 'vbr', 'fps', 'filesize', 'filesize_approx', 492 'timestamp', 'release_timestamp', 493 'duration', 'view_count', 'like_count', 'dislike_count', 'repost_count', 494 'average_rating', 'comment_count', 'age_limit', 495 'start_time', 'end_time', 496 'chapter_number', 'season_number', 'episode_number', 497 'track_number', 'disc_number', 'release_year', 498 )) 499 500 _format_selection_exts = { 501 'audio': {'m4a', 'mp3', 'ogg', 'aac'}, 502 'video': {'mp4', 'flv', 'webm', '3gp'}, 503 'storyboards': {'mhtml'}, 504 } 505 506 params = None 507 _ies = {} 508 _pps = {'pre_process': [], 'before_dl': [], 'after_move': [], 'post_process': []} 509 _printed_messages = set() 510 _first_webpage_request = True 511 _download_retcode = None 512 _num_downloads = None 513 _playlist_level = 0 514 _playlist_urls = set() 515 _screen_file = None 516 517 def __init__(self, params=None, auto_init=True): 518 """Create a FileDownloader object with the given options. 519 @param auto_init Whether to load the default extractors and print header (if verbose). 520 Set to 'no_verbose_header' to not print the header 521 """ 522 if params is None: 523 params = {} 524 self._ies = {} 525 self._ies_instances = {} 526 self._pps = {'pre_process': [], 'before_dl': [], 'after_move': [], 'post_process': []} 527 self._printed_messages = set() 528 self._first_webpage_request = True 529 self._post_hooks = [] 530 self._progress_hooks = [] 531 self._postprocessor_hooks = [] 532 self._download_retcode = 0 533 self._num_downloads = 0 534 self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)] 535 self._err_file = sys.stderr 536 self.params = params 537 self.cache = Cache(self) 538 539 windows_enable_vt_mode() 540 self._allow_colors = { 541 'screen': not self.params.get('no_color') and supports_terminal_sequences(self._screen_file), 542 'err': not self.params.get('no_color') and supports_terminal_sequences(self._err_file), 543 } 544 545 if sys.version_info < (3, 6): 546 self.report_warning( 547 'Python version %d.%d is not supported! Please update to Python 3.6 or above' % sys.version_info[:2]) 548 549 if self.params.get('allow_unplayable_formats'): 550 self.report_warning( 551 f'You have asked for {self._format_err("UNPLAYABLE", self.Styles.EMPHASIS)} formats to be listed/downloaded. ' 552 'This is a developer option intended for debugging. \n' 553 ' If you experience any issues while using this option, ' 554 f'{self._format_err("DO NOT", self.Styles.ERROR)} open a bug report') 555 556 def check_deprecated(param, option, suggestion): 557 if self.params.get(param) is not None: 558 self.report_warning('%s is deprecated. Use %s instead' % (option, suggestion)) 559 return True 560 return False 561 562 if check_deprecated('cn_verification_proxy', '--cn-verification-proxy', '--geo-verification-proxy'): 563 if self.params.get('geo_verification_proxy') is None: 564 self.params['geo_verification_proxy'] = self.params['cn_verification_proxy'] 565 566 check_deprecated('autonumber', '--auto-number', '-o "%(autonumber)s-%(title)s.%(ext)s"') 567 check_deprecated('usetitle', '--title', '-o "%(title)s-%(id)s.%(ext)s"') 568 check_deprecated('useid', '--id', '-o "%(id)s.%(ext)s"') 569 570 for msg in self.params.get('_warnings', []): 571 self.report_warning(msg) 572 for msg in self.params.get('_deprecation_warnings', []): 573 self.deprecation_warning(msg) 574 575 if 'list-formats' in self.params.get('compat_opts', []): 576 self.params['listformats_table'] = False 577 578 if 'overwrites' not in self.params and self.params.get('nooverwrites') is not None: 579 # nooverwrites was unnecessarily changed to overwrites 580 # in 0c3d0f51778b153f65c21906031c2e091fcfb641 581 # This ensures compatibility with both keys 582 self.params['overwrites'] = not self.params['nooverwrites'] 583 elif self.params.get('overwrites') is None: 584 self.params.pop('overwrites', None) 585 else: 586 self.params['nooverwrites'] = not self.params['overwrites'] 587 588 if params.get('bidi_workaround', False): 589 try: 590 import pty 591 master, slave = pty.openpty() 592 width = compat_get_terminal_size().columns 593 if width is None: 594 width_args = [] 595 else: 596 width_args = ['-w', str(width)] 597 sp_kwargs = dict( 598 stdin=subprocess.PIPE, 599 stdout=slave, 600 stderr=self._err_file) 601 try: 602 self._output_process = Popen(['bidiv'] + width_args, **sp_kwargs) 603 except OSError: 604 self._output_process = Popen(['fribidi', '-c', 'UTF-8'] + width_args, **sp_kwargs) 605 self._output_channel = os.fdopen(master, 'rb') 606 except OSError as ose: 607 if ose.errno == errno.ENOENT: 608 self.report_warning( 609 'Could not find fribidi executable, ignoring --bidi-workaround. ' 610 'Make sure that fribidi is an executable file in one of the directories in your $PATH.') 611 else: 612 raise 613 614 if (sys.platform != 'win32' 615 and sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968'] 616 and not params.get('restrictfilenames', False)): 617 # Unicode filesystem API will throw errors (#1474, #13027) 618 self.report_warning( 619 'Assuming --restrict-filenames since file system encoding ' 620 'cannot encode all characters. ' 621 'Set the LC_ALL environment variable to fix this.') 622 self.params['restrictfilenames'] = True 623 624 self.outtmpl_dict = self.parse_outtmpl() 625 626 # Creating format selector here allows us to catch syntax errors before the extraction 627 self.format_selector = ( 628 self.params.get('format') if self.params.get('format') in (None, '-') 629 else self.params['format'] if callable(self.params['format']) 630 else self.build_format_selector(self.params['format'])) 631 632 self._setup_opener() 633 634 if auto_init: 635 if auto_init != 'no_verbose_header': 636 self.print_debug_header() 637 self.add_default_info_extractors() 638 639 hooks = { 640 'post_hooks': self.add_post_hook, 641 'progress_hooks': self.add_progress_hook, 642 'postprocessor_hooks': self.add_postprocessor_hook, 643 } 644 for opt, fn in hooks.items(): 645 for ph in self.params.get(opt, []): 646 fn(ph) 647 648 for pp_def_raw in self.params.get('postprocessors', []): 649 pp_def = dict(pp_def_raw) 650 when = pp_def.pop('when', 'post_process') 651 self.add_post_processor( 652 get_postprocessor(pp_def.pop('key'))(self, **compat_kwargs(pp_def)), 653 when=when) 654 655 register_socks_protocols() 656 657 def preload_download_archive(fn): 658 """Preload the archive, if any is specified""" 659 if fn is None: 660 return False 661 self.write_debug(f'Loading archive file {fn!r}') 662 try: 663 with locked_file(fn, 'r', encoding='utf-8') as archive_file: 664 for line in archive_file: 665 self.archive.add(line.strip()) 666 except IOError as ioe: 667 if ioe.errno != errno.ENOENT: 668 raise 669 return False 670 return True 671 672 self.archive = set() 673 preload_download_archive(self.params.get('download_archive')) 674 675 def warn_if_short_id(self, argv): 676 # short YouTube ID starting with dash? 677 idxs = [ 678 i for i, a in enumerate(argv) 679 if re.match(r'^-[0-9A-Za-z_-]{10}$', a)] 680 if idxs: 681 correct_argv = ( 682 ['yt-dlp'] 683 + [a for i, a in enumerate(argv) if i not in idxs] 684 + ['--'] + [argv[i] for i in idxs] 685 ) 686 self.report_warning( 687 'Long argument string detected. ' 688 'Use -- to separate parameters and URLs, like this:\n%s' % 689 args_to_str(correct_argv)) 690 691 def add_info_extractor(self, ie): 692 """Add an InfoExtractor object to the end of the list.""" 693 ie_key = ie.ie_key() 694 self._ies[ie_key] = ie 695 if not isinstance(ie, type): 696 self._ies_instances[ie_key] = ie 697 ie.set_downloader(self) 698 699 def _get_info_extractor_class(self, ie_key): 700 ie = self._ies.get(ie_key) 701 if ie is None: 702 ie = get_info_extractor(ie_key) 703 self.add_info_extractor(ie) 704 return ie 705 706 def get_info_extractor(self, ie_key): 707 """ 708 Get an instance of an IE with name ie_key, it will try to get one from 709 the _ies list, if there's no instance it will create a new one and add 710 it to the extractor list. 711 """ 712 ie = self._ies_instances.get(ie_key) 713 if ie is None: 714 ie = get_info_extractor(ie_key)() 715 self.add_info_extractor(ie) 716 return ie 717 718 def add_default_info_extractors(self): 719 """ 720 Add the InfoExtractors returned by gen_extractors to the end of the list 721 """ 722 for ie in gen_extractor_classes(): 723 self.add_info_extractor(ie) 724 725 def add_post_processor(self, pp, when='post_process'): 726 """Add a PostProcessor object to the end of the chain.""" 727 self._pps[when].append(pp) 728 pp.set_downloader(self) 729 730 def add_post_hook(self, ph): 731 """Add the post hook""" 732 self._post_hooks.append(ph) 733 734 def add_progress_hook(self, ph): 735 """Add the download progress hook""" 736 self._progress_hooks.append(ph) 737 738 def add_postprocessor_hook(self, ph): 739 """Add the postprocessing progress hook""" 740 self._postprocessor_hooks.append(ph) 741 for pps in self._pps.values(): 742 for pp in pps: 743 pp.add_progress_hook(ph) 744 745 def _bidi_workaround(self, message): 746 if not hasattr(self, '_output_channel'): 747 return message 748 749 assert hasattr(self, '_output_process') 750 assert isinstance(message, compat_str) 751 line_count = message.count('\n') + 1 752 self._output_process.stdin.write((message + '\n').encode('utf-8')) 753 self._output_process.stdin.flush() 754 res = ''.join(self._output_channel.readline().decode('utf-8') 755 for _ in range(line_count)) 756 return res[:-len('\n')] 757 758 def _write_string(self, message, out=None, only_once=False): 759 if only_once: 760 if message in self._printed_messages: 761 return 762 self._printed_messages.add(message) 763 write_string(message, out=out, encoding=self.params.get('encoding')) 764 765 def to_stdout(self, message, skip_eol=False, quiet=False): 766 """Print message to stdout""" 767 if self.params.get('logger'): 768 self.params['logger'].debug(message) 769 elif not quiet or self.params.get('verbose'): 770 self._write_string( 771 '%s%s' % (self._bidi_workaround(message), ('' if skip_eol else '\n')), 772 self._err_file if quiet else self._screen_file) 773 774 def to_stderr(self, message, only_once=False): 775 """Print message to stderr""" 776 assert isinstance(message, compat_str) 777 if self.params.get('logger'): 778 self.params['logger'].error(message) 779 else: 780 self._write_string('%s\n' % self._bidi_workaround(message), self._err_file, only_once=only_once) 781 782 def to_console_title(self, message): 783 if not self.params.get('consoletitle', False): 784 return 785 message = remove_terminal_sequences(message) 786 if compat_os_name == 'nt': 787 if ctypes.windll.kernel32.GetConsoleWindow(): 788 # c_wchar_p() might not be necessary if `message` is 789 # already of type unicode() 790 ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message)) 791 elif 'TERM' in os.environ: 792 self._write_string('\033]0;%s\007' % message, self._screen_file) 793 794 def save_console_title(self): 795 if not self.params.get('consoletitle', False): 796 return 797 if self.params.get('simulate'): 798 return 799 if compat_os_name != 'nt' and 'TERM' in os.environ: 800 # Save the title on stack 801 self._write_string('\033[22;0t', self._screen_file) 802 803 def restore_console_title(self): 804 if not self.params.get('consoletitle', False): 805 return 806 if self.params.get('simulate'): 807 return 808 if compat_os_name != 'nt' and 'TERM' in os.environ: 809 # Restore the title from stack 810 self._write_string('\033[23;0t', self._screen_file) 811 812 def __enter__(self): 813 self.save_console_title() 814 return self 815 816 def __exit__(self, *args): 817 self.restore_console_title() 818 819 if self.params.get('cookiefile') is not None: 820 self.cookiejar.save(ignore_discard=True, ignore_expires=True) 821 822 def trouble(self, message=None, tb=None, is_error=True): 823 """Determine action to take when a download problem appears. 824 825 Depending on if the downloader has been configured to ignore 826 download errors or not, this method may throw an exception or 827 not when errors are found, after printing the message. 828 829 @param tb If given, is additional traceback information 830 @param is_error Whether to raise error according to ignorerrors 831 """ 832 if message is not None: 833 self.to_stderr(message) 834 if self.params.get('verbose'): 835 if tb is None: 836 if sys.exc_info()[0]: # if .trouble has been called from an except block 837 tb = '' 838 if hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]: 839 tb += ''.join(traceback.format_exception(*sys.exc_info()[1].exc_info)) 840 tb += encode_compat_str(traceback.format_exc()) 841 else: 842 tb_data = traceback.format_list(traceback.extract_stack()) 843 tb = ''.join(tb_data) 844 if tb: 845 self.to_stderr(tb) 846 if not is_error: 847 return 848 if not self.params.get('ignoreerrors'): 849 if sys.exc_info()[0] and hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]: 850 exc_info = sys.exc_info()[1].exc_info 851 else: 852 exc_info = sys.exc_info() 853 raise DownloadError(message, exc_info) 854 self._download_retcode = 1 855 856 def to_screen(self, message, skip_eol=False): 857 """Print message to stdout if not in quiet mode""" 858 self.to_stdout( 859 message, skip_eol, quiet=self.params.get('quiet', False)) 860 861 class Styles(Enum): 862 HEADERS = 'yellow' 863 EMPHASIS = 'light blue' 864 ID = 'green' 865 DELIM = 'blue' 866 ERROR = 'red' 867 WARNING = 'yellow' 868 SUPPRESS = 'light black' 869 870 def _format_text(self, handle, allow_colors, text, f, fallback=None, *, test_encoding=False): 871 if test_encoding: 872 original_text = text 873 encoding = self.params.get('encoding') or getattr(handle, 'encoding', 'ascii') 874 text = text.encode(encoding, 'ignore').decode(encoding) 875 if fallback is not None and text != original_text: 876 text = fallback 877 if isinstance(f, self.Styles): 878 f = f.value 879 return format_text(text, f) if allow_colors else text if fallback is None else fallback 880 881 def _format_screen(self, *args, **kwargs): 882 return self._format_text( 883 self._screen_file, self._allow_colors['screen'], *args, **kwargs) 884 885 def _format_err(self, *args, **kwargs): 886 return self._format_text( 887 self._err_file, self._allow_colors['err'], *args, **kwargs) 888 889 def report_warning(self, message, only_once=False): 890 ''' 891 Print the message to stderr, it will be prefixed with 'WARNING:' 892 If stderr is a tty file the 'WARNING:' will be colored 893 ''' 894 if self.params.get('logger') is not None: 895 self.params['logger'].warning(message) 896 else: 897 if self.params.get('no_warnings'): 898 return 899 self.to_stderr(f'{self._format_err("WARNING:", self.Styles.WARNING)} {message}', only_once) 900 901 def deprecation_warning(self, message): 902 if self.params.get('logger') is not None: 903 self.params['logger'].warning('DeprecationWarning: {message}') 904 else: 905 self.to_stderr(f'{self._format_err("DeprecationWarning:", self.Styles.ERROR)} {message}', True) 906 907 def report_error(self, message, *args, **kwargs): 908 ''' 909 Do the same as trouble, but prefixes the message with 'ERROR:', colored 910 in red if stderr is a tty file. 911 ''' 912 self.trouble(f'{self._format_err("ERROR:", self.Styles.ERROR)} {message}', *args, **kwargs) 913 914 def write_debug(self, message, only_once=False): 915 '''Log debug message or Print message to stderr''' 916 if not self.params.get('verbose', False): 917 return 918 message = '[debug] %s' % message 919 if self.params.get('logger'): 920 self.params['logger'].debug(message) 921 else: 922 self.to_stderr(message, only_once) 923 924 def report_file_already_downloaded(self, file_name): 925 """Report file has already been fully downloaded.""" 926 try: 927 self.to_screen('[download] %s has already been downloaded' % file_name) 928 except UnicodeEncodeError: 929 self.to_screen('[download] The file has already been downloaded') 930 931 def report_file_delete(self, file_name): 932 """Report that existing file will be deleted.""" 933 try: 934 self.to_screen('Deleting existing file %s' % file_name) 935 except UnicodeEncodeError: 936 self.to_screen('Deleting existing file') 937 938 def raise_no_formats(self, info, forced=False): 939 has_drm = info.get('__has_drm') 940 msg = 'This video is DRM protected' if has_drm else 'No video formats found!' 941 expected = self.params.get('ignore_no_formats_error') 942 if forced or not expected: 943 raise ExtractorError(msg, video_id=info['id'], ie=info['extractor'], 944 expected=has_drm or expected) 945 else: 946 self.report_warning(msg) 947 948 def parse_outtmpl(self): 949 outtmpl_dict = self.params.get('outtmpl', {}) 950 if not isinstance(outtmpl_dict, dict): 951 outtmpl_dict = {'default': outtmpl_dict} 952 # Remove spaces in the default template 953 if self.params.get('restrictfilenames'): 954 sanitize = lambda x: x.replace(' - ', ' ').replace(' ', '-') 955 else: 956 sanitize = lambda x: x 957 outtmpl_dict.update({ 958 k: sanitize(v) for k, v in DEFAULT_OUTTMPL.items() 959 if outtmpl_dict.get(k) is None}) 960 for key, val in outtmpl_dict.items(): 961 if isinstance(val, bytes): 962 self.report_warning( 963 'Parameter outtmpl is bytes, but should be a unicode string. ' 964 'Put from __future__ import unicode_literals at the top of your code file or consider switching to Python 3.x.') 965 return outtmpl_dict 966 967 def get_output_path(self, dir_type='', filename=None): 968 paths = self.params.get('paths', {}) 969 assert isinstance(paths, dict) 970 path = os.path.join( 971 expand_path(paths.get('home', '').strip()), 972 expand_path(paths.get(dir_type, '').strip()) if dir_type else '', 973 filename or '') 974 975 # Temporary fix for #4787 976 # 'Treat' all problem characters by passing filename through preferredencoding 977 # to workaround encoding issues with subprocess on python2 @ Windows 978 if sys.version_info < (3, 0) and sys.platform == 'win32': 979 path = encodeFilename(path, True).decode(preferredencoding()) 980 return sanitize_path(path, force=self.params.get('windowsfilenames')) 981 982 @staticmethod 983 def _outtmpl_expandpath(outtmpl): 984 # expand_path translates '%%' into '%' and '$$' into '$' 985 # correspondingly that is not what we want since we need to keep 986 # '%%' intact for template dict substitution step. Working around 987 # with boundary-alike separator hack. 988 sep = ''.join([random.choice(ascii_letters) for _ in range(32)]) 989 outtmpl = outtmpl.replace('%%', '%{0}%'.format(sep)).replace('$$', '${0}$'.format(sep)) 990 991 # outtmpl should be expand_path'ed before template dict substitution 992 # because meta fields may contain env variables we don't want to 993 # be expanded. For example, for outtmpl "%(title)s.%(ext)s" and 994 # title "Hello $PATH", we don't want `$PATH` to be expanded. 995 return expand_path(outtmpl).replace(sep, '') 996 997 @staticmethod 998 def escape_outtmpl(outtmpl): 999 ''' Escape any remaining strings like %s, %abc% etc. ''' 1000 return re.sub( 1001 STR_FORMAT_RE_TMPL.format('', '(?![%(\0])'), 1002 lambda mobj: ('' if mobj.group('has_key') else '%') + mobj.group(0), 1003 outtmpl) 1004 1005 @classmethod 1006 def validate_outtmpl(cls, outtmpl): 1007 ''' @return None or Exception object ''' 1008 outtmpl = re.sub( 1009 STR_FORMAT_RE_TMPL.format('[^)]*', '[ljqBUDS]'), 1010 lambda mobj: f'{mobj.group(0)[:-1]}s', 1011 cls._outtmpl_expandpath(outtmpl)) 1012 try: 1013 cls.escape_outtmpl(outtmpl) % collections.defaultdict(int) 1014 return None 1015 except ValueError as err: 1016 return err 1017 1018 @staticmethod 1019 def _copy_infodict(info_dict): 1020 info_dict = dict(info_dict) 1021 for key in ('__original_infodict', '__postprocessors'): 1022 info_dict.pop(key, None) 1023 return info_dict 1024 1025 def prepare_outtmpl(self, outtmpl, info_dict, sanitize=False): 1026 """ Make the outtmpl and info_dict suitable for substitution: ydl.escape_outtmpl(outtmpl) % info_dict 1027 @param sanitize Whether to sanitize the output as a filename. 1028 For backward compatibility, a function can also be passed 1029 """ 1030 1031 info_dict.setdefault('epoch', int(time.time())) # keep epoch consistent once set 1032 1033 info_dict = self._copy_infodict(info_dict) 1034 info_dict['duration_string'] = ( # %(duration>%H-%M-%S)s is wrong if duration > 24hrs 1035 formatSeconds(info_dict['duration'], '-' if sanitize else ':') 1036 if info_dict.get('duration', None) is not None 1037 else None) 1038 info_dict['autonumber'] = self.params.get('autonumber_start', 1) - 1 + self._num_downloads 1039 if info_dict.get('resolution') is None: 1040 info_dict['resolution'] = self.format_resolution(info_dict, default=None) 1041 1042 # For fields playlist_index, playlist_autonumber and autonumber convert all occurrences 1043 # of %(field)s to %(field)0Nd for backward compatibility 1044 field_size_compat_map = { 1045 'playlist_index': number_of_digits(info_dict.get('_last_playlist_index') or 0), 1046 'playlist_autonumber': number_of_digits(info_dict.get('n_entries') or 0), 1047 'autonumber': self.params.get('autonumber_size') or 5, 1048 } 1049 1050 TMPL_DICT = {} 1051 EXTERNAL_FORMAT_RE = re.compile(STR_FORMAT_RE_TMPL.format('[^)]*', f'[{STR_FORMAT_TYPES}ljqBUDS]')) 1052 MATH_FUNCTIONS = { 1053 '+': float.__add__, 1054 '-': float.__sub__, 1055 } 1056 # Field is of the form key1.key2... 1057 # where keys (except first) can be string, int or slice 1058 FIELD_RE = r'\w*(?:\.(?:\w+|{num}|{num}?(?::{num}?){{1,2}}))*'.format(num=r'(?:-?\d+)') 1059 MATH_FIELD_RE = r'''(?:{field}|{num})'''.format(field=FIELD_RE, num=r'-?\d+(?:.\d+)?') 1060 MATH_OPERATORS_RE = r'(?:%s)' % '|'.join(map(re.escape, MATH_FUNCTIONS.keys())) 1061 INTERNAL_FORMAT_RE = re.compile(r'''(?x) 1062 (?P<negate>-)? 1063 (?P<fields>{field}) 1064 (?P<maths>(?:{math_op}{math_field})*) 1065 (?:>(?P<strf_format>.+?))? 1066 (?P<alternate>(?<!\\),[^|&)]+)? 1067 (?:&(?P<replacement>.*?))? 1068 (?:\|(?P<default>.*?))? 1069 $'''.format(field=FIELD_RE, math_op=MATH_OPERATORS_RE, math_field=MATH_FIELD_RE)) 1070 1071 def _traverse_infodict(k): 1072 k = k.split('.') 1073 if k[0] == '': 1074 k.pop(0) 1075 return traverse_obj(info_dict, k, is_user_input=True, traverse_string=True) 1076 1077 def get_value(mdict): 1078 # Object traversal 1079 value = _traverse_infodict(mdict['fields']) 1080 # Negative 1081 if mdict['negate']: 1082 value = float_or_none(value) 1083 if value is not None: 1084 value *= -1 1085 # Do maths 1086 offset_key = mdict['maths'] 1087 if offset_key: 1088 value = float_or_none(value) 1089 operator = None 1090 while offset_key: 1091 item = re.match( 1092 MATH_FIELD_RE if operator else MATH_OPERATORS_RE, 1093 offset_key).group(0) 1094 offset_key = offset_key[len(item):] 1095 if operator is None: 1096 operator = MATH_FUNCTIONS[item] 1097 continue 1098 item, multiplier = (item[1:], -1) if item[0] == '-' else (item, 1) 1099 offset = float_or_none(item) 1100 if offset is None: 1101 offset = float_or_none(_traverse_infodict(item)) 1102 try: 1103 value = operator(value, multiplier * offset) 1104 except (TypeError, ZeroDivisionError): 1105 return None 1106 operator = None 1107 # Datetime formatting 1108 if mdict['strf_format']: 1109 value = strftime_or_none(value, mdict['strf_format'].replace('\\,', ',')) 1110 1111 return value 1112 1113 na = self.params.get('outtmpl_na_placeholder', 'NA') 1114 1115 def filename_sanitizer(key, value, restricted=self.params.get('restrictfilenames')): 1116 return sanitize_filename(str(value), restricted=restricted, 1117 is_id=re.search(r'(^|[_.])id(\.|$)', key)) 1118 1119 sanitizer = sanitize if callable(sanitize) else filename_sanitizer 1120 sanitize = bool(sanitize) 1121 1122 def _dumpjson_default(obj): 1123 if isinstance(obj, (set, LazyList)): 1124 return list(obj) 1125 return repr(obj) 1126 1127 def create_key(outer_mobj): 1128 if not outer_mobj.group('has_key'): 1129 return outer_mobj.group(0) 1130 key = outer_mobj.group('key') 1131 mobj = re.match(INTERNAL_FORMAT_RE, key) 1132 initial_field = mobj.group('fields') if mobj else '' 1133 value, replacement, default = None, None, na 1134 while mobj: 1135 mobj = mobj.groupdict() 1136 default = mobj['default'] if mobj['default'] is not None else default 1137 value = get_value(mobj) 1138 replacement = mobj['replacement'] 1139 if value is None and mobj['alternate']: 1140 mobj = re.match(INTERNAL_FORMAT_RE, mobj['alternate'][1:]) 1141 else: 1142 break 1143 1144 fmt = outer_mobj.group('format') 1145 if fmt == 's' and value is not None and key in field_size_compat_map.keys(): 1146 fmt = '0{:d}d'.format(field_size_compat_map[key]) 1147 1148 value = default if value is None else value if replacement is None else replacement 1149 1150 flags = outer_mobj.group('conversion') or '' 1151 str_fmt = f'{fmt[:-1]}s' 1152 if fmt[-1] == 'l': # list 1153 delim = '\n' if '#' in flags else ', ' 1154 value, fmt = delim.join(variadic(value, allowed_types=(str, bytes))), str_fmt 1155 elif fmt[-1] == 'j': # json 1156 value, fmt = json.dumps(value, default=_dumpjson_default, indent=4 if '#' in flags else None), str_fmt 1157 elif fmt[-1] == 'q': # quoted 1158 value = map(str, variadic(value) if '#' in flags else [value]) 1159 value, fmt = ' '.join(map(compat_shlex_quote, value)), str_fmt 1160 elif fmt[-1] == 'B': # bytes 1161 value = f'%{str_fmt}'.encode('utf-8') % str(value).encode('utf-8') 1162 value, fmt = value.decode('utf-8', 'ignore'), 's' 1163 elif fmt[-1] == 'U': # unicode normalized 1164 value, fmt = unicodedata.normalize( 1165 # "+" = compatibility equivalence, "#" = NFD 1166 'NF%s%s' % ('K' if '+' in flags else '', 'D' if '#' in flags else 'C'), 1167 value), str_fmt 1168 elif fmt[-1] == 'D': # decimal suffix 1169 value, fmt = format_decimal_suffix(value, f'%{fmt[:-1]}f%s' if fmt[:-1] else '%d%s'), 's' 1170 elif fmt[-1] == 'S': # filename sanitization 1171 value, fmt = filename_sanitizer(initial_field, value, restricted='#' in flags), str_fmt 1172 elif fmt[-1] == 'c': 1173 if value: 1174 value = str(value)[0] 1175 else: 1176 fmt = str_fmt 1177 elif fmt[-1] not in 'rs': # numeric 1178 value = float_or_none(value) 1179 if value is None: 1180 value, fmt = default, 's' 1181 1182 if sanitize: 1183 if fmt[-1] == 'r': 1184 # If value is an object, sanitize might convert it to a string 1185 # So we convert it to repr first 1186 value, fmt = repr(value), str_fmt 1187 if fmt[-1] in 'csr': 1188 value = sanitizer(initial_field, value) 1189 1190 key = '%s\0%s' % (key.replace('%', '%\0'), outer_mobj.group('format')) 1191 TMPL_DICT[key] = value 1192 return '{prefix}%({key}){fmt}'.format(key=key, fmt=fmt, prefix=outer_mobj.group('prefix')) 1193 1194 return EXTERNAL_FORMAT_RE.sub(create_key, outtmpl), TMPL_DICT 1195 1196 def evaluate_outtmpl(self, outtmpl, info_dict, *args, **kwargs): 1197 outtmpl, info_dict = self.prepare_outtmpl(outtmpl, info_dict, *args, **kwargs) 1198 return self.escape_outtmpl(outtmpl) % info_dict 1199 1200 def _prepare_filename(self, info_dict, tmpl_type='default'): 1201 try: 1202 outtmpl = self._outtmpl_expandpath(self.outtmpl_dict.get(tmpl_type, self.outtmpl_dict['default'])) 1203 filename = self.evaluate_outtmpl(outtmpl, info_dict, True) 1204 1205 force_ext = OUTTMPL_TYPES.get(tmpl_type) 1206 if filename and force_ext is not None: 1207 filename = replace_extension(filename, force_ext, info_dict.get('ext')) 1208 1209 # https://github.com/blackjack4494/youtube-dlc/issues/85 1210 trim_file_name = self.params.get('trim_file_name', False) 1211 if trim_file_name: 1212 no_ext, *ext = filename.rsplit('.', 2) 1213 filename = join_nonempty(no_ext[:trim_file_name], *ext, delim='.') 1214 1215 return filename 1216 except ValueError as err: 1217 self.report_error('Error in output template: ' + str(err) + ' (encoding: ' + repr(preferredencoding()) + ')') 1218 return None 1219 1220 def prepare_filename(self, info_dict, dir_type='', warn=False): 1221 """Generate the output filename.""" 1222 1223 filename = self._prepare_filename(info_dict, dir_type or 'default') 1224 if not filename and dir_type not in ('', 'temp'): 1225 return '' 1226 1227 if warn: 1228 if not self.params.get('paths'): 1229 pass 1230 elif filename == '-': 1231 self.report_warning('--paths is ignored when an outputting to stdout', only_once=True) 1232 elif os.path.isabs(filename): 1233 self.report_warning('--paths is ignored since an absolute path is given in output template', only_once=True) 1234 if filename == '-' or not filename: 1235 return filename 1236 1237 return self.get_output_path(dir_type, filename) 1238 1239 def _match_entry(self, info_dict, incomplete=False, silent=False): 1240 """ Returns None if the file should be downloaded """ 1241 1242 video_title = info_dict.get('title', info_dict.get('id', 'video')) 1243 1244 def check_filter(): 1245 if 'title' in info_dict: 1246 # This can happen when we're just evaluating the playlist 1247 title = info_dict['title'] 1248 matchtitle = self.params.get('matchtitle', False) 1249 if matchtitle: 1250 if not re.search(matchtitle, title, re.IGNORECASE): 1251 return '"' + title + '" title did not match pattern "' + matchtitle + '"' 1252 rejecttitle = self.params.get('rejecttitle', False) 1253 if rejecttitle: 1254 if re.search(rejecttitle, title, re.IGNORECASE): 1255 return '"' + title + '" title matched reject pattern "' + rejecttitle + '"' 1256 date = info_dict.get('upload_date') 1257 if date is not None: 1258 dateRange = self.params.get('daterange', DateRange()) 1259 if date not in dateRange: 1260 return '%s upload date is not in range %s' % (date_from_str(date).isoformat(), dateRange) 1261 view_count = info_dict.get('view_count') 1262 if view_count is not None: 1263 min_views = self.params.get('min_views') 1264 if min_views is not None and view_count < min_views: 1265 return 'Skipping %s, because it has not reached minimum view count (%d/%d)' % (video_title, view_count, min_views) 1266 max_views = self.params.get('max_views') 1267 if max_views is not None and view_count > max_views: 1268 return 'Skipping %s, because it has exceeded the maximum view count (%d/%d)' % (video_title, view_count, max_views) 1269 if age_restricted(info_dict.get('age_limit'), self.params.get('age_limit')): 1270 return 'Skipping "%s" because it is age restricted' % video_title 1271 1272 match_filter = self.params.get('match_filter') 1273 if match_filter is not None: 1274 try: 1275 ret = match_filter(info_dict, incomplete=incomplete) 1276 except TypeError: 1277 # For backward compatibility 1278 ret = None if incomplete else match_filter(info_dict) 1279 if ret is not None: 1280 return ret 1281 return None 1282 1283 if self.in_download_archive(info_dict): 1284 reason = '%s has already been recorded in the archive' % video_title 1285 break_opt, break_err = 'break_on_existing', ExistingVideoReached 1286 else: 1287 reason = check_filter() 1288 break_opt, break_err = 'break_on_reject', RejectedVideoReached 1289 if reason is not None: 1290 if not silent: 1291 self.to_screen('[download] ' + reason) 1292 if self.params.get(break_opt, False): 1293 raise break_err() 1294 return reason 1295 1296 @staticmethod 1297 def add_extra_info(info_dict, extra_info): 1298 '''Set the keys from extra_info in info dict if they are missing''' 1299 for key, value in extra_info.items(): 1300 info_dict.setdefault(key, value) 1301 1302 def extract_info(self, url, download=True, ie_key=None, extra_info=None, 1303 process=True, force_generic_extractor=False): 1304 """ 1305 Return a list with a dictionary for each video extracted. 1306 1307 Arguments: 1308 url -- URL to extract 1309 1310 Keyword arguments: 1311 download -- whether to download videos during extraction 1312 ie_key -- extractor key hint 1313 extra_info -- dictionary containing the extra values to add to each result 1314 process -- whether to resolve all unresolved references (URLs, playlist items), 1315 must be True for download to work. 1316 force_generic_extractor -- force using the generic extractor 1317 """ 1318 1319 if extra_info is None: 1320 extra_info = {} 1321 1322 if not ie_key and force_generic_extractor: 1323 ie_key = 'Generic' 1324 1325 if ie_key: 1326 ies = {ie_key: self._get_info_extractor_class(ie_key)} 1327 else: 1328 ies = self._ies 1329 1330 for ie_key, ie in ies.items(): 1331 if not ie.suitable(url): 1332 continue 1333 1334 if not ie.working(): 1335 self.report_warning('The program functionality for this site has been marked as broken, ' 1336 'and will probably not work.') 1337 1338 temp_id = ie.get_temp_id(url) 1339 if temp_id is not None and self.in_download_archive({'id': temp_id, 'ie_key': ie_key}): 1340 self.to_screen(f'[{ie_key}] {temp_id}: has already been recorded in the archive') 1341 if self.params.get('break_on_existing', False): 1342 raise ExistingVideoReached() 1343 break 1344 return self.__extract_info(url, self.get_info_extractor(ie_key), download, extra_info, process) 1345 else: 1346 self.report_error('no suitable InfoExtractor for URL %s' % url) 1347 1348 def __handle_extraction_exceptions(func): 1349 @functools.wraps(func) 1350 def wrapper(self, *args, **kwargs): 1351 while True: 1352 try: 1353 return func(self, *args, **kwargs) 1354 except (DownloadCancelled, LazyList.IndexError, PagedList.IndexError): 1355 raise 1356 except ReExtractInfo as e: 1357 if e.expected: 1358 self.to_screen(f'{e}; Re-extracting data') 1359 else: 1360 self.to_stderr('\r') 1361 self.report_warning(f'{e}; Re-extracting data') 1362 continue 1363 except GeoRestrictedError as e: 1364 msg = e.msg 1365 if e.countries: 1366 msg += '\nThis video is available in %s.' % ', '.join( 1367 map(ISO3166Utils.short2full, e.countries)) 1368 msg += '\nYou might want to use a VPN or a proxy server (with --proxy) to workaround.' 1369 self.report_error(msg) 1370 except ExtractorError as e: # An error we somewhat expected 1371 self.report_error(str(e), e.format_traceback()) 1372 except Exception as e: 1373 if self.params.get('ignoreerrors'): 1374 self.report_error(str(e), tb=encode_compat_str(traceback.format_exc())) 1375 else: 1376 raise 1377 break 1378 return wrapper 1379 1380 def _wait_for_video(self, ie_result): 1381 if (not self.params.get('wait_for_video') 1382 or ie_result.get('_type', 'video') != 'video' 1383 or ie_result.get('formats') or ie_result.get('url')): 1384 return 1385 1386 format_dur = lambda dur: '%02d:%02d:%02d' % timetuple_from_msec(dur * 1000)[:-1] 1387 last_msg = '' 1388 1389 def progress(msg): 1390 nonlocal last_msg 1391 self.to_screen(msg + ' ' * (len(last_msg) - len(msg)) + '\r', skip_eol=True) 1392 last_msg = msg 1393 1394 min_wait, max_wait = self.params.get('wait_for_video') 1395 diff = try_get(ie_result, lambda x: x['release_timestamp'] - time.time()) 1396 if diff is None and ie_result.get('live_status') == 'is_upcoming': 1397 diff = random.randrange(min_wait, max_wait) if (max_wait and min_wait) else (max_wait or min_wait) 1398 self.report_warning('Release time of video is not known') 1399 elif (diff or 0) <= 0: 1400 self.report_warning('Video should already be available according to extracted info') 1401 diff = min(max(diff or 0, min_wait or 0), max_wait or float('inf')) 1402 self.to_screen(f'[wait] Waiting for {format_dur(diff)} - Press Ctrl+C to try now') 1403 1404 wait_till = time.time() + diff 1405 try: 1406 while True: 1407 diff = wait_till - time.time() 1408 if diff <= 0: 1409 progress('') 1410 raise ReExtractInfo('[wait] Wait period ended', expected=True) 1411 progress(f'[wait] Remaining time until next attempt: {self._format_screen(format_dur(diff), self.Styles.EMPHASIS)}') 1412 time.sleep(1) 1413 except KeyboardInterrupt: 1414 progress('') 1415 raise ReExtractInfo('[wait] Interrupted by user', expected=True) 1416 except BaseException as e: 1417 if not isinstance(e, ReExtractInfo): 1418 self.to_screen('') 1419 raise 1420 1421 @__handle_extraction_exceptions 1422 def __extract_info(self, url, ie, download, extra_info, process): 1423 ie_result = ie.extract(url) 1424 if ie_result is None: # Finished already (backwards compatibility; listformats and friends should be moved here) 1425 return 1426 if isinstance(ie_result, list): 1427 # Backwards compatibility: old IE result format 1428 ie_result = { 1429 '_type': 'compat_list', 1430 'entries': ie_result, 1431 } 1432 if extra_info.get('original_url'): 1433 ie_result.setdefault('original_url', extra_info['original_url']) 1434 self.add_default_extra_info(ie_result, ie, url) 1435 if process: 1436 self._wait_for_video(ie_result) 1437 return self.process_ie_result(ie_result, download, extra_info) 1438 else: 1439 return ie_result 1440 1441 def add_default_extra_info(self, ie_result, ie, url): 1442 if url is not None: 1443 self.add_extra_info(ie_result, { 1444 'webpage_url': url, 1445 'original_url': url, 1446 'webpage_url_basename': url_basename(url), 1447 'webpage_url_domain': get_domain(url), 1448 }) 1449 if ie is not None: 1450 self.add_extra_info(ie_result, { 1451 'extractor': ie.IE_NAME, 1452 'extractor_key': ie.ie_key(), 1453 }) 1454 1455 def process_ie_result(self, ie_result, download=True, extra_info=None): 1456 """ 1457 Take the result of the ie(may be modified) and resolve all unresolved 1458 references (URLs, playlist items). 1459 1460 It will also download the videos if 'download'. 1461 Returns the resolved ie_result. 1462 """ 1463 if extra_info is None: 1464 extra_info = {} 1465 result_type = ie_result.get('_type', 'video') 1466 1467 if result_type in ('url', 'url_transparent'): 1468 ie_result['url'] = sanitize_url(ie_result['url']) 1469 if ie_result.get('original_url'): 1470 extra_info.setdefault('original_url', ie_result['original_url']) 1471 1472 extract_flat = self.params.get('extract_flat', False) 1473 if ((extract_flat == 'in_playlist' and 'playlist' in extra_info) 1474 or extract_flat is True): 1475 info_copy = ie_result.copy() 1476 ie = try_get(ie_result.get('ie_key'), self.get_info_extractor) 1477 if ie and not ie_result.get('id'): 1478 info_copy['id'] = ie.get_temp_id(ie_result['url']) 1479 self.add_default_extra_info(info_copy, ie, ie_result['url']) 1480 self.add_extra_info(info_copy, extra_info) 1481 info_copy, _ = self.pre_process(info_copy) 1482 self.__forced_printings(info_copy, self.prepare_filename(info_copy), incomplete=True) 1483 if self.params.get('force_write_download_archive', False): 1484 self.record_download_archive(info_copy) 1485 return ie_result 1486 1487 if result_type == 'video': 1488 self.add_extra_info(ie_result, extra_info) 1489 ie_result = self.process_video_result(ie_result, download=download) 1490 additional_urls = (ie_result or {}).get('additional_urls') 1491 if additional_urls: 1492 # TODO: Improve MetadataParserPP to allow setting a list 1493 if isinstance(additional_urls, compat_str): 1494 additional_urls = [additional_urls] 1495 self.to_screen( 1496 '[info] %s: %d additional URL(s) requested' % (ie_result['id'], len(additional_urls))) 1497 self.write_debug('Additional URLs: "%s"' % '", "'.join(additional_urls)) 1498 ie_result['additional_entries'] = [ 1499 self.extract_info( 1500 url, download, extra_info=extra_info, 1501 force_generic_extractor=self.params.get('force_generic_extractor')) 1502 for url in additional_urls 1503 ] 1504 return ie_result 1505 elif result_type == 'url': 1506 # We have to add extra_info to the results because it may be 1507 # contained in a playlist 1508 return self.extract_info( 1509 ie_result['url'], download, 1510 ie_key=ie_result.get('ie_key'), 1511 extra_info=extra_info) 1512 elif result_type == 'url_transparent': 1513 # Use the information from the embedding page 1514 info = self.extract_info( 1515 ie_result['url'], ie_key=ie_result.get('ie_key'), 1516 extra_info=extra_info, download=False, process=False) 1517 1518 # extract_info may return None when ignoreerrors is enabled and 1519 # extraction failed with an error, don't crash and return early 1520 # in this case 1521 if not info: 1522 return info 1523 1524 force_properties = dict( 1525 (k, v) for k, v in ie_result.items() if v is not None) 1526 for f in ('_type', 'url', 'id', 'extractor', 'extractor_key', 'ie_key'): 1527 if f in force_properties: 1528 del force_properties[f] 1529 new_result = info.copy() 1530 new_result.update(force_properties) 1531 1532 # Extracted info may not be a video result (i.e. 1533 # info.get('_type', 'video') != video) but rather an url or 1534 # url_transparent. In such cases outer metadata (from ie_result) 1535 # should be propagated to inner one (info). For this to happen 1536 # _type of info should be overridden with url_transparent. This 1537 # fixes issue from https://github.com/ytdl-org/youtube-dl/pull/11163. 1538 if new_result.get('_type') == 'url': 1539 new_result['_type'] = 'url_transparent' 1540 1541 return self.process_ie_result( 1542 new_result, download=download, extra_info=extra_info) 1543 elif result_type in ('playlist', 'multi_video'): 1544 # Protect from infinite recursion due to recursively nested playlists 1545 # (see https://github.com/ytdl-org/youtube-dl/issues/27833) 1546 webpage_url = ie_result['webpage_url'] 1547 if webpage_url in self._playlist_urls: 1548 self.to_screen( 1549 '[download] Skipping already downloaded playlist: %s' 1550 % ie_result.get('title') or ie_result.get('id')) 1551 return 1552 1553 self._playlist_level += 1 1554 self._playlist_urls.add(webpage_url) 1555 self._sanitize_thumbnails(ie_result) 1556 try: 1557 return self.__process_playlist(ie_result, download) 1558 finally: 1559 self._playlist_level -= 1 1560 if not self._playlist_level: 1561 self._playlist_urls.clear() 1562 elif result_type == 'compat_list': 1563 self.report_warning( 1564 'Extractor %s returned a compat_list result. ' 1565 'It needs to be updated.' % ie_result.get('extractor')) 1566 1567 def _fixup(r): 1568 self.add_extra_info(r, { 1569 'extractor': ie_result['extractor'], 1570 'webpage_url': ie_result['webpage_url'], 1571 'webpage_url_basename': url_basename(ie_result['webpage_url']), 1572 'webpage_url_domain': get_domain(ie_result['webpage_url']), 1573 'extractor_key': ie_result['extractor_key'], 1574 }) 1575 return r 1576 ie_result['entries'] = [ 1577 self.process_ie_result(_fixup(r), download, extra_info) 1578 for r in ie_result['entries'] 1579 ] 1580 return ie_result 1581 else: 1582 raise Exception('Invalid result type: %s' % result_type) 1583 1584 def _ensure_dir_exists(self, path): 1585 return make_dir(path, self.report_error) 1586 1587 def __process_playlist(self, ie_result, download): 1588 # We process each entry in the playlist 1589 playlist = ie_result.get('title') or ie_result.get('id') 1590 self.to_screen('[download] Downloading playlist: %s' % playlist) 1591 1592 if 'entries' not in ie_result: 1593 raise EntryNotInPlaylist('There are no entries') 1594 1595 MissingEntry = object() 1596 incomplete_entries = bool(ie_result.get('requested_entries')) 1597 if incomplete_entries: 1598 def fill_missing_entries(entries, indices): 1599 ret = [MissingEntry] * max(indices) 1600 for i, entry in zip(indices, entries): 1601 ret[i - 1] = entry 1602 return ret 1603 ie_result['entries'] = fill_missing_entries(ie_result['entries'], ie_result['requested_entries']) 1604 1605 playlist_results = [] 1606 1607 playliststart = self.params.get('playliststart', 1) 1608 playlistend = self.params.get('playlistend') 1609 # For backwards compatibility, interpret -1 as whole list 1610 if playlistend == -1: 1611 playlistend = None 1612 1613 playlistitems_str = self.params.get('playlist_items') 1614 playlistitems = None 1615 if playlistitems_str is not None: 1616 def iter_playlistitems(format): 1617 for string_segment in format.split(','): 1618 if '-' in string_segment: 1619 start, end = string_segment.split('-') 1620 for item in range(int(start), int(end) + 1): 1621 yield int(item) 1622 else: 1623 yield int(string_segment) 1624 playlistitems = orderedSet(iter_playlistitems(playlistitems_str)) 1625 1626 ie_entries = ie_result['entries'] 1627 msg = ( 1628 'Downloading %d videos' if not isinstance(ie_entries, list) 1629 else 'Collected %d videos; downloading %%d of them' % len(ie_entries)) 1630 1631 if isinstance(ie_entries, list): 1632 def get_entry(i): 1633 return ie_entries[i - 1] 1634 else: 1635 if not isinstance(ie_entries, (PagedList, LazyList)): 1636 ie_entries = LazyList(ie_entries) 1637 1638 def get_entry(i): 1639 return YoutubeDL.__handle_extraction_exceptions( 1640 lambda self, i: ie_entries[i - 1] 1641 )(self, i) 1642 1643 entries = [] 1644 items = playlistitems if playlistitems is not None else itertools.count(playliststart) 1645 for i in items: 1646 if i == 0: 1647 continue 1648 if playlistitems is None and playlistend is not None and playlistend < i: 1649 break 1650 entry = None 1651 try: 1652 entry = get_entry(i) 1653 if entry is MissingEntry: 1654 raise EntryNotInPlaylist() 1655 except (IndexError, EntryNotInPlaylist): 1656 if incomplete_entries: 1657 raise EntryNotInPlaylist(f'Entry {i} cannot be found') 1658 elif not playlistitems: 1659 break 1660 entries.append(entry) 1661 try: 1662 if entry is not None: 1663 self._match_entry(entry, incomplete=True, silent=True) 1664 except (ExistingVideoReached, RejectedVideoReached): 1665 break 1666 ie_result['entries'] = entries 1667 1668 # Save playlist_index before re-ordering 1669 entries = [ 1670 ((playlistitems[i - 1] if playlistitems else i + playliststart - 1), entry) 1671 for i, entry in enumerate(entries, 1) 1672 if entry is not None] 1673 n_entries = len(entries) 1674 1675 if not playlistitems and (playliststart != 1 or playlistend): 1676 playlistitems = list(range(playliststart, playliststart + n_entries)) 1677 ie_result['requested_entries'] = playlistitems 1678 1679 _infojson_written = False 1680 if not self.params.get('simulate') and self.params.get('allow_playlist_files', True): 1681 ie_copy = { 1682 'playlist': playlist, 1683 'playlist_id': ie_result.get('id'), 1684 'playlist_title': ie_result.get('title'), 1685 'playlist_uploader': ie_result.get('uploader'), 1686 'playlist_uploader_id': ie_result.get('uploader_id'), 1687 'playlist_index': 0, 1688 'n_entries': n_entries, 1689 } 1690 ie_copy.update(dict(ie_result)) 1691 1692 _infojson_written = self._write_info_json( 1693 'playlist', ie_result, self.prepare_filename(ie_copy, 'pl_infojson')) 1694 if _infojson_written is None: 1695 return 1696 if self._write_description('playlist', ie_result, 1697 self.prepare_filename(ie_copy, 'pl_description')) is None: 1698 return 1699 # TODO: This should be passed to ThumbnailsConvertor if necessary 1700 self._write_thumbnails('playlist', ie_copy, self.prepare_filename(ie_copy, 'pl_thumbnail')) 1701 1702 if self.params.get('playlistreverse', False): 1703 entries = entries[::-1] 1704 if self.params.get('playlistrandom', False): 1705 random.shuffle(entries) 1706 1707 x_forwarded_for = ie_result.get('__x_forwarded_for_ip') 1708 1709 self.to_screen('[%s] playlist %s: %s' % (ie_result['extractor'], playlist, msg % n_entries)) 1710 failures = 0 1711 max_failures = self.params.get('skip_playlist_after_errors') or float('inf') 1712 for i, entry_tuple in enumerate(entries, 1): 1713 playlist_index, entry = entry_tuple 1714 if 'playlist-index' in self.params.get('compat_opts', []): 1715 playlist_index = playlistitems[i - 1] if playlistitems else i + playliststart - 1 1716 self.to_screen('[download] Downloading video %s of %s' % (i, n_entries)) 1717 # This __x_forwarded_for_ip thing is a bit ugly but requires 1718 # minimal changes 1719 if x_forwarded_for: 1720 entry['__x_forwarded_for_ip'] = x_forwarded_for 1721 extra = { 1722 'n_entries': n_entries, 1723 '_last_playlist_index': max(playlistitems) if playlistitems else (playlistend or n_entries), 1724 'playlist_index': playlist_index, 1725 'playlist_autonumber': i, 1726 'playlist': playlist, 1727 'playlist_id': ie_result.get('id'), 1728 'playlist_title': ie_result.get('title'), 1729 'playlist_uploader': ie_result.get('uploader'), 1730 'playlist_uploader_id': ie_result.get('uploader_id'), 1731 'extractor': ie_result['extractor'], 1732 'webpage_url': ie_result['webpage_url'], 1733 'webpage_url_basename': url_basename(ie_result['webpage_url']), 1734 'webpage_url_domain': get_domain(ie_result['webpage_url']), 1735 'extractor_key': ie_result['extractor_key'], 1736 } 1737 1738 if self._match_entry(entry, incomplete=True) is not None: 1739 continue 1740 1741 entry_result = self.__process_iterable_entry(entry, download, extra) 1742 if not entry_result: 1743 failures += 1 1744 if failures >= max_failures: 1745 self.report_error( 1746 'Skipping the remaining entries in playlist "%s" since %d items failed extraction' % (playlist, failures)) 1747 break 1748 playlist_results.append(entry_result) 1749 ie_result['entries'] = playlist_results 1750 1751 # Write the updated info to json 1752 if _infojson_written and self._write_info_json( 1753 'updated playlist', ie_result, 1754 self.prepare_filename(ie_copy, 'pl_infojson'), overwrite=True) is None: 1755 return 1756 self.to_screen('[download] Finished downloading playlist: %s' % playlist) 1757 return ie_result 1758 1759 @__handle_extraction_exceptions 1760 def __process_iterable_entry(self, entry, download, extra_info): 1761 return self.process_ie_result( 1762 entry, download=download, extra_info=extra_info) 1763 1764 def _build_format_filter(self, filter_spec): 1765 " Returns a function to filter the formats according to the filter_spec " 1766 1767 OPERATORS = { 1768 '<': operator.lt, 1769 '<=': operator.le, 1770 '>': operator.gt, 1771 '>=': operator.ge, 1772 '=': operator.eq, 1773 '!=': operator.ne, 1774 } 1775 operator_rex = re.compile(r'''(?x)\s* 1776 (?P<key>width|height|tbr|abr|vbr|asr|filesize|filesize_approx|fps)\s* 1777 (?P<op>%s)(?P<none_inclusive>\s*\?)?\s* 1778 (?P<value>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)\s* 1779 ''' % '|'.join(map(re.escape, OPERATORS.keys()))) 1780 m = operator_rex.fullmatch(filter_spec) 1781 if m: 1782 try: 1783 comparison_value = int(m.group('value')) 1784 except ValueError: 1785 comparison_value = parse_filesize(m.group('value')) 1786 if comparison_value is None: 1787 comparison_value = parse_filesize(m.group('value') + 'B') 1788 if comparison_value is None: 1789 raise ValueError( 1790 'Invalid value %r in format specification %r' % ( 1791 m.group('value'), filter_spec)) 1792 op = OPERATORS[m.group('op')] 1793 1794 if not m: 1795 STR_OPERATORS = { 1796 '=': operator.eq, 1797 '^=': lambda attr, value: attr.startswith(value), 1798 '$=': lambda attr, value: attr.endswith(value), 1799 '*=': lambda attr, value: value in attr, 1800 } 1801 str_operator_rex = re.compile(r'''(?x)\s* 1802 (?P<key>[a-zA-Z0-9._-]+)\s* 1803 (?P<negation>!\s*)?(?P<op>%s)(?P<none_inclusive>\s*\?)?\s* 1804 (?P<value>[a-zA-Z0-9._-]+)\s* 1805 ''' % '|'.join(map(re.escape, STR_OPERATORS.keys()))) 1806 m = str_operator_rex.fullmatch(filter_spec) 1807 if m: 1808 comparison_value = m.group('value') 1809 str_op = STR_OPERATORS[m.group('op')] 1810 if m.group('negation'): 1811 op = lambda attr, value: not str_op(attr, value) 1812 else: 1813 op = str_op 1814 1815 if not m: 1816 raise SyntaxError('Invalid filter specification %r' % filter_spec) 1817 1818 def _filter(f): 1819 actual_value = f.get(m.group('key')) 1820 if actual_value is None: 1821 return m.group('none_inclusive') 1822 return op(actual_value, comparison_value) 1823 return _filter 1824 1825 def _check_formats(self, formats): 1826 for f in formats: 1827 self.to_screen('[info] Testing format %s' % f['format_id']) 1828 path = self.get_output_path('temp') 1829 if not self._ensure_dir_exists(f'{path}/'): 1830 continue 1831 temp_file = tempfile.NamedTemporaryFile(suffix='.tmp', delete=False, dir=path or None) 1832 temp_file.close() 1833 try: 1834 success, _ = self.dl(temp_file.name, f, test=True) 1835 except (DownloadError, IOError, OSError, ValueError) + network_exceptions: 1836 success = False 1837 finally: 1838 if os.path.exists(temp_file.name): 1839 try: 1840 os.remove(temp_file.name) 1841 except OSError: 1842 self.report_warning('Unable to delete temporary file "%s"' % temp_file.name) 1843 if success: 1844 yield f 1845 else: 1846 self.to_screen('[info] Unable to download format %s. Skipping...' % f['format_id']) 1847 1848 def _default_format_spec(self, info_dict, download=True): 1849 1850 def can_merge(): 1851 merger = FFmpegMergerPP(self) 1852 return merger.available and merger.can_merge() 1853 1854 prefer_best = ( 1855 not self.params.get('simulate') 1856 and download 1857 and ( 1858 not can_merge() 1859 or info_dict.get('is_live', False) 1860 or self.outtmpl_dict['default'] == '-')) 1861 compat = ( 1862 prefer_best 1863 or self.params.get('allow_multiple_audio_streams', False) 1864 or 'format-spec' in self.params.get('compat_opts', [])) 1865 1866 return ( 1867 'best/bestvideo+bestaudio' if prefer_best 1868 else 'bestvideo*+bestaudio/best' if not compat 1869 else 'bestvideo+bestaudio/best') 1870 1871 def build_format_selector(self, format_spec): 1872 def syntax_error(note, start): 1873 message = ( 1874 'Invalid format specification: ' 1875 '{0}\n\t{1}\n\t{2}^'.format(note, format_spec, ' ' * start[1])) 1876 return SyntaxError(message) 1877 1878 PICKFIRST = 'PICKFIRST' 1879 MERGE = 'MERGE' 1880 SINGLE = 'SINGLE' 1881 GROUP = 'GROUP' 1882 FormatSelector = collections.namedtuple('FormatSelector', ['type', 'selector', 'filters']) 1883 1884 allow_multiple_streams = {'audio': self.params.get('allow_multiple_audio_streams', False), 1885 'video': self.params.get('allow_multiple_video_streams', False)} 1886 1887 check_formats = self.params.get('check_formats') == 'selected' 1888 1889 def _parse_filter(tokens): 1890 filter_parts = [] 1891 for type, string, start, _, _ in tokens: 1892 if type == tokenize.OP and string == ']': 1893 return ''.join(filter_parts) 1894 else: 1895 filter_parts.append(string) 1896 1897 def _remove_unused_ops(tokens): 1898 # Remove operators that we don't use and join them with the surrounding strings 1899 # for example: 'mp4' '-' 'baseline' '-' '16x9' is converted to 'mp4-baseline-16x9' 1900 ALLOWED_OPS = ('/', '+', ',', '(', ')') 1901 last_string, last_start, last_end, last_line = None, None, None, None 1902 for type, string, start, end, line in tokens: 1903 if type == tokenize.OP and string == '[': 1904 if last_string: 1905 yield tokenize.NAME, last_string, last_start, last_end, last_line 1906 last_string = None 1907 yield type, string, start, end, line 1908 # everything inside brackets will be handled by _parse_filter 1909 for type, string, start, end, line in tokens: 1910 yield type, string, start, end, line 1911 if type == tokenize.OP and string == ']': 1912 break 1913 elif type == tokenize.OP and string in ALLOWED_OPS: 1914 if last_string: 1915 yield tokenize.NAME, last_string, last_start, last_end, last_line 1916 last_string = None 1917 yield type, string, start, end, line 1918 elif type in [tokenize.NAME, tokenize.NUMBER, tokenize.OP]: 1919 if not last_string: 1920 last_string = string 1921 last_start = start 1922 last_end = end 1923 else: 1924 last_string += string 1925 if last_string: 1926 yield tokenize.NAME, last_string, last_start, last_end, last_line 1927 1928 def _parse_format_selection(tokens, inside_merge=False, inside_choice=False, inside_group=False): 1929 selectors = [] 1930 current_selector = None 1931 for type, string, start, _, _ in tokens: 1932 # ENCODING is only defined in python 3.x 1933 if type == getattr(tokenize, 'ENCODING', None): 1934 continue 1935 elif type in [tokenize.NAME, tokenize.NUMBER]: 1936 current_selector = FormatSelector(SINGLE, string, []) 1937 elif type == tokenize.OP: 1938 if string == ')': 1939 if not inside_group: 1940 # ')' will be handled by the parentheses group 1941 tokens.restore_last_token() 1942 break 1943 elif inside_merge and string in ['/', ',']: 1944 tokens.restore_last_token() 1945 break 1946 elif inside_choice and string == ',': 1947 tokens.restore_last_token() 1948 break 1949 elif string == ',': 1950 if not current_selector: 1951 raise syntax_error('"," must follow a format selector', start) 1952 selectors.append(current_selector) 1953 current_selector = None 1954 elif string == '/': 1955 if not current_selector: 1956 raise syntax_error('"/" must follow a format selector', start) 1957 first_choice = current_selector 1958 second_choice = _parse_format_selection(tokens, inside_choice=True) 1959 current_selector = FormatSelector(PICKFIRST, (first_choice, second_choice), []) 1960 elif string == '[': 1961 if not current_selector: 1962 current_selector = FormatSelector(SINGLE, 'best', []) 1963 format_filter = _parse_filter(tokens) 1964 current_selector.filters.append(format_filter) 1965 elif string == '(': 1966 if current_selector: 1967 raise syntax_error('Unexpected "("', start) 1968 group = _parse_format_selection(tokens, inside_group=True) 1969 current_selector = FormatSelector(GROUP, group, []) 1970 elif string == '+': 1971 if not current_selector: 1972 raise syntax_error('Unexpected "+"', start) 1973 selector_1 = current_selector 1974 selector_2 = _parse_format_selection(tokens, inside_merge=True) 1975 if not selector_2: 1976 raise syntax_error('Expected a selector', start) 1977 current_selector = FormatSelector(MERGE, (selector_1, selector_2), []) 1978 else: 1979 raise syntax_error('Operator not recognized: "{0}"'.format(string), start) 1980 elif type == tokenize.ENDMARKER: 1981 break 1982 if current_selector: 1983 selectors.append(current_selector) 1984 return selectors 1985 1986 def _merge(formats_pair): 1987 format_1, format_2 = formats_pair 1988 1989 formats_info = [] 1990 formats_info.extend(format_1.get('requested_formats', (format_1,))) 1991 formats_info.extend(format_2.get('requested_formats', (format_2,))) 1992 1993 if not allow_multiple_streams['video'] or not allow_multiple_streams['audio']: 1994 get_no_more = {'video': False, 'audio': False} 1995 for (i, fmt_info) in enumerate(formats_info): 1996 if fmt_info.get('acodec') == fmt_info.get('vcodec') == 'none': 1997 formats_info.pop(i) 1998 continue 1999 for aud_vid in ['audio', 'video']: 2000 if not allow_multiple_streams[aud_vid] and fmt_info.get(aud_vid[0] + 'codec') != 'none': 2001 if get_no_more[aud_vid]: 2002 formats_info.pop(i) 2003 break 2004 get_no_more[aud_vid] = True 2005 2006 if len(formats_info) == 1: 2007 return formats_info[0] 2008 2009 video_fmts = [fmt_info for fmt_info in formats_info if fmt_info.get('vcodec') != 'none'] 2010 audio_fmts = [fmt_info for fmt_info in formats_info if fmt_info.get('acodec') != 'none'] 2011 2012 the_only_video = video_fmts[0] if len(video_fmts) == 1 else None 2013 the_only_audio = audio_fmts[0] if len(audio_fmts) == 1 else None 2014 2015 output_ext = self.params.get('merge_output_format') 2016 if not output_ext: 2017 if the_only_video: 2018 output_ext = the_only_video['ext'] 2019 elif the_only_audio and not video_fmts: 2020 output_ext = the_only_audio['ext'] 2021 else: 2022 output_ext = 'mkv' 2023 2024 filtered = lambda *keys: filter(None, (traverse_obj(fmt, *keys) for fmt in formats_info)) 2025 2026 new_dict = { 2027 'requested_formats': formats_info, 2028 'format': '+'.join(filtered('format')), 2029 'format_id': '+'.join(filtered('format_id')), 2030 'ext': output_ext, 2031 'protocol': '+'.join(map(determine_protocol, formats_info)), 2032 'language': '+'.join(orderedSet(filtered('language'))) or None, 2033 'format_note': '+'.join(orderedSet(filtered('format_note'))) or None, 2034 'filesize_approx': sum(filtered('filesize', 'filesize_approx')) or None, 2035 'tbr': sum(filtered('tbr', 'vbr', 'abr')), 2036 } 2037 2038 if the_only_video: 2039 new_dict.update({ 2040 'width': the_only_video.get('width'), 2041 'height': the_only_video.get('height'), 2042 'resolution': the_only_video.get('resolution') or self.format_resolution(the_only_video), 2043 'fps': the_only_video.get('fps'), 2044 'dynamic_range': the_only_video.get('dynamic_range'), 2045 'vcodec': the_only_video.get('vcodec'), 2046 'vbr': the_only_video.get('vbr'), 2047 'stretched_ratio': the_only_video.get('stretched_ratio'), 2048 }) 2049 2050 if the_only_audio: 2051 new_dict.update({ 2052 'acodec': the_only_audio.get('acodec'), 2053 'abr': the_only_audio.get('abr'), 2054 'asr': the_only_audio.get('asr'), 2055 }) 2056 2057 return new_dict 2058 2059 def _check_formats(formats): 2060 if not check_formats: 2061 yield from formats 2062 return 2063 yield from self._check_formats(formats) 2064 2065 def _build_selector_function(selector): 2066 if isinstance(selector, list): # , 2067 fs = [_build_selector_function(s) for s in selector] 2068 2069 def selector_function(ctx): 2070 for f in fs: 2071 yield from f(ctx) 2072 return selector_function 2073 2074 elif selector.type == GROUP: # () 2075 selector_function = _build_selector_function(selector.selector) 2076 2077 elif selector.type == PICKFIRST: # / 2078 fs = [_build_selector_function(s) for s in selector.selector] 2079 2080 def selector_function(ctx): 2081 for f in fs: 2082 picked_formats = list(f(ctx)) 2083 if picked_formats: 2084 return picked_formats 2085 return [] 2086 2087 elif selector.type == MERGE: # + 2088 selector_1, selector_2 = map(_build_selector_function, selector.selector) 2089 2090 def selector_function(ctx): 2091 for pair in itertools.product(selector_1(ctx), selector_2(ctx)): 2092 yield _merge(pair) 2093 2094 elif selector.type == SINGLE: # atom 2095 format_spec = selector.selector or 'best' 2096 2097 # TODO: Add allvideo, allaudio etc by generalizing the code with best/worst selector 2098 if format_spec == 'all': 2099 def selector_function(ctx): 2100 yield from _check_formats(ctx['formats'][::-1]) 2101 elif format_spec == 'mergeall': 2102 def selector_function(ctx): 2103 formats = list(_check_formats(ctx['formats'])) 2104 if not formats: 2105 return 2106 merged_format = formats[-1] 2107 for f in formats[-2::-1]: 2108 merged_format = _merge((merged_format, f)) 2109 yield merged_format 2110 2111 else: 2112 format_fallback, format_reverse, format_idx = False, True, 1 2113 mobj = re.match( 2114 r'(?P<bw>best|worst|b|w)(?P<type>video|audio|v|a)?(?P<mod>\*)?(?:\.(?P<n>[1-9]\d*))?$', 2115 format_spec) 2116 if mobj is not None: 2117 format_idx = int_or_none(mobj.group('n'), default=1) 2118 format_reverse = mobj.group('bw')[0] == 'b' 2119 format_type = (mobj.group('type') or [None])[0] 2120 not_format_type = {'v': 'a', 'a': 'v'}.get(format_type) 2121 format_modified = mobj.group('mod') is not None 2122 2123 format_fallback = not format_type and not format_modified # for b, w 2124 _filter_f = ( 2125 (lambda f: f.get('%scodec' % format_type) != 'none') 2126 if format_type and format_modified # bv*, ba*, wv*, wa* 2127 else (lambda f: f.get('%scodec' % not_format_type) == 'none') 2128 if format_type # bv, ba, wv, wa 2129 else (lambda f: f.get('vcodec') != 'none' and f.get('acodec') != 'none') 2130 if not format_modified # b, w 2131 else lambda f: True) # b*, w* 2132 filter_f = lambda f: _filter_f(f) and ( 2133 f.get('vcodec') != 'none' or f.get('acodec') != 'none') 2134 else: 2135 if format_spec in self._format_selection_exts['audio']: 2136 filter_f = lambda f: f.get('ext') == format_spec and f.get('acodec') != 'none' 2137 elif format_spec in self._format_selection_exts['video']: 2138 filter_f = lambda f: f.get('ext') == format_spec and f.get('acodec') != 'none' and f.get('vcodec') != 'none' 2139 elif format_spec in self._format_selection_exts['storyboards']: 2140 filter_f = lambda f: f.get('ext') == format_spec and f.get('acodec') == 'none' and f.get('vcodec') == 'none' 2141 else: 2142 filter_f = lambda f: f.get('format_id') == format_spec # id 2143 2144 def selector_function(ctx): 2145 formats = list(ctx['formats']) 2146 matches = list(filter(filter_f, formats)) if filter_f is not None else formats 2147 if format_fallback and ctx['incomplete_formats'] and not matches: 2148 # for extractors with incomplete formats (audio only (soundcloud) 2149 # or video only (imgur)) best/worst will fallback to 2150 # best/worst {video,audio}-only format 2151 matches = formats 2152 matches = LazyList(_check_formats(matches[::-1 if format_reverse else 1])) 2153 try: 2154 yield matches[format_idx - 1] 2155 except IndexError: 2156 return 2157 2158 filters = [self._build_format_filter(f) for f in selector.filters] 2159 2160 def final_selector(ctx): 2161 ctx_copy = dict(ctx) 2162 for _filter in filters: 2163 ctx_copy['formats'] = list(filter(_filter, ctx_copy['formats'])) 2164 return selector_function(ctx_copy) 2165 return final_selector 2166 2167 stream = io.BytesIO(format_spec.encode('utf-8')) 2168 try: 2169 tokens = list(_remove_unused_ops(compat_tokenize_tokenize(stream.readline))) 2170 except tokenize.TokenError: 2171 raise syntax_error('Missing closing/opening brackets or parenthesis', (0, len(format_spec))) 2172 2173 class TokenIterator(object): 2174 def __init__(self, tokens): 2175 self.tokens = tokens 2176 self.counter = 0 2177 2178 def __iter__(self): 2179 return self 2180 2181 def __next__(self): 2182 if self.counter >= len(self.tokens): 2183 raise StopIteration() 2184 value = self.tokens[self.counter] 2185 self.counter += 1 2186 return value 2187 2188 next = __next__ 2189 2190 def restore_last_token(self): 2191 self.counter -= 1 2192 2193 parsed_selector = _parse_format_selection(iter(TokenIterator(tokens))) 2194 return _build_selector_function(parsed_selector) 2195 2196 def _calc_headers(self, info_dict): 2197 res = std_headers.copy() 2198 2199 add_headers = info_dict.get('http_headers') 2200 if add_headers: 2201 res.update(add_headers) 2202 2203 cookies = self._calc_cookies(info_dict) 2204 if cookies: 2205 res['Cookie'] = cookies 2206 2207 if 'X-Forwarded-For' not in res: 2208 x_forwarded_for_ip = info_dict.get('__x_forwarded_for_ip') 2209 if x_forwarded_for_ip: 2210 res['X-Forwarded-For'] = x_forwarded_for_ip 2211 2212 return res 2213 2214 def _calc_cookies(self, info_dict): 2215 pr = sanitized_Request(info_dict['url']) 2216 self.cookiejar.add_cookie_header(pr) 2217 return pr.get_header('Cookie') 2218 2219 def _sort_thumbnails(self, thumbnails): 2220 thumbnails.sort(key=lambda t: ( 2221 t.get('preference') if t.get('preference') is not None else -1, 2222 t.get('width') if t.get('width') is not None else -1, 2223 t.get('height') if t.get('height') is not None else -1, 2224 t.get('id') if t.get('id') is not None else '', 2225 t.get('url'))) 2226 2227 def _sanitize_thumbnails(self, info_dict): 2228 thumbnails = info_dict.get('thumbnails') 2229 if thumbnails is None: 2230 thumbnail = info_dict.get('thumbnail') 2231 if thumbnail: 2232 info_dict['thumbnails'] = thumbnails = [{'url': thumbnail}] 2233 if not thumbnails: 2234 return 2235 2236 def check_thumbnails(thumbnails): 2237 for t in thumbnails: 2238 self.to_screen(f'[info] Testing thumbnail {t["id"]}') 2239 try: 2240 self.urlopen(HEADRequest(t['url'])) 2241 except network_exceptions as err: 2242 self.to_screen(f'[info] Unable to connect to thumbnail {t["id"]} URL {t["url"]!r} - {err}. Skipping...') 2243 continue 2244 yield t 2245 2246 self._sort_thumbnails(thumbnails) 2247 for i, t in enumerate(thumbnails): 2248 if t.get('id') is None: 2249 t['id'] = '%d' % i 2250 if t.get('width') and t.get('height'): 2251 t['resolution'] = '%dx%d' % (t['width'], t['height']) 2252 t['url'] = sanitize_url(t['url']) 2253 2254 if self.params.get('check_formats') is True: 2255 info_dict['thumbnails'] = LazyList(check_thumbnails(thumbnails[::-1]), reverse=True) 2256 else: 2257 info_dict['thumbnails'] = thumbnails 2258 2259 def process_video_result(self, info_dict, download=True): 2260 assert info_dict.get('_type', 'video') == 'video' 2261 2262 if 'id' not in info_dict: 2263 raise ExtractorError('Missing "id" field in extractor result') 2264 if 'title' not in info_dict: 2265 raise ExtractorError('Missing "title" field in extractor result', 2266 video_id=info_dict['id'], ie=info_dict['extractor']) 2267 2268 def report_force_conversion(field, field_not, conversion): 2269 self.report_warning( 2270 '"%s" field is not %s - forcing %s conversion, there is an error in extractor' 2271 % (field, field_not, conversion)) 2272 2273 def sanitize_string_field(info, string_field): 2274 field = info.get(string_field) 2275 if field is None or isinstance(field, compat_str): 2276 return 2277 report_force_conversion(string_field, 'a string', 'string') 2278 info[string_field] = compat_str(field) 2279 2280 def sanitize_numeric_fields(info): 2281 for numeric_field in self._NUMERIC_FIELDS: 2282 field = info.get(numeric_field) 2283 if field is None or isinstance(field, compat_numeric_types): 2284 continue 2285 report_force_conversion(numeric_field, 'numeric', 'int') 2286 info[numeric_field] = int_or_none(field) 2287 2288 sanitize_string_field(info_dict, 'id') 2289 sanitize_numeric_fields(info_dict) 2290 2291 if 'playlist' not in info_dict: 2292 # It isn't part of a playlist 2293 info_dict['playlist'] = None 2294 info_dict['playlist_index'] = None 2295 2296 self._sanitize_thumbnails(info_dict) 2297 2298 thumbnail = info_dict.get('thumbnail') 2299 thumbnails = info_dict.get('thumbnails') 2300 if thumbnail: 2301 info_dict['thumbnail'] = sanitize_url(thumbnail) 2302 elif thumbnails: 2303 info_dict['thumbnail'] = thumbnails[-1]['url'] 2304 2305 if info_dict.get('display_id') is None and 'id' in info_dict: 2306 info_dict['display_id'] = info_dict['id'] 2307 2308 if info_dict.get('duration') is not None: 2309 info_dict['duration_string'] = formatSeconds(info_dict['duration']) 2310 2311 for ts_key, date_key in ( 2312 ('timestamp', 'upload_date'), 2313 ('release_timestamp', 'release_date'), 2314 ): 2315 if info_dict.get(date_key) is None and info_dict.get(ts_key) is not None: 2316 # Working around out-of-range timestamp values (e.g. negative ones on Windows, 2317 # see http://bugs.python.org/issue1646728) 2318 try: 2319 upload_date = datetime.datetime.utcfromtimestamp(info_dict[ts_key]) 2320 info_dict[date_key] = upload_date.strftime('%Y%m%d') 2321 except (ValueError, OverflowError, OSError): 2322 pass 2323 2324 live_keys = ('is_live', 'was_live') 2325 live_status = info_dict.get('live_status') 2326 if live_status is None: 2327 for key in live_keys: 2328 if info_dict.get(key) is False: 2329 continue 2330 if info_dict.get(key): 2331 live_status = key 2332 break 2333 if all(info_dict.get(key) is False for key in live_keys): 2334 live_status = 'not_live' 2335 if live_status: 2336 info_dict['live_status'] = live_status 2337 for key in live_keys: 2338 if info_dict.get(key) is None: 2339 info_dict[key] = (live_status == key) 2340 2341 # Auto generate title fields corresponding to the *_number fields when missing 2342 # in order to always have clean titles. This is very common for TV series. 2343 for field in ('chapter', 'season', 'episode'): 2344 if info_dict.get('%s_number' % field) is not None and not info_dict.get(field): 2345 info_dict[field] = '%s %d' % (field.capitalize(), info_dict['%s_number' % field]) 2346 2347 for cc_kind in ('subtitles', 'automatic_captions'): 2348 cc = info_dict.get(cc_kind) 2349 if cc: 2350 for _, subtitle in cc.items(): 2351 for subtitle_format in subtitle: 2352 if subtitle_format.get('url'): 2353 subtitle_format['url'] = sanitize_url(subtitle_format['url']) 2354 if subtitle_format.get('ext') is None: 2355 subtitle_format['ext'] = determine_ext(subtitle_format['url']).lower() 2356 2357 automatic_captions = info_dict.get('automatic_captions') 2358 subtitles = info_dict.get('subtitles') 2359 2360 info_dict['requested_subtitles'] = self.process_subtitles( 2361 info_dict['id'], subtitles, automatic_captions) 2362 2363 if info_dict.get('formats') is None: 2364 # There's only one format available 2365 formats = [info_dict] 2366 else: 2367 formats = info_dict['formats'] 2368 2369 info_dict['__has_drm'] = any(f.get('has_drm') for f in formats) 2370 if not self.params.get('allow_unplayable_formats'): 2371 formats = [f for f in formats if not f.get('has_drm')] 2372 2373 if info_dict.get('is_live'): 2374 get_from_start = bool(self.params.get('live_from_start')) 2375 formats = [f for f in formats if bool(f.get('is_from_start')) == get_from_start] 2376 2377 if not formats: 2378 self.raise_no_formats(info_dict) 2379 2380 def is_wellformed(f): 2381 url = f.get('url') 2382 if not url: 2383 self.report_warning( 2384 '"url" field is missing or empty - skipping format, ' 2385 'there is an error in extractor') 2386 return False 2387 if isinstance(url, bytes): 2388 sanitize_string_field(f, 'url') 2389 return True 2390 2391 # Filter out malformed formats for better extraction robustness 2392 formats = list(filter(is_wellformed, formats)) 2393 2394 formats_dict = {} 2395 2396 # We check that all the formats have the format and format_id fields 2397 for i, format in enumerate(formats): 2398 sanitize_string_field(format, 'format_id') 2399 sanitize_numeric_fields(format) 2400 format['url'] = sanitize_url(format['url']) 2401 if not format.get('format_id'): 2402 format['format_id'] = compat_str(i) 2403 else: 2404 # Sanitize format_id from characters used in format selector expression 2405 format['format_id'] = re.sub(r'[\s,/+\[\]()]', '_', format['format_id']) 2406 format_id = format['format_id'] 2407 if format_id not in formats_dict: 2408 formats_dict[format_id] = [] 2409 formats_dict[format_id].append(format) 2410 2411 # Make sure all formats have unique format_id 2412 common_exts = set(itertools.chain(*self._format_selection_exts.values())) 2413 for format_id, ambiguous_formats in formats_dict.items(): 2414 ambigious_id = len(ambiguous_formats) > 1 2415 for i, format in enumerate(ambiguous_formats): 2416 if ambigious_id: 2417 format['format_id'] = '%s-%d' % (format_id, i) 2418 if format.get('ext') is None: 2419 format['ext'] = determine_ext(format['url']).lower() 2420 # Ensure there is no conflict between id and ext in format selection 2421 # See https://github.com/yt-dlp/yt-dlp/issues/1282 2422 if format['format_id'] != format['ext'] and format['format_id'] in common_exts: 2423 format['format_id'] = 'f%s' % format['format_id'] 2424 2425 for i, format in enumerate(formats): 2426 if format.get('format') is None: 2427 format['format'] = '{id} - {res}{note}'.format( 2428 id=format['format_id'], 2429 res=self.format_resolution(format), 2430 note=format_field(format, 'format_note', ' (%s)'), 2431 ) 2432 if format.get('protocol') is None: 2433 format['protocol'] = determine_protocol(format) 2434 if format.get('resolution') is None: 2435 format['resolution'] = self.format_resolution(format, default=None) 2436 if format.get('dynamic_range') is None and format.get('vcodec') != 'none': 2437 format['dynamic_range'] = 'SDR' 2438 if (info_dict.get('duration') and format.get('tbr') 2439 and not format.get('filesize') and not format.get('filesize_approx')): 2440 format['filesize_approx'] = info_dict['duration'] * format['tbr'] * (1024 / 8) 2441 2442 # Add HTTP headers, so that external programs can use them from the 2443 # json output 2444 full_format_info = info_dict.copy() 2445 full_format_info.update(format) 2446 format['http_headers'] = self._calc_headers(full_format_info) 2447 # Remove private housekeeping stuff 2448 if '__x_forwarded_for_ip' in info_dict: 2449 del info_dict['__x_forwarded_for_ip'] 2450 2451 # TODO Central sorting goes here 2452 2453 if self.params.get('check_formats') is True: 2454 formats = LazyList(self._check_formats(formats[::-1]), reverse=True) 2455 2456 if not formats or formats[0] is not info_dict: 2457 # only set the 'formats' fields if the original info_dict list them 2458 # otherwise we end up with a circular reference, the first (and unique) 2459 # element in the 'formats' field in info_dict is info_dict itself, 2460 # which can't be exported to json 2461 info_dict['formats'] = formats 2462 2463 info_dict, _ = self.pre_process(info_dict) 2464 2465 # The pre-processors may have modified the formats 2466 formats = info_dict.get('formats', [info_dict]) 2467 2468 list_only = self.params.get('simulate') is None and ( 2469 self.params.get('list_thumbnails') or self.params.get('listformats') or self.params.get('listsubtitles')) 2470 interactive_format_selection = not list_only and self.format_selector == '-' 2471 if self.params.get('list_thumbnails'): 2472 self.list_thumbnails(info_dict) 2473 if self.params.get('listsubtitles'): 2474 if 'automatic_captions' in info_dict: 2475 self.list_subtitles( 2476 info_dict['id'], automatic_captions, 'automatic captions') 2477 self.list_subtitles(info_dict['id'], subtitles, 'subtitles') 2478 if self.params.get('listformats') or interactive_format_selection: 2479 self.list_formats(info_dict) 2480 if list_only: 2481 # Without this printing, -F --print-json will not work 2482 self.__forced_printings(info_dict, self.prepare_filename(info_dict), incomplete=True) 2483 return 2484 2485 format_selector = self.format_selector 2486 if format_selector is None: 2487 req_format = self._default_format_spec(info_dict, download=download) 2488 self.write_debug('Default format spec: %s' % req_format) 2489 format_selector = self.build_format_selector(req_format) 2490 2491 while True: 2492 if interactive_format_selection: 2493 req_format = input( 2494 self._format_screen('\nEnter format selector: ', self.Styles.EMPHASIS)) 2495 try: 2496 format_selector = self.build_format_selector(req_format) 2497 except SyntaxError as err: 2498 self.report_error(err, tb=False, is_error=False) 2499 continue 2500 2501 # While in format selection we may need to have an access to the original 2502 # format set in order to calculate some metrics or do some processing. 2503 # For now we need to be able to guess whether original formats provided 2504 # by extractor are incomplete or not (i.e. whether extractor provides only 2505 # video-only or audio-only formats) for proper formats selection for 2506 # extractors with such incomplete formats (see 2507 # https://github.com/ytdl-org/youtube-dl/pull/5556). 2508 # Since formats may be filtered during format selection and may not match 2509 # the original formats the results may be incorrect. Thus original formats 2510 # or pre-calculated metrics should be passed to format selection routines 2511 # as well. 2512 # We will pass a context object containing all necessary additional data 2513 # instead of just formats. 2514 # This fixes incorrect format selection issue (see 2515 # https://github.com/ytdl-org/youtube-dl/issues/10083). 2516 incomplete_formats = ( 2517 # All formats are video-only or 2518 all(f.get('vcodec') != 'none' and f.get('acodec') == 'none' for f in formats) 2519 # all formats are audio-only 2520 or all(f.get('vcodec') == 'none' and f.get('acodec') != 'none' for f in formats)) 2521 2522 ctx = { 2523 'formats': formats, 2524 'incomplete_formats': incomplete_formats, 2525 } 2526 2527 formats_to_download = list(format_selector(ctx)) 2528 if interactive_format_selection and not formats_to_download: 2529 self.report_error('Requested format is not available', tb=False, is_error=False) 2530 continue 2531 break 2532 2533 if not formats_to_download: 2534 if not self.params.get('ignore_no_formats_error'): 2535 raise ExtractorError('Requested format is not available', expected=True, 2536 video_id=info_dict['id'], ie=info_dict['extractor']) 2537 else: 2538 self.report_warning('Requested format is not available') 2539 # Process what we can, even without any available formats. 2540 self.process_info(dict(info_dict)) 2541 elif download: 2542 self.to_screen( 2543 '[info] %s: Downloading %d format(s): %s' % ( 2544 info_dict['id'], len(formats_to_download), 2545 ", ".join([f['format_id'] for f in formats_to_download]))) 2546 for fmt in formats_to_download: 2547 new_info = dict(info_dict) 2548 # Save a reference to the original info_dict so that it can be modified in process_info if needed 2549 new_info['__original_infodict'] = info_dict 2550 new_info.update(fmt) 2551 self.process_info(new_info) 2552 # We update the info dict with the selected best quality format (backwards compatibility) 2553 if formats_to_download: 2554 info_dict.update(formats_to_download[-1]) 2555 return info_dict 2556 2557 def process_subtitles(self, video_id, normal_subtitles, automatic_captions): 2558 """Select the requested subtitles and their format""" 2559 available_subs = {} 2560 if normal_subtitles and self.params.get('writesubtitles'): 2561 available_subs.update(normal_subtitles) 2562 if automatic_captions and self.params.get('writeautomaticsub'): 2563 for lang, cap_info in automatic_captions.items(): 2564 if lang not in available_subs: 2565 available_subs[lang] = cap_info 2566 2567 if (not self.params.get('writesubtitles') and not 2568 self.params.get('writeautomaticsub') or not 2569 available_subs): 2570 return None 2571 2572 all_sub_langs = available_subs.keys() 2573 if self.params.get('allsubtitles', False): 2574 requested_langs = all_sub_langs 2575 elif self.params.get('subtitleslangs', False): 2576 # A list is used so that the order of languages will be the same as 2577 # given in subtitleslangs. See https://github.com/yt-dlp/yt-dlp/issues/1041 2578 requested_langs = [] 2579 for lang_re in self.params.get('subtitleslangs'): 2580 if lang_re == 'all': 2581 requested_langs.extend(all_sub_langs) 2582 continue 2583 discard = lang_re[0] == '-' 2584 if discard: 2585 lang_re = lang_re[1:] 2586 current_langs = filter(re.compile(lang_re + '$').match, all_sub_langs) 2587 if discard: 2588 for lang in current_langs: 2589 while lang in requested_langs: 2590 requested_langs.remove(lang) 2591 else: 2592 requested_langs.extend(current_langs) 2593 requested_langs = orderedSet(requested_langs) 2594 elif 'en' in available_subs: 2595 requested_langs = ['en'] 2596 else: 2597 requested_langs = [list(all_sub_langs)[0]] 2598 if requested_langs: 2599 self.write_debug('Downloading subtitles: %s' % ', '.join(requested_langs)) 2600 2601 formats_query = self.params.get('subtitlesformat', 'best') 2602 formats_preference = formats_query.split('/') if formats_query else [] 2603 subs = {} 2604 for lang in requested_langs: 2605 formats = available_subs.get(lang) 2606 if formats is None: 2607 self.report_warning('%s subtitles not available for %s' % (lang, video_id)) 2608 continue 2609 for ext in formats_preference: 2610 if ext == 'best': 2611 f = formats[-1] 2612 break 2613 matches = list(filter(lambda f: f['ext'] == ext, formats)) 2614 if matches: 2615 f = matches[-1] 2616 break 2617 else: 2618 f = formats[-1] 2619 self.report_warning( 2620 'No subtitle format found matching "%s" for language %s, ' 2621 'using %s' % (formats_query, lang, f['ext'])) 2622 subs[lang] = f 2623 return subs 2624 2625 def __forced_printings(self, info_dict, filename, incomplete): 2626 def print_mandatory(field, actual_field=None): 2627 if actual_field is None: 2628 actual_field = field 2629 if (self.params.get('force%s' % field, False) 2630 and (not incomplete or info_dict.get(actual_field) is not None)): 2631 self.to_stdout(info_dict[actual_field]) 2632 2633 def print_optional(field): 2634 if (self.params.get('force%s' % field, False) 2635 and info_dict.get(field) is not None): 2636 self.to_stdout(info_dict[field]) 2637 2638 info_dict = info_dict.copy() 2639 if filename is not None: 2640 info_dict['filename'] = filename 2641 if info_dict.get('requested_formats') is not None: 2642 # For RTMP URLs, also include the playpath 2643 info_dict['urls'] = '\n'.join(f['url'] + f.get('play_path', '') for f in info_dict['requested_formats']) 2644 elif 'url' in info_dict: 2645 info_dict['urls'] = info_dict['url'] + info_dict.get('play_path', '') 2646 2647 if self.params.get('forceprint') or self.params.get('forcejson'): 2648 self.post_extract(info_dict) 2649 for tmpl in self.params.get('forceprint', []): 2650 mobj = re.match(r'\w+(=?)$', tmpl) 2651 if mobj and mobj.group(1): 2652 tmpl = f'{tmpl[:-1]} = %({tmpl[:-1]})s' 2653 elif mobj: 2654 tmpl = '%({})s'.format(tmpl) 2655 self.to_stdout(self.evaluate_outtmpl(tmpl, info_dict)) 2656 2657 print_mandatory('title') 2658 print_mandatory('id') 2659 print_mandatory('url', 'urls') 2660 print_optional('thumbnail') 2661 print_optional('description') 2662 print_optional('filename') 2663 if self.params.get('forceduration') and info_dict.get('duration') is not None: 2664 self.to_stdout(formatSeconds(info_dict['duration'])) 2665 print_mandatory('format') 2666 2667 if self.params.get('forcejson'): 2668 self.to_stdout(json.dumps(self.sanitize_info(info_dict))) 2669 2670 def dl(self, name, info, subtitle=False, test=False): 2671 if not info.get('url'): 2672 self.raise_no_formats(info, True) 2673 2674 if test: 2675 verbose = self.params.get('verbose') 2676 params = { 2677 'test': True, 2678 'quiet': self.params.get('quiet') or not verbose, 2679 'verbose': verbose, 2680 'noprogress': not verbose, 2681 'nopart': True, 2682 'skip_unavailable_fragments': False, 2683 'keep_fragments': False, 2684 'overwrites': True, 2685 '_no_ytdl_file': True, 2686 } 2687 else: 2688 params = self.params 2689 fd = get_suitable_downloader(info, params, to_stdout=(name == '-'))(self, params) 2690 if not test: 2691 for ph in self._progress_hooks: 2692 fd.add_progress_hook(ph) 2693 urls = '", "'.join([f['url'] for f in info.get('requested_formats', [])] or [info['url']]) 2694 self.write_debug('Invoking downloader on "%s"' % urls) 2695 2696 # Note: Ideally info should be a deep-copied so that hooks cannot modify it. 2697 # But it may contain objects that are not deep-copyable 2698 new_info = self._copy_infodict(info) 2699 if new_info.get('http_headers') is None: 2700 new_info['http_headers'] = self._calc_headers(new_info) 2701 return fd.download(name, new_info, subtitle) 2702 2703 def process_info(self, info_dict): 2704 """Process a single resolved IE result.""" 2705 2706 assert info_dict.get('_type', 'video') == 'video' 2707 2708 max_downloads = self.params.get('max_downloads') 2709 if max_downloads is not None: 2710 if self._num_downloads >= int(max_downloads): 2711 raise MaxDownloadsReached() 2712 2713 if info_dict.get('is_live') and not self.params.get('live_from_start'): 2714 info_dict['title'] += ' ' + datetime.datetime.now().strftime('%Y-%m-%d %H:%M') 2715 2716 # TODO: backward compatibility, to be removed 2717 info_dict['fulltitle'] = info_dict['title'] 2718 2719 if 'format' not in info_dict and 'ext' in info_dict: 2720 info_dict['format'] = info_dict['ext'] 2721 2722 if self._match_entry(info_dict) is not None: 2723 return 2724 2725 self.post_extract(info_dict) 2726 self._num_downloads += 1 2727 2728 # info_dict['_filename'] needs to be set for backward compatibility 2729 info_dict['_filename'] = full_filename = self.prepare_filename(info_dict, warn=True) 2730 temp_filename = self.prepare_filename(info_dict, 'temp') 2731 files_to_move = {} 2732 2733 # Forced printings 2734 self.__forced_printings(info_dict, full_filename, incomplete=('format' not in info_dict)) 2735 2736 if self.params.get('simulate'): 2737 if self.params.get('force_write_download_archive', False): 2738 self.record_download_archive(info_dict) 2739 # Do nothing else if in simulate mode 2740 return 2741 2742 if full_filename is None: 2743 return 2744 if not self._ensure_dir_exists(encodeFilename(full_filename)): 2745 return 2746 if not self._ensure_dir_exists(encodeFilename(temp_filename)): 2747 return 2748 2749 if self._write_description('video', info_dict, 2750 self.prepare_filename(info_dict, 'description')) is None: 2751 return 2752 2753 sub_files = self._write_subtitles(info_dict, temp_filename) 2754 if sub_files is None: 2755 return 2756 files_to_move.update(dict(sub_files)) 2757 2758 thumb_files = self._write_thumbnails( 2759 'video', info_dict, temp_filename, self.prepare_filename(info_dict, 'thumbnail')) 2760 if thumb_files is None: 2761 return 2762 files_to_move.update(dict(thumb_files)) 2763 2764 infofn = self.prepare_filename(info_dict, 'infojson') 2765 _infojson_written = self._write_info_json('video', info_dict, infofn) 2766 if _infojson_written: 2767 info_dict['infojson_filename'] = infofn 2768 # For backward compatibility, even though it was a private field 2769 info_dict['__infojson_filename'] = infofn 2770 elif _infojson_written is None: 2771 return 2772 2773 # Note: Annotations are deprecated 2774 annofn = None 2775 if self.params.get('writeannotations', False): 2776 annofn = self.prepare_filename(info_dict, 'annotation') 2777 if annofn: 2778 if not self._ensure_dir_exists(encodeFilename(annofn)): 2779 return 2780 if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(annofn)): 2781 self.to_screen('[info] Video annotations are already present') 2782 elif not info_dict.get('annotations'): 2783 self.report_warning('There are no annotations to write.') 2784 else: 2785 try: 2786 self.to_screen('[info] Writing video annotations to: ' + annofn) 2787 with io.open(encodeFilename(annofn), 'w', encoding='utf-8') as annofile: 2788 annofile.write(info_dict['annotations']) 2789 except (KeyError, TypeError): 2790 self.report_warning('There are no annotations to write.') 2791 except (OSError, IOError): 2792 self.report_error('Cannot write annotations file: ' + annofn) 2793 return 2794 2795 # Write internet shortcut files 2796 def _write_link_file(link_type): 2797 if 'webpage_url' not in info_dict: 2798 self.report_error('Cannot write internet shortcut file because the "webpage_url" field is missing in the media information') 2799 return False 2800 linkfn = replace_extension(self.prepare_filename(info_dict, 'link'), link_type, info_dict.get('ext')) 2801 if not self._ensure_dir_exists(encodeFilename(linkfn)): 2802 return False 2803 if self.params.get('overwrites', True) and os.path.exists(encodeFilename(linkfn)): 2804 self.to_screen(f'[info] Internet shortcut (.{link_type}) is already present') 2805 return True 2806 try: 2807 self.to_screen(f'[info] Writing internet shortcut (.{link_type}) to: {linkfn}') 2808 with io.open(encodeFilename(to_high_limit_path(linkfn)), 'w', encoding='utf-8', 2809 newline='\r\n' if link_type == 'url' else '\n') as linkfile: 2810 template_vars = {'url': iri_to_uri(info_dict['webpage_url'])} 2811 if link_type == 'desktop': 2812 template_vars['filename'] = linkfn[:-(len(link_type) + 1)] 2813 linkfile.write(LINK_TEMPLATES[link_type] % template_vars) 2814 except (OSError, IOError): 2815 self.report_error(f'Cannot write internet shortcut {linkfn}') 2816 return False 2817 return True 2818 2819 write_links = { 2820 'url': self.params.get('writeurllink'), 2821 'webloc': self.params.get('writewebloclink'), 2822 'desktop': self.params.get('writedesktoplink'), 2823 } 2824 if self.params.get('writelink'): 2825 link_type = ('webloc' if sys.platform == 'darwin' 2826 else 'desktop' if sys.platform.startswith('linux') 2827 else 'url') 2828 write_links[link_type] = True 2829 2830 if any(should_write and not _write_link_file(link_type) 2831 for link_type, should_write in write_links.items()): 2832 return 2833 2834 try: 2835 info_dict, files_to_move = self.pre_process(info_dict, 'before_dl', files_to_move) 2836 except PostProcessingError as err: 2837 self.report_error('Preprocessing: %s' % str(err)) 2838 return 2839 2840 must_record_download_archive = False 2841 if self.params.get('skip_download', False): 2842 info_dict['filepath'] = temp_filename 2843 info_dict['__finaldir'] = os.path.dirname(os.path.abspath(encodeFilename(full_filename))) 2844 info_dict['__files_to_move'] = files_to_move 2845 info_dict = self.run_pp(MoveFilesAfterDownloadPP(self, False), info_dict) 2846 else: 2847 # Download 2848 info_dict.setdefault('__postprocessors', []) 2849 try: 2850 2851 def existing_file(*filepaths): 2852 ext = info_dict.get('ext') 2853 final_ext = self.params.get('final_ext', ext) 2854 existing_files = [] 2855 for file in orderedSet(filepaths): 2856 if final_ext != ext: 2857 converted = replace_extension(file, final_ext, ext) 2858 if os.path.exists(encodeFilename(converted)): 2859 existing_files.append(converted) 2860 if os.path.exists(encodeFilename(file)): 2861 existing_files.append(file) 2862 2863 if not existing_files or self.params.get('overwrites', False): 2864 for file in orderedSet(existing_files): 2865 self.report_file_delete(file) 2866 os.remove(encodeFilename(file)) 2867 return None 2868 2869 info_dict['ext'] = os.path.splitext(existing_files[0])[1][1:] 2870 return existing_files[0] 2871 2872 success = True 2873 if info_dict.get('requested_formats') is not None: 2874 2875 def compatible_formats(formats): 2876 # TODO: some formats actually allow this (mkv, webm, ogg, mp4), but not all of them. 2877 video_formats = [format for format in formats if format.get('vcodec') != 'none'] 2878 audio_formats = [format for format in formats if format.get('acodec') != 'none'] 2879 if len(video_formats) > 2 or len(audio_formats) > 2: 2880 return False 2881 2882 # Check extension 2883 exts = set(format.get('ext') for format in formats) 2884 COMPATIBLE_EXTS = ( 2885 set(('mp3', 'mp4', 'm4a', 'm4p', 'm4b', 'm4r', 'm4v', 'ismv', 'isma')), 2886 set(('webm',)), 2887 ) 2888 for ext_sets in COMPATIBLE_EXTS: 2889 if ext_sets.issuperset(exts): 2890 return True 2891 # TODO: Check acodec/vcodec 2892 return False 2893 2894 requested_formats = info_dict['requested_formats'] 2895 old_ext = info_dict['ext'] 2896 if self.params.get('merge_output_format') is None: 2897 if not compatible_formats(requested_formats): 2898 info_dict['ext'] = 'mkv' 2899 self.report_warning( 2900 'Requested formats are incompatible for merge and will be merged into mkv') 2901 if (info_dict['ext'] == 'webm' 2902 and info_dict.get('thumbnails') 2903 # check with type instead of pp_key, __name__, or isinstance 2904 # since we dont want any custom PPs to trigger this 2905 and any(type(pp) == EmbedThumbnailPP for pp in self._pps['post_process'])): 2906 info_dict['ext'] = 'mkv' 2907 self.report_warning( 2908 'webm doesn\'t support embedding a thumbnail, mkv will be used') 2909 new_ext = info_dict['ext'] 2910 2911 def correct_ext(filename, ext=new_ext): 2912 if filename == '-': 2913 return filename 2914 filename_real_ext = os.path.splitext(filename)[1][1:] 2915 filename_wo_ext = ( 2916 os.path.splitext(filename)[0] 2917 if filename_real_ext in (old_ext, new_ext) 2918 else filename) 2919 return '%s.%s' % (filename_wo_ext, ext) 2920 2921 # Ensure filename always has a correct extension for successful merge 2922 full_filename = correct_ext(full_filename) 2923 temp_filename = correct_ext(temp_filename) 2924 dl_filename = existing_file(full_filename, temp_filename) 2925 info_dict['__real_download'] = False 2926 2927 downloaded = [] 2928 merger = FFmpegMergerPP(self) 2929 2930 fd = get_suitable_downloader(info_dict, self.params, to_stdout=temp_filename == '-') 2931 if dl_filename is not None: 2932 self.report_file_already_downloaded(dl_filename) 2933 elif fd: 2934 for f in requested_formats if fd != FFmpegFD else []: 2935 f['filepath'] = fname = prepend_extension( 2936 correct_ext(temp_filename, info_dict['ext']), 2937 'f%s' % f['format_id'], info_dict['ext']) 2938 downloaded.append(fname) 2939 info_dict['url'] = '\n'.join(f['url'] for f in requested_formats) 2940 success, real_download = self.dl(temp_filename, info_dict) 2941 info_dict['__real_download'] = real_download 2942 else: 2943 if self.params.get('allow_unplayable_formats'): 2944 self.report_warning( 2945 'You have requested merging of multiple formats ' 2946 'while also allowing unplayable formats to be downloaded. ' 2947 'The formats won\'t be merged to prevent data corruption.') 2948 elif not merger.available: 2949 self.report_warning( 2950 'You have requested merging of multiple formats but ffmpeg is not installed. ' 2951 'The formats won\'t be merged.') 2952 2953 if temp_filename == '-': 2954 reason = ('using a downloader other than ffmpeg' if FFmpegFD.can_merge_formats(info_dict, self.params) 2955 else 'but the formats are incompatible for simultaneous download' if merger.available 2956 else 'but ffmpeg is not installed') 2957 self.report_warning( 2958 f'You have requested downloading multiple formats to stdout {reason}. ' 2959 'The formats will be streamed one after the other') 2960 fname = temp_filename 2961 for f in requested_formats: 2962 new_info = dict(info_dict) 2963 del new_info['requested_formats'] 2964 new_info.update(f) 2965 if temp_filename != '-': 2966 fname = prepend_extension( 2967 correct_ext(temp_filename, new_info['ext']), 2968 'f%s' % f['format_id'], new_info['ext']) 2969 if not self._ensure_dir_exists(fname): 2970 return 2971 f['filepath'] = fname 2972 downloaded.append(fname) 2973 partial_success, real_download = self.dl(fname, new_info) 2974 info_dict['__real_download'] = info_dict['__real_download'] or real_download 2975 success = success and partial_success 2976 2977 if downloaded and merger.available and not self.params.get('allow_unplayable_formats'): 2978 info_dict['__postprocessors'].append(merger) 2979 info_dict['__files_to_merge'] = downloaded 2980 # Even if there were no downloads, it is being merged only now 2981 info_dict['__real_download'] = True 2982 else: 2983 for file in downloaded: 2984 files_to_move[file] = None 2985 else: 2986 # Just a single file 2987 dl_filename = existing_file(full_filename, temp_filename) 2988 if dl_filename is None or dl_filename == temp_filename: 2989 # dl_filename == temp_filename could mean that the file was partially downloaded with --no-part. 2990 # So we should try to resume the download 2991 success, real_download = self.dl(temp_filename, info_dict) 2992 info_dict['__real_download'] = real_download 2993 else: 2994 self.report_file_already_downloaded(dl_filename) 2995 2996 dl_filename = dl_filename or temp_filename 2997 info_dict['__finaldir'] = os.path.dirname(os.path.abspath(encodeFilename(full_filename))) 2998 2999 except network_exceptions as err: 3000 self.report_error('unable to download video data: %s' % error_to_compat_str(err)) 3001 return 3002 except (OSError, IOError) as err: 3003 raise UnavailableVideoError(err) 3004 except (ContentTooShortError, ) as err: 3005 self.report_error('content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded)) 3006 return 3007 3008 if success and full_filename != '-': 3009 3010 def fixup(): 3011 do_fixup = True 3012 fixup_policy = self.params.get('fixup') 3013 vid = info_dict['id'] 3014 3015 if fixup_policy in ('ignore', 'never'): 3016 return 3017 elif fixup_policy == 'warn': 3018 do_fixup = False 3019 elif fixup_policy != 'force': 3020 assert fixup_policy in ('detect_or_warn', None) 3021 if not info_dict.get('__real_download'): 3022 do_fixup = False 3023 3024 def ffmpeg_fixup(cndn, msg, cls): 3025 if not cndn: 3026 return 3027 if not do_fixup: 3028 self.report_warning(f'{vid}: {msg}') 3029 return 3030 pp = cls(self) 3031 if pp.available: 3032 info_dict['__postprocessors'].append(pp) 3033 else: 3034 self.report_warning(f'{vid}: {msg}. Install ffmpeg to fix this automatically') 3035 3036 stretched_ratio = info_dict.get('stretched_ratio') 3037 ffmpeg_fixup( 3038 stretched_ratio not in (1, None), 3039 f'Non-uniform pixel ratio {stretched_ratio}', 3040 FFmpegFixupStretchedPP) 3041 3042 ffmpeg_fixup( 3043 (info_dict.get('requested_formats') is None 3044 and info_dict.get('container') == 'm4a_dash' 3045 and info_dict.get('ext') == 'm4a'), 3046 'writing DASH m4a. Only some players support this container', 3047 FFmpegFixupM4aPP) 3048 3049 downloader = get_suitable_downloader(info_dict, self.params) if 'protocol' in info_dict else None 3050 downloader = downloader.__name__ if downloader else None 3051 3052 if info_dict.get('requested_formats') is None: # Not necessary if doing merger 3053 ffmpeg_fixup(downloader == 'HlsFD', 3054 'Possible MPEG-TS in MP4 container or malformed AAC timestamps', 3055 FFmpegFixupM3u8PP) 3056 ffmpeg_fixup(info_dict.get('is_live') and downloader == 'DashSegmentsFD', 3057 'Possible duplicate MOOV atoms', FFmpegFixupDuplicateMoovPP) 3058 3059 ffmpeg_fixup(downloader == 'WebSocketFragmentFD', 'Malformed timestamps detected', FFmpegFixupTimestampPP) 3060 ffmpeg_fixup(downloader == 'WebSocketFragmentFD', 'Malformed duration detected', FFmpegFixupDurationPP) 3061 3062 fixup() 3063 try: 3064 info_dict = self.post_process(dl_filename, info_dict, files_to_move) 3065 except PostProcessingError as err: 3066 self.report_error('Postprocessing: %s' % str(err)) 3067 return 3068 try: 3069 for ph in self._post_hooks: 3070 ph(info_dict['filepath']) 3071 except Exception as err: 3072 self.report_error('post hooks: %s' % str(err)) 3073 return 3074 must_record_download_archive = True 3075 3076 if must_record_download_archive or self.params.get('force_write_download_archive', False): 3077 self.record_download_archive(info_dict) 3078 max_downloads = self.params.get('max_downloads') 3079 if max_downloads is not None and self._num_downloads >= int(max_downloads): 3080 raise MaxDownloadsReached() 3081 3082 def __download_wrapper(self, func): 3083 @functools.wraps(func) 3084 def wrapper(*args, **kwargs): 3085 try: 3086 res = func(*args, **kwargs) 3087 except UnavailableVideoError as e: 3088 self.report_error(e) 3089 except MaxDownloadsReached as e: 3090 self.to_screen(f'[info] {e}') 3091 raise 3092 except DownloadCancelled as e: 3093 self.to_screen(f'[info] {e}') 3094 if not self.params.get('break_per_url'): 3095 raise 3096 else: 3097 if self.params.get('dump_single_json', False): 3098 self.post_extract(res) 3099 self.to_stdout(json.dumps(self.sanitize_info(res))) 3100 return wrapper 3101 3102 def download(self, url_list): 3103 """Download a given list of URLs.""" 3104 url_list = variadic(url_list) # Passing a single URL is a common mistake 3105 outtmpl = self.outtmpl_dict['default'] 3106 if (len(url_list) > 1 3107 and outtmpl != '-' 3108 and '%' not in outtmpl 3109 and self.params.get('max_downloads') != 1): 3110 raise SameFileError(outtmpl) 3111 3112 for url in url_list: 3113 self.__download_wrapper(self.extract_info)( 3114 url, force_generic_extractor=self.params.get('force_generic_extractor', False)) 3115 3116 return self._download_retcode 3117 3118 def download_with_info_file(self, info_filename): 3119 with contextlib.closing(fileinput.FileInput( 3120 [info_filename], mode='r', 3121 openhook=fileinput.hook_encoded('utf-8'))) as f: 3122 # FileInput doesn't have a read method, we can't call json.load 3123 info = self.sanitize_info(json.loads('\n'.join(f)), self.params.get('clean_infojson', True)) 3124 try: 3125 self.__download_wrapper(self.process_ie_result)(info, download=True) 3126 except (DownloadError, EntryNotInPlaylist, ReExtractInfo) as e: 3127 if not isinstance(e, EntryNotInPlaylist): 3128 self.to_stderr('\r') 3129 webpage_url = info.get('webpage_url') 3130 if webpage_url is not None: 3131 self.report_warning(f'The info failed to download: {e}; trying with URL {webpage_url}') 3132 return self.download([webpage_url]) 3133 else: 3134 raise 3135 return self._download_retcode 3136 3137 @staticmethod 3138 def sanitize_info(info_dict, remove_private_keys=False): 3139 ''' Sanitize the infodict for converting to json ''' 3140 if info_dict is None: 3141 return info_dict 3142 info_dict.setdefault('epoch', int(time.time())) 3143 remove_keys = {'__original_infodict'} # Always remove this since this may contain a copy of the entire dict 3144 keep_keys = ['_type'] # Always keep this to facilitate load-info-json 3145 if remove_private_keys: 3146 remove_keys |= { 3147 'requested_formats', 'requested_subtitles', 'requested_entries', 'entries', 3148 'filepath', 'infojson_filename', 'original_url', 'playlist_autonumber', 3149 } 3150 empty_values = (None, {}, [], set(), tuple()) 3151 reject = lambda k, v: k not in keep_keys and ( 3152 k.startswith('_') or k in remove_keys or v in empty_values) 3153 else: 3154 reject = lambda k, v: k in remove_keys 3155 3156 def filter_fn(obj): 3157 if isinstance(obj, dict): 3158 return {k: filter_fn(v) for k, v in obj.items() if not reject(k, v)} 3159 elif isinstance(obj, (list, tuple, set, LazyList)): 3160 return list(map(filter_fn, obj)) 3161 elif obj is None or isinstance(obj, (str, int, float, bool)): 3162 return obj 3163 else: 3164 return repr(obj) 3165 3166 return filter_fn(info_dict) 3167 3168 @staticmethod 3169 def filter_requested_info(info_dict, actually_filter=True): 3170 ''' Alias of sanitize_info for backward compatibility ''' 3171 return YoutubeDL.sanitize_info(info_dict, actually_filter) 3172 3173 def run_pp(self, pp, infodict): 3174 files_to_delete = [] 3175 if '__files_to_move' not in infodict: 3176 infodict['__files_to_move'] = {} 3177 try: 3178 files_to_delete, infodict = pp.run(infodict) 3179 except PostProcessingError as e: 3180 # Must be True and not 'only_download' 3181 if self.params.get('ignoreerrors') is True: 3182 self.report_error(e) 3183 return infodict 3184 raise 3185 3186 if not files_to_delete: 3187 return infodict 3188 if self.params.get('keepvideo', False): 3189 for f in files_to_delete: 3190 infodict['__files_to_move'].setdefault(f, '') 3191 else: 3192 for old_filename in set(files_to_delete): 3193 self.to_screen('Deleting original file %s (pass -k to keep)' % old_filename) 3194 try: 3195 os.remove(encodeFilename(old_filename)) 3196 except (IOError, OSError): 3197 self.report_warning('Unable to remove downloaded original file') 3198 if old_filename in infodict['__files_to_move']: 3199 del infodict['__files_to_move'][old_filename] 3200 return infodict 3201 3202 @staticmethod 3203 def post_extract(info_dict): 3204 def actual_post_extract(info_dict): 3205 if info_dict.get('_type') in ('playlist', 'multi_video'): 3206 for video_dict in info_dict.get('entries', {}): 3207 actual_post_extract(video_dict or {}) 3208 return 3209 3210 post_extractor = info_dict.get('__post_extractor') or (lambda: {}) 3211 extra = post_extractor().items() 3212 info_dict.update(extra) 3213 info_dict.pop('__post_extractor', None) 3214 3215 original_infodict = info_dict.get('__original_infodict') or {} 3216 original_infodict.update(extra) 3217 original_infodict.pop('__post_extractor', None) 3218 3219 actual_post_extract(info_dict or {}) 3220 3221 def pre_process(self, ie_info, key='pre_process', files_to_move=None): 3222 info = dict(ie_info) 3223 info['__files_to_move'] = files_to_move or {} 3224 for pp in self._pps[key]: 3225 info = self.run_pp(pp, info) 3226 return info, info.pop('__files_to_move', None) 3227 3228 def post_process(self, filename, ie_info, files_to_move=None): 3229 """Run all the postprocessors on the given file.""" 3230 info = dict(ie_info) 3231 info['filepath'] = filename 3232 info['__files_to_move'] = files_to_move or {} 3233 3234 for pp in ie_info.get('__postprocessors', []) + self._pps['post_process']: 3235 info = self.run_pp(pp, info) 3236 info = self.run_pp(MoveFilesAfterDownloadPP(self), info) 3237 del info['__files_to_move'] 3238 for pp in self._pps['after_move']: 3239 info = self.run_pp(pp, info) 3240 return info 3241 3242 def _make_archive_id(self, info_dict): 3243 video_id = info_dict.get('id') 3244 if not video_id: 3245 return 3246 # Future-proof against any change in case 3247 # and backwards compatibility with prior versions 3248 extractor = info_dict.get('extractor_key') or info_dict.get('ie_key') # key in a playlist 3249 if extractor is None: 3250 url = str_or_none(info_dict.get('url')) 3251 if not url: 3252 return 3253 # Try to find matching extractor for the URL and take its ie_key 3254 for ie_key, ie in self._ies.items(): 3255 if ie.suitable(url): 3256 extractor = ie_key 3257 break 3258 else: 3259 return 3260 return '%s %s' % (extractor.lower(), video_id) 3261 3262 def in_download_archive(self, info_dict): 3263 fn = self.params.get('download_archive') 3264 if fn is None: 3265 return False 3266 3267 vid_id = self._make_archive_id(info_dict) 3268 if not vid_id: 3269 return False # Incomplete video information 3270 3271 return vid_id in self.archive 3272 3273 def record_download_archive(self, info_dict): 3274 fn = self.params.get('download_archive') 3275 if fn is None: 3276 return 3277 vid_id = self._make_archive_id(info_dict) 3278 assert vid_id 3279 with locked_file(fn, 'a', encoding='utf-8') as archive_file: 3280 archive_file.write(vid_id + '\n') 3281 self.archive.add(vid_id) 3282 3283 @staticmethod 3284 def format_resolution(format, default='unknown'): 3285 if format.get('vcodec') == 'none' and format.get('acodec') != 'none': 3286 return 'audio only' 3287 if format.get('resolution') is not None: 3288 return format['resolution'] 3289 if format.get('width') and format.get('height'): 3290 return '%dx%d' % (format['width'], format['height']) 3291 elif format.get('height'): 3292 return '%sp' % format['height'] 3293 elif format.get('width'): 3294 return '%dx?' % format['width'] 3295 return default 3296 3297 def _format_note(self, fdict): 3298 res = '' 3299 if fdict.get('ext') in ['f4f', 'f4m']: 3300 res += '(unsupported)' 3301 if fdict.get('language'): 3302 if res: 3303 res += ' ' 3304 res += '[%s]' % fdict['language'] 3305 if fdict.get('format_note') is not None: 3306 if res: 3307 res += ' ' 3308 res += fdict['format_note'] 3309 if fdict.get('tbr') is not None: 3310 if res: 3311 res += ', ' 3312 res += '%4dk' % fdict['tbr'] 3313 if fdict.get('container') is not None: 3314 if res: 3315 res += ', ' 3316 res += '%s container' % fdict['container'] 3317 if (fdict.get('vcodec') is not None 3318 and fdict.get('vcodec') != 'none'): 3319 if res: 3320 res += ', ' 3321 res += fdict['vcodec'] 3322 if fdict.get('vbr') is not None: 3323 res += '@' 3324 elif fdict.get('vbr') is not None and fdict.get('abr') is not None: 3325 res += 'video@' 3326 if fdict.get('vbr') is not None: 3327 res += '%4dk' % fdict['vbr'] 3328 if fdict.get('fps') is not None: 3329 if res: 3330 res += ', ' 3331 res += '%sfps' % fdict['fps'] 3332 if fdict.get('acodec') is not None: 3333 if res: 3334 res += ', ' 3335 if fdict['acodec'] == 'none': 3336 res += 'video only' 3337 else: 3338 res += '%-5s' % fdict['acodec'] 3339 elif fdict.get('abr') is not None: 3340 if res: 3341 res += ', ' 3342 res += 'audio' 3343 if fdict.get('abr') is not None: 3344 res += '@%3dk' % fdict['abr'] 3345 if fdict.get('asr') is not None: 3346 res += ' (%5dHz)' % fdict['asr'] 3347 if fdict.get('filesize') is not None: 3348 if res: 3349 res += ', ' 3350 res += format_bytes(fdict['filesize']) 3351 elif fdict.get('filesize_approx') is not None: 3352 if res: 3353 res += ', ' 3354 res += '~' + format_bytes(fdict['filesize_approx']) 3355 return res 3356 3357 def _list_format_headers(self, *headers): 3358 if self.params.get('listformats_table', True) is not False: 3359 return [self._format_screen(header, self.Styles.HEADERS) for header in headers] 3360 return headers 3361 3362 def list_formats(self, info_dict): 3363 if not info_dict.get('formats') and not info_dict.get('url'): 3364 self.to_screen('%s has no formats' % info_dict['id']) 3365 return 3366 self.to_screen('[info] Available formats for %s:' % info_dict['id']) 3367 3368 formats = info_dict.get('formats', [info_dict]) 3369 new_format = self.params.get('listformats_table', True) is not False 3370 if new_format: 3371 delim = self._format_screen('\u2502', self.Styles.DELIM, '|', test_encoding=True) 3372 table = [ 3373 [ 3374 self._format_screen(format_field(f, 'format_id'), self.Styles.ID), 3375 format_field(f, 'ext'), 3376 format_field(f, func=self.format_resolution, ignore=('audio only', 'images')), 3377 format_field(f, 'fps', '\t%d'), 3378 format_field(f, 'dynamic_range', '%s', ignore=(None, 'SDR')).replace('HDR', ''), 3379 delim, 3380 format_field(f, 'filesize', ' \t%s', func=format_bytes) + format_field(f, 'filesize_approx', '~\t%s', func=format_bytes), 3381 format_field(f, 'tbr', '\t%dk'), 3382 shorten_protocol_name(f.get('protocol', '')), 3383 delim, 3384 format_field(f, 'vcodec', default='unknown').replace( 3385 'none', 3386 'images' if f.get('acodec') == 'none' 3387 else self._format_screen('audio only', self.Styles.SUPPRESS)), 3388 format_field(f, 'vbr', '\t%dk'), 3389 format_field(f, 'acodec', default='unknown').replace( 3390 'none', 3391 '' if f.get('vcodec') == 'none' 3392 else self._format_screen('video only', self.Styles.SUPPRESS)), 3393 format_field(f, 'abr', '\t%dk'), 3394 format_field(f, 'asr', '\t%dHz'), 3395 join_nonempty( 3396 self._format_screen('UNSUPPORTED', 'light red') if f.get('ext') in ('f4f', 'f4m') else None, 3397 format_field(f, 'language', '[%s]'), 3398 join_nonempty( 3399 format_field(f, 'format_note'), 3400 format_field(f, 'container', ignore=(None, f.get('ext'))), 3401 delim=', '), 3402 delim=' '), 3403 ] for f in formats if f.get('preference') is None or f['preference'] >= -1000] 3404 header_line = self._list_format_headers( 3405 'ID', 'EXT', 'RESOLUTION', '\tFPS', 'HDR', delim, '\tFILESIZE', '\tTBR', 'PROTO', 3406 delim, 'VCODEC', '\tVBR', 'ACODEC', '\tABR', '\tASR', 'MORE INFO') 3407 else: 3408 table = [ 3409 [ 3410 format_field(f, 'format_id'), 3411 format_field(f, 'ext'), 3412 self.format_resolution(f), 3413 self._format_note(f)] 3414 for f in formats 3415 if f.get('preference') is None or f['preference'] >= -1000] 3416 header_line = ['format code', 'extension', 'resolution', 'note'] 3417 3418 self.to_stdout(render_table( 3419 header_line, table, 3420 extra_gap=(0 if new_format else 1), 3421 hide_empty=new_format, 3422 delim=new_format and self._format_screen('\u2500', self.Styles.DELIM, '-', test_encoding=True))) 3423 3424 def list_thumbnails(self, info_dict): 3425 thumbnails = list(info_dict.get('thumbnails')) 3426 if not thumbnails: 3427 self.to_screen('[info] No thumbnails present for %s' % info_dict['id']) 3428 return 3429 3430 self.to_screen( 3431 '[info] Thumbnails for %s:' % info_dict['id']) 3432 self.to_stdout(render_table( 3433 self._list_format_headers('ID', 'Width', 'Height', 'URL'), 3434 [[t['id'], t.get('width', 'unknown'), t.get('height', 'unknown'), t['url']] for t in thumbnails])) 3435 3436 def list_subtitles(self, video_id, subtitles, name='subtitles'): 3437 if not subtitles: 3438 self.to_screen('%s has no %s' % (video_id, name)) 3439 return 3440 self.to_screen( 3441 'Available %s for %s:' % (name, video_id)) 3442 3443 def _row(lang, formats): 3444 exts, names = zip(*((f['ext'], f.get('name') or 'unknown') for f in reversed(formats))) 3445 if len(set(names)) == 1: 3446 names = [] if names[0] == 'unknown' else names[:1] 3447 return [lang, ', '.join(names), ', '.join(exts)] 3448 3449 self.to_stdout(render_table( 3450 self._list_format_headers('Language', 'Name', 'Formats'), 3451 [_row(lang, formats) for lang, formats in subtitles.items()], 3452 hide_empty=True)) 3453 3454 def urlopen(self, req): 3455 """ Start an HTTP download """ 3456 if isinstance(req, compat_basestring): 3457 req = sanitized_Request(req) 3458 return self._opener.open(req, timeout=self._socket_timeout) 3459 3460 def print_debug_header(self): 3461 if not self.params.get('verbose'): 3462 return 3463 3464 def get_encoding(stream): 3465 ret = getattr(stream, 'encoding', 'missing (%s)' % type(stream).__name__) 3466 if not supports_terminal_sequences(stream): 3467 from .compat import WINDOWS_VT_MODE 3468 ret += ' (No VT)' if WINDOWS_VT_MODE is False else ' (No ANSI)' 3469 return ret 3470 3471 encoding_str = 'Encodings: locale %s, fs %s, out %s, err %s, pref %s' % ( 3472 locale.getpreferredencoding(), 3473 sys.getfilesystemencoding(), 3474 get_encoding(self._screen_file), get_encoding(self._err_file), 3475 self.get_encoding()) 3476 3477 logger = self.params.get('logger') 3478 if logger: 3479 write_debug = lambda msg: logger.debug(f'[debug] {msg}') 3480 write_debug(encoding_str) 3481 else: 3482 write_string(f'[debug] {encoding_str}\n', encoding=None) 3483 write_debug = lambda msg: self._write_string(f'[debug] {msg}\n') 3484 3485 source = detect_variant() 3486 write_debug(join_nonempty( 3487 'yt-dlp version', __version__, 3488 f'[{RELEASE_GIT_HEAD}]' if RELEASE_GIT_HEAD else '', 3489 '' if source == 'unknown' else f'({source})', 3490 delim=' ')) 3491 if not _LAZY_LOADER: 3492 if os.environ.get('YTDLP_NO_LAZY_EXTRACTORS'): 3493 write_debug('Lazy loading extractors is forcibly disabled') 3494 else: 3495 write_debug('Lazy loading extractors is disabled') 3496 if plugin_extractors or plugin_postprocessors: 3497 write_debug('Plugins: %s' % [ 3498 '%s%s' % (klass.__name__, '' if klass.__name__ == name else f' as {name}') 3499 for name, klass in itertools.chain(plugin_extractors.items(), plugin_postprocessors.items())]) 3500 if self.params.get('compat_opts'): 3501 write_debug('Compatibility options: %s' % ', '.join(self.params.get('compat_opts'))) 3502 3503 if source == 'source': 3504 try: 3505 sp = Popen( 3506 ['git', 'rev-parse', '--short', 'HEAD'], 3507 stdout=subprocess.PIPE, stderr=subprocess.PIPE, 3508 cwd=os.path.dirname(os.path.abspath(__file__))) 3509 out, err = sp.communicate_or_kill() 3510 out = out.decode().strip() 3511 if re.match('[0-9a-f]+', out): 3512 write_debug('Git HEAD: %s' % out) 3513 except Exception: 3514 try: 3515 sys.exc_clear() 3516 except Exception: 3517 pass 3518 3519 def python_implementation(): 3520 impl_name = platform.python_implementation() 3521 if impl_name == 'PyPy' and hasattr(sys, 'pypy_version_info'): 3522 return impl_name + ' version %d.%d.%d' % sys.pypy_version_info[:3] 3523 return impl_name 3524 3525 write_debug('Python version %s (%s %s) - %s' % ( 3526 platform.python_version(), 3527 python_implementation(), 3528 platform.architecture()[0], 3529 platform_name())) 3530 3531 exe_versions, ffmpeg_features = FFmpegPostProcessor.get_versions_and_features(self) 3532 ffmpeg_features = {key for key, val in ffmpeg_features.items() if val} 3533 if ffmpeg_features: 3534 exe_versions['ffmpeg'] += ' (%s)' % ','.join(ffmpeg_features) 3535 3536 exe_versions['rtmpdump'] = rtmpdump_version() 3537 exe_versions['phantomjs'] = PhantomJSwrapper._version() 3538 exe_str = ', '.join( 3539 f'{exe} {v}' for exe, v in sorted(exe_versions.items()) if v 3540 ) or 'none' 3541 write_debug('exe versions: %s' % exe_str) 3542 3543 from .downloader.websocket import has_websockets 3544 from .postprocessor.embedthumbnail import has_mutagen 3545 from .cookies import SQLITE_AVAILABLE, SECRETSTORAGE_AVAILABLE 3546 3547 lib_str = join_nonempty( 3548 compat_pycrypto_AES and compat_pycrypto_AES.__name__.split('.')[0], 3549 SECRETSTORAGE_AVAILABLE and 'secretstorage', 3550 has_mutagen and 'mutagen', 3551 SQLITE_AVAILABLE and 'sqlite', 3552 has_websockets and 'websockets', 3553 delim=', ') or 'none' 3554 write_debug('Optional libraries: %s' % lib_str) 3555 3556 proxy_map = {} 3557 for handler in self._opener.handlers: 3558 if hasattr(handler, 'proxies'): 3559 proxy_map.update(handler.proxies) 3560 write_debug(f'Proxy map: {proxy_map}') 3561 3562 # Not implemented 3563 if False and self.params.get('call_home'): 3564 ipaddr = self.urlopen('https://yt-dl.org/ip').read().decode('utf-8') 3565 write_debug('Public IP address: %s' % ipaddr) 3566 latest_version = self.urlopen( 3567 'https://yt-dl.org/latest/version').read().decode('utf-8') 3568 if version_tuple(latest_version) > version_tuple(__version__): 3569 self.report_warning( 3570 'You are using an outdated version (newest version: %s)! ' 3571 'See https://yt-dl.org/update if you need help updating.' % 3572 latest_version) 3573 3574 def _setup_opener(self): 3575 timeout_val = self.params.get('socket_timeout') 3576 self._socket_timeout = 20 if timeout_val is None else float(timeout_val) 3577 3578 opts_cookiesfrombrowser = self.params.get('cookiesfrombrowser') 3579 opts_cookiefile = self.params.get('cookiefile') 3580 opts_proxy = self.params.get('proxy') 3581 3582 self.cookiejar = load_cookies(opts_cookiefile, opts_cookiesfrombrowser, self) 3583 3584 cookie_processor = YoutubeDLCookieProcessor(self.cookiejar) 3585 if opts_proxy is not None: 3586 if opts_proxy == '': 3587 proxies = {} 3588 else: 3589 proxies = {'http': opts_proxy, 'https': opts_proxy} 3590 else: 3591 proxies = compat_urllib_request.getproxies() 3592 # Set HTTPS proxy to HTTP one if given (https://github.com/ytdl-org/youtube-dl/issues/805) 3593 if 'http' in proxies and 'https' not in proxies: 3594 proxies['https'] = proxies['http'] 3595 proxy_handler = PerRequestProxyHandler(proxies) 3596 3597 debuglevel = 1 if self.params.get('debug_printtraffic') else 0 3598 https_handler = make_HTTPS_handler(self.params, debuglevel=debuglevel) 3599 ydlh = YoutubeDLHandler(self.params, debuglevel=debuglevel) 3600 redirect_handler = YoutubeDLRedirectHandler() 3601 data_handler = compat_urllib_request_DataHandler() 3602 3603 # When passing our own FileHandler instance, build_opener won't add the 3604 # default FileHandler and allows us to disable the file protocol, which 3605 # can be used for malicious purposes (see 3606 # https://github.com/ytdl-org/youtube-dl/issues/8227) 3607 file_handler = compat_urllib_request.FileHandler() 3608 3609 def file_open(*args, **kwargs): 3610 raise compat_urllib_error.URLError('file:// scheme is explicitly disabled in yt-dlp for security reasons') 3611 file_handler.file_open = file_open 3612 3613 opener = compat_urllib_request.build_opener( 3614 proxy_handler, https_handler, cookie_processor, ydlh, redirect_handler, data_handler, file_handler) 3615 3616 # Delete the default user-agent header, which would otherwise apply in 3617 # cases where our custom HTTP handler doesn't come into play 3618 # (See https://github.com/ytdl-org/youtube-dl/issues/1309 for details) 3619 opener.addheaders = [] 3620 self._opener = opener 3621 3622 def encode(self, s): 3623 if isinstance(s, bytes): 3624 return s # Already encoded 3625 3626 try: 3627 return s.encode(self.get_encoding()) 3628 except UnicodeEncodeError as err: 3629 err.reason = err.reason + '. Check your system encoding configuration or use the --encoding option.' 3630 raise 3631 3632 def get_encoding(self): 3633 encoding = self.params.get('encoding') 3634 if encoding is None: 3635 encoding = preferredencoding() 3636 return encoding 3637 3638 def _write_info_json(self, label, ie_result, infofn, overwrite=None): 3639 ''' Write infojson and returns True = written, False = skip, None = error ''' 3640 if overwrite is None: 3641 overwrite = self.params.get('overwrites', True) 3642 if not self.params.get('writeinfojson'): 3643 return False 3644 elif not infofn: 3645 self.write_debug(f'Skipping writing {label} infojson') 3646 return False 3647 elif not self._ensure_dir_exists(infofn): 3648 return None 3649 elif not overwrite and os.path.exists(infofn): 3650 self.to_screen(f'[info] {label.title()} metadata is already present') 3651 else: 3652 self.to_screen(f'[info] Writing {label} metadata as JSON to: {infofn}') 3653 try: 3654 write_json_file(self.sanitize_info(ie_result, self.params.get('clean_infojson', True)), infofn) 3655 except (OSError, IOError): 3656 self.report_error(f'Cannot write {label} metadata to JSON file {infofn}') 3657 return None 3658 return True 3659 3660 def _write_description(self, label, ie_result, descfn): 3661 ''' Write description and returns True = written, False = skip, None = error ''' 3662 if not self.params.get('writedescription'): 3663 return False 3664 elif not descfn: 3665 self.write_debug(f'Skipping writing {label} description') 3666 return False 3667 elif not self._ensure_dir_exists(descfn): 3668 return None 3669 elif not self.params.get('overwrites', True) and os.path.exists(descfn): 3670 self.to_screen(f'[info] {label.title()} description is already present') 3671 elif ie_result.get('description') is None: 3672 self.report_warning(f'There\'s no {label} description to write') 3673 return False 3674 else: 3675 try: 3676 self.to_screen(f'[info] Writing {label} description to: {descfn}') 3677 with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile: 3678 descfile.write(ie_result['description']) 3679 except (OSError, IOError): 3680 self.report_error(f'Cannot write {label} description file {descfn}') 3681 return None 3682 return True 3683 3684 def _write_subtitles(self, info_dict, filename): 3685 ''' Write subtitles to file and return list of (sub_filename, final_sub_filename); or None if error''' 3686 ret = [] 3687 subtitles = info_dict.get('requested_subtitles') 3688 if not subtitles or not (self.params.get('writesubtitles') or self.params.get('writeautomaticsub')): 3689 # subtitles download errors are already managed as troubles in relevant IE 3690 # that way it will silently go on when used with unsupporting IE 3691 return ret 3692 3693 sub_filename_base = self.prepare_filename(info_dict, 'subtitle') 3694 if not sub_filename_base: 3695 self.to_screen('[info] Skipping writing video subtitles') 3696 return ret 3697 for sub_lang, sub_info in subtitles.items(): 3698 sub_format = sub_info['ext'] 3699 sub_filename = subtitles_filename(filename, sub_lang, sub_format, info_dict.get('ext')) 3700 sub_filename_final = subtitles_filename(sub_filename_base, sub_lang, sub_format, info_dict.get('ext')) 3701 if not self.params.get('overwrites', True) and os.path.exists(sub_filename): 3702 self.to_screen(f'[info] Video subtitle {sub_lang}.{sub_format} is already present') 3703 sub_info['filepath'] = sub_filename 3704 ret.append((sub_filename, sub_filename_final)) 3705 continue 3706 3707 self.to_screen(f'[info] Writing video subtitles to: {sub_filename}') 3708 if sub_info.get('data') is not None: 3709 try: 3710 # Use newline='' to prevent conversion of newline characters 3711 # See https://github.com/ytdl-org/youtube-dl/issues/10268 3712 with io.open(sub_filename, 'w', encoding='utf-8', newline='') as subfile: 3713 subfile.write(sub_info['data']) 3714 sub_info['filepath'] = sub_filename 3715 ret.append((sub_filename, sub_filename_final)) 3716 continue 3717 except (OSError, IOError): 3718 self.report_error(f'Cannot write video subtitles file {sub_filename}') 3719 return None 3720 3721 try: 3722 sub_copy = sub_info.copy() 3723 sub_copy.setdefault('http_headers', info_dict.get('http_headers')) 3724 self.dl(sub_filename, sub_copy, subtitle=True) 3725 sub_info['filepath'] = sub_filename 3726 ret.append((sub_filename, sub_filename_final)) 3727 except (ExtractorError, IOError, OSError, ValueError) + network_exceptions as err: 3728 self.report_warning(f'Unable to download video subtitles for {sub_lang!r}: {err}') 3729 continue 3730 return ret 3731 3732 def _write_thumbnails(self, label, info_dict, filename, thumb_filename_base=None): 3733 ''' Write thumbnails to file and return list of (thumb_filename, final_thumb_filename) ''' 3734 write_all = self.params.get('write_all_thumbnails', False) 3735 thumbnails, ret = [], [] 3736 if write_all or self.params.get('writethumbnail', False): 3737 thumbnails = info_dict.get('thumbnails') or [] 3738 multiple = write_all and len(thumbnails) > 1 3739 3740 if thumb_filename_base is None: 3741 thumb_filename_base = filename 3742 if thumbnails and not thumb_filename_base: 3743 self.write_debug(f'Skipping writing {label} thumbnail') 3744 return ret 3745 3746 for idx, t in list(enumerate(thumbnails))[::-1]: 3747 thumb_ext = (f'{t["id"]}.' if multiple else '') + determine_ext(t['url'], 'jpg') 3748 thumb_display_id = f'{label} thumbnail {t["id"]}' 3749 thumb_filename = replace_extension(filename, thumb_ext, info_dict.get('ext')) 3750 thumb_filename_final = replace_extension(thumb_filename_base, thumb_ext, info_dict.get('ext')) 3751 3752 if not self.params.get('overwrites', True) and os.path.exists(thumb_filename): 3753 ret.append((thumb_filename, thumb_filename_final)) 3754 t['filepath'] = thumb_filename 3755 self.to_screen('[info] %s is already present' % ( 3756 thumb_display_id if multiple else f'{label} thumbnail').capitalize()) 3757 else: 3758 self.to_screen(f'[info] Downloading {thumb_display_id} ...') 3759 try: 3760 uf = self.urlopen(t['url']) 3761 self.to_screen(f'[info] Writing {thumb_display_id} to: {thumb_filename}') 3762 with open(encodeFilename(thumb_filename), 'wb') as thumbf: 3763 shutil.copyfileobj(uf, thumbf) 3764 ret.append((thumb_filename, thumb_filename_final)) 3765 t['filepath'] = thumb_filename 3766 except network_exceptions as err: 3767 thumbnails.pop(idx) 3768 self.report_warning(f'Unable to download {thumb_display_id}: {err}') 3769 if ret and not write_all: 3770 break 3771 return ret 3772