1#!/usr/bin/env python3
2# coding: utf-8
3
4from __future__ import absolute_import, unicode_literals
5
6import collections
7import contextlib
8import datetime
9import errno
10import fileinput
11import functools
12import io
13import itertools
14import json
15import locale
16import operator
17import os
18import platform
19import re
20import shutil
21import subprocess
22import sys
23import tempfile
24import time
25import tokenize
26import traceback
27import random
28import unicodedata
29
30from enum import Enum
31from string import ascii_letters
32
33from .compat import (
34    compat_basestring,
35    compat_get_terminal_size,
36    compat_kwargs,
37    compat_numeric_types,
38    compat_os_name,
39    compat_pycrypto_AES,
40    compat_shlex_quote,
41    compat_str,
42    compat_tokenize_tokenize,
43    compat_urllib_error,
44    compat_urllib_request,
45    compat_urllib_request_DataHandler,
46    windows_enable_vt_mode,
47)
48from .cookies import load_cookies
49from .utils import (
50    age_restricted,
51    args_to_str,
52    ContentTooShortError,
53    date_from_str,
54    DateRange,
55    DEFAULT_OUTTMPL,
56    determine_ext,
57    determine_protocol,
58    DownloadCancelled,
59    DownloadError,
60    encode_compat_str,
61    encodeFilename,
62    EntryNotInPlaylist,
63    error_to_compat_str,
64    ExistingVideoReached,
65    expand_path,
66    ExtractorError,
67    float_or_none,
68    format_bytes,
69    format_field,
70    format_decimal_suffix,
71    formatSeconds,
72    GeoRestrictedError,
73    get_domain,
74    HEADRequest,
75    int_or_none,
76    iri_to_uri,
77    ISO3166Utils,
78    join_nonempty,
79    LazyList,
80    LINK_TEMPLATES,
81    locked_file,
82    make_dir,
83    make_HTTPS_handler,
84    MaxDownloadsReached,
85    network_exceptions,
86    number_of_digits,
87    orderedSet,
88    OUTTMPL_TYPES,
89    PagedList,
90    parse_filesize,
91    PerRequestProxyHandler,
92    platform_name,
93    Popen,
94    PostProcessingError,
95    preferredencoding,
96    prepend_extension,
97    ReExtractInfo,
98    register_socks_protocols,
99    RejectedVideoReached,
100    remove_terminal_sequences,
101    render_table,
102    replace_extension,
103    SameFileError,
104    sanitize_filename,
105    sanitize_path,
106    sanitize_url,
107    sanitized_Request,
108    std_headers,
109    STR_FORMAT_RE_TMPL,
110    STR_FORMAT_TYPES,
111    str_or_none,
112    strftime_or_none,
113    subtitles_filename,
114    supports_terminal_sequences,
115    timetuple_from_msec,
116    to_high_limit_path,
117    traverse_obj,
118    try_get,
119    UnavailableVideoError,
120    url_basename,
121    variadic,
122    version_tuple,
123    write_json_file,
124    write_string,
125    YoutubeDLCookieProcessor,
126    YoutubeDLHandler,
127    YoutubeDLRedirectHandler,
128)
129from .cache import Cache
130from .minicurses import format_text
131from .extractor import (
132    gen_extractor_classes,
133    get_info_extractor,
134    _LAZY_LOADER,
135    _PLUGIN_CLASSES as plugin_extractors
136)
137from .extractor.openload import PhantomJSwrapper
138from .downloader import (
139    FFmpegFD,
140    get_suitable_downloader,
141    shorten_protocol_name
142)
143from .downloader.rtmp import rtmpdump_version
144from .postprocessor import (
145    get_postprocessor,
146    EmbedThumbnailPP,
147    FFmpegFixupDuplicateMoovPP,
148    FFmpegFixupDurationPP,
149    FFmpegFixupM3u8PP,
150    FFmpegFixupM4aPP,
151    FFmpegFixupStretchedPP,
152    FFmpegFixupTimestampPP,
153    FFmpegMergerPP,
154    FFmpegPostProcessor,
155    MoveFilesAfterDownloadPP,
156    _PLUGIN_CLASSES as plugin_postprocessors
157)
158from .update import detect_variant
159from .version import __version__, RELEASE_GIT_HEAD
160
161if compat_os_name == 'nt':
162    import ctypes
163
164
165class YoutubeDL(object):
166    """YoutubeDL class.
167
168    YoutubeDL objects are the ones responsible of downloading the
169    actual video file and writing it to disk if the user has requested
170    it, among some other tasks. In most cases there should be one per
171    program. As, given a video URL, the downloader doesn't know how to
172    extract all the needed information, task that InfoExtractors do, it
173    has to pass the URL to one of them.
174
175    For this, YoutubeDL objects have a method that allows
176    InfoExtractors to be registered in a given order. When it is passed
177    a URL, the YoutubeDL object handles it to the first InfoExtractor it
178    finds that reports being able to handle it. The InfoExtractor extracts
179    all the information about the video or videos the URL refers to, and
180    YoutubeDL process the extracted information, possibly using a File
181    Downloader to download the video.
182
183    YoutubeDL objects accept a lot of parameters. In order not to saturate
184    the object constructor with arguments, it receives a dictionary of
185    options instead. These options are available through the params
186    attribute for the InfoExtractors to use. The YoutubeDL also
187    registers itself as the downloader in charge for the InfoExtractors
188    that are added to it, so this is a "mutual registration".
189
190    Available options:
191
192    username:          Username for authentication purposes.
193    password:          Password for authentication purposes.
194    videopassword:     Password for accessing a video.
195    ap_mso:            Adobe Pass multiple-system operator identifier.
196    ap_username:       Multiple-system operator account username.
197    ap_password:       Multiple-system operator account password.
198    usenetrc:          Use netrc for authentication instead.
199    verbose:           Print additional info to stdout.
200    quiet:             Do not print messages to stdout.
201    no_warnings:       Do not print out anything for warnings.
202    forceprint:        A list of templates to force print
203    forceurl:          Force printing final URL. (Deprecated)
204    forcetitle:        Force printing title. (Deprecated)
205    forceid:           Force printing ID. (Deprecated)
206    forcethumbnail:    Force printing thumbnail URL. (Deprecated)
207    forcedescription:  Force printing description. (Deprecated)
208    forcefilename:     Force printing final filename. (Deprecated)
209    forceduration:     Force printing duration. (Deprecated)
210    forcejson:         Force printing info_dict as JSON.
211    dump_single_json:  Force printing the info_dict of the whole playlist
212                       (or video) as a single JSON line.
213    force_write_download_archive: Force writing download archive regardless
214                       of 'skip_download' or 'simulate'.
215    simulate:          Do not download the video files. If unset (or None),
216                       simulate only if listsubtitles, listformats or list_thumbnails is used
217    format:            Video format code. see "FORMAT SELECTION" for more details.
218                       You can also pass a function. The function takes 'ctx' as
219                       argument and returns the formats to download.
220                       See "build_format_selector" for an implementation
221    allow_unplayable_formats:   Allow unplayable formats to be extracted and downloaded.
222    ignore_no_formats_error: Ignore "No video formats" error. Usefull for
223                       extracting metadata even if the video is not actually
224                       available for download (experimental)
225    format_sort:       A list of fields by which to sort the video formats.
226                       See "Sorting Formats" for more details.
227    format_sort_force: Force the given format_sort. see "Sorting Formats"
228                       for more details.
229    allow_multiple_video_streams:   Allow multiple video streams to be merged
230                       into a single file
231    allow_multiple_audio_streams:   Allow multiple audio streams to be merged
232                       into a single file
233    check_formats      Whether to test if the formats are downloadable.
234                       Can be True (check all), False (check none),
235                       'selected' (check selected formats),
236                       or None (check only if requested by extractor)
237    paths:             Dictionary of output paths. The allowed keys are 'home'
238                       'temp' and the keys of OUTTMPL_TYPES (in utils.py)
239    outtmpl:           Dictionary of templates for output names. Allowed keys
240                       are 'default' and the keys of OUTTMPL_TYPES (in utils.py).
241                       For compatibility with youtube-dl, a single string can also be used
242    outtmpl_na_placeholder: Placeholder for unavailable meta fields.
243    restrictfilenames: Do not allow "&" and spaces in file names
244    trim_file_name:    Limit length of filename (extension excluded)
245    windowsfilenames:  Force the filenames to be windows compatible
246    ignoreerrors:      Do not stop on download/postprocessing errors.
247                       Can be 'only_download' to ignore only download errors.
248                       Default is 'only_download' for CLI, but False for API
249    skip_playlist_after_errors: Number of allowed failures until the rest of
250                       the playlist is skipped
251    force_generic_extractor: Force downloader to use the generic extractor
252    overwrites:        Overwrite all video and metadata files if True,
253                       overwrite only non-video files if None
254                       and don't overwrite any file if False
255                       For compatibility with youtube-dl,
256                       "nooverwrites" may also be used instead
257    playliststart:     Playlist item to start at.
258    playlistend:       Playlist item to end at.
259    playlist_items:    Specific indices of playlist to download.
260    playlistreverse:   Download playlist items in reverse order.
261    playlistrandom:    Download playlist items in random order.
262    matchtitle:        Download only matching titles.
263    rejecttitle:       Reject downloads for matching titles.
264    logger:            Log messages to a logging.Logger instance.
265    logtostderr:       Log messages to stderr instead of stdout.
266    consoletitle:       Display progress in console window's titlebar.
267    writedescription:  Write the video description to a .description file
268    writeinfojson:     Write the video description to a .info.json file
269    clean_infojson:    Remove private fields from the infojson
270    getcomments:       Extract video comments. This will not be written to disk
271                       unless writeinfojson is also given
272    writeannotations:  Write the video annotations to a .annotations.xml file
273    writethumbnail:    Write the thumbnail image to a file
274    allow_playlist_files: Whether to write playlists' description, infojson etc
275                       also to disk when using the 'write*' options
276    write_all_thumbnails:  Write all thumbnail formats to files
277    writelink:         Write an internet shortcut file, depending on the
278                       current platform (.url/.webloc/.desktop)
279    writeurllink:      Write a Windows internet shortcut file (.url)
280    writewebloclink:   Write a macOS internet shortcut file (.webloc)
281    writedesktoplink:  Write a Linux internet shortcut file (.desktop)
282    writesubtitles:    Write the video subtitles to a file
283    writeautomaticsub: Write the automatically generated subtitles to a file
284    allsubtitles:      Deprecated - Use subtitleslangs = ['all']
285                       Downloads all the subtitles of the video
286                       (requires writesubtitles or writeautomaticsub)
287    listsubtitles:     Lists all available subtitles for the video
288    subtitlesformat:   The format code for subtitles
289    subtitleslangs:    List of languages of the subtitles to download (can be regex).
290                       The list may contain "all" to refer to all the available
291                       subtitles. The language can be prefixed with a "-" to
292                       exclude it from the requested languages. Eg: ['all', '-live_chat']
293    keepvideo:         Keep the video file after post-processing
294    daterange:         A DateRange object, download only if the upload_date is in the range.
295    skip_download:     Skip the actual download of the video file
296    cachedir:          Location of the cache files in the filesystem.
297                       False to disable filesystem cache.
298    noplaylist:        Download single video instead of a playlist if in doubt.
299    age_limit:         An integer representing the user's age in years.
300                       Unsuitable videos for the given age are skipped.
301    min_views:         An integer representing the minimum view count the video
302                       must have in order to not be skipped.
303                       Videos without view count information are always
304                       downloaded. None for no limit.
305    max_views:         An integer representing the maximum view count.
306                       Videos that are more popular than that are not
307                       downloaded.
308                       Videos without view count information are always
309                       downloaded. None for no limit.
310    download_archive:  File name of a file where all downloads are recorded.
311                       Videos already present in the file are not downloaded
312                       again.
313    break_on_existing: Stop the download process after attempting to download a
314                       file that is in the archive.
315    break_on_reject:   Stop the download process when encountering a video that
316                       has been filtered out.
317    break_per_url:     Whether break_on_reject and break_on_existing
318                       should act on each input URL as opposed to for the entire queue
319    cookiefile:        File name where cookies should be read from and dumped to
320    cookiesfrombrowser:  A tuple containing the name of the browser, the profile
321                       name/pathfrom where cookies are loaded, and the name of the
322                       keyring. Eg: ('chrome', ) or ('vivaldi', 'default', 'BASICTEXT')
323    nocheckcertificate:  Do not verify SSL certificates
324    prefer_insecure:   Use HTTP instead of HTTPS to retrieve information.
325                       At the moment, this is only supported by YouTube.
326    proxy:             URL of the proxy server to use
327    geo_verification_proxy:  URL of the proxy to use for IP address verification
328                       on geo-restricted sites.
329    socket_timeout:    Time to wait for unresponsive hosts, in seconds
330    bidi_workaround:   Work around buggy terminals without bidirectional text
331                       support, using fridibi
332    debug_printtraffic:Print out sent and received HTTP traffic
333    include_ads:       Download ads as well (deprecated)
334    default_search:    Prepend this string if an input url is not valid.
335                       'auto' for elaborate guessing
336    encoding:          Use this encoding instead of the system-specified.
337    extract_flat:      Do not resolve URLs, return the immediate result.
338                       Pass in 'in_playlist' to only show this behavior for
339                       playlist items.
340    wait_for_video:    If given, wait for scheduled streams to become available.
341                       The value should be a tuple containing the range
342                       (min_secs, max_secs) to wait between retries
343    postprocessors:    A list of dictionaries, each with an entry
344                       * key:  The name of the postprocessor. See
345                               yt_dlp/postprocessor/__init__.py for a list.
346                       * when: When to run the postprocessor. Can be one of
347                               pre_process|before_dl|post_process|after_move.
348                               Assumed to be 'post_process' if not given
349    post_hooks:        Deprecated - Register a custom postprocessor instead
350                       A list of functions that get called as the final step
351                       for each video file, after all postprocessors have been
352                       called. The filename will be passed as the only argument.
353    progress_hooks:    A list of functions that get called on download
354                       progress, with a dictionary with the entries
355                       * status: One of "downloading", "error", or "finished".
356                                 Check this first and ignore unknown values.
357                       * info_dict: The extracted info_dict
358
359                       If status is one of "downloading", or "finished", the
360                       following properties may also be present:
361                       * filename: The final filename (always present)
362                       * tmpfilename: The filename we're currently writing to
363                       * downloaded_bytes: Bytes on disk
364                       * total_bytes: Size of the whole file, None if unknown
365                       * total_bytes_estimate: Guess of the eventual file size,
366                                               None if unavailable.
367                       * elapsed: The number of seconds since download started.
368                       * eta: The estimated time in seconds, None if unknown
369                       * speed: The download speed in bytes/second, None if
370                                unknown
371                       * fragment_index: The counter of the currently
372                                         downloaded video fragment.
373                       * fragment_count: The number of fragments (= individual
374                                         files that will be merged)
375
376                       Progress hooks are guaranteed to be called at least once
377                       (with status "finished") if the download is successful.
378    postprocessor_hooks:  A list of functions that get called on postprocessing
379                       progress, with a dictionary with the entries
380                       * status: One of "started", "processing", or "finished".
381                                 Check this first and ignore unknown values.
382                       * postprocessor: Name of the postprocessor
383                       * info_dict: The extracted info_dict
384
385                       Progress hooks are guaranteed to be called at least twice
386                       (with status "started" and "finished") if the processing is successful.
387    merge_output_format: Extension to use when merging formats.
388    final_ext:         Expected final extension; used to detect when the file was
389                       already downloaded and converted
390    fixup:             Automatically correct known faults of the file.
391                       One of:
392                       - "never": do nothing
393                       - "warn": only emit a warning
394                       - "detect_or_warn": check whether we can do anything
395                                           about it, warn otherwise (default)
396    source_address:    Client-side IP address to bind to.
397    call_home:         Boolean, true iff we are allowed to contact the
398                       yt-dlp servers for debugging. (BROKEN)
399    sleep_interval_requests: Number of seconds to sleep between requests
400                       during extraction
401    sleep_interval:    Number of seconds to sleep before each download when
402                       used alone or a lower bound of a range for randomized
403                       sleep before each download (minimum possible number
404                       of seconds to sleep) when used along with
405                       max_sleep_interval.
406    max_sleep_interval:Upper bound of a range for randomized sleep before each
407                       download (maximum possible number of seconds to sleep).
408                       Must only be used along with sleep_interval.
409                       Actual sleep time will be a random float from range
410                       [sleep_interval; max_sleep_interval].
411    sleep_interval_subtitles: Number of seconds to sleep before each subtitle download
412    listformats:       Print an overview of available video formats and exit.
413    list_thumbnails:   Print a table of all thumbnails and exit.
414    match_filter:      A function that gets called with the info_dict of
415                       every video.
416                       If it returns a message, the video is ignored.
417                       If it returns None, the video is downloaded.
418                       match_filter_func in utils.py is one example for this.
419    no_color:          Do not emit color codes in output.
420    geo_bypass:        Bypass geographic restriction via faking X-Forwarded-For
421                       HTTP header
422    geo_bypass_country:
423                       Two-letter ISO 3166-2 country code that will be used for
424                       explicit geographic restriction bypassing via faking
425                       X-Forwarded-For HTTP header
426    geo_bypass_ip_block:
427                       IP range in CIDR notation that will be used similarly to
428                       geo_bypass_country
429
430    The following options determine which downloader is picked:
431    external_downloader: A dictionary of protocol keys and the executable of the
432                       external downloader to use for it. The allowed protocols
433                       are default|http|ftp|m3u8|dash|rtsp|rtmp|mms.
434                       Set the value to 'native' to use the native downloader
435    hls_prefer_native: Deprecated - Use external_downloader = {'m3u8': 'native'}
436                       or {'m3u8': 'ffmpeg'} instead.
437                       Use the native HLS downloader instead of ffmpeg/avconv
438                       if True, otherwise use ffmpeg/avconv if False, otherwise
439                       use downloader suggested by extractor if None.
440    compat_opts:       Compatibility options. See "Differences in default behavior".
441                       The following options do not work when used through the API:
442                       filename, abort-on-error, multistreams, no-live-chat, format-sort
443                       no-clean-infojson, no-playlist-metafiles, no-keep-subs, no-attach-info-json.
444                       Refer __init__.py for their implementation
445    progress_template: Dictionary of templates for progress outputs.
446                       Allowed keys are 'download', 'postprocess',
447                       'download-title' (console title) and 'postprocess-title'.
448                       The template is mapped on a dictionary with keys 'progress' and 'info'
449
450    The following parameters are not used by YoutubeDL itself, they are used by
451    the downloader (see yt_dlp/downloader/common.py):
452    nopart, updatetime, buffersize, ratelimit, throttledratelimit, min_filesize,
453    max_filesize, test, noresizebuffer, retries, file_access_retries, fragment_retries,
454    continuedl, noprogress, xattr_set_filesize, hls_use_mpegts, http_chunk_size,
455    external_downloader_args, concurrent_fragment_downloads.
456
457    The following options are used by the post processors:
458    prefer_ffmpeg:     If False, use avconv instead of ffmpeg if both are available,
459                       otherwise prefer ffmpeg. (avconv support is deprecated)
460    ffmpeg_location:   Location of the ffmpeg/avconv binary; either the path
461                       to the binary or its containing directory.
462    postprocessor_args: A dictionary of postprocessor/executable keys (in lower case)
463                       and a list of additional command-line arguments for the
464                       postprocessor/executable. The dict can also have "PP+EXE" keys
465                       which are used when the given exe is used by the given PP.
466                       Use 'default' as the name for arguments to passed to all PP
467                       For compatibility with youtube-dl, a single list of args
468                       can also be used
469
470    The following options are used by the extractors:
471    extractor_retries: Number of times to retry for known errors
472    dynamic_mpd:       Whether to process dynamic DASH manifests (default: True)
473    hls_split_discontinuity: Split HLS playlists to different formats at
474                       discontinuities such as ad breaks (default: False)
475    extractor_args:    A dictionary of arguments to be passed to the extractors.
476                       See "EXTRACTOR ARGUMENTS" for details.
477                       Eg: {'youtube': {'skip': ['dash', 'hls']}}
478    youtube_include_dash_manifest: Deprecated - Use extractor_args instead.
479                       If True (default), DASH manifests and related
480                       data will be downloaded and processed by extractor.
481                       You can reduce network I/O by disabling it if you don't
482                       care about DASH. (only for youtube)
483    youtube_include_hls_manifest: Deprecated - Use extractor_args instead.
484                       If True (default), HLS manifests and related
485                       data will be downloaded and processed by extractor.
486                       You can reduce network I/O by disabling it if you don't
487                       care about HLS. (only for youtube)
488    """
489
490    _NUMERIC_FIELDS = set((
491        'width', 'height', 'tbr', 'abr', 'asr', 'vbr', 'fps', 'filesize', 'filesize_approx',
492        'timestamp', 'release_timestamp',
493        'duration', 'view_count', 'like_count', 'dislike_count', 'repost_count',
494        'average_rating', 'comment_count', 'age_limit',
495        'start_time', 'end_time',
496        'chapter_number', 'season_number', 'episode_number',
497        'track_number', 'disc_number', 'release_year',
498    ))
499
500    _format_selection_exts = {
501        'audio': {'m4a', 'mp3', 'ogg', 'aac'},
502        'video': {'mp4', 'flv', 'webm', '3gp'},
503        'storyboards': {'mhtml'},
504    }
505
506    params = None
507    _ies = {}
508    _pps = {'pre_process': [], 'before_dl': [], 'after_move': [], 'post_process': []}
509    _printed_messages = set()
510    _first_webpage_request = True
511    _download_retcode = None
512    _num_downloads = None
513    _playlist_level = 0
514    _playlist_urls = set()
515    _screen_file = None
516
517    def __init__(self, params=None, auto_init=True):
518        """Create a FileDownloader object with the given options.
519        @param auto_init    Whether to load the default extractors and print header (if verbose).
520                            Set to 'no_verbose_header' to not print the header
521        """
522        if params is None:
523            params = {}
524        self._ies = {}
525        self._ies_instances = {}
526        self._pps = {'pre_process': [], 'before_dl': [], 'after_move': [], 'post_process': []}
527        self._printed_messages = set()
528        self._first_webpage_request = True
529        self._post_hooks = []
530        self._progress_hooks = []
531        self._postprocessor_hooks = []
532        self._download_retcode = 0
533        self._num_downloads = 0
534        self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]
535        self._err_file = sys.stderr
536        self.params = params
537        self.cache = Cache(self)
538
539        windows_enable_vt_mode()
540        self._allow_colors = {
541            'screen': not self.params.get('no_color') and supports_terminal_sequences(self._screen_file),
542            'err': not self.params.get('no_color') and supports_terminal_sequences(self._err_file),
543        }
544
545        if sys.version_info < (3, 6):
546            self.report_warning(
547                'Python version %d.%d is not supported! Please update to Python 3.6 or above' % sys.version_info[:2])
548
549        if self.params.get('allow_unplayable_formats'):
550            self.report_warning(
551                f'You have asked for {self._format_err("UNPLAYABLE", self.Styles.EMPHASIS)} formats to be listed/downloaded. '
552                'This is a developer option intended for debugging. \n'
553                '         If you experience any issues while using this option, '
554                f'{self._format_err("DO NOT", self.Styles.ERROR)} open a bug report')
555
556        def check_deprecated(param, option, suggestion):
557            if self.params.get(param) is not None:
558                self.report_warning('%s is deprecated. Use %s instead' % (option, suggestion))
559                return True
560            return False
561
562        if check_deprecated('cn_verification_proxy', '--cn-verification-proxy', '--geo-verification-proxy'):
563            if self.params.get('geo_verification_proxy') is None:
564                self.params['geo_verification_proxy'] = self.params['cn_verification_proxy']
565
566        check_deprecated('autonumber', '--auto-number', '-o "%(autonumber)s-%(title)s.%(ext)s"')
567        check_deprecated('usetitle', '--title', '-o "%(title)s-%(id)s.%(ext)s"')
568        check_deprecated('useid', '--id', '-o "%(id)s.%(ext)s"')
569
570        for msg in self.params.get('_warnings', []):
571            self.report_warning(msg)
572        for msg in self.params.get('_deprecation_warnings', []):
573            self.deprecation_warning(msg)
574
575        if 'list-formats' in self.params.get('compat_opts', []):
576            self.params['listformats_table'] = False
577
578        if 'overwrites' not in self.params and self.params.get('nooverwrites') is not None:
579            # nooverwrites was unnecessarily changed to overwrites
580            # in 0c3d0f51778b153f65c21906031c2e091fcfb641
581            # This ensures compatibility with both keys
582            self.params['overwrites'] = not self.params['nooverwrites']
583        elif self.params.get('overwrites') is None:
584            self.params.pop('overwrites', None)
585        else:
586            self.params['nooverwrites'] = not self.params['overwrites']
587
588        if params.get('bidi_workaround', False):
589            try:
590                import pty
591                master, slave = pty.openpty()
592                width = compat_get_terminal_size().columns
593                if width is None:
594                    width_args = []
595                else:
596                    width_args = ['-w', str(width)]
597                sp_kwargs = dict(
598                    stdin=subprocess.PIPE,
599                    stdout=slave,
600                    stderr=self._err_file)
601                try:
602                    self._output_process = Popen(['bidiv'] + width_args, **sp_kwargs)
603                except OSError:
604                    self._output_process = Popen(['fribidi', '-c', 'UTF-8'] + width_args, **sp_kwargs)
605                self._output_channel = os.fdopen(master, 'rb')
606            except OSError as ose:
607                if ose.errno == errno.ENOENT:
608                    self.report_warning(
609                        'Could not find fribidi executable, ignoring --bidi-workaround. '
610                        'Make sure that  fribidi  is an executable file in one of the directories in your $PATH.')
611                else:
612                    raise
613
614        if (sys.platform != 'win32'
615                and sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968']
616                and not params.get('restrictfilenames', False)):
617            # Unicode filesystem API will throw errors (#1474, #13027)
618            self.report_warning(
619                'Assuming --restrict-filenames since file system encoding '
620                'cannot encode all characters. '
621                'Set the LC_ALL environment variable to fix this.')
622            self.params['restrictfilenames'] = True
623
624        self.outtmpl_dict = self.parse_outtmpl()
625
626        # Creating format selector here allows us to catch syntax errors before the extraction
627        self.format_selector = (
628            self.params.get('format') if self.params.get('format') in (None, '-')
629            else self.params['format'] if callable(self.params['format'])
630            else self.build_format_selector(self.params['format']))
631
632        self._setup_opener()
633
634        if auto_init:
635            if auto_init != 'no_verbose_header':
636                self.print_debug_header()
637            self.add_default_info_extractors()
638
639        hooks = {
640            'post_hooks': self.add_post_hook,
641            'progress_hooks': self.add_progress_hook,
642            'postprocessor_hooks': self.add_postprocessor_hook,
643        }
644        for opt, fn in hooks.items():
645            for ph in self.params.get(opt, []):
646                fn(ph)
647
648        for pp_def_raw in self.params.get('postprocessors', []):
649            pp_def = dict(pp_def_raw)
650            when = pp_def.pop('when', 'post_process')
651            self.add_post_processor(
652                get_postprocessor(pp_def.pop('key'))(self, **compat_kwargs(pp_def)),
653                when=when)
654
655        register_socks_protocols()
656
657        def preload_download_archive(fn):
658            """Preload the archive, if any is specified"""
659            if fn is None:
660                return False
661            self.write_debug(f'Loading archive file {fn!r}')
662            try:
663                with locked_file(fn, 'r', encoding='utf-8') as archive_file:
664                    for line in archive_file:
665                        self.archive.add(line.strip())
666            except IOError as ioe:
667                if ioe.errno != errno.ENOENT:
668                    raise
669                return False
670            return True
671
672        self.archive = set()
673        preload_download_archive(self.params.get('download_archive'))
674
675    def warn_if_short_id(self, argv):
676        # short YouTube ID starting with dash?
677        idxs = [
678            i for i, a in enumerate(argv)
679            if re.match(r'^-[0-9A-Za-z_-]{10}$', a)]
680        if idxs:
681            correct_argv = (
682                ['yt-dlp']
683                + [a for i, a in enumerate(argv) if i not in idxs]
684                + ['--'] + [argv[i] for i in idxs]
685            )
686            self.report_warning(
687                'Long argument string detected. '
688                'Use -- to separate parameters and URLs, like this:\n%s' %
689                args_to_str(correct_argv))
690
691    def add_info_extractor(self, ie):
692        """Add an InfoExtractor object to the end of the list."""
693        ie_key = ie.ie_key()
694        self._ies[ie_key] = ie
695        if not isinstance(ie, type):
696            self._ies_instances[ie_key] = ie
697            ie.set_downloader(self)
698
699    def _get_info_extractor_class(self, ie_key):
700        ie = self._ies.get(ie_key)
701        if ie is None:
702            ie = get_info_extractor(ie_key)
703            self.add_info_extractor(ie)
704        return ie
705
706    def get_info_extractor(self, ie_key):
707        """
708        Get an instance of an IE with name ie_key, it will try to get one from
709        the _ies list, if there's no instance it will create a new one and add
710        it to the extractor list.
711        """
712        ie = self._ies_instances.get(ie_key)
713        if ie is None:
714            ie = get_info_extractor(ie_key)()
715            self.add_info_extractor(ie)
716        return ie
717
718    def add_default_info_extractors(self):
719        """
720        Add the InfoExtractors returned by gen_extractors to the end of the list
721        """
722        for ie in gen_extractor_classes():
723            self.add_info_extractor(ie)
724
725    def add_post_processor(self, pp, when='post_process'):
726        """Add a PostProcessor object to the end of the chain."""
727        self._pps[when].append(pp)
728        pp.set_downloader(self)
729
730    def add_post_hook(self, ph):
731        """Add the post hook"""
732        self._post_hooks.append(ph)
733
734    def add_progress_hook(self, ph):
735        """Add the download progress hook"""
736        self._progress_hooks.append(ph)
737
738    def add_postprocessor_hook(self, ph):
739        """Add the postprocessing progress hook"""
740        self._postprocessor_hooks.append(ph)
741        for pps in self._pps.values():
742            for pp in pps:
743                pp.add_progress_hook(ph)
744
745    def _bidi_workaround(self, message):
746        if not hasattr(self, '_output_channel'):
747            return message
748
749        assert hasattr(self, '_output_process')
750        assert isinstance(message, compat_str)
751        line_count = message.count('\n') + 1
752        self._output_process.stdin.write((message + '\n').encode('utf-8'))
753        self._output_process.stdin.flush()
754        res = ''.join(self._output_channel.readline().decode('utf-8')
755                      for _ in range(line_count))
756        return res[:-len('\n')]
757
758    def _write_string(self, message, out=None, only_once=False):
759        if only_once:
760            if message in self._printed_messages:
761                return
762            self._printed_messages.add(message)
763        write_string(message, out=out, encoding=self.params.get('encoding'))
764
765    def to_stdout(self, message, skip_eol=False, quiet=False):
766        """Print message to stdout"""
767        if self.params.get('logger'):
768            self.params['logger'].debug(message)
769        elif not quiet or self.params.get('verbose'):
770            self._write_string(
771                '%s%s' % (self._bidi_workaround(message), ('' if skip_eol else '\n')),
772                self._err_file if quiet else self._screen_file)
773
774    def to_stderr(self, message, only_once=False):
775        """Print message to stderr"""
776        assert isinstance(message, compat_str)
777        if self.params.get('logger'):
778            self.params['logger'].error(message)
779        else:
780            self._write_string('%s\n' % self._bidi_workaround(message), self._err_file, only_once=only_once)
781
782    def to_console_title(self, message):
783        if not self.params.get('consoletitle', False):
784            return
785        message = remove_terminal_sequences(message)
786        if compat_os_name == 'nt':
787            if ctypes.windll.kernel32.GetConsoleWindow():
788                # c_wchar_p() might not be necessary if `message` is
789                # already of type unicode()
790                ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
791        elif 'TERM' in os.environ:
792            self._write_string('\033]0;%s\007' % message, self._screen_file)
793
794    def save_console_title(self):
795        if not self.params.get('consoletitle', False):
796            return
797        if self.params.get('simulate'):
798            return
799        if compat_os_name != 'nt' and 'TERM' in os.environ:
800            # Save the title on stack
801            self._write_string('\033[22;0t', self._screen_file)
802
803    def restore_console_title(self):
804        if not self.params.get('consoletitle', False):
805            return
806        if self.params.get('simulate'):
807            return
808        if compat_os_name != 'nt' and 'TERM' in os.environ:
809            # Restore the title from stack
810            self._write_string('\033[23;0t', self._screen_file)
811
812    def __enter__(self):
813        self.save_console_title()
814        return self
815
816    def __exit__(self, *args):
817        self.restore_console_title()
818
819        if self.params.get('cookiefile') is not None:
820            self.cookiejar.save(ignore_discard=True, ignore_expires=True)
821
822    def trouble(self, message=None, tb=None, is_error=True):
823        """Determine action to take when a download problem appears.
824
825        Depending on if the downloader has been configured to ignore
826        download errors or not, this method may throw an exception or
827        not when errors are found, after printing the message.
828
829        @param tb          If given, is additional traceback information
830        @param is_error    Whether to raise error according to ignorerrors
831        """
832        if message is not None:
833            self.to_stderr(message)
834        if self.params.get('verbose'):
835            if tb is None:
836                if sys.exc_info()[0]:  # if .trouble has been called from an except block
837                    tb = ''
838                    if hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
839                        tb += ''.join(traceback.format_exception(*sys.exc_info()[1].exc_info))
840                    tb += encode_compat_str(traceback.format_exc())
841                else:
842                    tb_data = traceback.format_list(traceback.extract_stack())
843                    tb = ''.join(tb_data)
844            if tb:
845                self.to_stderr(tb)
846        if not is_error:
847            return
848        if not self.params.get('ignoreerrors'):
849            if sys.exc_info()[0] and hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
850                exc_info = sys.exc_info()[1].exc_info
851            else:
852                exc_info = sys.exc_info()
853            raise DownloadError(message, exc_info)
854        self._download_retcode = 1
855
856    def to_screen(self, message, skip_eol=False):
857        """Print message to stdout if not in quiet mode"""
858        self.to_stdout(
859            message, skip_eol, quiet=self.params.get('quiet', False))
860
861    class Styles(Enum):
862        HEADERS = 'yellow'
863        EMPHASIS = 'light blue'
864        ID = 'green'
865        DELIM = 'blue'
866        ERROR = 'red'
867        WARNING = 'yellow'
868        SUPPRESS = 'light black'
869
870    def _format_text(self, handle, allow_colors, text, f, fallback=None, *, test_encoding=False):
871        if test_encoding:
872            original_text = text
873            encoding = self.params.get('encoding') or getattr(handle, 'encoding', 'ascii')
874            text = text.encode(encoding, 'ignore').decode(encoding)
875            if fallback is not None and text != original_text:
876                text = fallback
877        if isinstance(f, self.Styles):
878            f = f.value
879        return format_text(text, f) if allow_colors else text if fallback is None else fallback
880
881    def _format_screen(self, *args, **kwargs):
882        return self._format_text(
883            self._screen_file, self._allow_colors['screen'], *args, **kwargs)
884
885    def _format_err(self, *args, **kwargs):
886        return self._format_text(
887            self._err_file, self._allow_colors['err'], *args, **kwargs)
888
889    def report_warning(self, message, only_once=False):
890        '''
891        Print the message to stderr, it will be prefixed with 'WARNING:'
892        If stderr is a tty file the 'WARNING:' will be colored
893        '''
894        if self.params.get('logger') is not None:
895            self.params['logger'].warning(message)
896        else:
897            if self.params.get('no_warnings'):
898                return
899            self.to_stderr(f'{self._format_err("WARNING:", self.Styles.WARNING)} {message}', only_once)
900
901    def deprecation_warning(self, message):
902        if self.params.get('logger') is not None:
903            self.params['logger'].warning('DeprecationWarning: {message}')
904        else:
905            self.to_stderr(f'{self._format_err("DeprecationWarning:", self.Styles.ERROR)} {message}', True)
906
907    def report_error(self, message, *args, **kwargs):
908        '''
909        Do the same as trouble, but prefixes the message with 'ERROR:', colored
910        in red if stderr is a tty file.
911        '''
912        self.trouble(f'{self._format_err("ERROR:", self.Styles.ERROR)} {message}', *args, **kwargs)
913
914    def write_debug(self, message, only_once=False):
915        '''Log debug message or Print message to stderr'''
916        if not self.params.get('verbose', False):
917            return
918        message = '[debug] %s' % message
919        if self.params.get('logger'):
920            self.params['logger'].debug(message)
921        else:
922            self.to_stderr(message, only_once)
923
924    def report_file_already_downloaded(self, file_name):
925        """Report file has already been fully downloaded."""
926        try:
927            self.to_screen('[download] %s has already been downloaded' % file_name)
928        except UnicodeEncodeError:
929            self.to_screen('[download] The file has already been downloaded')
930
931    def report_file_delete(self, file_name):
932        """Report that existing file will be deleted."""
933        try:
934            self.to_screen('Deleting existing file %s' % file_name)
935        except UnicodeEncodeError:
936            self.to_screen('Deleting existing file')
937
938    def raise_no_formats(self, info, forced=False):
939        has_drm = info.get('__has_drm')
940        msg = 'This video is DRM protected' if has_drm else 'No video formats found!'
941        expected = self.params.get('ignore_no_formats_error')
942        if forced or not expected:
943            raise ExtractorError(msg, video_id=info['id'], ie=info['extractor'],
944                                 expected=has_drm or expected)
945        else:
946            self.report_warning(msg)
947
948    def parse_outtmpl(self):
949        outtmpl_dict = self.params.get('outtmpl', {})
950        if not isinstance(outtmpl_dict, dict):
951            outtmpl_dict = {'default': outtmpl_dict}
952        # Remove spaces in the default template
953        if self.params.get('restrictfilenames'):
954            sanitize = lambda x: x.replace(' - ', ' ').replace(' ', '-')
955        else:
956            sanitize = lambda x: x
957        outtmpl_dict.update({
958            k: sanitize(v) for k, v in DEFAULT_OUTTMPL.items()
959            if outtmpl_dict.get(k) is None})
960        for key, val in outtmpl_dict.items():
961            if isinstance(val, bytes):
962                self.report_warning(
963                    'Parameter outtmpl is bytes, but should be a unicode string. '
964                    'Put  from __future__ import unicode_literals  at the top of your code file or consider switching to Python 3.x.')
965        return outtmpl_dict
966
967    def get_output_path(self, dir_type='', filename=None):
968        paths = self.params.get('paths', {})
969        assert isinstance(paths, dict)
970        path = os.path.join(
971            expand_path(paths.get('home', '').strip()),
972            expand_path(paths.get(dir_type, '').strip()) if dir_type else '',
973            filename or '')
974
975        # Temporary fix for #4787
976        # 'Treat' all problem characters by passing filename through preferredencoding
977        # to workaround encoding issues with subprocess on python2 @ Windows
978        if sys.version_info < (3, 0) and sys.platform == 'win32':
979            path = encodeFilename(path, True).decode(preferredencoding())
980        return sanitize_path(path, force=self.params.get('windowsfilenames'))
981
982    @staticmethod
983    def _outtmpl_expandpath(outtmpl):
984        # expand_path translates '%%' into '%' and '$$' into '$'
985        # correspondingly that is not what we want since we need to keep
986        # '%%' intact for template dict substitution step. Working around
987        # with boundary-alike separator hack.
988        sep = ''.join([random.choice(ascii_letters) for _ in range(32)])
989        outtmpl = outtmpl.replace('%%', '%{0}%'.format(sep)).replace('$$', '${0}$'.format(sep))
990
991        # outtmpl should be expand_path'ed before template dict substitution
992        # because meta fields may contain env variables we don't want to
993        # be expanded. For example, for outtmpl "%(title)s.%(ext)s" and
994        # title "Hello $PATH", we don't want `$PATH` to be expanded.
995        return expand_path(outtmpl).replace(sep, '')
996
997    @staticmethod
998    def escape_outtmpl(outtmpl):
999        ''' Escape any remaining strings like %s, %abc% etc. '''
1000        return re.sub(
1001            STR_FORMAT_RE_TMPL.format('', '(?![%(\0])'),
1002            lambda mobj: ('' if mobj.group('has_key') else '%') + mobj.group(0),
1003            outtmpl)
1004
1005    @classmethod
1006    def validate_outtmpl(cls, outtmpl):
1007        ''' @return None or Exception object '''
1008        outtmpl = re.sub(
1009            STR_FORMAT_RE_TMPL.format('[^)]*', '[ljqBUDS]'),
1010            lambda mobj: f'{mobj.group(0)[:-1]}s',
1011            cls._outtmpl_expandpath(outtmpl))
1012        try:
1013            cls.escape_outtmpl(outtmpl) % collections.defaultdict(int)
1014            return None
1015        except ValueError as err:
1016            return err
1017
1018    @staticmethod
1019    def _copy_infodict(info_dict):
1020        info_dict = dict(info_dict)
1021        for key in ('__original_infodict', '__postprocessors'):
1022            info_dict.pop(key, None)
1023        return info_dict
1024
1025    def prepare_outtmpl(self, outtmpl, info_dict, sanitize=False):
1026        """ Make the outtmpl and info_dict suitable for substitution: ydl.escape_outtmpl(outtmpl) % info_dict
1027        @param sanitize    Whether to sanitize the output as a filename.
1028                           For backward compatibility, a function can also be passed
1029        """
1030
1031        info_dict.setdefault('epoch', int(time.time()))  # keep epoch consistent once set
1032
1033        info_dict = self._copy_infodict(info_dict)
1034        info_dict['duration_string'] = (  # %(duration>%H-%M-%S)s is wrong if duration > 24hrs
1035            formatSeconds(info_dict['duration'], '-' if sanitize else ':')
1036            if info_dict.get('duration', None) is not None
1037            else None)
1038        info_dict['autonumber'] = self.params.get('autonumber_start', 1) - 1 + self._num_downloads
1039        if info_dict.get('resolution') is None:
1040            info_dict['resolution'] = self.format_resolution(info_dict, default=None)
1041
1042        # For fields playlist_index, playlist_autonumber and autonumber convert all occurrences
1043        # of %(field)s to %(field)0Nd for backward compatibility
1044        field_size_compat_map = {
1045            'playlist_index': number_of_digits(info_dict.get('_last_playlist_index') or 0),
1046            'playlist_autonumber': number_of_digits(info_dict.get('n_entries') or 0),
1047            'autonumber': self.params.get('autonumber_size') or 5,
1048        }
1049
1050        TMPL_DICT = {}
1051        EXTERNAL_FORMAT_RE = re.compile(STR_FORMAT_RE_TMPL.format('[^)]*', f'[{STR_FORMAT_TYPES}ljqBUDS]'))
1052        MATH_FUNCTIONS = {
1053            '+': float.__add__,
1054            '-': float.__sub__,
1055        }
1056        # Field is of the form key1.key2...
1057        # where keys (except first) can be string, int or slice
1058        FIELD_RE = r'\w*(?:\.(?:\w+|{num}|{num}?(?::{num}?){{1,2}}))*'.format(num=r'(?:-?\d+)')
1059        MATH_FIELD_RE = r'''(?:{field}|{num})'''.format(field=FIELD_RE, num=r'-?\d+(?:.\d+)?')
1060        MATH_OPERATORS_RE = r'(?:%s)' % '|'.join(map(re.escape, MATH_FUNCTIONS.keys()))
1061        INTERNAL_FORMAT_RE = re.compile(r'''(?x)
1062            (?P<negate>-)?
1063            (?P<fields>{field})
1064            (?P<maths>(?:{math_op}{math_field})*)
1065            (?:>(?P<strf_format>.+?))?
1066            (?P<alternate>(?<!\\),[^|&)]+)?
1067            (?:&(?P<replacement>.*?))?
1068            (?:\|(?P<default>.*?))?
1069            $'''.format(field=FIELD_RE, math_op=MATH_OPERATORS_RE, math_field=MATH_FIELD_RE))
1070
1071        def _traverse_infodict(k):
1072            k = k.split('.')
1073            if k[0] == '':
1074                k.pop(0)
1075            return traverse_obj(info_dict, k, is_user_input=True, traverse_string=True)
1076
1077        def get_value(mdict):
1078            # Object traversal
1079            value = _traverse_infodict(mdict['fields'])
1080            # Negative
1081            if mdict['negate']:
1082                value = float_or_none(value)
1083                if value is not None:
1084                    value *= -1
1085            # Do maths
1086            offset_key = mdict['maths']
1087            if offset_key:
1088                value = float_or_none(value)
1089                operator = None
1090                while offset_key:
1091                    item = re.match(
1092                        MATH_FIELD_RE if operator else MATH_OPERATORS_RE,
1093                        offset_key).group(0)
1094                    offset_key = offset_key[len(item):]
1095                    if operator is None:
1096                        operator = MATH_FUNCTIONS[item]
1097                        continue
1098                    item, multiplier = (item[1:], -1) if item[0] == '-' else (item, 1)
1099                    offset = float_or_none(item)
1100                    if offset is None:
1101                        offset = float_or_none(_traverse_infodict(item))
1102                    try:
1103                        value = operator(value, multiplier * offset)
1104                    except (TypeError, ZeroDivisionError):
1105                        return None
1106                    operator = None
1107            # Datetime formatting
1108            if mdict['strf_format']:
1109                value = strftime_or_none(value, mdict['strf_format'].replace('\\,', ','))
1110
1111            return value
1112
1113        na = self.params.get('outtmpl_na_placeholder', 'NA')
1114
1115        def filename_sanitizer(key, value, restricted=self.params.get('restrictfilenames')):
1116            return sanitize_filename(str(value), restricted=restricted,
1117                                     is_id=re.search(r'(^|[_.])id(\.|$)', key))
1118
1119        sanitizer = sanitize if callable(sanitize) else filename_sanitizer
1120        sanitize = bool(sanitize)
1121
1122        def _dumpjson_default(obj):
1123            if isinstance(obj, (set, LazyList)):
1124                return list(obj)
1125            return repr(obj)
1126
1127        def create_key(outer_mobj):
1128            if not outer_mobj.group('has_key'):
1129                return outer_mobj.group(0)
1130            key = outer_mobj.group('key')
1131            mobj = re.match(INTERNAL_FORMAT_RE, key)
1132            initial_field = mobj.group('fields') if mobj else ''
1133            value, replacement, default = None, None, na
1134            while mobj:
1135                mobj = mobj.groupdict()
1136                default = mobj['default'] if mobj['default'] is not None else default
1137                value = get_value(mobj)
1138                replacement = mobj['replacement']
1139                if value is None and mobj['alternate']:
1140                    mobj = re.match(INTERNAL_FORMAT_RE, mobj['alternate'][1:])
1141                else:
1142                    break
1143
1144            fmt = outer_mobj.group('format')
1145            if fmt == 's' and value is not None and key in field_size_compat_map.keys():
1146                fmt = '0{:d}d'.format(field_size_compat_map[key])
1147
1148            value = default if value is None else value if replacement is None else replacement
1149
1150            flags = outer_mobj.group('conversion') or ''
1151            str_fmt = f'{fmt[:-1]}s'
1152            if fmt[-1] == 'l':  # list
1153                delim = '\n' if '#' in flags else ', '
1154                value, fmt = delim.join(variadic(value, allowed_types=(str, bytes))), str_fmt
1155            elif fmt[-1] == 'j':  # json
1156                value, fmt = json.dumps(value, default=_dumpjson_default, indent=4 if '#' in flags else None), str_fmt
1157            elif fmt[-1] == 'q':  # quoted
1158                value = map(str, variadic(value) if '#' in flags else [value])
1159                value, fmt = ' '.join(map(compat_shlex_quote, value)), str_fmt
1160            elif fmt[-1] == 'B':  # bytes
1161                value = f'%{str_fmt}'.encode('utf-8') % str(value).encode('utf-8')
1162                value, fmt = value.decode('utf-8', 'ignore'), 's'
1163            elif fmt[-1] == 'U':  # unicode normalized
1164                value, fmt = unicodedata.normalize(
1165                    # "+" = compatibility equivalence, "#" = NFD
1166                    'NF%s%s' % ('K' if '+' in flags else '', 'D' if '#' in flags else 'C'),
1167                    value), str_fmt
1168            elif fmt[-1] == 'D':  # decimal suffix
1169                value, fmt = format_decimal_suffix(value, f'%{fmt[:-1]}f%s' if fmt[:-1] else '%d%s'), 's'
1170            elif fmt[-1] == 'S':  # filename sanitization
1171                value, fmt = filename_sanitizer(initial_field, value, restricted='#' in flags), str_fmt
1172            elif fmt[-1] == 'c':
1173                if value:
1174                    value = str(value)[0]
1175                else:
1176                    fmt = str_fmt
1177            elif fmt[-1] not in 'rs':  # numeric
1178                value = float_or_none(value)
1179                if value is None:
1180                    value, fmt = default, 's'
1181
1182            if sanitize:
1183                if fmt[-1] == 'r':
1184                    # If value is an object, sanitize might convert it to a string
1185                    # So we convert it to repr first
1186                    value, fmt = repr(value), str_fmt
1187                if fmt[-1] in 'csr':
1188                    value = sanitizer(initial_field, value)
1189
1190            key = '%s\0%s' % (key.replace('%', '%\0'), outer_mobj.group('format'))
1191            TMPL_DICT[key] = value
1192            return '{prefix}%({key}){fmt}'.format(key=key, fmt=fmt, prefix=outer_mobj.group('prefix'))
1193
1194        return EXTERNAL_FORMAT_RE.sub(create_key, outtmpl), TMPL_DICT
1195
1196    def evaluate_outtmpl(self, outtmpl, info_dict, *args, **kwargs):
1197        outtmpl, info_dict = self.prepare_outtmpl(outtmpl, info_dict, *args, **kwargs)
1198        return self.escape_outtmpl(outtmpl) % info_dict
1199
1200    def _prepare_filename(self, info_dict, tmpl_type='default'):
1201        try:
1202            outtmpl = self._outtmpl_expandpath(self.outtmpl_dict.get(tmpl_type, self.outtmpl_dict['default']))
1203            filename = self.evaluate_outtmpl(outtmpl, info_dict, True)
1204
1205            force_ext = OUTTMPL_TYPES.get(tmpl_type)
1206            if filename and force_ext is not None:
1207                filename = replace_extension(filename, force_ext, info_dict.get('ext'))
1208
1209            # https://github.com/blackjack4494/youtube-dlc/issues/85
1210            trim_file_name = self.params.get('trim_file_name', False)
1211            if trim_file_name:
1212                no_ext, *ext = filename.rsplit('.', 2)
1213                filename = join_nonempty(no_ext[:trim_file_name], *ext, delim='.')
1214
1215            return filename
1216        except ValueError as err:
1217            self.report_error('Error in output template: ' + str(err) + ' (encoding: ' + repr(preferredencoding()) + ')')
1218            return None
1219
1220    def prepare_filename(self, info_dict, dir_type='', warn=False):
1221        """Generate the output filename."""
1222
1223        filename = self._prepare_filename(info_dict, dir_type or 'default')
1224        if not filename and dir_type not in ('', 'temp'):
1225            return ''
1226
1227        if warn:
1228            if not self.params.get('paths'):
1229                pass
1230            elif filename == '-':
1231                self.report_warning('--paths is ignored when an outputting to stdout', only_once=True)
1232            elif os.path.isabs(filename):
1233                self.report_warning('--paths is ignored since an absolute path is given in output template', only_once=True)
1234        if filename == '-' or not filename:
1235            return filename
1236
1237        return self.get_output_path(dir_type, filename)
1238
1239    def _match_entry(self, info_dict, incomplete=False, silent=False):
1240        """ Returns None if the file should be downloaded """
1241
1242        video_title = info_dict.get('title', info_dict.get('id', 'video'))
1243
1244        def check_filter():
1245            if 'title' in info_dict:
1246                # This can happen when we're just evaluating the playlist
1247                title = info_dict['title']
1248                matchtitle = self.params.get('matchtitle', False)
1249                if matchtitle:
1250                    if not re.search(matchtitle, title, re.IGNORECASE):
1251                        return '"' + title + '" title did not match pattern "' + matchtitle + '"'
1252                rejecttitle = self.params.get('rejecttitle', False)
1253                if rejecttitle:
1254                    if re.search(rejecttitle, title, re.IGNORECASE):
1255                        return '"' + title + '" title matched reject pattern "' + rejecttitle + '"'
1256            date = info_dict.get('upload_date')
1257            if date is not None:
1258                dateRange = self.params.get('daterange', DateRange())
1259                if date not in dateRange:
1260                    return '%s upload date is not in range %s' % (date_from_str(date).isoformat(), dateRange)
1261            view_count = info_dict.get('view_count')
1262            if view_count is not None:
1263                min_views = self.params.get('min_views')
1264                if min_views is not None and view_count < min_views:
1265                    return 'Skipping %s, because it has not reached minimum view count (%d/%d)' % (video_title, view_count, min_views)
1266                max_views = self.params.get('max_views')
1267                if max_views is not None and view_count > max_views:
1268                    return 'Skipping %s, because it has exceeded the maximum view count (%d/%d)' % (video_title, view_count, max_views)
1269            if age_restricted(info_dict.get('age_limit'), self.params.get('age_limit')):
1270                return 'Skipping "%s" because it is age restricted' % video_title
1271
1272            match_filter = self.params.get('match_filter')
1273            if match_filter is not None:
1274                try:
1275                    ret = match_filter(info_dict, incomplete=incomplete)
1276                except TypeError:
1277                    # For backward compatibility
1278                    ret = None if incomplete else match_filter(info_dict)
1279                if ret is not None:
1280                    return ret
1281            return None
1282
1283        if self.in_download_archive(info_dict):
1284            reason = '%s has already been recorded in the archive' % video_title
1285            break_opt, break_err = 'break_on_existing', ExistingVideoReached
1286        else:
1287            reason = check_filter()
1288            break_opt, break_err = 'break_on_reject', RejectedVideoReached
1289        if reason is not None:
1290            if not silent:
1291                self.to_screen('[download] ' + reason)
1292            if self.params.get(break_opt, False):
1293                raise break_err()
1294        return reason
1295
1296    @staticmethod
1297    def add_extra_info(info_dict, extra_info):
1298        '''Set the keys from extra_info in info dict if they are missing'''
1299        for key, value in extra_info.items():
1300            info_dict.setdefault(key, value)
1301
1302    def extract_info(self, url, download=True, ie_key=None, extra_info=None,
1303                     process=True, force_generic_extractor=False):
1304        """
1305        Return a list with a dictionary for each video extracted.
1306
1307        Arguments:
1308        url -- URL to extract
1309
1310        Keyword arguments:
1311        download -- whether to download videos during extraction
1312        ie_key -- extractor key hint
1313        extra_info -- dictionary containing the extra values to add to each result
1314        process -- whether to resolve all unresolved references (URLs, playlist items),
1315            must be True for download to work.
1316        force_generic_extractor -- force using the generic extractor
1317        """
1318
1319        if extra_info is None:
1320            extra_info = {}
1321
1322        if not ie_key and force_generic_extractor:
1323            ie_key = 'Generic'
1324
1325        if ie_key:
1326            ies = {ie_key: self._get_info_extractor_class(ie_key)}
1327        else:
1328            ies = self._ies
1329
1330        for ie_key, ie in ies.items():
1331            if not ie.suitable(url):
1332                continue
1333
1334            if not ie.working():
1335                self.report_warning('The program functionality for this site has been marked as broken, '
1336                                    'and will probably not work.')
1337
1338            temp_id = ie.get_temp_id(url)
1339            if temp_id is not None and self.in_download_archive({'id': temp_id, 'ie_key': ie_key}):
1340                self.to_screen(f'[{ie_key}] {temp_id}: has already been recorded in the archive')
1341                if self.params.get('break_on_existing', False):
1342                    raise ExistingVideoReached()
1343                break
1344            return self.__extract_info(url, self.get_info_extractor(ie_key), download, extra_info, process)
1345        else:
1346            self.report_error('no suitable InfoExtractor for URL %s' % url)
1347
1348    def __handle_extraction_exceptions(func):
1349        @functools.wraps(func)
1350        def wrapper(self, *args, **kwargs):
1351            while True:
1352                try:
1353                    return func(self, *args, **kwargs)
1354                except (DownloadCancelled, LazyList.IndexError, PagedList.IndexError):
1355                    raise
1356                except ReExtractInfo as e:
1357                    if e.expected:
1358                        self.to_screen(f'{e}; Re-extracting data')
1359                    else:
1360                        self.to_stderr('\r')
1361                        self.report_warning(f'{e}; Re-extracting data')
1362                    continue
1363                except GeoRestrictedError as e:
1364                    msg = e.msg
1365                    if e.countries:
1366                        msg += '\nThis video is available in %s.' % ', '.join(
1367                            map(ISO3166Utils.short2full, e.countries))
1368                    msg += '\nYou might want to use a VPN or a proxy server (with --proxy) to workaround.'
1369                    self.report_error(msg)
1370                except ExtractorError as e:  # An error we somewhat expected
1371                    self.report_error(str(e), e.format_traceback())
1372                except Exception as e:
1373                    if self.params.get('ignoreerrors'):
1374                        self.report_error(str(e), tb=encode_compat_str(traceback.format_exc()))
1375                    else:
1376                        raise
1377                break
1378        return wrapper
1379
1380    def _wait_for_video(self, ie_result):
1381        if (not self.params.get('wait_for_video')
1382                or ie_result.get('_type', 'video') != 'video'
1383                or ie_result.get('formats') or ie_result.get('url')):
1384            return
1385
1386        format_dur = lambda dur: '%02d:%02d:%02d' % timetuple_from_msec(dur * 1000)[:-1]
1387        last_msg = ''
1388
1389        def progress(msg):
1390            nonlocal last_msg
1391            self.to_screen(msg + ' ' * (len(last_msg) - len(msg)) + '\r', skip_eol=True)
1392            last_msg = msg
1393
1394        min_wait, max_wait = self.params.get('wait_for_video')
1395        diff = try_get(ie_result, lambda x: x['release_timestamp'] - time.time())
1396        if diff is None and ie_result.get('live_status') == 'is_upcoming':
1397            diff = random.randrange(min_wait, max_wait) if (max_wait and min_wait) else (max_wait or min_wait)
1398            self.report_warning('Release time of video is not known')
1399        elif (diff or 0) <= 0:
1400            self.report_warning('Video should already be available according to extracted info')
1401        diff = min(max(diff or 0, min_wait or 0), max_wait or float('inf'))
1402        self.to_screen(f'[wait] Waiting for {format_dur(diff)} - Press Ctrl+C to try now')
1403
1404        wait_till = time.time() + diff
1405        try:
1406            while True:
1407                diff = wait_till - time.time()
1408                if diff <= 0:
1409                    progress('')
1410                    raise ReExtractInfo('[wait] Wait period ended', expected=True)
1411                progress(f'[wait] Remaining time until next attempt: {self._format_screen(format_dur(diff), self.Styles.EMPHASIS)}')
1412                time.sleep(1)
1413        except KeyboardInterrupt:
1414            progress('')
1415            raise ReExtractInfo('[wait] Interrupted by user', expected=True)
1416        except BaseException as e:
1417            if not isinstance(e, ReExtractInfo):
1418                self.to_screen('')
1419            raise
1420
1421    @__handle_extraction_exceptions
1422    def __extract_info(self, url, ie, download, extra_info, process):
1423        ie_result = ie.extract(url)
1424        if ie_result is None:  # Finished already (backwards compatibility; listformats and friends should be moved here)
1425            return
1426        if isinstance(ie_result, list):
1427            # Backwards compatibility: old IE result format
1428            ie_result = {
1429                '_type': 'compat_list',
1430                'entries': ie_result,
1431            }
1432        if extra_info.get('original_url'):
1433            ie_result.setdefault('original_url', extra_info['original_url'])
1434        self.add_default_extra_info(ie_result, ie, url)
1435        if process:
1436            self._wait_for_video(ie_result)
1437            return self.process_ie_result(ie_result, download, extra_info)
1438        else:
1439            return ie_result
1440
1441    def add_default_extra_info(self, ie_result, ie, url):
1442        if url is not None:
1443            self.add_extra_info(ie_result, {
1444                'webpage_url': url,
1445                'original_url': url,
1446                'webpage_url_basename': url_basename(url),
1447                'webpage_url_domain': get_domain(url),
1448            })
1449        if ie is not None:
1450            self.add_extra_info(ie_result, {
1451                'extractor': ie.IE_NAME,
1452                'extractor_key': ie.ie_key(),
1453            })
1454
1455    def process_ie_result(self, ie_result, download=True, extra_info=None):
1456        """
1457        Take the result of the ie(may be modified) and resolve all unresolved
1458        references (URLs, playlist items).
1459
1460        It will also download the videos if 'download'.
1461        Returns the resolved ie_result.
1462        """
1463        if extra_info is None:
1464            extra_info = {}
1465        result_type = ie_result.get('_type', 'video')
1466
1467        if result_type in ('url', 'url_transparent'):
1468            ie_result['url'] = sanitize_url(ie_result['url'])
1469            if ie_result.get('original_url'):
1470                extra_info.setdefault('original_url', ie_result['original_url'])
1471
1472            extract_flat = self.params.get('extract_flat', False)
1473            if ((extract_flat == 'in_playlist' and 'playlist' in extra_info)
1474                    or extract_flat is True):
1475                info_copy = ie_result.copy()
1476                ie = try_get(ie_result.get('ie_key'), self.get_info_extractor)
1477                if ie and not ie_result.get('id'):
1478                    info_copy['id'] = ie.get_temp_id(ie_result['url'])
1479                self.add_default_extra_info(info_copy, ie, ie_result['url'])
1480                self.add_extra_info(info_copy, extra_info)
1481                info_copy, _ = self.pre_process(info_copy)
1482                self.__forced_printings(info_copy, self.prepare_filename(info_copy), incomplete=True)
1483                if self.params.get('force_write_download_archive', False):
1484                    self.record_download_archive(info_copy)
1485                return ie_result
1486
1487        if result_type == 'video':
1488            self.add_extra_info(ie_result, extra_info)
1489            ie_result = self.process_video_result(ie_result, download=download)
1490            additional_urls = (ie_result or {}).get('additional_urls')
1491            if additional_urls:
1492                # TODO: Improve MetadataParserPP to allow setting a list
1493                if isinstance(additional_urls, compat_str):
1494                    additional_urls = [additional_urls]
1495                self.to_screen(
1496                    '[info] %s: %d additional URL(s) requested' % (ie_result['id'], len(additional_urls)))
1497                self.write_debug('Additional URLs: "%s"' % '", "'.join(additional_urls))
1498                ie_result['additional_entries'] = [
1499                    self.extract_info(
1500                        url, download, extra_info=extra_info,
1501                        force_generic_extractor=self.params.get('force_generic_extractor'))
1502                    for url in additional_urls
1503                ]
1504            return ie_result
1505        elif result_type == 'url':
1506            # We have to add extra_info to the results because it may be
1507            # contained in a playlist
1508            return self.extract_info(
1509                ie_result['url'], download,
1510                ie_key=ie_result.get('ie_key'),
1511                extra_info=extra_info)
1512        elif result_type == 'url_transparent':
1513            # Use the information from the embedding page
1514            info = self.extract_info(
1515                ie_result['url'], ie_key=ie_result.get('ie_key'),
1516                extra_info=extra_info, download=False, process=False)
1517
1518            # extract_info may return None when ignoreerrors is enabled and
1519            # extraction failed with an error, don't crash and return early
1520            # in this case
1521            if not info:
1522                return info
1523
1524            force_properties = dict(
1525                (k, v) for k, v in ie_result.items() if v is not None)
1526            for f in ('_type', 'url', 'id', 'extractor', 'extractor_key', 'ie_key'):
1527                if f in force_properties:
1528                    del force_properties[f]
1529            new_result = info.copy()
1530            new_result.update(force_properties)
1531
1532            # Extracted info may not be a video result (i.e.
1533            # info.get('_type', 'video') != video) but rather an url or
1534            # url_transparent. In such cases outer metadata (from ie_result)
1535            # should be propagated to inner one (info). For this to happen
1536            # _type of info should be overridden with url_transparent. This
1537            # fixes issue from https://github.com/ytdl-org/youtube-dl/pull/11163.
1538            if new_result.get('_type') == 'url':
1539                new_result['_type'] = 'url_transparent'
1540
1541            return self.process_ie_result(
1542                new_result, download=download, extra_info=extra_info)
1543        elif result_type in ('playlist', 'multi_video'):
1544            # Protect from infinite recursion due to recursively nested playlists
1545            # (see https://github.com/ytdl-org/youtube-dl/issues/27833)
1546            webpage_url = ie_result['webpage_url']
1547            if webpage_url in self._playlist_urls:
1548                self.to_screen(
1549                    '[download] Skipping already downloaded playlist: %s'
1550                    % ie_result.get('title') or ie_result.get('id'))
1551                return
1552
1553            self._playlist_level += 1
1554            self._playlist_urls.add(webpage_url)
1555            self._sanitize_thumbnails(ie_result)
1556            try:
1557                return self.__process_playlist(ie_result, download)
1558            finally:
1559                self._playlist_level -= 1
1560                if not self._playlist_level:
1561                    self._playlist_urls.clear()
1562        elif result_type == 'compat_list':
1563            self.report_warning(
1564                'Extractor %s returned a compat_list result. '
1565                'It needs to be updated.' % ie_result.get('extractor'))
1566
1567            def _fixup(r):
1568                self.add_extra_info(r, {
1569                    'extractor': ie_result['extractor'],
1570                    'webpage_url': ie_result['webpage_url'],
1571                    'webpage_url_basename': url_basename(ie_result['webpage_url']),
1572                    'webpage_url_domain': get_domain(ie_result['webpage_url']),
1573                    'extractor_key': ie_result['extractor_key'],
1574                })
1575                return r
1576            ie_result['entries'] = [
1577                self.process_ie_result(_fixup(r), download, extra_info)
1578                for r in ie_result['entries']
1579            ]
1580            return ie_result
1581        else:
1582            raise Exception('Invalid result type: %s' % result_type)
1583
1584    def _ensure_dir_exists(self, path):
1585        return make_dir(path, self.report_error)
1586
1587    def __process_playlist(self, ie_result, download):
1588        # We process each entry in the playlist
1589        playlist = ie_result.get('title') or ie_result.get('id')
1590        self.to_screen('[download] Downloading playlist: %s' % playlist)
1591
1592        if 'entries' not in ie_result:
1593            raise EntryNotInPlaylist('There are no entries')
1594
1595        MissingEntry = object()
1596        incomplete_entries = bool(ie_result.get('requested_entries'))
1597        if incomplete_entries:
1598            def fill_missing_entries(entries, indices):
1599                ret = [MissingEntry] * max(indices)
1600                for i, entry in zip(indices, entries):
1601                    ret[i - 1] = entry
1602                return ret
1603            ie_result['entries'] = fill_missing_entries(ie_result['entries'], ie_result['requested_entries'])
1604
1605        playlist_results = []
1606
1607        playliststart = self.params.get('playliststart', 1)
1608        playlistend = self.params.get('playlistend')
1609        # For backwards compatibility, interpret -1 as whole list
1610        if playlistend == -1:
1611            playlistend = None
1612
1613        playlistitems_str = self.params.get('playlist_items')
1614        playlistitems = None
1615        if playlistitems_str is not None:
1616            def iter_playlistitems(format):
1617                for string_segment in format.split(','):
1618                    if '-' in string_segment:
1619                        start, end = string_segment.split('-')
1620                        for item in range(int(start), int(end) + 1):
1621                            yield int(item)
1622                    else:
1623                        yield int(string_segment)
1624            playlistitems = orderedSet(iter_playlistitems(playlistitems_str))
1625
1626        ie_entries = ie_result['entries']
1627        msg = (
1628            'Downloading %d videos' if not isinstance(ie_entries, list)
1629            else 'Collected %d videos; downloading %%d of them' % len(ie_entries))
1630
1631        if isinstance(ie_entries, list):
1632            def get_entry(i):
1633                return ie_entries[i - 1]
1634        else:
1635            if not isinstance(ie_entries, (PagedList, LazyList)):
1636                ie_entries = LazyList(ie_entries)
1637
1638            def get_entry(i):
1639                return YoutubeDL.__handle_extraction_exceptions(
1640                    lambda self, i: ie_entries[i - 1]
1641                )(self, i)
1642
1643        entries = []
1644        items = playlistitems if playlistitems is not None else itertools.count(playliststart)
1645        for i in items:
1646            if i == 0:
1647                continue
1648            if playlistitems is None and playlistend is not None and playlistend < i:
1649                break
1650            entry = None
1651            try:
1652                entry = get_entry(i)
1653                if entry is MissingEntry:
1654                    raise EntryNotInPlaylist()
1655            except (IndexError, EntryNotInPlaylist):
1656                if incomplete_entries:
1657                    raise EntryNotInPlaylist(f'Entry {i} cannot be found')
1658                elif not playlistitems:
1659                    break
1660            entries.append(entry)
1661            try:
1662                if entry is not None:
1663                    self._match_entry(entry, incomplete=True, silent=True)
1664            except (ExistingVideoReached, RejectedVideoReached):
1665                break
1666        ie_result['entries'] = entries
1667
1668        # Save playlist_index before re-ordering
1669        entries = [
1670            ((playlistitems[i - 1] if playlistitems else i + playliststart - 1), entry)
1671            for i, entry in enumerate(entries, 1)
1672            if entry is not None]
1673        n_entries = len(entries)
1674
1675        if not playlistitems and (playliststart != 1 or playlistend):
1676            playlistitems = list(range(playliststart, playliststart + n_entries))
1677        ie_result['requested_entries'] = playlistitems
1678
1679        _infojson_written = False
1680        if not self.params.get('simulate') and self.params.get('allow_playlist_files', True):
1681            ie_copy = {
1682                'playlist': playlist,
1683                'playlist_id': ie_result.get('id'),
1684                'playlist_title': ie_result.get('title'),
1685                'playlist_uploader': ie_result.get('uploader'),
1686                'playlist_uploader_id': ie_result.get('uploader_id'),
1687                'playlist_index': 0,
1688                'n_entries': n_entries,
1689            }
1690            ie_copy.update(dict(ie_result))
1691
1692            _infojson_written = self._write_info_json(
1693                'playlist', ie_result, self.prepare_filename(ie_copy, 'pl_infojson'))
1694            if _infojson_written is None:
1695                return
1696            if self._write_description('playlist', ie_result,
1697                                       self.prepare_filename(ie_copy, 'pl_description')) is None:
1698                return
1699            # TODO: This should be passed to ThumbnailsConvertor if necessary
1700            self._write_thumbnails('playlist', ie_copy, self.prepare_filename(ie_copy, 'pl_thumbnail'))
1701
1702        if self.params.get('playlistreverse', False):
1703            entries = entries[::-1]
1704        if self.params.get('playlistrandom', False):
1705            random.shuffle(entries)
1706
1707        x_forwarded_for = ie_result.get('__x_forwarded_for_ip')
1708
1709        self.to_screen('[%s] playlist %s: %s' % (ie_result['extractor'], playlist, msg % n_entries))
1710        failures = 0
1711        max_failures = self.params.get('skip_playlist_after_errors') or float('inf')
1712        for i, entry_tuple in enumerate(entries, 1):
1713            playlist_index, entry = entry_tuple
1714            if 'playlist-index' in self.params.get('compat_opts', []):
1715                playlist_index = playlistitems[i - 1] if playlistitems else i + playliststart - 1
1716            self.to_screen('[download] Downloading video %s of %s' % (i, n_entries))
1717            # This __x_forwarded_for_ip thing is a bit ugly but requires
1718            # minimal changes
1719            if x_forwarded_for:
1720                entry['__x_forwarded_for_ip'] = x_forwarded_for
1721            extra = {
1722                'n_entries': n_entries,
1723                '_last_playlist_index': max(playlistitems) if playlistitems else (playlistend or n_entries),
1724                'playlist_index': playlist_index,
1725                'playlist_autonumber': i,
1726                'playlist': playlist,
1727                'playlist_id': ie_result.get('id'),
1728                'playlist_title': ie_result.get('title'),
1729                'playlist_uploader': ie_result.get('uploader'),
1730                'playlist_uploader_id': ie_result.get('uploader_id'),
1731                'extractor': ie_result['extractor'],
1732                'webpage_url': ie_result['webpage_url'],
1733                'webpage_url_basename': url_basename(ie_result['webpage_url']),
1734                'webpage_url_domain': get_domain(ie_result['webpage_url']),
1735                'extractor_key': ie_result['extractor_key'],
1736            }
1737
1738            if self._match_entry(entry, incomplete=True) is not None:
1739                continue
1740
1741            entry_result = self.__process_iterable_entry(entry, download, extra)
1742            if not entry_result:
1743                failures += 1
1744            if failures >= max_failures:
1745                self.report_error(
1746                    'Skipping the remaining entries in playlist "%s" since %d items failed extraction' % (playlist, failures))
1747                break
1748            playlist_results.append(entry_result)
1749        ie_result['entries'] = playlist_results
1750
1751        # Write the updated info to json
1752        if _infojson_written and self._write_info_json(
1753                'updated playlist', ie_result,
1754                self.prepare_filename(ie_copy, 'pl_infojson'), overwrite=True) is None:
1755            return
1756        self.to_screen('[download] Finished downloading playlist: %s' % playlist)
1757        return ie_result
1758
1759    @__handle_extraction_exceptions
1760    def __process_iterable_entry(self, entry, download, extra_info):
1761        return self.process_ie_result(
1762            entry, download=download, extra_info=extra_info)
1763
1764    def _build_format_filter(self, filter_spec):
1765        " Returns a function to filter the formats according to the filter_spec "
1766
1767        OPERATORS = {
1768            '<': operator.lt,
1769            '<=': operator.le,
1770            '>': operator.gt,
1771            '>=': operator.ge,
1772            '=': operator.eq,
1773            '!=': operator.ne,
1774        }
1775        operator_rex = re.compile(r'''(?x)\s*
1776            (?P<key>width|height|tbr|abr|vbr|asr|filesize|filesize_approx|fps)\s*
1777            (?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
1778            (?P<value>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)\s*
1779            ''' % '|'.join(map(re.escape, OPERATORS.keys())))
1780        m = operator_rex.fullmatch(filter_spec)
1781        if m:
1782            try:
1783                comparison_value = int(m.group('value'))
1784            except ValueError:
1785                comparison_value = parse_filesize(m.group('value'))
1786                if comparison_value is None:
1787                    comparison_value = parse_filesize(m.group('value') + 'B')
1788                if comparison_value is None:
1789                    raise ValueError(
1790                        'Invalid value %r in format specification %r' % (
1791                            m.group('value'), filter_spec))
1792            op = OPERATORS[m.group('op')]
1793
1794        if not m:
1795            STR_OPERATORS = {
1796                '=': operator.eq,
1797                '^=': lambda attr, value: attr.startswith(value),
1798                '$=': lambda attr, value: attr.endswith(value),
1799                '*=': lambda attr, value: value in attr,
1800            }
1801            str_operator_rex = re.compile(r'''(?x)\s*
1802                (?P<key>[a-zA-Z0-9._-]+)\s*
1803                (?P<negation>!\s*)?(?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
1804                (?P<value>[a-zA-Z0-9._-]+)\s*
1805                ''' % '|'.join(map(re.escape, STR_OPERATORS.keys())))
1806            m = str_operator_rex.fullmatch(filter_spec)
1807            if m:
1808                comparison_value = m.group('value')
1809                str_op = STR_OPERATORS[m.group('op')]
1810                if m.group('negation'):
1811                    op = lambda attr, value: not str_op(attr, value)
1812                else:
1813                    op = str_op
1814
1815        if not m:
1816            raise SyntaxError('Invalid filter specification %r' % filter_spec)
1817
1818        def _filter(f):
1819            actual_value = f.get(m.group('key'))
1820            if actual_value is None:
1821                return m.group('none_inclusive')
1822            return op(actual_value, comparison_value)
1823        return _filter
1824
1825    def _check_formats(self, formats):
1826        for f in formats:
1827            self.to_screen('[info] Testing format %s' % f['format_id'])
1828            path = self.get_output_path('temp')
1829            if not self._ensure_dir_exists(f'{path}/'):
1830                continue
1831            temp_file = tempfile.NamedTemporaryFile(suffix='.tmp', delete=False, dir=path or None)
1832            temp_file.close()
1833            try:
1834                success, _ = self.dl(temp_file.name, f, test=True)
1835            except (DownloadError, IOError, OSError, ValueError) + network_exceptions:
1836                success = False
1837            finally:
1838                if os.path.exists(temp_file.name):
1839                    try:
1840                        os.remove(temp_file.name)
1841                    except OSError:
1842                        self.report_warning('Unable to delete temporary file "%s"' % temp_file.name)
1843            if success:
1844                yield f
1845            else:
1846                self.to_screen('[info] Unable to download format %s. Skipping...' % f['format_id'])
1847
1848    def _default_format_spec(self, info_dict, download=True):
1849
1850        def can_merge():
1851            merger = FFmpegMergerPP(self)
1852            return merger.available and merger.can_merge()
1853
1854        prefer_best = (
1855            not self.params.get('simulate')
1856            and download
1857            and (
1858                not can_merge()
1859                or info_dict.get('is_live', False)
1860                or self.outtmpl_dict['default'] == '-'))
1861        compat = (
1862            prefer_best
1863            or self.params.get('allow_multiple_audio_streams', False)
1864            or 'format-spec' in self.params.get('compat_opts', []))
1865
1866        return (
1867            'best/bestvideo+bestaudio' if prefer_best
1868            else 'bestvideo*+bestaudio/best' if not compat
1869            else 'bestvideo+bestaudio/best')
1870
1871    def build_format_selector(self, format_spec):
1872        def syntax_error(note, start):
1873            message = (
1874                'Invalid format specification: '
1875                '{0}\n\t{1}\n\t{2}^'.format(note, format_spec, ' ' * start[1]))
1876            return SyntaxError(message)
1877
1878        PICKFIRST = 'PICKFIRST'
1879        MERGE = 'MERGE'
1880        SINGLE = 'SINGLE'
1881        GROUP = 'GROUP'
1882        FormatSelector = collections.namedtuple('FormatSelector', ['type', 'selector', 'filters'])
1883
1884        allow_multiple_streams = {'audio': self.params.get('allow_multiple_audio_streams', False),
1885                                  'video': self.params.get('allow_multiple_video_streams', False)}
1886
1887        check_formats = self.params.get('check_formats') == 'selected'
1888
1889        def _parse_filter(tokens):
1890            filter_parts = []
1891            for type, string, start, _, _ in tokens:
1892                if type == tokenize.OP and string == ']':
1893                    return ''.join(filter_parts)
1894                else:
1895                    filter_parts.append(string)
1896
1897        def _remove_unused_ops(tokens):
1898            # Remove operators that we don't use and join them with the surrounding strings
1899            # for example: 'mp4' '-' 'baseline' '-' '16x9' is converted to 'mp4-baseline-16x9'
1900            ALLOWED_OPS = ('/', '+', ',', '(', ')')
1901            last_string, last_start, last_end, last_line = None, None, None, None
1902            for type, string, start, end, line in tokens:
1903                if type == tokenize.OP and string == '[':
1904                    if last_string:
1905                        yield tokenize.NAME, last_string, last_start, last_end, last_line
1906                        last_string = None
1907                    yield type, string, start, end, line
1908                    # everything inside brackets will be handled by _parse_filter
1909                    for type, string, start, end, line in tokens:
1910                        yield type, string, start, end, line
1911                        if type == tokenize.OP and string == ']':
1912                            break
1913                elif type == tokenize.OP and string in ALLOWED_OPS:
1914                    if last_string:
1915                        yield tokenize.NAME, last_string, last_start, last_end, last_line
1916                        last_string = None
1917                    yield type, string, start, end, line
1918                elif type in [tokenize.NAME, tokenize.NUMBER, tokenize.OP]:
1919                    if not last_string:
1920                        last_string = string
1921                        last_start = start
1922                        last_end = end
1923                    else:
1924                        last_string += string
1925            if last_string:
1926                yield tokenize.NAME, last_string, last_start, last_end, last_line
1927
1928        def _parse_format_selection(tokens, inside_merge=False, inside_choice=False, inside_group=False):
1929            selectors = []
1930            current_selector = None
1931            for type, string, start, _, _ in tokens:
1932                # ENCODING is only defined in python 3.x
1933                if type == getattr(tokenize, 'ENCODING', None):
1934                    continue
1935                elif type in [tokenize.NAME, tokenize.NUMBER]:
1936                    current_selector = FormatSelector(SINGLE, string, [])
1937                elif type == tokenize.OP:
1938                    if string == ')':
1939                        if not inside_group:
1940                            # ')' will be handled by the parentheses group
1941                            tokens.restore_last_token()
1942                        break
1943                    elif inside_merge and string in ['/', ',']:
1944                        tokens.restore_last_token()
1945                        break
1946                    elif inside_choice and string == ',':
1947                        tokens.restore_last_token()
1948                        break
1949                    elif string == ',':
1950                        if not current_selector:
1951                            raise syntax_error('"," must follow a format selector', start)
1952                        selectors.append(current_selector)
1953                        current_selector = None
1954                    elif string == '/':
1955                        if not current_selector:
1956                            raise syntax_error('"/" must follow a format selector', start)
1957                        first_choice = current_selector
1958                        second_choice = _parse_format_selection(tokens, inside_choice=True)
1959                        current_selector = FormatSelector(PICKFIRST, (first_choice, second_choice), [])
1960                    elif string == '[':
1961                        if not current_selector:
1962                            current_selector = FormatSelector(SINGLE, 'best', [])
1963                        format_filter = _parse_filter(tokens)
1964                        current_selector.filters.append(format_filter)
1965                    elif string == '(':
1966                        if current_selector:
1967                            raise syntax_error('Unexpected "("', start)
1968                        group = _parse_format_selection(tokens, inside_group=True)
1969                        current_selector = FormatSelector(GROUP, group, [])
1970                    elif string == '+':
1971                        if not current_selector:
1972                            raise syntax_error('Unexpected "+"', start)
1973                        selector_1 = current_selector
1974                        selector_2 = _parse_format_selection(tokens, inside_merge=True)
1975                        if not selector_2:
1976                            raise syntax_error('Expected a selector', start)
1977                        current_selector = FormatSelector(MERGE, (selector_1, selector_2), [])
1978                    else:
1979                        raise syntax_error('Operator not recognized: "{0}"'.format(string), start)
1980                elif type == tokenize.ENDMARKER:
1981                    break
1982            if current_selector:
1983                selectors.append(current_selector)
1984            return selectors
1985
1986        def _merge(formats_pair):
1987            format_1, format_2 = formats_pair
1988
1989            formats_info = []
1990            formats_info.extend(format_1.get('requested_formats', (format_1,)))
1991            formats_info.extend(format_2.get('requested_formats', (format_2,)))
1992
1993            if not allow_multiple_streams['video'] or not allow_multiple_streams['audio']:
1994                get_no_more = {'video': False, 'audio': False}
1995                for (i, fmt_info) in enumerate(formats_info):
1996                    if fmt_info.get('acodec') == fmt_info.get('vcodec') == 'none':
1997                        formats_info.pop(i)
1998                        continue
1999                    for aud_vid in ['audio', 'video']:
2000                        if not allow_multiple_streams[aud_vid] and fmt_info.get(aud_vid[0] + 'codec') != 'none':
2001                            if get_no_more[aud_vid]:
2002                                formats_info.pop(i)
2003                                break
2004                            get_no_more[aud_vid] = True
2005
2006            if len(formats_info) == 1:
2007                return formats_info[0]
2008
2009            video_fmts = [fmt_info for fmt_info in formats_info if fmt_info.get('vcodec') != 'none']
2010            audio_fmts = [fmt_info for fmt_info in formats_info if fmt_info.get('acodec') != 'none']
2011
2012            the_only_video = video_fmts[0] if len(video_fmts) == 1 else None
2013            the_only_audio = audio_fmts[0] if len(audio_fmts) == 1 else None
2014
2015            output_ext = self.params.get('merge_output_format')
2016            if not output_ext:
2017                if the_only_video:
2018                    output_ext = the_only_video['ext']
2019                elif the_only_audio and not video_fmts:
2020                    output_ext = the_only_audio['ext']
2021                else:
2022                    output_ext = 'mkv'
2023
2024            filtered = lambda *keys: filter(None, (traverse_obj(fmt, *keys) for fmt in formats_info))
2025
2026            new_dict = {
2027                'requested_formats': formats_info,
2028                'format': '+'.join(filtered('format')),
2029                'format_id': '+'.join(filtered('format_id')),
2030                'ext': output_ext,
2031                'protocol': '+'.join(map(determine_protocol, formats_info)),
2032                'language': '+'.join(orderedSet(filtered('language'))) or None,
2033                'format_note': '+'.join(orderedSet(filtered('format_note'))) or None,
2034                'filesize_approx': sum(filtered('filesize', 'filesize_approx')) or None,
2035                'tbr': sum(filtered('tbr', 'vbr', 'abr')),
2036            }
2037
2038            if the_only_video:
2039                new_dict.update({
2040                    'width': the_only_video.get('width'),
2041                    'height': the_only_video.get('height'),
2042                    'resolution': the_only_video.get('resolution') or self.format_resolution(the_only_video),
2043                    'fps': the_only_video.get('fps'),
2044                    'dynamic_range': the_only_video.get('dynamic_range'),
2045                    'vcodec': the_only_video.get('vcodec'),
2046                    'vbr': the_only_video.get('vbr'),
2047                    'stretched_ratio': the_only_video.get('stretched_ratio'),
2048                })
2049
2050            if the_only_audio:
2051                new_dict.update({
2052                    'acodec': the_only_audio.get('acodec'),
2053                    'abr': the_only_audio.get('abr'),
2054                    'asr': the_only_audio.get('asr'),
2055                })
2056
2057            return new_dict
2058
2059        def _check_formats(formats):
2060            if not check_formats:
2061                yield from formats
2062                return
2063            yield from self._check_formats(formats)
2064
2065        def _build_selector_function(selector):
2066            if isinstance(selector, list):  # ,
2067                fs = [_build_selector_function(s) for s in selector]
2068
2069                def selector_function(ctx):
2070                    for f in fs:
2071                        yield from f(ctx)
2072                return selector_function
2073
2074            elif selector.type == GROUP:  # ()
2075                selector_function = _build_selector_function(selector.selector)
2076
2077            elif selector.type == PICKFIRST:  # /
2078                fs = [_build_selector_function(s) for s in selector.selector]
2079
2080                def selector_function(ctx):
2081                    for f in fs:
2082                        picked_formats = list(f(ctx))
2083                        if picked_formats:
2084                            return picked_formats
2085                    return []
2086
2087            elif selector.type == MERGE:  # +
2088                selector_1, selector_2 = map(_build_selector_function, selector.selector)
2089
2090                def selector_function(ctx):
2091                    for pair in itertools.product(selector_1(ctx), selector_2(ctx)):
2092                        yield _merge(pair)
2093
2094            elif selector.type == SINGLE:  # atom
2095                format_spec = selector.selector or 'best'
2096
2097                # TODO: Add allvideo, allaudio etc by generalizing the code with best/worst selector
2098                if format_spec == 'all':
2099                    def selector_function(ctx):
2100                        yield from _check_formats(ctx['formats'][::-1])
2101                elif format_spec == 'mergeall':
2102                    def selector_function(ctx):
2103                        formats = list(_check_formats(ctx['formats']))
2104                        if not formats:
2105                            return
2106                        merged_format = formats[-1]
2107                        for f in formats[-2::-1]:
2108                            merged_format = _merge((merged_format, f))
2109                        yield merged_format
2110
2111                else:
2112                    format_fallback, format_reverse, format_idx = False, True, 1
2113                    mobj = re.match(
2114                        r'(?P<bw>best|worst|b|w)(?P<type>video|audio|v|a)?(?P<mod>\*)?(?:\.(?P<n>[1-9]\d*))?$',
2115                        format_spec)
2116                    if mobj is not None:
2117                        format_idx = int_or_none(mobj.group('n'), default=1)
2118                        format_reverse = mobj.group('bw')[0] == 'b'
2119                        format_type = (mobj.group('type') or [None])[0]
2120                        not_format_type = {'v': 'a', 'a': 'v'}.get(format_type)
2121                        format_modified = mobj.group('mod') is not None
2122
2123                        format_fallback = not format_type and not format_modified  # for b, w
2124                        _filter_f = (
2125                            (lambda f: f.get('%scodec' % format_type) != 'none')
2126                            if format_type and format_modified  # bv*, ba*, wv*, wa*
2127                            else (lambda f: f.get('%scodec' % not_format_type) == 'none')
2128                            if format_type  # bv, ba, wv, wa
2129                            else (lambda f: f.get('vcodec') != 'none' and f.get('acodec') != 'none')
2130                            if not format_modified  # b, w
2131                            else lambda f: True)  # b*, w*
2132                        filter_f = lambda f: _filter_f(f) and (
2133                            f.get('vcodec') != 'none' or f.get('acodec') != 'none')
2134                    else:
2135                        if format_spec in self._format_selection_exts['audio']:
2136                            filter_f = lambda f: f.get('ext') == format_spec and f.get('acodec') != 'none'
2137                        elif format_spec in self._format_selection_exts['video']:
2138                            filter_f = lambda f: f.get('ext') == format_spec and f.get('acodec') != 'none' and f.get('vcodec') != 'none'
2139                        elif format_spec in self._format_selection_exts['storyboards']:
2140                            filter_f = lambda f: f.get('ext') == format_spec and f.get('acodec') == 'none' and f.get('vcodec') == 'none'
2141                        else:
2142                            filter_f = lambda f: f.get('format_id') == format_spec  # id
2143
2144                    def selector_function(ctx):
2145                        formats = list(ctx['formats'])
2146                        matches = list(filter(filter_f, formats)) if filter_f is not None else formats
2147                        if format_fallback and ctx['incomplete_formats'] and not matches:
2148                            # for extractors with incomplete formats (audio only (soundcloud)
2149                            # or video only (imgur)) best/worst will fallback to
2150                            # best/worst {video,audio}-only format
2151                            matches = formats
2152                        matches = LazyList(_check_formats(matches[::-1 if format_reverse else 1]))
2153                        try:
2154                            yield matches[format_idx - 1]
2155                        except IndexError:
2156                            return
2157
2158            filters = [self._build_format_filter(f) for f in selector.filters]
2159
2160            def final_selector(ctx):
2161                ctx_copy = dict(ctx)
2162                for _filter in filters:
2163                    ctx_copy['formats'] = list(filter(_filter, ctx_copy['formats']))
2164                return selector_function(ctx_copy)
2165            return final_selector
2166
2167        stream = io.BytesIO(format_spec.encode('utf-8'))
2168        try:
2169            tokens = list(_remove_unused_ops(compat_tokenize_tokenize(stream.readline)))
2170        except tokenize.TokenError:
2171            raise syntax_error('Missing closing/opening brackets or parenthesis', (0, len(format_spec)))
2172
2173        class TokenIterator(object):
2174            def __init__(self, tokens):
2175                self.tokens = tokens
2176                self.counter = 0
2177
2178            def __iter__(self):
2179                return self
2180
2181            def __next__(self):
2182                if self.counter >= len(self.tokens):
2183                    raise StopIteration()
2184                value = self.tokens[self.counter]
2185                self.counter += 1
2186                return value
2187
2188            next = __next__
2189
2190            def restore_last_token(self):
2191                self.counter -= 1
2192
2193        parsed_selector = _parse_format_selection(iter(TokenIterator(tokens)))
2194        return _build_selector_function(parsed_selector)
2195
2196    def _calc_headers(self, info_dict):
2197        res = std_headers.copy()
2198
2199        add_headers = info_dict.get('http_headers')
2200        if add_headers:
2201            res.update(add_headers)
2202
2203        cookies = self._calc_cookies(info_dict)
2204        if cookies:
2205            res['Cookie'] = cookies
2206
2207        if 'X-Forwarded-For' not in res:
2208            x_forwarded_for_ip = info_dict.get('__x_forwarded_for_ip')
2209            if x_forwarded_for_ip:
2210                res['X-Forwarded-For'] = x_forwarded_for_ip
2211
2212        return res
2213
2214    def _calc_cookies(self, info_dict):
2215        pr = sanitized_Request(info_dict['url'])
2216        self.cookiejar.add_cookie_header(pr)
2217        return pr.get_header('Cookie')
2218
2219    def _sort_thumbnails(self, thumbnails):
2220        thumbnails.sort(key=lambda t: (
2221            t.get('preference') if t.get('preference') is not None else -1,
2222            t.get('width') if t.get('width') is not None else -1,
2223            t.get('height') if t.get('height') is not None else -1,
2224            t.get('id') if t.get('id') is not None else '',
2225            t.get('url')))
2226
2227    def _sanitize_thumbnails(self, info_dict):
2228        thumbnails = info_dict.get('thumbnails')
2229        if thumbnails is None:
2230            thumbnail = info_dict.get('thumbnail')
2231            if thumbnail:
2232                info_dict['thumbnails'] = thumbnails = [{'url': thumbnail}]
2233        if not thumbnails:
2234            return
2235
2236        def check_thumbnails(thumbnails):
2237            for t in thumbnails:
2238                self.to_screen(f'[info] Testing thumbnail {t["id"]}')
2239                try:
2240                    self.urlopen(HEADRequest(t['url']))
2241                except network_exceptions as err:
2242                    self.to_screen(f'[info] Unable to connect to thumbnail {t["id"]} URL {t["url"]!r} - {err}. Skipping...')
2243                    continue
2244                yield t
2245
2246        self._sort_thumbnails(thumbnails)
2247        for i, t in enumerate(thumbnails):
2248            if t.get('id') is None:
2249                t['id'] = '%d' % i
2250            if t.get('width') and t.get('height'):
2251                t['resolution'] = '%dx%d' % (t['width'], t['height'])
2252            t['url'] = sanitize_url(t['url'])
2253
2254        if self.params.get('check_formats') is True:
2255            info_dict['thumbnails'] = LazyList(check_thumbnails(thumbnails[::-1]), reverse=True)
2256        else:
2257            info_dict['thumbnails'] = thumbnails
2258
2259    def process_video_result(self, info_dict, download=True):
2260        assert info_dict.get('_type', 'video') == 'video'
2261
2262        if 'id' not in info_dict:
2263            raise ExtractorError('Missing "id" field in extractor result')
2264        if 'title' not in info_dict:
2265            raise ExtractorError('Missing "title" field in extractor result',
2266                                 video_id=info_dict['id'], ie=info_dict['extractor'])
2267
2268        def report_force_conversion(field, field_not, conversion):
2269            self.report_warning(
2270                '"%s" field is not %s - forcing %s conversion, there is an error in extractor'
2271                % (field, field_not, conversion))
2272
2273        def sanitize_string_field(info, string_field):
2274            field = info.get(string_field)
2275            if field is None or isinstance(field, compat_str):
2276                return
2277            report_force_conversion(string_field, 'a string', 'string')
2278            info[string_field] = compat_str(field)
2279
2280        def sanitize_numeric_fields(info):
2281            for numeric_field in self._NUMERIC_FIELDS:
2282                field = info.get(numeric_field)
2283                if field is None or isinstance(field, compat_numeric_types):
2284                    continue
2285                report_force_conversion(numeric_field, 'numeric', 'int')
2286                info[numeric_field] = int_or_none(field)
2287
2288        sanitize_string_field(info_dict, 'id')
2289        sanitize_numeric_fields(info_dict)
2290
2291        if 'playlist' not in info_dict:
2292            # It isn't part of a playlist
2293            info_dict['playlist'] = None
2294            info_dict['playlist_index'] = None
2295
2296        self._sanitize_thumbnails(info_dict)
2297
2298        thumbnail = info_dict.get('thumbnail')
2299        thumbnails = info_dict.get('thumbnails')
2300        if thumbnail:
2301            info_dict['thumbnail'] = sanitize_url(thumbnail)
2302        elif thumbnails:
2303            info_dict['thumbnail'] = thumbnails[-1]['url']
2304
2305        if info_dict.get('display_id') is None and 'id' in info_dict:
2306            info_dict['display_id'] = info_dict['id']
2307
2308        if info_dict.get('duration') is not None:
2309            info_dict['duration_string'] = formatSeconds(info_dict['duration'])
2310
2311        for ts_key, date_key in (
2312                ('timestamp', 'upload_date'),
2313                ('release_timestamp', 'release_date'),
2314        ):
2315            if info_dict.get(date_key) is None and info_dict.get(ts_key) is not None:
2316                # Working around out-of-range timestamp values (e.g. negative ones on Windows,
2317                # see http://bugs.python.org/issue1646728)
2318                try:
2319                    upload_date = datetime.datetime.utcfromtimestamp(info_dict[ts_key])
2320                    info_dict[date_key] = upload_date.strftime('%Y%m%d')
2321                except (ValueError, OverflowError, OSError):
2322                    pass
2323
2324        live_keys = ('is_live', 'was_live')
2325        live_status = info_dict.get('live_status')
2326        if live_status is None:
2327            for key in live_keys:
2328                if info_dict.get(key) is False:
2329                    continue
2330                if info_dict.get(key):
2331                    live_status = key
2332                break
2333            if all(info_dict.get(key) is False for key in live_keys):
2334                live_status = 'not_live'
2335        if live_status:
2336            info_dict['live_status'] = live_status
2337            for key in live_keys:
2338                if info_dict.get(key) is None:
2339                    info_dict[key] = (live_status == key)
2340
2341        # Auto generate title fields corresponding to the *_number fields when missing
2342        # in order to always have clean titles. This is very common for TV series.
2343        for field in ('chapter', 'season', 'episode'):
2344            if info_dict.get('%s_number' % field) is not None and not info_dict.get(field):
2345                info_dict[field] = '%s %d' % (field.capitalize(), info_dict['%s_number' % field])
2346
2347        for cc_kind in ('subtitles', 'automatic_captions'):
2348            cc = info_dict.get(cc_kind)
2349            if cc:
2350                for _, subtitle in cc.items():
2351                    for subtitle_format in subtitle:
2352                        if subtitle_format.get('url'):
2353                            subtitle_format['url'] = sanitize_url(subtitle_format['url'])
2354                        if subtitle_format.get('ext') is None:
2355                            subtitle_format['ext'] = determine_ext(subtitle_format['url']).lower()
2356
2357        automatic_captions = info_dict.get('automatic_captions')
2358        subtitles = info_dict.get('subtitles')
2359
2360        info_dict['requested_subtitles'] = self.process_subtitles(
2361            info_dict['id'], subtitles, automatic_captions)
2362
2363        if info_dict.get('formats') is None:
2364            # There's only one format available
2365            formats = [info_dict]
2366        else:
2367            formats = info_dict['formats']
2368
2369        info_dict['__has_drm'] = any(f.get('has_drm') for f in formats)
2370        if not self.params.get('allow_unplayable_formats'):
2371            formats = [f for f in formats if not f.get('has_drm')]
2372
2373        if info_dict.get('is_live'):
2374            get_from_start = bool(self.params.get('live_from_start'))
2375            formats = [f for f in formats if bool(f.get('is_from_start')) == get_from_start]
2376
2377        if not formats:
2378            self.raise_no_formats(info_dict)
2379
2380        def is_wellformed(f):
2381            url = f.get('url')
2382            if not url:
2383                self.report_warning(
2384                    '"url" field is missing or empty - skipping format, '
2385                    'there is an error in extractor')
2386                return False
2387            if isinstance(url, bytes):
2388                sanitize_string_field(f, 'url')
2389            return True
2390
2391        # Filter out malformed formats for better extraction robustness
2392        formats = list(filter(is_wellformed, formats))
2393
2394        formats_dict = {}
2395
2396        # We check that all the formats have the format and format_id fields
2397        for i, format in enumerate(formats):
2398            sanitize_string_field(format, 'format_id')
2399            sanitize_numeric_fields(format)
2400            format['url'] = sanitize_url(format['url'])
2401            if not format.get('format_id'):
2402                format['format_id'] = compat_str(i)
2403            else:
2404                # Sanitize format_id from characters used in format selector expression
2405                format['format_id'] = re.sub(r'[\s,/+\[\]()]', '_', format['format_id'])
2406            format_id = format['format_id']
2407            if format_id not in formats_dict:
2408                formats_dict[format_id] = []
2409            formats_dict[format_id].append(format)
2410
2411        # Make sure all formats have unique format_id
2412        common_exts = set(itertools.chain(*self._format_selection_exts.values()))
2413        for format_id, ambiguous_formats in formats_dict.items():
2414            ambigious_id = len(ambiguous_formats) > 1
2415            for i, format in enumerate(ambiguous_formats):
2416                if ambigious_id:
2417                    format['format_id'] = '%s-%d' % (format_id, i)
2418                if format.get('ext') is None:
2419                    format['ext'] = determine_ext(format['url']).lower()
2420                # Ensure there is no conflict between id and ext in format selection
2421                # See https://github.com/yt-dlp/yt-dlp/issues/1282
2422                if format['format_id'] != format['ext'] and format['format_id'] in common_exts:
2423                    format['format_id'] = 'f%s' % format['format_id']
2424
2425        for i, format in enumerate(formats):
2426            if format.get('format') is None:
2427                format['format'] = '{id} - {res}{note}'.format(
2428                    id=format['format_id'],
2429                    res=self.format_resolution(format),
2430                    note=format_field(format, 'format_note', ' (%s)'),
2431                )
2432            if format.get('protocol') is None:
2433                format['protocol'] = determine_protocol(format)
2434            if format.get('resolution') is None:
2435                format['resolution'] = self.format_resolution(format, default=None)
2436            if format.get('dynamic_range') is None and format.get('vcodec') != 'none':
2437                format['dynamic_range'] = 'SDR'
2438            if (info_dict.get('duration') and format.get('tbr')
2439                    and not format.get('filesize') and not format.get('filesize_approx')):
2440                format['filesize_approx'] = info_dict['duration'] * format['tbr'] * (1024 / 8)
2441
2442            # Add HTTP headers, so that external programs can use them from the
2443            # json output
2444            full_format_info = info_dict.copy()
2445            full_format_info.update(format)
2446            format['http_headers'] = self._calc_headers(full_format_info)
2447        # Remove private housekeeping stuff
2448        if '__x_forwarded_for_ip' in info_dict:
2449            del info_dict['__x_forwarded_for_ip']
2450
2451        # TODO Central sorting goes here
2452
2453        if self.params.get('check_formats') is True:
2454            formats = LazyList(self._check_formats(formats[::-1]), reverse=True)
2455
2456        if not formats or formats[0] is not info_dict:
2457            # only set the 'formats' fields if the original info_dict list them
2458            # otherwise we end up with a circular reference, the first (and unique)
2459            # element in the 'formats' field in info_dict is info_dict itself,
2460            # which can't be exported to json
2461            info_dict['formats'] = formats
2462
2463        info_dict, _ = self.pre_process(info_dict)
2464
2465        # The pre-processors may have modified the formats
2466        formats = info_dict.get('formats', [info_dict])
2467
2468        list_only = self.params.get('simulate') is None and (
2469            self.params.get('list_thumbnails') or self.params.get('listformats') or self.params.get('listsubtitles'))
2470        interactive_format_selection = not list_only and self.format_selector == '-'
2471        if self.params.get('list_thumbnails'):
2472            self.list_thumbnails(info_dict)
2473        if self.params.get('listsubtitles'):
2474            if 'automatic_captions' in info_dict:
2475                self.list_subtitles(
2476                    info_dict['id'], automatic_captions, 'automatic captions')
2477            self.list_subtitles(info_dict['id'], subtitles, 'subtitles')
2478        if self.params.get('listformats') or interactive_format_selection:
2479            self.list_formats(info_dict)
2480        if list_only:
2481            # Without this printing, -F --print-json will not work
2482            self.__forced_printings(info_dict, self.prepare_filename(info_dict), incomplete=True)
2483            return
2484
2485        format_selector = self.format_selector
2486        if format_selector is None:
2487            req_format = self._default_format_spec(info_dict, download=download)
2488            self.write_debug('Default format spec: %s' % req_format)
2489            format_selector = self.build_format_selector(req_format)
2490
2491        while True:
2492            if interactive_format_selection:
2493                req_format = input(
2494                    self._format_screen('\nEnter format selector: ', self.Styles.EMPHASIS))
2495                try:
2496                    format_selector = self.build_format_selector(req_format)
2497                except SyntaxError as err:
2498                    self.report_error(err, tb=False, is_error=False)
2499                    continue
2500
2501            # While in format selection we may need to have an access to the original
2502            # format set in order to calculate some metrics or do some processing.
2503            # For now we need to be able to guess whether original formats provided
2504            # by extractor are incomplete or not (i.e. whether extractor provides only
2505            # video-only or audio-only formats) for proper formats selection for
2506            # extractors with such incomplete formats (see
2507            # https://github.com/ytdl-org/youtube-dl/pull/5556).
2508            # Since formats may be filtered during format selection and may not match
2509            # the original formats the results may be incorrect. Thus original formats
2510            # or pre-calculated metrics should be passed to format selection routines
2511            # as well.
2512            # We will pass a context object containing all necessary additional data
2513            # instead of just formats.
2514            # This fixes incorrect format selection issue (see
2515            # https://github.com/ytdl-org/youtube-dl/issues/10083).
2516            incomplete_formats = (
2517                # All formats are video-only or
2518                all(f.get('vcodec') != 'none' and f.get('acodec') == 'none' for f in formats)
2519                # all formats are audio-only
2520                or all(f.get('vcodec') == 'none' and f.get('acodec') != 'none' for f in formats))
2521
2522            ctx = {
2523                'formats': formats,
2524                'incomplete_formats': incomplete_formats,
2525            }
2526
2527            formats_to_download = list(format_selector(ctx))
2528            if interactive_format_selection and not formats_to_download:
2529                self.report_error('Requested format is not available', tb=False, is_error=False)
2530                continue
2531            break
2532
2533        if not formats_to_download:
2534            if not self.params.get('ignore_no_formats_error'):
2535                raise ExtractorError('Requested format is not available', expected=True,
2536                                     video_id=info_dict['id'], ie=info_dict['extractor'])
2537            else:
2538                self.report_warning('Requested format is not available')
2539                # Process what we can, even without any available formats.
2540                self.process_info(dict(info_dict))
2541        elif download:
2542            self.to_screen(
2543                '[info] %s: Downloading %d format(s): %s' % (
2544                    info_dict['id'], len(formats_to_download),
2545                    ", ".join([f['format_id'] for f in formats_to_download])))
2546            for fmt in formats_to_download:
2547                new_info = dict(info_dict)
2548                # Save a reference to the original info_dict so that it can be modified in process_info if needed
2549                new_info['__original_infodict'] = info_dict
2550                new_info.update(fmt)
2551                self.process_info(new_info)
2552        # We update the info dict with the selected best quality format (backwards compatibility)
2553        if formats_to_download:
2554            info_dict.update(formats_to_download[-1])
2555        return info_dict
2556
2557    def process_subtitles(self, video_id, normal_subtitles, automatic_captions):
2558        """Select the requested subtitles and their format"""
2559        available_subs = {}
2560        if normal_subtitles and self.params.get('writesubtitles'):
2561            available_subs.update(normal_subtitles)
2562        if automatic_captions and self.params.get('writeautomaticsub'):
2563            for lang, cap_info in automatic_captions.items():
2564                if lang not in available_subs:
2565                    available_subs[lang] = cap_info
2566
2567        if (not self.params.get('writesubtitles') and not
2568                self.params.get('writeautomaticsub') or not
2569                available_subs):
2570            return None
2571
2572        all_sub_langs = available_subs.keys()
2573        if self.params.get('allsubtitles', False):
2574            requested_langs = all_sub_langs
2575        elif self.params.get('subtitleslangs', False):
2576            # A list is used so that the order of languages will be the same as
2577            # given in subtitleslangs. See https://github.com/yt-dlp/yt-dlp/issues/1041
2578            requested_langs = []
2579            for lang_re in self.params.get('subtitleslangs'):
2580                if lang_re == 'all':
2581                    requested_langs.extend(all_sub_langs)
2582                    continue
2583                discard = lang_re[0] == '-'
2584                if discard:
2585                    lang_re = lang_re[1:]
2586                current_langs = filter(re.compile(lang_re + '$').match, all_sub_langs)
2587                if discard:
2588                    for lang in current_langs:
2589                        while lang in requested_langs:
2590                            requested_langs.remove(lang)
2591                else:
2592                    requested_langs.extend(current_langs)
2593            requested_langs = orderedSet(requested_langs)
2594        elif 'en' in available_subs:
2595            requested_langs = ['en']
2596        else:
2597            requested_langs = [list(all_sub_langs)[0]]
2598        if requested_langs:
2599            self.write_debug('Downloading subtitles: %s' % ', '.join(requested_langs))
2600
2601        formats_query = self.params.get('subtitlesformat', 'best')
2602        formats_preference = formats_query.split('/') if formats_query else []
2603        subs = {}
2604        for lang in requested_langs:
2605            formats = available_subs.get(lang)
2606            if formats is None:
2607                self.report_warning('%s subtitles not available for %s' % (lang, video_id))
2608                continue
2609            for ext in formats_preference:
2610                if ext == 'best':
2611                    f = formats[-1]
2612                    break
2613                matches = list(filter(lambda f: f['ext'] == ext, formats))
2614                if matches:
2615                    f = matches[-1]
2616                    break
2617            else:
2618                f = formats[-1]
2619                self.report_warning(
2620                    'No subtitle format found matching "%s" for language %s, '
2621                    'using %s' % (formats_query, lang, f['ext']))
2622            subs[lang] = f
2623        return subs
2624
2625    def __forced_printings(self, info_dict, filename, incomplete):
2626        def print_mandatory(field, actual_field=None):
2627            if actual_field is None:
2628                actual_field = field
2629            if (self.params.get('force%s' % field, False)
2630                    and (not incomplete or info_dict.get(actual_field) is not None)):
2631                self.to_stdout(info_dict[actual_field])
2632
2633        def print_optional(field):
2634            if (self.params.get('force%s' % field, False)
2635                    and info_dict.get(field) is not None):
2636                self.to_stdout(info_dict[field])
2637
2638        info_dict = info_dict.copy()
2639        if filename is not None:
2640            info_dict['filename'] = filename
2641        if info_dict.get('requested_formats') is not None:
2642            # For RTMP URLs, also include the playpath
2643            info_dict['urls'] = '\n'.join(f['url'] + f.get('play_path', '') for f in info_dict['requested_formats'])
2644        elif 'url' in info_dict:
2645            info_dict['urls'] = info_dict['url'] + info_dict.get('play_path', '')
2646
2647        if self.params.get('forceprint') or self.params.get('forcejson'):
2648            self.post_extract(info_dict)
2649        for tmpl in self.params.get('forceprint', []):
2650            mobj = re.match(r'\w+(=?)$', tmpl)
2651            if mobj and mobj.group(1):
2652                tmpl = f'{tmpl[:-1]} = %({tmpl[:-1]})s'
2653            elif mobj:
2654                tmpl = '%({})s'.format(tmpl)
2655            self.to_stdout(self.evaluate_outtmpl(tmpl, info_dict))
2656
2657        print_mandatory('title')
2658        print_mandatory('id')
2659        print_mandatory('url', 'urls')
2660        print_optional('thumbnail')
2661        print_optional('description')
2662        print_optional('filename')
2663        if self.params.get('forceduration') and info_dict.get('duration') is not None:
2664            self.to_stdout(formatSeconds(info_dict['duration']))
2665        print_mandatory('format')
2666
2667        if self.params.get('forcejson'):
2668            self.to_stdout(json.dumps(self.sanitize_info(info_dict)))
2669
2670    def dl(self, name, info, subtitle=False, test=False):
2671        if not info.get('url'):
2672            self.raise_no_formats(info, True)
2673
2674        if test:
2675            verbose = self.params.get('verbose')
2676            params = {
2677                'test': True,
2678                'quiet': self.params.get('quiet') or not verbose,
2679                'verbose': verbose,
2680                'noprogress': not verbose,
2681                'nopart': True,
2682                'skip_unavailable_fragments': False,
2683                'keep_fragments': False,
2684                'overwrites': True,
2685                '_no_ytdl_file': True,
2686            }
2687        else:
2688            params = self.params
2689        fd = get_suitable_downloader(info, params, to_stdout=(name == '-'))(self, params)
2690        if not test:
2691            for ph in self._progress_hooks:
2692                fd.add_progress_hook(ph)
2693            urls = '", "'.join([f['url'] for f in info.get('requested_formats', [])] or [info['url']])
2694            self.write_debug('Invoking downloader on "%s"' % urls)
2695
2696        # Note: Ideally info should be a deep-copied so that hooks cannot modify it.
2697        # But it may contain objects that are not deep-copyable
2698        new_info = self._copy_infodict(info)
2699        if new_info.get('http_headers') is None:
2700            new_info['http_headers'] = self._calc_headers(new_info)
2701        return fd.download(name, new_info, subtitle)
2702
2703    def process_info(self, info_dict):
2704        """Process a single resolved IE result."""
2705
2706        assert info_dict.get('_type', 'video') == 'video'
2707
2708        max_downloads = self.params.get('max_downloads')
2709        if max_downloads is not None:
2710            if self._num_downloads >= int(max_downloads):
2711                raise MaxDownloadsReached()
2712
2713        if info_dict.get('is_live') and not self.params.get('live_from_start'):
2714            info_dict['title'] += ' ' + datetime.datetime.now().strftime('%Y-%m-%d %H:%M')
2715
2716        # TODO: backward compatibility, to be removed
2717        info_dict['fulltitle'] = info_dict['title']
2718
2719        if 'format' not in info_dict and 'ext' in info_dict:
2720            info_dict['format'] = info_dict['ext']
2721
2722        if self._match_entry(info_dict) is not None:
2723            return
2724
2725        self.post_extract(info_dict)
2726        self._num_downloads += 1
2727
2728        # info_dict['_filename'] needs to be set for backward compatibility
2729        info_dict['_filename'] = full_filename = self.prepare_filename(info_dict, warn=True)
2730        temp_filename = self.prepare_filename(info_dict, 'temp')
2731        files_to_move = {}
2732
2733        # Forced printings
2734        self.__forced_printings(info_dict, full_filename, incomplete=('format' not in info_dict))
2735
2736        if self.params.get('simulate'):
2737            if self.params.get('force_write_download_archive', False):
2738                self.record_download_archive(info_dict)
2739            # Do nothing else if in simulate mode
2740            return
2741
2742        if full_filename is None:
2743            return
2744        if not self._ensure_dir_exists(encodeFilename(full_filename)):
2745            return
2746        if not self._ensure_dir_exists(encodeFilename(temp_filename)):
2747            return
2748
2749        if self._write_description('video', info_dict,
2750                                   self.prepare_filename(info_dict, 'description')) is None:
2751            return
2752
2753        sub_files = self._write_subtitles(info_dict, temp_filename)
2754        if sub_files is None:
2755            return
2756        files_to_move.update(dict(sub_files))
2757
2758        thumb_files = self._write_thumbnails(
2759            'video', info_dict, temp_filename, self.prepare_filename(info_dict, 'thumbnail'))
2760        if thumb_files is None:
2761            return
2762        files_to_move.update(dict(thumb_files))
2763
2764        infofn = self.prepare_filename(info_dict, 'infojson')
2765        _infojson_written = self._write_info_json('video', info_dict, infofn)
2766        if _infojson_written:
2767            info_dict['infojson_filename'] = infofn
2768            # For backward compatibility, even though it was a private field
2769            info_dict['__infojson_filename'] = infofn
2770        elif _infojson_written is None:
2771            return
2772
2773        # Note: Annotations are deprecated
2774        annofn = None
2775        if self.params.get('writeannotations', False):
2776            annofn = self.prepare_filename(info_dict, 'annotation')
2777        if annofn:
2778            if not self._ensure_dir_exists(encodeFilename(annofn)):
2779                return
2780            if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(annofn)):
2781                self.to_screen('[info] Video annotations are already present')
2782            elif not info_dict.get('annotations'):
2783                self.report_warning('There are no annotations to write.')
2784            else:
2785                try:
2786                    self.to_screen('[info] Writing video annotations to: ' + annofn)
2787                    with io.open(encodeFilename(annofn), 'w', encoding='utf-8') as annofile:
2788                        annofile.write(info_dict['annotations'])
2789                except (KeyError, TypeError):
2790                    self.report_warning('There are no annotations to write.')
2791                except (OSError, IOError):
2792                    self.report_error('Cannot write annotations file: ' + annofn)
2793                    return
2794
2795        # Write internet shortcut files
2796        def _write_link_file(link_type):
2797            if 'webpage_url' not in info_dict:
2798                self.report_error('Cannot write internet shortcut file because the "webpage_url" field is missing in the media information')
2799                return False
2800            linkfn = replace_extension(self.prepare_filename(info_dict, 'link'), link_type, info_dict.get('ext'))
2801            if not self._ensure_dir_exists(encodeFilename(linkfn)):
2802                return False
2803            if self.params.get('overwrites', True) and os.path.exists(encodeFilename(linkfn)):
2804                self.to_screen(f'[info] Internet shortcut (.{link_type}) is already present')
2805                return True
2806            try:
2807                self.to_screen(f'[info] Writing internet shortcut (.{link_type}) to: {linkfn}')
2808                with io.open(encodeFilename(to_high_limit_path(linkfn)), 'w', encoding='utf-8',
2809                             newline='\r\n' if link_type == 'url' else '\n') as linkfile:
2810                    template_vars = {'url': iri_to_uri(info_dict['webpage_url'])}
2811                    if link_type == 'desktop':
2812                        template_vars['filename'] = linkfn[:-(len(link_type) + 1)]
2813                    linkfile.write(LINK_TEMPLATES[link_type] % template_vars)
2814            except (OSError, IOError):
2815                self.report_error(f'Cannot write internet shortcut {linkfn}')
2816                return False
2817            return True
2818
2819        write_links = {
2820            'url': self.params.get('writeurllink'),
2821            'webloc': self.params.get('writewebloclink'),
2822            'desktop': self.params.get('writedesktoplink'),
2823        }
2824        if self.params.get('writelink'):
2825            link_type = ('webloc' if sys.platform == 'darwin'
2826                         else 'desktop' if sys.platform.startswith('linux')
2827                         else 'url')
2828            write_links[link_type] = True
2829
2830        if any(should_write and not _write_link_file(link_type)
2831               for link_type, should_write in write_links.items()):
2832            return
2833
2834        try:
2835            info_dict, files_to_move = self.pre_process(info_dict, 'before_dl', files_to_move)
2836        except PostProcessingError as err:
2837            self.report_error('Preprocessing: %s' % str(err))
2838            return
2839
2840        must_record_download_archive = False
2841        if self.params.get('skip_download', False):
2842            info_dict['filepath'] = temp_filename
2843            info_dict['__finaldir'] = os.path.dirname(os.path.abspath(encodeFilename(full_filename)))
2844            info_dict['__files_to_move'] = files_to_move
2845            info_dict = self.run_pp(MoveFilesAfterDownloadPP(self, False), info_dict)
2846        else:
2847            # Download
2848            info_dict.setdefault('__postprocessors', [])
2849            try:
2850
2851                def existing_file(*filepaths):
2852                    ext = info_dict.get('ext')
2853                    final_ext = self.params.get('final_ext', ext)
2854                    existing_files = []
2855                    for file in orderedSet(filepaths):
2856                        if final_ext != ext:
2857                            converted = replace_extension(file, final_ext, ext)
2858                            if os.path.exists(encodeFilename(converted)):
2859                                existing_files.append(converted)
2860                        if os.path.exists(encodeFilename(file)):
2861                            existing_files.append(file)
2862
2863                    if not existing_files or self.params.get('overwrites', False):
2864                        for file in orderedSet(existing_files):
2865                            self.report_file_delete(file)
2866                            os.remove(encodeFilename(file))
2867                        return None
2868
2869                    info_dict['ext'] = os.path.splitext(existing_files[0])[1][1:]
2870                    return existing_files[0]
2871
2872                success = True
2873                if info_dict.get('requested_formats') is not None:
2874
2875                    def compatible_formats(formats):
2876                        # TODO: some formats actually allow this (mkv, webm, ogg, mp4), but not all of them.
2877                        video_formats = [format for format in formats if format.get('vcodec') != 'none']
2878                        audio_formats = [format for format in formats if format.get('acodec') != 'none']
2879                        if len(video_formats) > 2 or len(audio_formats) > 2:
2880                            return False
2881
2882                        # Check extension
2883                        exts = set(format.get('ext') for format in formats)
2884                        COMPATIBLE_EXTS = (
2885                            set(('mp3', 'mp4', 'm4a', 'm4p', 'm4b', 'm4r', 'm4v', 'ismv', 'isma')),
2886                            set(('webm',)),
2887                        )
2888                        for ext_sets in COMPATIBLE_EXTS:
2889                            if ext_sets.issuperset(exts):
2890                                return True
2891                        # TODO: Check acodec/vcodec
2892                        return False
2893
2894                    requested_formats = info_dict['requested_formats']
2895                    old_ext = info_dict['ext']
2896                    if self.params.get('merge_output_format') is None:
2897                        if not compatible_formats(requested_formats):
2898                            info_dict['ext'] = 'mkv'
2899                            self.report_warning(
2900                                'Requested formats are incompatible for merge and will be merged into mkv')
2901                        if (info_dict['ext'] == 'webm'
2902                                and info_dict.get('thumbnails')
2903                                # check with type instead of pp_key, __name__, or isinstance
2904                                # since we dont want any custom PPs to trigger this
2905                                and any(type(pp) == EmbedThumbnailPP for pp in self._pps['post_process'])):
2906                            info_dict['ext'] = 'mkv'
2907                            self.report_warning(
2908                                'webm doesn\'t support embedding a thumbnail, mkv will be used')
2909                    new_ext = info_dict['ext']
2910
2911                    def correct_ext(filename, ext=new_ext):
2912                        if filename == '-':
2913                            return filename
2914                        filename_real_ext = os.path.splitext(filename)[1][1:]
2915                        filename_wo_ext = (
2916                            os.path.splitext(filename)[0]
2917                            if filename_real_ext in (old_ext, new_ext)
2918                            else filename)
2919                        return '%s.%s' % (filename_wo_ext, ext)
2920
2921                    # Ensure filename always has a correct extension for successful merge
2922                    full_filename = correct_ext(full_filename)
2923                    temp_filename = correct_ext(temp_filename)
2924                    dl_filename = existing_file(full_filename, temp_filename)
2925                    info_dict['__real_download'] = False
2926
2927                    downloaded = []
2928                    merger = FFmpegMergerPP(self)
2929
2930                    fd = get_suitable_downloader(info_dict, self.params, to_stdout=temp_filename == '-')
2931                    if dl_filename is not None:
2932                        self.report_file_already_downloaded(dl_filename)
2933                    elif fd:
2934                        for f in requested_formats if fd != FFmpegFD else []:
2935                            f['filepath'] = fname = prepend_extension(
2936                                correct_ext(temp_filename, info_dict['ext']),
2937                                'f%s' % f['format_id'], info_dict['ext'])
2938                            downloaded.append(fname)
2939                        info_dict['url'] = '\n'.join(f['url'] for f in requested_formats)
2940                        success, real_download = self.dl(temp_filename, info_dict)
2941                        info_dict['__real_download'] = real_download
2942                    else:
2943                        if self.params.get('allow_unplayable_formats'):
2944                            self.report_warning(
2945                                'You have requested merging of multiple formats '
2946                                'while also allowing unplayable formats to be downloaded. '
2947                                'The formats won\'t be merged to prevent data corruption.')
2948                        elif not merger.available:
2949                            self.report_warning(
2950                                'You have requested merging of multiple formats but ffmpeg is not installed. '
2951                                'The formats won\'t be merged.')
2952
2953                        if temp_filename == '-':
2954                            reason = ('using a downloader other than ffmpeg' if FFmpegFD.can_merge_formats(info_dict, self.params)
2955                                      else 'but the formats are incompatible for simultaneous download' if merger.available
2956                                      else 'but ffmpeg is not installed')
2957                            self.report_warning(
2958                                f'You have requested downloading multiple formats to stdout {reason}. '
2959                                'The formats will be streamed one after the other')
2960                            fname = temp_filename
2961                        for f in requested_formats:
2962                            new_info = dict(info_dict)
2963                            del new_info['requested_formats']
2964                            new_info.update(f)
2965                            if temp_filename != '-':
2966                                fname = prepend_extension(
2967                                    correct_ext(temp_filename, new_info['ext']),
2968                                    'f%s' % f['format_id'], new_info['ext'])
2969                                if not self._ensure_dir_exists(fname):
2970                                    return
2971                                f['filepath'] = fname
2972                                downloaded.append(fname)
2973                            partial_success, real_download = self.dl(fname, new_info)
2974                            info_dict['__real_download'] = info_dict['__real_download'] or real_download
2975                            success = success and partial_success
2976
2977                    if downloaded and merger.available and not self.params.get('allow_unplayable_formats'):
2978                        info_dict['__postprocessors'].append(merger)
2979                        info_dict['__files_to_merge'] = downloaded
2980                        # Even if there were no downloads, it is being merged only now
2981                        info_dict['__real_download'] = True
2982                    else:
2983                        for file in downloaded:
2984                            files_to_move[file] = None
2985                else:
2986                    # Just a single file
2987                    dl_filename = existing_file(full_filename, temp_filename)
2988                    if dl_filename is None or dl_filename == temp_filename:
2989                        # dl_filename == temp_filename could mean that the file was partially downloaded with --no-part.
2990                        # So we should try to resume the download
2991                        success, real_download = self.dl(temp_filename, info_dict)
2992                        info_dict['__real_download'] = real_download
2993                    else:
2994                        self.report_file_already_downloaded(dl_filename)
2995
2996                dl_filename = dl_filename or temp_filename
2997                info_dict['__finaldir'] = os.path.dirname(os.path.abspath(encodeFilename(full_filename)))
2998
2999            except network_exceptions as err:
3000                self.report_error('unable to download video data: %s' % error_to_compat_str(err))
3001                return
3002            except (OSError, IOError) as err:
3003                raise UnavailableVideoError(err)
3004            except (ContentTooShortError, ) as err:
3005                self.report_error('content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded))
3006                return
3007
3008            if success and full_filename != '-':
3009
3010                def fixup():
3011                    do_fixup = True
3012                    fixup_policy = self.params.get('fixup')
3013                    vid = info_dict['id']
3014
3015                    if fixup_policy in ('ignore', 'never'):
3016                        return
3017                    elif fixup_policy == 'warn':
3018                        do_fixup = False
3019                    elif fixup_policy != 'force':
3020                        assert fixup_policy in ('detect_or_warn', None)
3021                        if not info_dict.get('__real_download'):
3022                            do_fixup = False
3023
3024                    def ffmpeg_fixup(cndn, msg, cls):
3025                        if not cndn:
3026                            return
3027                        if not do_fixup:
3028                            self.report_warning(f'{vid}: {msg}')
3029                            return
3030                        pp = cls(self)
3031                        if pp.available:
3032                            info_dict['__postprocessors'].append(pp)
3033                        else:
3034                            self.report_warning(f'{vid}: {msg}. Install ffmpeg to fix this automatically')
3035
3036                    stretched_ratio = info_dict.get('stretched_ratio')
3037                    ffmpeg_fixup(
3038                        stretched_ratio not in (1, None),
3039                        f'Non-uniform pixel ratio {stretched_ratio}',
3040                        FFmpegFixupStretchedPP)
3041
3042                    ffmpeg_fixup(
3043                        (info_dict.get('requested_formats') is None
3044                         and info_dict.get('container') == 'm4a_dash'
3045                         and info_dict.get('ext') == 'm4a'),
3046                        'writing DASH m4a. Only some players support this container',
3047                        FFmpegFixupM4aPP)
3048
3049                    downloader = get_suitable_downloader(info_dict, self.params) if 'protocol' in info_dict else None
3050                    downloader = downloader.__name__ if downloader else None
3051
3052                    if info_dict.get('requested_formats') is None:  # Not necessary if doing merger
3053                        ffmpeg_fixup(downloader == 'HlsFD',
3054                                     'Possible MPEG-TS in MP4 container or malformed AAC timestamps',
3055                                     FFmpegFixupM3u8PP)
3056                        ffmpeg_fixup(info_dict.get('is_live') and downloader == 'DashSegmentsFD',
3057                                     'Possible duplicate MOOV atoms', FFmpegFixupDuplicateMoovPP)
3058
3059                    ffmpeg_fixup(downloader == 'WebSocketFragmentFD', 'Malformed timestamps detected', FFmpegFixupTimestampPP)
3060                    ffmpeg_fixup(downloader == 'WebSocketFragmentFD', 'Malformed duration detected', FFmpegFixupDurationPP)
3061
3062                fixup()
3063                try:
3064                    info_dict = self.post_process(dl_filename, info_dict, files_to_move)
3065                except PostProcessingError as err:
3066                    self.report_error('Postprocessing: %s' % str(err))
3067                    return
3068                try:
3069                    for ph in self._post_hooks:
3070                        ph(info_dict['filepath'])
3071                except Exception as err:
3072                    self.report_error('post hooks: %s' % str(err))
3073                    return
3074                must_record_download_archive = True
3075
3076        if must_record_download_archive or self.params.get('force_write_download_archive', False):
3077            self.record_download_archive(info_dict)
3078        max_downloads = self.params.get('max_downloads')
3079        if max_downloads is not None and self._num_downloads >= int(max_downloads):
3080            raise MaxDownloadsReached()
3081
3082    def __download_wrapper(self, func):
3083        @functools.wraps(func)
3084        def wrapper(*args, **kwargs):
3085            try:
3086                res = func(*args, **kwargs)
3087            except UnavailableVideoError as e:
3088                self.report_error(e)
3089            except MaxDownloadsReached as e:
3090                self.to_screen(f'[info] {e}')
3091                raise
3092            except DownloadCancelled as e:
3093                self.to_screen(f'[info] {e}')
3094                if not self.params.get('break_per_url'):
3095                    raise
3096            else:
3097                if self.params.get('dump_single_json', False):
3098                    self.post_extract(res)
3099                    self.to_stdout(json.dumps(self.sanitize_info(res)))
3100        return wrapper
3101
3102    def download(self, url_list):
3103        """Download a given list of URLs."""
3104        url_list = variadic(url_list)  # Passing a single URL is a common mistake
3105        outtmpl = self.outtmpl_dict['default']
3106        if (len(url_list) > 1
3107                and outtmpl != '-'
3108                and '%' not in outtmpl
3109                and self.params.get('max_downloads') != 1):
3110            raise SameFileError(outtmpl)
3111
3112        for url in url_list:
3113            self.__download_wrapper(self.extract_info)(
3114                url, force_generic_extractor=self.params.get('force_generic_extractor', False))
3115
3116        return self._download_retcode
3117
3118    def download_with_info_file(self, info_filename):
3119        with contextlib.closing(fileinput.FileInput(
3120                [info_filename], mode='r',
3121                openhook=fileinput.hook_encoded('utf-8'))) as f:
3122            # FileInput doesn't have a read method, we can't call json.load
3123            info = self.sanitize_info(json.loads('\n'.join(f)), self.params.get('clean_infojson', True))
3124        try:
3125            self.__download_wrapper(self.process_ie_result)(info, download=True)
3126        except (DownloadError, EntryNotInPlaylist, ReExtractInfo) as e:
3127            if not isinstance(e, EntryNotInPlaylist):
3128                self.to_stderr('\r')
3129            webpage_url = info.get('webpage_url')
3130            if webpage_url is not None:
3131                self.report_warning(f'The info failed to download: {e}; trying with URL {webpage_url}')
3132                return self.download([webpage_url])
3133            else:
3134                raise
3135        return self._download_retcode
3136
3137    @staticmethod
3138    def sanitize_info(info_dict, remove_private_keys=False):
3139        ''' Sanitize the infodict for converting to json '''
3140        if info_dict is None:
3141            return info_dict
3142        info_dict.setdefault('epoch', int(time.time()))
3143        remove_keys = {'__original_infodict'}  # Always remove this since this may contain a copy of the entire dict
3144        keep_keys = ['_type']  # Always keep this to facilitate load-info-json
3145        if remove_private_keys:
3146            remove_keys |= {
3147                'requested_formats', 'requested_subtitles', 'requested_entries', 'entries',
3148                'filepath', 'infojson_filename', 'original_url', 'playlist_autonumber',
3149            }
3150            empty_values = (None, {}, [], set(), tuple())
3151            reject = lambda k, v: k not in keep_keys and (
3152                k.startswith('_') or k in remove_keys or v in empty_values)
3153        else:
3154            reject = lambda k, v: k in remove_keys
3155
3156        def filter_fn(obj):
3157            if isinstance(obj, dict):
3158                return {k: filter_fn(v) for k, v in obj.items() if not reject(k, v)}
3159            elif isinstance(obj, (list, tuple, set, LazyList)):
3160                return list(map(filter_fn, obj))
3161            elif obj is None or isinstance(obj, (str, int, float, bool)):
3162                return obj
3163            else:
3164                return repr(obj)
3165
3166        return filter_fn(info_dict)
3167
3168    @staticmethod
3169    def filter_requested_info(info_dict, actually_filter=True):
3170        ''' Alias of sanitize_info for backward compatibility '''
3171        return YoutubeDL.sanitize_info(info_dict, actually_filter)
3172
3173    def run_pp(self, pp, infodict):
3174        files_to_delete = []
3175        if '__files_to_move' not in infodict:
3176            infodict['__files_to_move'] = {}
3177        try:
3178            files_to_delete, infodict = pp.run(infodict)
3179        except PostProcessingError as e:
3180            # Must be True and not 'only_download'
3181            if self.params.get('ignoreerrors') is True:
3182                self.report_error(e)
3183                return infodict
3184            raise
3185
3186        if not files_to_delete:
3187            return infodict
3188        if self.params.get('keepvideo', False):
3189            for f in files_to_delete:
3190                infodict['__files_to_move'].setdefault(f, '')
3191        else:
3192            for old_filename in set(files_to_delete):
3193                self.to_screen('Deleting original file %s (pass -k to keep)' % old_filename)
3194                try:
3195                    os.remove(encodeFilename(old_filename))
3196                except (IOError, OSError):
3197                    self.report_warning('Unable to remove downloaded original file')
3198                if old_filename in infodict['__files_to_move']:
3199                    del infodict['__files_to_move'][old_filename]
3200        return infodict
3201
3202    @staticmethod
3203    def post_extract(info_dict):
3204        def actual_post_extract(info_dict):
3205            if info_dict.get('_type') in ('playlist', 'multi_video'):
3206                for video_dict in info_dict.get('entries', {}):
3207                    actual_post_extract(video_dict or {})
3208                return
3209
3210            post_extractor = info_dict.get('__post_extractor') or (lambda: {})
3211            extra = post_extractor().items()
3212            info_dict.update(extra)
3213            info_dict.pop('__post_extractor', None)
3214
3215            original_infodict = info_dict.get('__original_infodict') or {}
3216            original_infodict.update(extra)
3217            original_infodict.pop('__post_extractor', None)
3218
3219        actual_post_extract(info_dict or {})
3220
3221    def pre_process(self, ie_info, key='pre_process', files_to_move=None):
3222        info = dict(ie_info)
3223        info['__files_to_move'] = files_to_move or {}
3224        for pp in self._pps[key]:
3225            info = self.run_pp(pp, info)
3226        return info, info.pop('__files_to_move', None)
3227
3228    def post_process(self, filename, ie_info, files_to_move=None):
3229        """Run all the postprocessors on the given file."""
3230        info = dict(ie_info)
3231        info['filepath'] = filename
3232        info['__files_to_move'] = files_to_move or {}
3233
3234        for pp in ie_info.get('__postprocessors', []) + self._pps['post_process']:
3235            info = self.run_pp(pp, info)
3236        info = self.run_pp(MoveFilesAfterDownloadPP(self), info)
3237        del info['__files_to_move']
3238        for pp in self._pps['after_move']:
3239            info = self.run_pp(pp, info)
3240        return info
3241
3242    def _make_archive_id(self, info_dict):
3243        video_id = info_dict.get('id')
3244        if not video_id:
3245            return
3246        # Future-proof against any change in case
3247        # and backwards compatibility with prior versions
3248        extractor = info_dict.get('extractor_key') or info_dict.get('ie_key')  # key in a playlist
3249        if extractor is None:
3250            url = str_or_none(info_dict.get('url'))
3251            if not url:
3252                return
3253            # Try to find matching extractor for the URL and take its ie_key
3254            for ie_key, ie in self._ies.items():
3255                if ie.suitable(url):
3256                    extractor = ie_key
3257                    break
3258            else:
3259                return
3260        return '%s %s' % (extractor.lower(), video_id)
3261
3262    def in_download_archive(self, info_dict):
3263        fn = self.params.get('download_archive')
3264        if fn is None:
3265            return False
3266
3267        vid_id = self._make_archive_id(info_dict)
3268        if not vid_id:
3269            return False  # Incomplete video information
3270
3271        return vid_id in self.archive
3272
3273    def record_download_archive(self, info_dict):
3274        fn = self.params.get('download_archive')
3275        if fn is None:
3276            return
3277        vid_id = self._make_archive_id(info_dict)
3278        assert vid_id
3279        with locked_file(fn, 'a', encoding='utf-8') as archive_file:
3280            archive_file.write(vid_id + '\n')
3281        self.archive.add(vid_id)
3282
3283    @staticmethod
3284    def format_resolution(format, default='unknown'):
3285        if format.get('vcodec') == 'none' and format.get('acodec') != 'none':
3286            return 'audio only'
3287        if format.get('resolution') is not None:
3288            return format['resolution']
3289        if format.get('width') and format.get('height'):
3290            return '%dx%d' % (format['width'], format['height'])
3291        elif format.get('height'):
3292            return '%sp' % format['height']
3293        elif format.get('width'):
3294            return '%dx?' % format['width']
3295        return default
3296
3297    def _format_note(self, fdict):
3298        res = ''
3299        if fdict.get('ext') in ['f4f', 'f4m']:
3300            res += '(unsupported)'
3301        if fdict.get('language'):
3302            if res:
3303                res += ' '
3304            res += '[%s]' % fdict['language']
3305        if fdict.get('format_note') is not None:
3306            if res:
3307                res += ' '
3308            res += fdict['format_note']
3309        if fdict.get('tbr') is not None:
3310            if res:
3311                res += ', '
3312            res += '%4dk' % fdict['tbr']
3313        if fdict.get('container') is not None:
3314            if res:
3315                res += ', '
3316            res += '%s container' % fdict['container']
3317        if (fdict.get('vcodec') is not None
3318                and fdict.get('vcodec') != 'none'):
3319            if res:
3320                res += ', '
3321            res += fdict['vcodec']
3322            if fdict.get('vbr') is not None:
3323                res += '@'
3324        elif fdict.get('vbr') is not None and fdict.get('abr') is not None:
3325            res += 'video@'
3326        if fdict.get('vbr') is not None:
3327            res += '%4dk' % fdict['vbr']
3328        if fdict.get('fps') is not None:
3329            if res:
3330                res += ', '
3331            res += '%sfps' % fdict['fps']
3332        if fdict.get('acodec') is not None:
3333            if res:
3334                res += ', '
3335            if fdict['acodec'] == 'none':
3336                res += 'video only'
3337            else:
3338                res += '%-5s' % fdict['acodec']
3339        elif fdict.get('abr') is not None:
3340            if res:
3341                res += ', '
3342            res += 'audio'
3343        if fdict.get('abr') is not None:
3344            res += '@%3dk' % fdict['abr']
3345        if fdict.get('asr') is not None:
3346            res += ' (%5dHz)' % fdict['asr']
3347        if fdict.get('filesize') is not None:
3348            if res:
3349                res += ', '
3350            res += format_bytes(fdict['filesize'])
3351        elif fdict.get('filesize_approx') is not None:
3352            if res:
3353                res += ', '
3354            res += '~' + format_bytes(fdict['filesize_approx'])
3355        return res
3356
3357    def _list_format_headers(self, *headers):
3358        if self.params.get('listformats_table', True) is not False:
3359            return [self._format_screen(header, self.Styles.HEADERS) for header in headers]
3360        return headers
3361
3362    def list_formats(self, info_dict):
3363        if not info_dict.get('formats') and not info_dict.get('url'):
3364            self.to_screen('%s has no formats' % info_dict['id'])
3365            return
3366        self.to_screen('[info] Available formats for %s:' % info_dict['id'])
3367
3368        formats = info_dict.get('formats', [info_dict])
3369        new_format = self.params.get('listformats_table', True) is not False
3370        if new_format:
3371            delim = self._format_screen('\u2502', self.Styles.DELIM, '|', test_encoding=True)
3372            table = [
3373                [
3374                    self._format_screen(format_field(f, 'format_id'), self.Styles.ID),
3375                    format_field(f, 'ext'),
3376                    format_field(f, func=self.format_resolution, ignore=('audio only', 'images')),
3377                    format_field(f, 'fps', '\t%d'),
3378                    format_field(f, 'dynamic_range', '%s', ignore=(None, 'SDR')).replace('HDR', ''),
3379                    delim,
3380                    format_field(f, 'filesize', ' \t%s', func=format_bytes) + format_field(f, 'filesize_approx', '~\t%s', func=format_bytes),
3381                    format_field(f, 'tbr', '\t%dk'),
3382                    shorten_protocol_name(f.get('protocol', '')),
3383                    delim,
3384                    format_field(f, 'vcodec', default='unknown').replace(
3385                        'none',
3386                        'images' if f.get('acodec') == 'none'
3387                        else self._format_screen('audio only', self.Styles.SUPPRESS)),
3388                    format_field(f, 'vbr', '\t%dk'),
3389                    format_field(f, 'acodec', default='unknown').replace(
3390                        'none',
3391                        '' if f.get('vcodec') == 'none'
3392                        else self._format_screen('video only', self.Styles.SUPPRESS)),
3393                    format_field(f, 'abr', '\t%dk'),
3394                    format_field(f, 'asr', '\t%dHz'),
3395                    join_nonempty(
3396                        self._format_screen('UNSUPPORTED', 'light red') if f.get('ext') in ('f4f', 'f4m') else None,
3397                        format_field(f, 'language', '[%s]'),
3398                        join_nonempty(
3399                            format_field(f, 'format_note'),
3400                            format_field(f, 'container', ignore=(None, f.get('ext'))),
3401                            delim=', '),
3402                        delim=' '),
3403                ] for f in formats if f.get('preference') is None or f['preference'] >= -1000]
3404            header_line = self._list_format_headers(
3405                'ID', 'EXT', 'RESOLUTION', '\tFPS', 'HDR', delim, '\tFILESIZE', '\tTBR', 'PROTO',
3406                delim, 'VCODEC', '\tVBR', 'ACODEC', '\tABR', '\tASR', 'MORE INFO')
3407        else:
3408            table = [
3409                [
3410                    format_field(f, 'format_id'),
3411                    format_field(f, 'ext'),
3412                    self.format_resolution(f),
3413                    self._format_note(f)]
3414                for f in formats
3415                if f.get('preference') is None or f['preference'] >= -1000]
3416            header_line = ['format code', 'extension', 'resolution', 'note']
3417
3418        self.to_stdout(render_table(
3419            header_line, table,
3420            extra_gap=(0 if new_format else 1),
3421            hide_empty=new_format,
3422            delim=new_format and self._format_screen('\u2500', self.Styles.DELIM, '-', test_encoding=True)))
3423
3424    def list_thumbnails(self, info_dict):
3425        thumbnails = list(info_dict.get('thumbnails'))
3426        if not thumbnails:
3427            self.to_screen('[info] No thumbnails present for %s' % info_dict['id'])
3428            return
3429
3430        self.to_screen(
3431            '[info] Thumbnails for %s:' % info_dict['id'])
3432        self.to_stdout(render_table(
3433            self._list_format_headers('ID', 'Width', 'Height', 'URL'),
3434            [[t['id'], t.get('width', 'unknown'), t.get('height', 'unknown'), t['url']] for t in thumbnails]))
3435
3436    def list_subtitles(self, video_id, subtitles, name='subtitles'):
3437        if not subtitles:
3438            self.to_screen('%s has no %s' % (video_id, name))
3439            return
3440        self.to_screen(
3441            'Available %s for %s:' % (name, video_id))
3442
3443        def _row(lang, formats):
3444            exts, names = zip(*((f['ext'], f.get('name') or 'unknown') for f in reversed(formats)))
3445            if len(set(names)) == 1:
3446                names = [] if names[0] == 'unknown' else names[:1]
3447            return [lang, ', '.join(names), ', '.join(exts)]
3448
3449        self.to_stdout(render_table(
3450            self._list_format_headers('Language', 'Name', 'Formats'),
3451            [_row(lang, formats) for lang, formats in subtitles.items()],
3452            hide_empty=True))
3453
3454    def urlopen(self, req):
3455        """ Start an HTTP download """
3456        if isinstance(req, compat_basestring):
3457            req = sanitized_Request(req)
3458        return self._opener.open(req, timeout=self._socket_timeout)
3459
3460    def print_debug_header(self):
3461        if not self.params.get('verbose'):
3462            return
3463
3464        def get_encoding(stream):
3465            ret = getattr(stream, 'encoding', 'missing (%s)' % type(stream).__name__)
3466            if not supports_terminal_sequences(stream):
3467                from .compat import WINDOWS_VT_MODE
3468                ret += ' (No VT)' if WINDOWS_VT_MODE is False else ' (No ANSI)'
3469            return ret
3470
3471        encoding_str = 'Encodings: locale %s, fs %s, out %s, err %s, pref %s' % (
3472            locale.getpreferredencoding(),
3473            sys.getfilesystemencoding(),
3474            get_encoding(self._screen_file), get_encoding(self._err_file),
3475            self.get_encoding())
3476
3477        logger = self.params.get('logger')
3478        if logger:
3479            write_debug = lambda msg: logger.debug(f'[debug] {msg}')
3480            write_debug(encoding_str)
3481        else:
3482            write_string(f'[debug] {encoding_str}\n', encoding=None)
3483            write_debug = lambda msg: self._write_string(f'[debug] {msg}\n')
3484
3485        source = detect_variant()
3486        write_debug(join_nonempty(
3487            'yt-dlp version', __version__,
3488            f'[{RELEASE_GIT_HEAD}]' if RELEASE_GIT_HEAD else '',
3489            '' if source == 'unknown' else f'({source})',
3490            delim=' '))
3491        if not _LAZY_LOADER:
3492            if os.environ.get('YTDLP_NO_LAZY_EXTRACTORS'):
3493                write_debug('Lazy loading extractors is forcibly disabled')
3494            else:
3495                write_debug('Lazy loading extractors is disabled')
3496        if plugin_extractors or plugin_postprocessors:
3497            write_debug('Plugins: %s' % [
3498                '%s%s' % (klass.__name__, '' if klass.__name__ == name else f' as {name}')
3499                for name, klass in itertools.chain(plugin_extractors.items(), plugin_postprocessors.items())])
3500        if self.params.get('compat_opts'):
3501            write_debug('Compatibility options: %s' % ', '.join(self.params.get('compat_opts')))
3502
3503        if source == 'source':
3504            try:
3505                sp = Popen(
3506                    ['git', 'rev-parse', '--short', 'HEAD'],
3507                    stdout=subprocess.PIPE, stderr=subprocess.PIPE,
3508                    cwd=os.path.dirname(os.path.abspath(__file__)))
3509                out, err = sp.communicate_or_kill()
3510                out = out.decode().strip()
3511                if re.match('[0-9a-f]+', out):
3512                    write_debug('Git HEAD: %s' % out)
3513            except Exception:
3514                try:
3515                    sys.exc_clear()
3516                except Exception:
3517                    pass
3518
3519        def python_implementation():
3520            impl_name = platform.python_implementation()
3521            if impl_name == 'PyPy' and hasattr(sys, 'pypy_version_info'):
3522                return impl_name + ' version %d.%d.%d' % sys.pypy_version_info[:3]
3523            return impl_name
3524
3525        write_debug('Python version %s (%s %s) - %s' % (
3526            platform.python_version(),
3527            python_implementation(),
3528            platform.architecture()[0],
3529            platform_name()))
3530
3531        exe_versions, ffmpeg_features = FFmpegPostProcessor.get_versions_and_features(self)
3532        ffmpeg_features = {key for key, val in ffmpeg_features.items() if val}
3533        if ffmpeg_features:
3534            exe_versions['ffmpeg'] += ' (%s)' % ','.join(ffmpeg_features)
3535
3536        exe_versions['rtmpdump'] = rtmpdump_version()
3537        exe_versions['phantomjs'] = PhantomJSwrapper._version()
3538        exe_str = ', '.join(
3539            f'{exe} {v}' for exe, v in sorted(exe_versions.items()) if v
3540        ) or 'none'
3541        write_debug('exe versions: %s' % exe_str)
3542
3543        from .downloader.websocket import has_websockets
3544        from .postprocessor.embedthumbnail import has_mutagen
3545        from .cookies import SQLITE_AVAILABLE, SECRETSTORAGE_AVAILABLE
3546
3547        lib_str = join_nonempty(
3548            compat_pycrypto_AES and compat_pycrypto_AES.__name__.split('.')[0],
3549            SECRETSTORAGE_AVAILABLE and 'secretstorage',
3550            has_mutagen and 'mutagen',
3551            SQLITE_AVAILABLE and 'sqlite',
3552            has_websockets and 'websockets',
3553            delim=', ') or 'none'
3554        write_debug('Optional libraries: %s' % lib_str)
3555
3556        proxy_map = {}
3557        for handler in self._opener.handlers:
3558            if hasattr(handler, 'proxies'):
3559                proxy_map.update(handler.proxies)
3560        write_debug(f'Proxy map: {proxy_map}')
3561
3562        # Not implemented
3563        if False and self.params.get('call_home'):
3564            ipaddr = self.urlopen('https://yt-dl.org/ip').read().decode('utf-8')
3565            write_debug('Public IP address: %s' % ipaddr)
3566            latest_version = self.urlopen(
3567                'https://yt-dl.org/latest/version').read().decode('utf-8')
3568            if version_tuple(latest_version) > version_tuple(__version__):
3569                self.report_warning(
3570                    'You are using an outdated version (newest version: %s)! '
3571                    'See https://yt-dl.org/update if you need help updating.' %
3572                    latest_version)
3573
3574    def _setup_opener(self):
3575        timeout_val = self.params.get('socket_timeout')
3576        self._socket_timeout = 20 if timeout_val is None else float(timeout_val)
3577
3578        opts_cookiesfrombrowser = self.params.get('cookiesfrombrowser')
3579        opts_cookiefile = self.params.get('cookiefile')
3580        opts_proxy = self.params.get('proxy')
3581
3582        self.cookiejar = load_cookies(opts_cookiefile, opts_cookiesfrombrowser, self)
3583
3584        cookie_processor = YoutubeDLCookieProcessor(self.cookiejar)
3585        if opts_proxy is not None:
3586            if opts_proxy == '':
3587                proxies = {}
3588            else:
3589                proxies = {'http': opts_proxy, 'https': opts_proxy}
3590        else:
3591            proxies = compat_urllib_request.getproxies()
3592            # Set HTTPS proxy to HTTP one if given (https://github.com/ytdl-org/youtube-dl/issues/805)
3593            if 'http' in proxies and 'https' not in proxies:
3594                proxies['https'] = proxies['http']
3595        proxy_handler = PerRequestProxyHandler(proxies)
3596
3597        debuglevel = 1 if self.params.get('debug_printtraffic') else 0
3598        https_handler = make_HTTPS_handler(self.params, debuglevel=debuglevel)
3599        ydlh = YoutubeDLHandler(self.params, debuglevel=debuglevel)
3600        redirect_handler = YoutubeDLRedirectHandler()
3601        data_handler = compat_urllib_request_DataHandler()
3602
3603        # When passing our own FileHandler instance, build_opener won't add the
3604        # default FileHandler and allows us to disable the file protocol, which
3605        # can be used for malicious purposes (see
3606        # https://github.com/ytdl-org/youtube-dl/issues/8227)
3607        file_handler = compat_urllib_request.FileHandler()
3608
3609        def file_open(*args, **kwargs):
3610            raise compat_urllib_error.URLError('file:// scheme is explicitly disabled in yt-dlp for security reasons')
3611        file_handler.file_open = file_open
3612
3613        opener = compat_urllib_request.build_opener(
3614            proxy_handler, https_handler, cookie_processor, ydlh, redirect_handler, data_handler, file_handler)
3615
3616        # Delete the default user-agent header, which would otherwise apply in
3617        # cases where our custom HTTP handler doesn't come into play
3618        # (See https://github.com/ytdl-org/youtube-dl/issues/1309 for details)
3619        opener.addheaders = []
3620        self._opener = opener
3621
3622    def encode(self, s):
3623        if isinstance(s, bytes):
3624            return s  # Already encoded
3625
3626        try:
3627            return s.encode(self.get_encoding())
3628        except UnicodeEncodeError as err:
3629            err.reason = err.reason + '. Check your system encoding configuration or use the --encoding option.'
3630            raise
3631
3632    def get_encoding(self):
3633        encoding = self.params.get('encoding')
3634        if encoding is None:
3635            encoding = preferredencoding()
3636        return encoding
3637
3638    def _write_info_json(self, label, ie_result, infofn, overwrite=None):
3639        ''' Write infojson and returns True = written, False = skip, None = error '''
3640        if overwrite is None:
3641            overwrite = self.params.get('overwrites', True)
3642        if not self.params.get('writeinfojson'):
3643            return False
3644        elif not infofn:
3645            self.write_debug(f'Skipping writing {label} infojson')
3646            return False
3647        elif not self._ensure_dir_exists(infofn):
3648            return None
3649        elif not overwrite and os.path.exists(infofn):
3650            self.to_screen(f'[info] {label.title()} metadata is already present')
3651        else:
3652            self.to_screen(f'[info] Writing {label} metadata as JSON to: {infofn}')
3653            try:
3654                write_json_file(self.sanitize_info(ie_result, self.params.get('clean_infojson', True)), infofn)
3655            except (OSError, IOError):
3656                self.report_error(f'Cannot write {label} metadata to JSON file {infofn}')
3657                return None
3658        return True
3659
3660    def _write_description(self, label, ie_result, descfn):
3661        ''' Write description and returns True = written, False = skip, None = error '''
3662        if not self.params.get('writedescription'):
3663            return False
3664        elif not descfn:
3665            self.write_debug(f'Skipping writing {label} description')
3666            return False
3667        elif not self._ensure_dir_exists(descfn):
3668            return None
3669        elif not self.params.get('overwrites', True) and os.path.exists(descfn):
3670            self.to_screen(f'[info] {label.title()} description is already present')
3671        elif ie_result.get('description') is None:
3672            self.report_warning(f'There\'s no {label} description to write')
3673            return False
3674        else:
3675            try:
3676                self.to_screen(f'[info] Writing {label} description to: {descfn}')
3677                with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
3678                    descfile.write(ie_result['description'])
3679            except (OSError, IOError):
3680                self.report_error(f'Cannot write {label} description file {descfn}')
3681                return None
3682        return True
3683
3684    def _write_subtitles(self, info_dict, filename):
3685        ''' Write subtitles to file and return list of (sub_filename, final_sub_filename); or None if error'''
3686        ret = []
3687        subtitles = info_dict.get('requested_subtitles')
3688        if not subtitles or not (self.params.get('writesubtitles') or self.params.get('writeautomaticsub')):
3689            # subtitles download errors are already managed as troubles in relevant IE
3690            # that way it will silently go on when used with unsupporting IE
3691            return ret
3692
3693        sub_filename_base = self.prepare_filename(info_dict, 'subtitle')
3694        if not sub_filename_base:
3695            self.to_screen('[info] Skipping writing video subtitles')
3696            return ret
3697        for sub_lang, sub_info in subtitles.items():
3698            sub_format = sub_info['ext']
3699            sub_filename = subtitles_filename(filename, sub_lang, sub_format, info_dict.get('ext'))
3700            sub_filename_final = subtitles_filename(sub_filename_base, sub_lang, sub_format, info_dict.get('ext'))
3701            if not self.params.get('overwrites', True) and os.path.exists(sub_filename):
3702                self.to_screen(f'[info] Video subtitle {sub_lang}.{sub_format} is already present')
3703                sub_info['filepath'] = sub_filename
3704                ret.append((sub_filename, sub_filename_final))
3705                continue
3706
3707            self.to_screen(f'[info] Writing video subtitles to: {sub_filename}')
3708            if sub_info.get('data') is not None:
3709                try:
3710                    # Use newline='' to prevent conversion of newline characters
3711                    # See https://github.com/ytdl-org/youtube-dl/issues/10268
3712                    with io.open(sub_filename, 'w', encoding='utf-8', newline='') as subfile:
3713                        subfile.write(sub_info['data'])
3714                    sub_info['filepath'] = sub_filename
3715                    ret.append((sub_filename, sub_filename_final))
3716                    continue
3717                except (OSError, IOError):
3718                    self.report_error(f'Cannot write video subtitles file {sub_filename}')
3719                    return None
3720
3721            try:
3722                sub_copy = sub_info.copy()
3723                sub_copy.setdefault('http_headers', info_dict.get('http_headers'))
3724                self.dl(sub_filename, sub_copy, subtitle=True)
3725                sub_info['filepath'] = sub_filename
3726                ret.append((sub_filename, sub_filename_final))
3727            except (ExtractorError, IOError, OSError, ValueError) + network_exceptions as err:
3728                self.report_warning(f'Unable to download video subtitles for {sub_lang!r}: {err}')
3729                continue
3730        return ret
3731
3732    def _write_thumbnails(self, label, info_dict, filename, thumb_filename_base=None):
3733        ''' Write thumbnails to file and return list of (thumb_filename, final_thumb_filename) '''
3734        write_all = self.params.get('write_all_thumbnails', False)
3735        thumbnails, ret = [], []
3736        if write_all or self.params.get('writethumbnail', False):
3737            thumbnails = info_dict.get('thumbnails') or []
3738        multiple = write_all and len(thumbnails) > 1
3739
3740        if thumb_filename_base is None:
3741            thumb_filename_base = filename
3742        if thumbnails and not thumb_filename_base:
3743            self.write_debug(f'Skipping writing {label} thumbnail')
3744            return ret
3745
3746        for idx, t in list(enumerate(thumbnails))[::-1]:
3747            thumb_ext = (f'{t["id"]}.' if multiple else '') + determine_ext(t['url'], 'jpg')
3748            thumb_display_id = f'{label} thumbnail {t["id"]}'
3749            thumb_filename = replace_extension(filename, thumb_ext, info_dict.get('ext'))
3750            thumb_filename_final = replace_extension(thumb_filename_base, thumb_ext, info_dict.get('ext'))
3751
3752            if not self.params.get('overwrites', True) and os.path.exists(thumb_filename):
3753                ret.append((thumb_filename, thumb_filename_final))
3754                t['filepath'] = thumb_filename
3755                self.to_screen('[info] %s is already present' % (
3756                    thumb_display_id if multiple else f'{label} thumbnail').capitalize())
3757            else:
3758                self.to_screen(f'[info] Downloading {thumb_display_id} ...')
3759                try:
3760                    uf = self.urlopen(t['url'])
3761                    self.to_screen(f'[info] Writing {thumb_display_id} to: {thumb_filename}')
3762                    with open(encodeFilename(thumb_filename), 'wb') as thumbf:
3763                        shutil.copyfileobj(uf, thumbf)
3764                    ret.append((thumb_filename, thumb_filename_final))
3765                    t['filepath'] = thumb_filename
3766                except network_exceptions as err:
3767                    thumbnails.pop(idx)
3768                    self.report_warning(f'Unable to download {thumb_display_id}: {err}')
3769            if ret and not write_all:
3770                break
3771        return ret
3772