1#!/usr/bin/env python
2# coding: utf-8
3
4from __future__ import absolute_import, unicode_literals
5
6import collections
7import contextlib
8import copy
9import datetime
10import errno
11import fileinput
12import io
13import itertools
14import json
15import locale
16import operator
17import os
18import platform
19import re
20import shutil
21import subprocess
22import socket
23import sys
24import time
25import tokenize
26import traceback
27import random
28
29from string import ascii_letters
30
31from .compat import (
32    compat_basestring,
33    compat_cookiejar,
34    compat_get_terminal_size,
35    compat_http_client,
36    compat_kwargs,
37    compat_numeric_types,
38    compat_os_name,
39    compat_str,
40    compat_tokenize_tokenize,
41    compat_urllib_error,
42    compat_urllib_request,
43    compat_urllib_request_DataHandler,
44)
45from .utils import (
46    age_restricted,
47    args_to_str,
48    ContentTooShortError,
49    date_from_str,
50    DateRange,
51    DEFAULT_OUTTMPL,
52    determine_ext,
53    determine_protocol,
54    DownloadError,
55    encode_compat_str,
56    encodeFilename,
57    error_to_compat_str,
58    expand_path,
59    ExtractorError,
60    format_bytes,
61    formatSeconds,
62    GeoRestrictedError,
63    int_or_none,
64    ISO3166Utils,
65    locked_file,
66    make_HTTPS_handler,
67    MaxDownloadsReached,
68    orderedSet,
69    PagedList,
70    parse_filesize,
71    PerRequestProxyHandler,
72    platform_name,
73    PostProcessingError,
74    preferredencoding,
75    prepend_extension,
76    register_socks_protocols,
77    render_table,
78    replace_extension,
79    SameFileError,
80    sanitize_filename,
81    sanitize_path,
82    sanitize_url,
83    sanitized_Request,
84    std_headers,
85    str_or_none,
86    subtitles_filename,
87    UnavailableVideoError,
88    url_basename,
89    version_tuple,
90    write_json_file,
91    write_string,
92    YoutubeDLCookieJar,
93    YoutubeDLCookieProcessor,
94    YoutubeDLHandler,
95    YoutubeDLRedirectHandler,
96)
97from .cache import Cache
98from .extractor import get_info_extractor, gen_extractor_classes, _LAZY_LOADER
99from .extractor.openload import PhantomJSwrapper
100from .downloader import get_suitable_downloader
101from .downloader.rtmp import rtmpdump_version
102from .postprocessor import (
103    FFmpegFixupM3u8PP,
104    FFmpegFixupM4aPP,
105    FFmpegFixupStretchedPP,
106    FFmpegMergerPP,
107    FFmpegPostProcessor,
108    get_postprocessor,
109)
110from .version import __version__
111
112if compat_os_name == 'nt':
113    import ctypes
114
115
116class YoutubeDL(object):
117    """YoutubeDL class.
118
119    YoutubeDL objects are the ones responsible of downloading the
120    actual video file and writing it to disk if the user has requested
121    it, among some other tasks. In most cases there should be one per
122    program. As, given a video URL, the downloader doesn't know how to
123    extract all the needed information, task that InfoExtractors do, it
124    has to pass the URL to one of them.
125
126    For this, YoutubeDL objects have a method that allows
127    InfoExtractors to be registered in a given order. When it is passed
128    a URL, the YoutubeDL object handles it to the first InfoExtractor it
129    finds that reports being able to handle it. The InfoExtractor extracts
130    all the information about the video or videos the URL refers to, and
131    YoutubeDL process the extracted information, possibly using a File
132    Downloader to download the video.
133
134    YoutubeDL objects accept a lot of parameters. In order not to saturate
135    the object constructor with arguments, it receives a dictionary of
136    options instead. These options are available through the params
137    attribute for the InfoExtractors to use. The YoutubeDL also
138    registers itself as the downloader in charge for the InfoExtractors
139    that are added to it, so this is a "mutual registration".
140
141    Available options:
142
143    username:          Username for authentication purposes.
144    password:          Password for authentication purposes.
145    videopassword:     Password for accessing a video.
146    ap_mso:            Adobe Pass multiple-system operator identifier.
147    ap_username:       Multiple-system operator account username.
148    ap_password:       Multiple-system operator account password.
149    usenetrc:          Use netrc for authentication instead.
150    verbose:           Print additional info to stdout.
151    quiet:             Do not print messages to stdout.
152    no_warnings:       Do not print out anything for warnings.
153    forceurl:          Force printing final URL.
154    forcetitle:        Force printing title.
155    forceid:           Force printing ID.
156    forcethumbnail:    Force printing thumbnail URL.
157    forcedescription:  Force printing description.
158    forcefilename:     Force printing final filename.
159    forceduration:     Force printing duration.
160    forcejson:         Force printing info_dict as JSON.
161    dump_single_json:  Force printing the info_dict of the whole playlist
162                       (or video) as a single JSON line.
163    simulate:          Do not download the video files.
164    format:            Video format code. See options.py for more information.
165    outtmpl:           Template for output names.
166    outtmpl_na_placeholder: Placeholder for unavailable meta fields.
167    restrictfilenames: Do not allow "&" and spaces in file names
168    ignoreerrors:      Do not stop on download errors.
169    force_generic_extractor: Force downloader to use the generic extractor
170    nooverwrites:      Prevent overwriting files.
171    playliststart:     Playlist item to start at.
172    playlistend:       Playlist item to end at.
173    playlist_items:    Specific indices of playlist to download.
174    playlistreverse:   Download playlist items in reverse order.
175    playlistrandom:    Download playlist items in random order.
176    matchtitle:        Download only matching titles.
177    rejecttitle:       Reject downloads for matching titles.
178    logger:            Log messages to a logging.Logger instance.
179    logtostderr:       Log messages to stderr instead of stdout.
180    writedescription:  Write the video description to a .description file
181    writeinfojson:     Write the video description to a .info.json file
182    writeannotations:  Write the video annotations to a .annotations.xml file
183    writethumbnail:    Write the thumbnail image to a file
184    write_all_thumbnails:  Write all thumbnail formats to files
185    writesubtitles:    Write the video subtitles to a file
186    writeautomaticsub: Write the automatically generated subtitles to a file
187    allsubtitles:      Downloads all the subtitles of the video
188                       (requires writesubtitles or writeautomaticsub)
189    listsubtitles:     Lists all available subtitles for the video
190    subtitlesformat:   The format code for subtitles
191    subtitleslangs:    List of languages of the subtitles to download
192    keepvideo:         Keep the video file after post-processing
193    daterange:         A DateRange object, download only if the upload_date is in the range.
194    skip_download:     Skip the actual download of the video file
195    cachedir:          Location of the cache files in the filesystem.
196                       False to disable filesystem cache.
197    noplaylist:        Download single video instead of a playlist if in doubt.
198    age_limit:         An integer representing the user's age in years.
199                       Unsuitable videos for the given age are skipped.
200    min_views:         An integer representing the minimum view count the video
201                       must have in order to not be skipped.
202                       Videos without view count information are always
203                       downloaded. None for no limit.
204    max_views:         An integer representing the maximum view count.
205                       Videos that are more popular than that are not
206                       downloaded.
207                       Videos without view count information are always
208                       downloaded. None for no limit.
209    download_archive:  File name of a file where all downloads are recorded.
210                       Videos already present in the file are not downloaded
211                       again.
212    cookiefile:        File name where cookies should be read from and dumped to.
213    nocheckcertificate:Do not verify SSL certificates
214    prefer_insecure:   Use HTTP instead of HTTPS to retrieve information.
215                       At the moment, this is only supported by YouTube.
216    proxy:             URL of the proxy server to use
217    geo_verification_proxy:  URL of the proxy to use for IP address verification
218                       on geo-restricted sites.
219    socket_timeout:    Time to wait for unresponsive hosts, in seconds
220    bidi_workaround:   Work around buggy terminals without bidirectional text
221                       support, using fridibi
222    debug_printtraffic:Print out sent and received HTTP traffic
223    include_ads:       Download ads as well
224    default_search:    Prepend this string if an input url is not valid.
225                       'auto' for elaborate guessing
226    encoding:          Use this encoding instead of the system-specified.
227    extract_flat:      Do not resolve URLs, return the immediate result.
228                       Pass in 'in_playlist' to only show this behavior for
229                       playlist items.
230    postprocessors:    A list of dictionaries, each with an entry
231                       * key:  The name of the postprocessor. See
232                               youtube_dl/postprocessor/__init__.py for a list.
233                       as well as any further keyword arguments for the
234                       postprocessor.
235    progress_hooks:    A list of functions that get called on download
236                       progress, with a dictionary with the entries
237                       * status: One of "downloading", "error", or "finished".
238                                 Check this first and ignore unknown values.
239
240                       If status is one of "downloading", or "finished", the
241                       following properties may also be present:
242                       * filename: The final filename (always present)
243                       * tmpfilename: The filename we're currently writing to
244                       * downloaded_bytes: Bytes on disk
245                       * total_bytes: Size of the whole file, None if unknown
246                       * total_bytes_estimate: Guess of the eventual file size,
247                                               None if unavailable.
248                       * elapsed: The number of seconds since download started.
249                       * eta: The estimated time in seconds, None if unknown
250                       * speed: The download speed in bytes/second, None if
251                                unknown
252                       * fragment_index: The counter of the currently
253                                         downloaded video fragment.
254                       * fragment_count: The number of fragments (= individual
255                                         files that will be merged)
256
257                       Progress hooks are guaranteed to be called at least once
258                       (with status "finished") if the download is successful.
259    merge_output_format: Extension to use when merging formats.
260    fixup:             Automatically correct known faults of the file.
261                       One of:
262                       - "never": do nothing
263                       - "warn": only emit a warning
264                       - "detect_or_warn": check whether we can do anything
265                                           about it, warn otherwise (default)
266    source_address:    Client-side IP address to bind to.
267    call_home:         Boolean, true iff we are allowed to contact the
268                       youtube-dl servers for debugging.
269    sleep_interval:    Number of seconds to sleep before each download when
270                       used alone or a lower bound of a range for randomized
271                       sleep before each download (minimum possible number
272                       of seconds to sleep) when used along with
273                       max_sleep_interval.
274    max_sleep_interval:Upper bound of a range for randomized sleep before each
275                       download (maximum possible number of seconds to sleep).
276                       Must only be used along with sleep_interval.
277                       Actual sleep time will be a random float from range
278                       [sleep_interval; max_sleep_interval].
279    listformats:       Print an overview of available video formats and exit.
280    list_thumbnails:   Print a table of all thumbnails and exit.
281    match_filter:      A function that gets called with the info_dict of
282                       every video.
283                       If it returns a message, the video is ignored.
284                       If it returns None, the video is downloaded.
285                       match_filter_func in utils.py is one example for this.
286    no_color:          Do not emit color codes in output.
287    geo_bypass:        Bypass geographic restriction via faking X-Forwarded-For
288                       HTTP header
289    geo_bypass_country:
290                       Two-letter ISO 3166-2 country code that will be used for
291                       explicit geographic restriction bypassing via faking
292                       X-Forwarded-For HTTP header
293    geo_bypass_ip_block:
294                       IP range in CIDR notation that will be used similarly to
295                       geo_bypass_country
296
297    The following options determine which downloader is picked:
298    external_downloader: Executable of the external downloader to call.
299                       None or unset for standard (built-in) downloader.
300    hls_prefer_native: Use the native HLS downloader instead of ffmpeg/avconv
301                       if True, otherwise use ffmpeg/avconv if False, otherwise
302                       use downloader suggested by extractor if None.
303
304    The following parameters are not used by YoutubeDL itself, they are used by
305    the downloader (see youtube_dl/downloader/common.py):
306    nopart, updatetime, buffersize, ratelimit, min_filesize, max_filesize, test,
307    noresizebuffer, retries, continuedl, noprogress, consoletitle,
308    xattr_set_filesize, external_downloader_args, hls_use_mpegts,
309    http_chunk_size.
310
311    The following options are used by the post processors:
312    prefer_ffmpeg:     If False, use avconv instead of ffmpeg if both are available,
313                       otherwise prefer ffmpeg.
314    ffmpeg_location:   Location of the ffmpeg/avconv binary; either the path
315                       to the binary or its containing directory.
316    postprocessor_args: A list of additional command-line arguments for the
317                        postprocessor.
318
319    The following options are used by the Youtube extractor:
320    youtube_include_dash_manifest: If True (default), DASH manifests and related
321                        data will be downloaded and processed by extractor.
322                        You can reduce network I/O by disabling it if you don't
323                        care about DASH.
324    """
325
326    _NUMERIC_FIELDS = set((
327        'width', 'height', 'tbr', 'abr', 'asr', 'vbr', 'fps', 'filesize', 'filesize_approx',
328        'timestamp', 'upload_year', 'upload_month', 'upload_day',
329        'duration', 'view_count', 'like_count', 'dislike_count', 'repost_count',
330        'average_rating', 'comment_count', 'age_limit',
331        'start_time', 'end_time',
332        'chapter_number', 'season_number', 'episode_number',
333        'track_number', 'disc_number', 'release_year',
334        'playlist_index',
335    ))
336
337    params = None
338    _ies = []
339    _pps = []
340    _download_retcode = None
341    _num_downloads = None
342    _playlist_level = 0
343    _playlist_urls = set()
344    _screen_file = None
345
346    def __init__(self, params=None, auto_init=True):
347        """Create a FileDownloader object with the given options."""
348        if params is None:
349            params = {}
350        self._ies = []
351        self._ies_instances = {}
352        self._pps = []
353        self._progress_hooks = []
354        self._download_retcode = 0
355        self._num_downloads = 0
356        self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]
357        self._err_file = sys.stderr
358        self.params = {
359            # Default parameters
360            'nocheckcertificate': False,
361        }
362        self.params.update(params)
363        self.cache = Cache(self)
364
365        def check_deprecated(param, option, suggestion):
366            if self.params.get(param) is not None:
367                self.report_warning(
368                    '%s is deprecated. Use %s instead.' % (option, suggestion))
369                return True
370            return False
371
372        if check_deprecated('cn_verification_proxy', '--cn-verification-proxy', '--geo-verification-proxy'):
373            if self.params.get('geo_verification_proxy') is None:
374                self.params['geo_verification_proxy'] = self.params['cn_verification_proxy']
375
376        check_deprecated('autonumber_size', '--autonumber-size', 'output template with %(autonumber)0Nd, where N in the number of digits')
377        check_deprecated('autonumber', '--auto-number', '-o "%(autonumber)s-%(title)s.%(ext)s"')
378        check_deprecated('usetitle', '--title', '-o "%(title)s-%(id)s.%(ext)s"')
379
380        if params.get('bidi_workaround', False):
381            try:
382                import pty
383                master, slave = pty.openpty()
384                width = compat_get_terminal_size().columns
385                if width is None:
386                    width_args = []
387                else:
388                    width_args = ['-w', str(width)]
389                sp_kwargs = dict(
390                    stdin=subprocess.PIPE,
391                    stdout=slave,
392                    stderr=self._err_file)
393                try:
394                    self._output_process = subprocess.Popen(
395                        ['bidiv'] + width_args, **sp_kwargs
396                    )
397                except OSError:
398                    self._output_process = subprocess.Popen(
399                        ['fribidi', '-c', 'UTF-8'] + width_args, **sp_kwargs)
400                self._output_channel = os.fdopen(master, 'rb')
401            except OSError as ose:
402                if ose.errno == errno.ENOENT:
403                    self.report_warning('Could not find fribidi executable, ignoring --bidi-workaround . Make sure that  fribidi  is an executable file in one of the directories in your $PATH.')
404                else:
405                    raise
406
407        if (sys.platform != 'win32'
408                and sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968']
409                and not params.get('restrictfilenames', False)):
410            # Unicode filesystem API will throw errors (#1474, #13027)
411            self.report_warning(
412                'Assuming --restrict-filenames since file system encoding '
413                'cannot encode all characters. '
414                'Set the LC_ALL environment variable to fix this.')
415            self.params['restrictfilenames'] = True
416
417        if isinstance(params.get('outtmpl'), bytes):
418            self.report_warning(
419                'Parameter outtmpl is bytes, but should be a unicode string. '
420                'Put  from __future__ import unicode_literals  at the top of your code file or consider switching to Python 3.x.')
421
422        self._setup_opener()
423
424        if auto_init:
425            self.print_debug_header()
426            self.add_default_info_extractors()
427
428        for pp_def_raw in self.params.get('postprocessors', []):
429            pp_class = get_postprocessor(pp_def_raw['key'])
430            pp_def = dict(pp_def_raw)
431            del pp_def['key']
432            pp = pp_class(self, **compat_kwargs(pp_def))
433            self.add_post_processor(pp)
434
435        for ph in self.params.get('progress_hooks', []):
436            self.add_progress_hook(ph)
437
438        register_socks_protocols()
439
440    def warn_if_short_id(self, argv):
441        # short YouTube ID starting with dash?
442        idxs = [
443            i for i, a in enumerate(argv)
444            if re.match(r'^-[0-9A-Za-z_-]{10}$', a)]
445        if idxs:
446            correct_argv = (
447                ['youtube-dl']
448                + [a for i, a in enumerate(argv) if i not in idxs]
449                + ['--'] + [argv[i] for i in idxs]
450            )
451            self.report_warning(
452                'Long argument string detected. '
453                'Use -- to separate parameters and URLs, like this:\n%s\n' %
454                args_to_str(correct_argv))
455
456    def add_info_extractor(self, ie):
457        """Add an InfoExtractor object to the end of the list."""
458        self._ies.append(ie)
459        if not isinstance(ie, type):
460            self._ies_instances[ie.ie_key()] = ie
461            ie.set_downloader(self)
462
463    def get_info_extractor(self, ie_key):
464        """
465        Get an instance of an IE with name ie_key, it will try to get one from
466        the _ies list, if there's no instance it will create a new one and add
467        it to the extractor list.
468        """
469        ie = self._ies_instances.get(ie_key)
470        if ie is None:
471            ie = get_info_extractor(ie_key)()
472            self.add_info_extractor(ie)
473        return ie
474
475    def add_default_info_extractors(self):
476        """
477        Add the InfoExtractors returned by gen_extractors to the end of the list
478        """
479        for ie in gen_extractor_classes():
480            self.add_info_extractor(ie)
481
482    def add_post_processor(self, pp):
483        """Add a PostProcessor object to the end of the chain."""
484        self._pps.append(pp)
485        pp.set_downloader(self)
486
487    def add_progress_hook(self, ph):
488        """Add the progress hook (currently only for the file downloader)"""
489        self._progress_hooks.append(ph)
490
491    def _bidi_workaround(self, message):
492        if not hasattr(self, '_output_channel'):
493            return message
494
495        assert hasattr(self, '_output_process')
496        assert isinstance(message, compat_str)
497        line_count = message.count('\n') + 1
498        self._output_process.stdin.write((message + '\n').encode('utf-8'))
499        self._output_process.stdin.flush()
500        res = ''.join(self._output_channel.readline().decode('utf-8')
501                      for _ in range(line_count))
502        return res[:-len('\n')]
503
504    def to_screen(self, message, skip_eol=False):
505        """Print message to stdout if not in quiet mode."""
506        return self.to_stdout(message, skip_eol, check_quiet=True)
507
508    def _write_string(self, s, out=None):
509        write_string(s, out=out, encoding=self.params.get('encoding'))
510
511    def to_stdout(self, message, skip_eol=False, check_quiet=False):
512        """Print message to stdout if not in quiet mode."""
513        if self.params.get('logger'):
514            self.params['logger'].debug(message)
515        elif not check_quiet or not self.params.get('quiet', False):
516            message = self._bidi_workaround(message)
517            terminator = ['\n', ''][skip_eol]
518            output = message + terminator
519
520            self._write_string(output, self._screen_file)
521
522    def to_stderr(self, message):
523        """Print message to stderr."""
524        assert isinstance(message, compat_str)
525        if self.params.get('logger'):
526            self.params['logger'].error(message)
527        else:
528            message = self._bidi_workaround(message)
529            output = message + '\n'
530            self._write_string(output, self._err_file)
531
532    def to_console_title(self, message):
533        if not self.params.get('consoletitle', False):
534            return
535        if compat_os_name == 'nt':
536            if ctypes.windll.kernel32.GetConsoleWindow():
537                # c_wchar_p() might not be necessary if `message` is
538                # already of type unicode()
539                ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
540        elif 'TERM' in os.environ:
541            self._write_string('\033]0;%s\007' % message, self._screen_file)
542
543    def save_console_title(self):
544        if not self.params.get('consoletitle', False):
545            return
546        if self.params.get('simulate', False):
547            return
548        if compat_os_name != 'nt' and 'TERM' in os.environ:
549            # Save the title on stack
550            self._write_string('\033[22;0t', self._screen_file)
551
552    def restore_console_title(self):
553        if not self.params.get('consoletitle', False):
554            return
555        if self.params.get('simulate', False):
556            return
557        if compat_os_name != 'nt' and 'TERM' in os.environ:
558            # Restore the title from stack
559            self._write_string('\033[23;0t', self._screen_file)
560
561    def __enter__(self):
562        self.save_console_title()
563        return self
564
565    def __exit__(self, *args):
566        self.restore_console_title()
567
568        if self.params.get('cookiefile') is not None:
569            self.cookiejar.save(ignore_discard=True, ignore_expires=True)
570
571    def trouble(self, message=None, tb=None):
572        """Determine action to take when a download problem appears.
573
574        Depending on if the downloader has been configured to ignore
575        download errors or not, this method may throw an exception or
576        not when errors are found, after printing the message.
577
578        tb, if given, is additional traceback information.
579        """
580        if message is not None:
581            self.to_stderr(message)
582        if self.params.get('verbose'):
583            if tb is None:
584                if sys.exc_info()[0]:  # if .trouble has been called from an except block
585                    tb = ''
586                    if hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
587                        tb += ''.join(traceback.format_exception(*sys.exc_info()[1].exc_info))
588                    tb += encode_compat_str(traceback.format_exc())
589                else:
590                    tb_data = traceback.format_list(traceback.extract_stack())
591                    tb = ''.join(tb_data)
592            self.to_stderr(tb)
593        if not self.params.get('ignoreerrors', False):
594            if sys.exc_info()[0] and hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
595                exc_info = sys.exc_info()[1].exc_info
596            else:
597                exc_info = sys.exc_info()
598            raise DownloadError(message, exc_info)
599        self._download_retcode = 1
600
601    def report_warning(self, message):
602        '''
603        Print the message to stderr, it will be prefixed with 'WARNING:'
604        If stderr is a tty file the 'WARNING:' will be colored
605        '''
606        if self.params.get('logger') is not None:
607            self.params['logger'].warning(message)
608        else:
609            if self.params.get('no_warnings'):
610                return
611            if not self.params.get('no_color') and self._err_file.isatty() and compat_os_name != 'nt':
612                _msg_header = '\033[0;33mWARNING:\033[0m'
613            else:
614                _msg_header = 'WARNING:'
615            warning_message = '%s %s' % (_msg_header, message)
616            self.to_stderr(warning_message)
617
618    def report_error(self, message, tb=None):
619        '''
620        Do the same as trouble, but prefixes the message with 'ERROR:', colored
621        in red if stderr is a tty file.
622        '''
623        if not self.params.get('no_color') and self._err_file.isatty() and compat_os_name != 'nt':
624            _msg_header = '\033[0;31mERROR:\033[0m'
625        else:
626            _msg_header = 'ERROR:'
627        error_message = '%s %s' % (_msg_header, message)
628        self.trouble(error_message, tb)
629
630    def report_file_already_downloaded(self, file_name):
631        """Report file has already been fully downloaded."""
632        try:
633            self.to_screen('[download] %s has already been downloaded' % file_name)
634        except UnicodeEncodeError:
635            self.to_screen('[download] The file has already been downloaded')
636
637    def prepare_filename(self, info_dict):
638        """Generate the output filename."""
639        try:
640            template_dict = dict(info_dict)
641
642            template_dict['epoch'] = int(time.time())
643            autonumber_size = self.params.get('autonumber_size')
644            if autonumber_size is None:
645                autonumber_size = 5
646            template_dict['autonumber'] = self.params.get('autonumber_start', 1) - 1 + self._num_downloads
647            if template_dict.get('resolution') is None:
648                if template_dict.get('width') and template_dict.get('height'):
649                    template_dict['resolution'] = '%dx%d' % (template_dict['width'], template_dict['height'])
650                elif template_dict.get('height'):
651                    template_dict['resolution'] = '%sp' % template_dict['height']
652                elif template_dict.get('width'):
653                    template_dict['resolution'] = '%dx?' % template_dict['width']
654
655            sanitize = lambda k, v: sanitize_filename(
656                compat_str(v),
657                restricted=self.params.get('restrictfilenames'),
658                is_id=(k == 'id' or k.endswith('_id')))
659            template_dict = dict((k, v if isinstance(v, compat_numeric_types) else sanitize(k, v))
660                                 for k, v in template_dict.items()
661                                 if v is not None and not isinstance(v, (list, tuple, dict)))
662            template_dict = collections.defaultdict(lambda: self.params.get('outtmpl_na_placeholder', 'NA'), template_dict)
663
664            outtmpl = self.params.get('outtmpl', DEFAULT_OUTTMPL)
665
666            # For fields playlist_index and autonumber convert all occurrences
667            # of %(field)s to %(field)0Nd for backward compatibility
668            field_size_compat_map = {
669                'playlist_index': len(str(template_dict['n_entries'])),
670                'autonumber': autonumber_size,
671            }
672            FIELD_SIZE_COMPAT_RE = r'(?<!%)%\((?P<field>autonumber|playlist_index)\)s'
673            mobj = re.search(FIELD_SIZE_COMPAT_RE, outtmpl)
674            if mobj:
675                outtmpl = re.sub(
676                    FIELD_SIZE_COMPAT_RE,
677                    r'%%(\1)0%dd' % field_size_compat_map[mobj.group('field')],
678                    outtmpl)
679
680            # Missing numeric fields used together with integer presentation types
681            # in format specification will break the argument substitution since
682            # string NA placeholder is returned for missing fields. We will patch
683            # output template for missing fields to meet string presentation type.
684            for numeric_field in self._NUMERIC_FIELDS:
685                if numeric_field not in template_dict:
686                    # As of [1] format syntax is:
687                    #  %[mapping_key][conversion_flags][minimum_width][.precision][length_modifier]type
688                    # 1. https://docs.python.org/2/library/stdtypes.html#string-formatting
689                    FORMAT_RE = r'''(?x)
690                        (?<!%)
691                        %
692                        \({0}\)  # mapping key
693                        (?:[#0\-+ ]+)?  # conversion flags (optional)
694                        (?:\d+)?  # minimum field width (optional)
695                        (?:\.\d+)?  # precision (optional)
696                        [hlL]?  # length modifier (optional)
697                        [diouxXeEfFgGcrs%]  # conversion type
698                    '''
699                    outtmpl = re.sub(
700                        FORMAT_RE.format(numeric_field),
701                        r'%({0})s'.format(numeric_field), outtmpl)
702
703            # expand_path translates '%%' into '%' and '$$' into '$'
704            # correspondingly that is not what we want since we need to keep
705            # '%%' intact for template dict substitution step. Working around
706            # with boundary-alike separator hack.
707            sep = ''.join([random.choice(ascii_letters) for _ in range(32)])
708            outtmpl = outtmpl.replace('%%', '%{0}%'.format(sep)).replace('$$', '${0}$'.format(sep))
709
710            # outtmpl should be expand_path'ed before template dict substitution
711            # because meta fields may contain env variables we don't want to
712            # be expanded. For example, for outtmpl "%(title)s.%(ext)s" and
713            # title "Hello $PATH", we don't want `$PATH` to be expanded.
714            filename = expand_path(outtmpl).replace(sep, '') % template_dict
715
716            # Temporary fix for #4787
717            # 'Treat' all problem characters by passing filename through preferredencoding
718            # to workaround encoding issues with subprocess on python2 @ Windows
719            if sys.version_info < (3, 0) and sys.platform == 'win32':
720                filename = encodeFilename(filename, True).decode(preferredencoding())
721            return sanitize_path(filename)
722        except ValueError as err:
723            self.report_error('Error in output template: ' + str(err) + ' (encoding: ' + repr(preferredencoding()) + ')')
724            return None
725
726    def _match_entry(self, info_dict, incomplete):
727        """ Returns None iff the file should be downloaded """
728
729        video_title = info_dict.get('title', info_dict.get('id', 'video'))
730        if 'title' in info_dict:
731            # This can happen when we're just evaluating the playlist
732            title = info_dict['title']
733            matchtitle = self.params.get('matchtitle', False)
734            if matchtitle:
735                if not re.search(matchtitle, title, re.IGNORECASE):
736                    return '"' + title + '" title did not match pattern "' + matchtitle + '"'
737            rejecttitle = self.params.get('rejecttitle', False)
738            if rejecttitle:
739                if re.search(rejecttitle, title, re.IGNORECASE):
740                    return '"' + title + '" title matched reject pattern "' + rejecttitle + '"'
741        date = info_dict.get('upload_date')
742        if date is not None:
743            dateRange = self.params.get('daterange', DateRange())
744            if date not in dateRange:
745                return '%s upload date is not in range %s' % (date_from_str(date).isoformat(), dateRange)
746        view_count = info_dict.get('view_count')
747        if view_count is not None:
748            min_views = self.params.get('min_views')
749            if min_views is not None and view_count < min_views:
750                return 'Skipping %s, because it has not reached minimum view count (%d/%d)' % (video_title, view_count, min_views)
751            max_views = self.params.get('max_views')
752            if max_views is not None and view_count > max_views:
753                return 'Skipping %s, because it has exceeded the maximum view count (%d/%d)' % (video_title, view_count, max_views)
754        if age_restricted(info_dict.get('age_limit'), self.params.get('age_limit')):
755            return 'Skipping "%s" because it is age restricted' % video_title
756        if self.in_download_archive(info_dict):
757            return '%s has already been recorded in archive' % video_title
758
759        if not incomplete:
760            match_filter = self.params.get('match_filter')
761            if match_filter is not None:
762                ret = match_filter(info_dict)
763                if ret is not None:
764                    return ret
765
766        return None
767
768    @staticmethod
769    def add_extra_info(info_dict, extra_info):
770        '''Set the keys from extra_info in info dict if they are missing'''
771        for key, value in extra_info.items():
772            info_dict.setdefault(key, value)
773
774    def extract_info(self, url, download=True, ie_key=None, extra_info={},
775                     process=True, force_generic_extractor=False):
776        """
777        Return a list with a dictionary for each video extracted.
778
779        Arguments:
780        url -- URL to extract
781
782        Keyword arguments:
783        download -- whether to download videos during extraction
784        ie_key -- extractor key hint
785        extra_info -- dictionary containing the extra values to add to each result
786        process -- whether to resolve all unresolved references (URLs, playlist items),
787            must be True for download to work.
788        force_generic_extractor -- force using the generic extractor
789        """
790
791        if not ie_key and force_generic_extractor:
792            ie_key = 'Generic'
793
794        if ie_key:
795            ies = [self.get_info_extractor(ie_key)]
796        else:
797            ies = self._ies
798
799        for ie in ies:
800            if not ie.suitable(url):
801                continue
802
803            ie = self.get_info_extractor(ie.ie_key())
804            if not ie.working():
805                self.report_warning('The program functionality for this site has been marked as broken, '
806                                    'and will probably not work.')
807
808            return self.__extract_info(url, ie, download, extra_info, process)
809        else:
810            self.report_error('no suitable InfoExtractor for URL %s' % url)
811
812    def __handle_extraction_exceptions(func):
813        def wrapper(self, *args, **kwargs):
814            try:
815                return func(self, *args, **kwargs)
816            except GeoRestrictedError as e:
817                msg = e.msg
818                if e.countries:
819                    msg += '\nThis video is available in %s.' % ', '.join(
820                        map(ISO3166Utils.short2full, e.countries))
821                msg += '\nYou might want to use a VPN or a proxy server (with --proxy) to workaround.'
822                self.report_error(msg)
823            except ExtractorError as e:  # An error we somewhat expected
824                self.report_error(compat_str(e), e.format_traceback())
825            except MaxDownloadsReached:
826                raise
827            except Exception as e:
828                if self.params.get('ignoreerrors', False):
829                    self.report_error(error_to_compat_str(e), tb=encode_compat_str(traceback.format_exc()))
830                else:
831                    raise
832        return wrapper
833
834    @__handle_extraction_exceptions
835    def __extract_info(self, url, ie, download, extra_info, process):
836        ie_result = ie.extract(url)
837        if ie_result is None:  # Finished already (backwards compatibility; listformats and friends should be moved here)
838            return
839        if isinstance(ie_result, list):
840            # Backwards compatibility: old IE result format
841            ie_result = {
842                '_type': 'compat_list',
843                'entries': ie_result,
844            }
845        self.add_default_extra_info(ie_result, ie, url)
846        if process:
847            return self.process_ie_result(ie_result, download, extra_info)
848        else:
849            return ie_result
850
851    def add_default_extra_info(self, ie_result, ie, url):
852        self.add_extra_info(ie_result, {
853            'extractor': ie.IE_NAME,
854            'webpage_url': url,
855            'webpage_url_basename': url_basename(url),
856            'extractor_key': ie.ie_key(),
857        })
858
859    def process_ie_result(self, ie_result, download=True, extra_info={}):
860        """
861        Take the result of the ie(may be modified) and resolve all unresolved
862        references (URLs, playlist items).
863
864        It will also download the videos if 'download'.
865        Returns the resolved ie_result.
866        """
867        result_type = ie_result.get('_type', 'video')
868
869        if result_type in ('url', 'url_transparent'):
870            ie_result['url'] = sanitize_url(ie_result['url'])
871            extract_flat = self.params.get('extract_flat', False)
872            if ((extract_flat == 'in_playlist' and 'playlist' in extra_info)
873                    or extract_flat is True):
874                self.__forced_printings(
875                    ie_result, self.prepare_filename(ie_result),
876                    incomplete=True)
877                return ie_result
878
879        if result_type == 'video':
880            self.add_extra_info(ie_result, extra_info)
881            return self.process_video_result(ie_result, download=download)
882        elif result_type == 'url':
883            # We have to add extra_info to the results because it may be
884            # contained in a playlist
885            return self.extract_info(ie_result['url'],
886                                     download,
887                                     ie_key=ie_result.get('ie_key'),
888                                     extra_info=extra_info)
889        elif result_type == 'url_transparent':
890            # Use the information from the embedding page
891            info = self.extract_info(
892                ie_result['url'], ie_key=ie_result.get('ie_key'),
893                extra_info=extra_info, download=False, process=False)
894
895            # extract_info may return None when ignoreerrors is enabled and
896            # extraction failed with an error, don't crash and return early
897            # in this case
898            if not info:
899                return info
900
901            force_properties = dict(
902                (k, v) for k, v in ie_result.items() if v is not None)
903            for f in ('_type', 'url', 'id', 'extractor', 'extractor_key', 'ie_key'):
904                if f in force_properties:
905                    del force_properties[f]
906            new_result = info.copy()
907            new_result.update(force_properties)
908
909            # Extracted info may not be a video result (i.e.
910            # info.get('_type', 'video') != video) but rather an url or
911            # url_transparent. In such cases outer metadata (from ie_result)
912            # should be propagated to inner one (info). For this to happen
913            # _type of info should be overridden with url_transparent. This
914            # fixes issue from https://github.com/ytdl-org/youtube-dl/pull/11163.
915            if new_result.get('_type') == 'url':
916                new_result['_type'] = 'url_transparent'
917
918            return self.process_ie_result(
919                new_result, download=download, extra_info=extra_info)
920        elif result_type in ('playlist', 'multi_video'):
921            # Protect from infinite recursion due to recursively nested playlists
922            # (see https://github.com/ytdl-org/youtube-dl/issues/27833)
923            webpage_url = ie_result['webpage_url']
924            if webpage_url in self._playlist_urls:
925                self.to_screen(
926                    '[download] Skipping already downloaded playlist: %s'
927                    % ie_result.get('title') or ie_result.get('id'))
928                return
929
930            self._playlist_level += 1
931            self._playlist_urls.add(webpage_url)
932            try:
933                return self.__process_playlist(ie_result, download)
934            finally:
935                self._playlist_level -= 1
936                if not self._playlist_level:
937                    self._playlist_urls.clear()
938        elif result_type == 'compat_list':
939            self.report_warning(
940                'Extractor %s returned a compat_list result. '
941                'It needs to be updated.' % ie_result.get('extractor'))
942
943            def _fixup(r):
944                self.add_extra_info(
945                    r,
946                    {
947                        'extractor': ie_result['extractor'],
948                        'webpage_url': ie_result['webpage_url'],
949                        'webpage_url_basename': url_basename(ie_result['webpage_url']),
950                        'extractor_key': ie_result['extractor_key'],
951                    }
952                )
953                return r
954            ie_result['entries'] = [
955                self.process_ie_result(_fixup(r), download, extra_info)
956                for r in ie_result['entries']
957            ]
958            return ie_result
959        else:
960            raise Exception('Invalid result type: %s' % result_type)
961
962    def __process_playlist(self, ie_result, download):
963        # We process each entry in the playlist
964        playlist = ie_result.get('title') or ie_result.get('id')
965
966        self.to_screen('[download] Downloading playlist: %s' % playlist)
967
968        playlist_results = []
969
970        playliststart = self.params.get('playliststart', 1) - 1
971        playlistend = self.params.get('playlistend')
972        # For backwards compatibility, interpret -1 as whole list
973        if playlistend == -1:
974            playlistend = None
975
976        playlistitems_str = self.params.get('playlist_items')
977        playlistitems = None
978        if playlistitems_str is not None:
979            def iter_playlistitems(format):
980                for string_segment in format.split(','):
981                    if '-' in string_segment:
982                        start, end = string_segment.split('-')
983                        for item in range(int(start), int(end) + 1):
984                            yield int(item)
985                    else:
986                        yield int(string_segment)
987            playlistitems = orderedSet(iter_playlistitems(playlistitems_str))
988
989        ie_entries = ie_result['entries']
990
991        def make_playlistitems_entries(list_ie_entries):
992            num_entries = len(list_ie_entries)
993            return [
994                list_ie_entries[i - 1] for i in playlistitems
995                if -num_entries <= i - 1 < num_entries]
996
997        def report_download(num_entries):
998            self.to_screen(
999                '[%s] playlist %s: Downloading %d videos' %
1000                (ie_result['extractor'], playlist, num_entries))
1001
1002        if isinstance(ie_entries, list):
1003            n_all_entries = len(ie_entries)
1004            if playlistitems:
1005                entries = make_playlistitems_entries(ie_entries)
1006            else:
1007                entries = ie_entries[playliststart:playlistend]
1008            n_entries = len(entries)
1009            self.to_screen(
1010                '[%s] playlist %s: Collected %d video ids (downloading %d of them)' %
1011                (ie_result['extractor'], playlist, n_all_entries, n_entries))
1012        elif isinstance(ie_entries, PagedList):
1013            if playlistitems:
1014                entries = []
1015                for item in playlistitems:
1016                    entries.extend(ie_entries.getslice(
1017                        item - 1, item
1018                    ))
1019            else:
1020                entries = ie_entries.getslice(
1021                    playliststart, playlistend)
1022            n_entries = len(entries)
1023            report_download(n_entries)
1024        else:  # iterable
1025            if playlistitems:
1026                entries = make_playlistitems_entries(list(itertools.islice(
1027                    ie_entries, 0, max(playlistitems))))
1028            else:
1029                entries = list(itertools.islice(
1030                    ie_entries, playliststart, playlistend))
1031            n_entries = len(entries)
1032            report_download(n_entries)
1033
1034        if self.params.get('playlistreverse', False):
1035            entries = entries[::-1]
1036
1037        if self.params.get('playlistrandom', False):
1038            random.shuffle(entries)
1039
1040        x_forwarded_for = ie_result.get('__x_forwarded_for_ip')
1041
1042        for i, entry in enumerate(entries, 1):
1043            self.to_screen('[download] Downloading video %s of %s' % (i, n_entries))
1044            # This __x_forwarded_for_ip thing is a bit ugly but requires
1045            # minimal changes
1046            if x_forwarded_for:
1047                entry['__x_forwarded_for_ip'] = x_forwarded_for
1048            extra = {
1049                'n_entries': n_entries,
1050                'playlist': playlist,
1051                'playlist_id': ie_result.get('id'),
1052                'playlist_title': ie_result.get('title'),
1053                'playlist_uploader': ie_result.get('uploader'),
1054                'playlist_uploader_id': ie_result.get('uploader_id'),
1055                'playlist_index': playlistitems[i - 1] if playlistitems else i + playliststart,
1056                'extractor': ie_result['extractor'],
1057                'webpage_url': ie_result['webpage_url'],
1058                'webpage_url_basename': url_basename(ie_result['webpage_url']),
1059                'extractor_key': ie_result['extractor_key'],
1060            }
1061
1062            reason = self._match_entry(entry, incomplete=True)
1063            if reason is not None:
1064                self.to_screen('[download] ' + reason)
1065                continue
1066
1067            entry_result = self.__process_iterable_entry(entry, download, extra)
1068            # TODO: skip failed (empty) entries?
1069            playlist_results.append(entry_result)
1070        ie_result['entries'] = playlist_results
1071        self.to_screen('[download] Finished downloading playlist: %s' % playlist)
1072        return ie_result
1073
1074    @__handle_extraction_exceptions
1075    def __process_iterable_entry(self, entry, download, extra_info):
1076        return self.process_ie_result(
1077            entry, download=download, extra_info=extra_info)
1078
1079    def _build_format_filter(self, filter_spec):
1080        " Returns a function to filter the formats according to the filter_spec "
1081
1082        OPERATORS = {
1083            '<': operator.lt,
1084            '<=': operator.le,
1085            '>': operator.gt,
1086            '>=': operator.ge,
1087            '=': operator.eq,
1088            '!=': operator.ne,
1089        }
1090        operator_rex = re.compile(r'''(?x)\s*
1091            (?P<key>width|height|tbr|abr|vbr|asr|filesize|filesize_approx|fps)
1092            \s*(?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
1093            (?P<value>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)
1094            $
1095            ''' % '|'.join(map(re.escape, OPERATORS.keys())))
1096        m = operator_rex.search(filter_spec)
1097        if m:
1098            try:
1099                comparison_value = int(m.group('value'))
1100            except ValueError:
1101                comparison_value = parse_filesize(m.group('value'))
1102                if comparison_value is None:
1103                    comparison_value = parse_filesize(m.group('value') + 'B')
1104                if comparison_value is None:
1105                    raise ValueError(
1106                        'Invalid value %r in format specification %r' % (
1107                            m.group('value'), filter_spec))
1108            op = OPERATORS[m.group('op')]
1109
1110        if not m:
1111            STR_OPERATORS = {
1112                '=': operator.eq,
1113                '^=': lambda attr, value: attr.startswith(value),
1114                '$=': lambda attr, value: attr.endswith(value),
1115                '*=': lambda attr, value: value in attr,
1116            }
1117            str_operator_rex = re.compile(r'''(?x)
1118                \s*(?P<key>ext|acodec|vcodec|container|protocol|format_id|language)
1119                \s*(?P<negation>!\s*)?(?P<op>%s)(?P<none_inclusive>\s*\?)?
1120                \s*(?P<value>[a-zA-Z0-9._-]+)
1121                \s*$
1122                ''' % '|'.join(map(re.escape, STR_OPERATORS.keys())))
1123            m = str_operator_rex.search(filter_spec)
1124            if m:
1125                comparison_value = m.group('value')
1126                str_op = STR_OPERATORS[m.group('op')]
1127                if m.group('negation'):
1128                    op = lambda attr, value: not str_op(attr, value)
1129                else:
1130                    op = str_op
1131
1132        if not m:
1133            raise ValueError('Invalid filter specification %r' % filter_spec)
1134
1135        def _filter(f):
1136            actual_value = f.get(m.group('key'))
1137            if actual_value is None:
1138                return m.group('none_inclusive')
1139            return op(actual_value, comparison_value)
1140        return _filter
1141
1142    def _default_format_spec(self, info_dict, download=True):
1143
1144        def can_merge():
1145            merger = FFmpegMergerPP(self)
1146            return merger.available and merger.can_merge()
1147
1148        def prefer_best():
1149            if self.params.get('simulate', False):
1150                return False
1151            if not download:
1152                return False
1153            if self.params.get('outtmpl', DEFAULT_OUTTMPL) == '-':
1154                return True
1155            if info_dict.get('is_live'):
1156                return True
1157            if not can_merge():
1158                return True
1159            return False
1160
1161        req_format_list = ['bestvideo+bestaudio', 'best']
1162        if prefer_best():
1163            req_format_list.reverse()
1164        return '/'.join(req_format_list)
1165
1166    def build_format_selector(self, format_spec):
1167        def syntax_error(note, start):
1168            message = (
1169                'Invalid format specification: '
1170                '{0}\n\t{1}\n\t{2}^'.format(note, format_spec, ' ' * start[1]))
1171            return SyntaxError(message)
1172
1173        PICKFIRST = 'PICKFIRST'
1174        MERGE = 'MERGE'
1175        SINGLE = 'SINGLE'
1176        GROUP = 'GROUP'
1177        FormatSelector = collections.namedtuple('FormatSelector', ['type', 'selector', 'filters'])
1178
1179        def _parse_filter(tokens):
1180            filter_parts = []
1181            for type, string, start, _, _ in tokens:
1182                if type == tokenize.OP and string == ']':
1183                    return ''.join(filter_parts)
1184                else:
1185                    filter_parts.append(string)
1186
1187        def _remove_unused_ops(tokens):
1188            # Remove operators that we don't use and join them with the surrounding strings
1189            # for example: 'mp4' '-' 'baseline' '-' '16x9' is converted to 'mp4-baseline-16x9'
1190            ALLOWED_OPS = ('/', '+', ',', '(', ')')
1191            last_string, last_start, last_end, last_line = None, None, None, None
1192            for type, string, start, end, line in tokens:
1193                if type == tokenize.OP and string == '[':
1194                    if last_string:
1195                        yield tokenize.NAME, last_string, last_start, last_end, last_line
1196                        last_string = None
1197                    yield type, string, start, end, line
1198                    # everything inside brackets will be handled by _parse_filter
1199                    for type, string, start, end, line in tokens:
1200                        yield type, string, start, end, line
1201                        if type == tokenize.OP and string == ']':
1202                            break
1203                elif type == tokenize.OP and string in ALLOWED_OPS:
1204                    if last_string:
1205                        yield tokenize.NAME, last_string, last_start, last_end, last_line
1206                        last_string = None
1207                    yield type, string, start, end, line
1208                elif type in [tokenize.NAME, tokenize.NUMBER, tokenize.OP]:
1209                    if not last_string:
1210                        last_string = string
1211                        last_start = start
1212                        last_end = end
1213                    else:
1214                        last_string += string
1215            if last_string:
1216                yield tokenize.NAME, last_string, last_start, last_end, last_line
1217
1218        def _parse_format_selection(tokens, inside_merge=False, inside_choice=False, inside_group=False):
1219            selectors = []
1220            current_selector = None
1221            for type, string, start, _, _ in tokens:
1222                # ENCODING is only defined in python 3.x
1223                if type == getattr(tokenize, 'ENCODING', None):
1224                    continue
1225                elif type in [tokenize.NAME, tokenize.NUMBER]:
1226                    current_selector = FormatSelector(SINGLE, string, [])
1227                elif type == tokenize.OP:
1228                    if string == ')':
1229                        if not inside_group:
1230                            # ')' will be handled by the parentheses group
1231                            tokens.restore_last_token()
1232                        break
1233                    elif inside_merge and string in ['/', ',']:
1234                        tokens.restore_last_token()
1235                        break
1236                    elif inside_choice and string == ',':
1237                        tokens.restore_last_token()
1238                        break
1239                    elif string == ',':
1240                        if not current_selector:
1241                            raise syntax_error('"," must follow a format selector', start)
1242                        selectors.append(current_selector)
1243                        current_selector = None
1244                    elif string == '/':
1245                        if not current_selector:
1246                            raise syntax_error('"/" must follow a format selector', start)
1247                        first_choice = current_selector
1248                        second_choice = _parse_format_selection(tokens, inside_choice=True)
1249                        current_selector = FormatSelector(PICKFIRST, (first_choice, second_choice), [])
1250                    elif string == '[':
1251                        if not current_selector:
1252                            current_selector = FormatSelector(SINGLE, 'best', [])
1253                        format_filter = _parse_filter(tokens)
1254                        current_selector.filters.append(format_filter)
1255                    elif string == '(':
1256                        if current_selector:
1257                            raise syntax_error('Unexpected "("', start)
1258                        group = _parse_format_selection(tokens, inside_group=True)
1259                        current_selector = FormatSelector(GROUP, group, [])
1260                    elif string == '+':
1261                        if inside_merge:
1262                            raise syntax_error('Unexpected "+"', start)
1263                        video_selector = current_selector
1264                        audio_selector = _parse_format_selection(tokens, inside_merge=True)
1265                        if not video_selector or not audio_selector:
1266                            raise syntax_error('"+" must be between two format selectors', start)
1267                        current_selector = FormatSelector(MERGE, (video_selector, audio_selector), [])
1268                    else:
1269                        raise syntax_error('Operator not recognized: "{0}"'.format(string), start)
1270                elif type == tokenize.ENDMARKER:
1271                    break
1272            if current_selector:
1273                selectors.append(current_selector)
1274            return selectors
1275
1276        def _build_selector_function(selector):
1277            if isinstance(selector, list):
1278                fs = [_build_selector_function(s) for s in selector]
1279
1280                def selector_function(ctx):
1281                    for f in fs:
1282                        for format in f(ctx):
1283                            yield format
1284                return selector_function
1285            elif selector.type == GROUP:
1286                selector_function = _build_selector_function(selector.selector)
1287            elif selector.type == PICKFIRST:
1288                fs = [_build_selector_function(s) for s in selector.selector]
1289
1290                def selector_function(ctx):
1291                    for f in fs:
1292                        picked_formats = list(f(ctx))
1293                        if picked_formats:
1294                            return picked_formats
1295                    return []
1296            elif selector.type == SINGLE:
1297                format_spec = selector.selector
1298
1299                def selector_function(ctx):
1300                    formats = list(ctx['formats'])
1301                    if not formats:
1302                        return
1303                    if format_spec == 'all':
1304                        for f in formats:
1305                            yield f
1306                    elif format_spec in ['best', 'worst', None]:
1307                        format_idx = 0 if format_spec == 'worst' else -1
1308                        audiovideo_formats = [
1309                            f for f in formats
1310                            if f.get('vcodec') != 'none' and f.get('acodec') != 'none']
1311                        if audiovideo_formats:
1312                            yield audiovideo_formats[format_idx]
1313                        # for extractors with incomplete formats (audio only (soundcloud)
1314                        # or video only (imgur)) we will fallback to best/worst
1315                        # {video,audio}-only format
1316                        elif ctx['incomplete_formats']:
1317                            yield formats[format_idx]
1318                    elif format_spec == 'bestaudio':
1319                        audio_formats = [
1320                            f for f in formats
1321                            if f.get('vcodec') == 'none']
1322                        if audio_formats:
1323                            yield audio_formats[-1]
1324                    elif format_spec == 'worstaudio':
1325                        audio_formats = [
1326                            f for f in formats
1327                            if f.get('vcodec') == 'none']
1328                        if audio_formats:
1329                            yield audio_formats[0]
1330                    elif format_spec == 'bestvideo':
1331                        video_formats = [
1332                            f for f in formats
1333                            if f.get('acodec') == 'none']
1334                        if video_formats:
1335                            yield video_formats[-1]
1336                    elif format_spec == 'worstvideo':
1337                        video_formats = [
1338                            f for f in formats
1339                            if f.get('acodec') == 'none']
1340                        if video_formats:
1341                            yield video_formats[0]
1342                    else:
1343                        extensions = ['mp4', 'flv', 'webm', '3gp', 'm4a', 'mp3', 'ogg', 'aac', 'wav']
1344                        if format_spec in extensions:
1345                            filter_f = lambda f: f['ext'] == format_spec
1346                        else:
1347                            filter_f = lambda f: f['format_id'] == format_spec
1348                        matches = list(filter(filter_f, formats))
1349                        if matches:
1350                            yield matches[-1]
1351            elif selector.type == MERGE:
1352                def _merge(formats_info):
1353                    format_1, format_2 = [f['format_id'] for f in formats_info]
1354                    # The first format must contain the video and the
1355                    # second the audio
1356                    if formats_info[0].get('vcodec') == 'none':
1357                        self.report_error('The first format must '
1358                                          'contain the video, try using '
1359                                          '"-f %s+%s"' % (format_2, format_1))
1360                        return
1361                    # Formats must be opposite (video+audio)
1362                    if formats_info[0].get('acodec') == 'none' and formats_info[1].get('acodec') == 'none':
1363                        self.report_error(
1364                            'Both formats %s and %s are video-only, you must specify "-f video+audio"'
1365                            % (format_1, format_2))
1366                        return
1367                    output_ext = (
1368                        formats_info[0]['ext']
1369                        if self.params.get('merge_output_format') is None
1370                        else self.params['merge_output_format'])
1371                    return {
1372                        'requested_formats': formats_info,
1373                        'format': '%s+%s' % (formats_info[0].get('format'),
1374                                             formats_info[1].get('format')),
1375                        'format_id': '%s+%s' % (formats_info[0].get('format_id'),
1376                                                formats_info[1].get('format_id')),
1377                        'width': formats_info[0].get('width'),
1378                        'height': formats_info[0].get('height'),
1379                        'resolution': formats_info[0].get('resolution'),
1380                        'fps': formats_info[0].get('fps'),
1381                        'vcodec': formats_info[0].get('vcodec'),
1382                        'vbr': formats_info[0].get('vbr'),
1383                        'stretched_ratio': formats_info[0].get('stretched_ratio'),
1384                        'acodec': formats_info[1].get('acodec'),
1385                        'abr': formats_info[1].get('abr'),
1386                        'ext': output_ext,
1387                    }
1388                video_selector, audio_selector = map(_build_selector_function, selector.selector)
1389
1390                def selector_function(ctx):
1391                    for pair in itertools.product(
1392                            video_selector(copy.deepcopy(ctx)), audio_selector(copy.deepcopy(ctx))):
1393                        yield _merge(pair)
1394
1395            filters = [self._build_format_filter(f) for f in selector.filters]
1396
1397            def final_selector(ctx):
1398                ctx_copy = copy.deepcopy(ctx)
1399                for _filter in filters:
1400                    ctx_copy['formats'] = list(filter(_filter, ctx_copy['formats']))
1401                return selector_function(ctx_copy)
1402            return final_selector
1403
1404        stream = io.BytesIO(format_spec.encode('utf-8'))
1405        try:
1406            tokens = list(_remove_unused_ops(compat_tokenize_tokenize(stream.readline)))
1407        except tokenize.TokenError:
1408            raise syntax_error('Missing closing/opening brackets or parenthesis', (0, len(format_spec)))
1409
1410        class TokenIterator(object):
1411            def __init__(self, tokens):
1412                self.tokens = tokens
1413                self.counter = 0
1414
1415            def __iter__(self):
1416                return self
1417
1418            def __next__(self):
1419                if self.counter >= len(self.tokens):
1420                    raise StopIteration()
1421                value = self.tokens[self.counter]
1422                self.counter += 1
1423                return value
1424
1425            next = __next__
1426
1427            def restore_last_token(self):
1428                self.counter -= 1
1429
1430        parsed_selector = _parse_format_selection(iter(TokenIterator(tokens)))
1431        return _build_selector_function(parsed_selector)
1432
1433    def _calc_headers(self, info_dict):
1434        res = std_headers.copy()
1435
1436        add_headers = info_dict.get('http_headers')
1437        if add_headers:
1438            res.update(add_headers)
1439
1440        cookies = self._calc_cookies(info_dict)
1441        if cookies:
1442            res['Cookie'] = cookies
1443
1444        if 'X-Forwarded-For' not in res:
1445            x_forwarded_for_ip = info_dict.get('__x_forwarded_for_ip')
1446            if x_forwarded_for_ip:
1447                res['X-Forwarded-For'] = x_forwarded_for_ip
1448
1449        return res
1450
1451    def _calc_cookies(self, info_dict):
1452        pr = sanitized_Request(info_dict['url'])
1453        self.cookiejar.add_cookie_header(pr)
1454        return pr.get_header('Cookie')
1455
1456    def process_video_result(self, info_dict, download=True):
1457        assert info_dict.get('_type', 'video') == 'video'
1458
1459        if 'id' not in info_dict:
1460            raise ExtractorError('Missing "id" field in extractor result')
1461        if 'title' not in info_dict:
1462            raise ExtractorError('Missing "title" field in extractor result')
1463
1464        def report_force_conversion(field, field_not, conversion):
1465            self.report_warning(
1466                '"%s" field is not %s - forcing %s conversion, there is an error in extractor'
1467                % (field, field_not, conversion))
1468
1469        def sanitize_string_field(info, string_field):
1470            field = info.get(string_field)
1471            if field is None or isinstance(field, compat_str):
1472                return
1473            report_force_conversion(string_field, 'a string', 'string')
1474            info[string_field] = compat_str(field)
1475
1476        def sanitize_numeric_fields(info):
1477            for numeric_field in self._NUMERIC_FIELDS:
1478                field = info.get(numeric_field)
1479                if field is None or isinstance(field, compat_numeric_types):
1480                    continue
1481                report_force_conversion(numeric_field, 'numeric', 'int')
1482                info[numeric_field] = int_or_none(field)
1483
1484        sanitize_string_field(info_dict, 'id')
1485        sanitize_numeric_fields(info_dict)
1486
1487        if 'playlist' not in info_dict:
1488            # It isn't part of a playlist
1489            info_dict['playlist'] = None
1490            info_dict['playlist_index'] = None
1491
1492        thumbnails = info_dict.get('thumbnails')
1493        if thumbnails is None:
1494            thumbnail = info_dict.get('thumbnail')
1495            if thumbnail:
1496                info_dict['thumbnails'] = thumbnails = [{'url': thumbnail}]
1497        if thumbnails:
1498            thumbnails.sort(key=lambda t: (
1499                t.get('preference') if t.get('preference') is not None else -1,
1500                t.get('width') if t.get('width') is not None else -1,
1501                t.get('height') if t.get('height') is not None else -1,
1502                t.get('id') if t.get('id') is not None else '', t.get('url')))
1503            for i, t in enumerate(thumbnails):
1504                t['url'] = sanitize_url(t['url'])
1505                if t.get('width') and t.get('height'):
1506                    t['resolution'] = '%dx%d' % (t['width'], t['height'])
1507                if t.get('id') is None:
1508                    t['id'] = '%d' % i
1509
1510        if self.params.get('list_thumbnails'):
1511            self.list_thumbnails(info_dict)
1512            return
1513
1514        thumbnail = info_dict.get('thumbnail')
1515        if thumbnail:
1516            info_dict['thumbnail'] = sanitize_url(thumbnail)
1517        elif thumbnails:
1518            info_dict['thumbnail'] = thumbnails[-1]['url']
1519
1520        if 'display_id' not in info_dict and 'id' in info_dict:
1521            info_dict['display_id'] = info_dict['id']
1522
1523        for ts_key, date_key in (
1524                ('timestamp', 'upload_date'),
1525                ('release_timestamp', 'release_date'),
1526        ):
1527            if info_dict.get(date_key) is None and info_dict.get(ts_key) is not None:
1528                # Working around out-of-range timestamp values (e.g. negative ones on Windows,
1529                # see http://bugs.python.org/issue1646728)
1530                try:
1531                    upload_date = datetime.datetime.utcfromtimestamp(info_dict[ts_key])
1532                    info_dict[date_key] = upload_date.strftime('%Y%m%d')
1533                except (ValueError, OverflowError, OSError):
1534                    pass
1535
1536        # Auto generate title fields corresponding to the *_number fields when missing
1537        # in order to always have clean titles. This is very common for TV series.
1538        for field in ('chapter', 'season', 'episode'):
1539            if info_dict.get('%s_number' % field) is not None and not info_dict.get(field):
1540                info_dict[field] = '%s %d' % (field.capitalize(), info_dict['%s_number' % field])
1541
1542        for cc_kind in ('subtitles', 'automatic_captions'):
1543            cc = info_dict.get(cc_kind)
1544            if cc:
1545                for _, subtitle in cc.items():
1546                    for subtitle_format in subtitle:
1547                        if subtitle_format.get('url'):
1548                            subtitle_format['url'] = sanitize_url(subtitle_format['url'])
1549                        if subtitle_format.get('ext') is None:
1550                            subtitle_format['ext'] = determine_ext(subtitle_format['url']).lower()
1551
1552        automatic_captions = info_dict.get('automatic_captions')
1553        subtitles = info_dict.get('subtitles')
1554
1555        if self.params.get('listsubtitles', False):
1556            if 'automatic_captions' in info_dict:
1557                self.list_subtitles(
1558                    info_dict['id'], automatic_captions, 'automatic captions')
1559            self.list_subtitles(info_dict['id'], subtitles, 'subtitles')
1560            return
1561
1562        info_dict['requested_subtitles'] = self.process_subtitles(
1563            info_dict['id'], subtitles, automatic_captions)
1564
1565        # We now pick which formats have to be downloaded
1566        if info_dict.get('formats') is None:
1567            # There's only one format available
1568            formats = [info_dict]
1569        else:
1570            formats = info_dict['formats']
1571
1572        if not formats:
1573            raise ExtractorError('No video formats found!')
1574
1575        def is_wellformed(f):
1576            url = f.get('url')
1577            if not url:
1578                self.report_warning(
1579                    '"url" field is missing or empty - skipping format, '
1580                    'there is an error in extractor')
1581                return False
1582            if isinstance(url, bytes):
1583                sanitize_string_field(f, 'url')
1584            return True
1585
1586        # Filter out malformed formats for better extraction robustness
1587        formats = list(filter(is_wellformed, formats))
1588
1589        formats_dict = {}
1590
1591        # We check that all the formats have the format and format_id fields
1592        for i, format in enumerate(formats):
1593            sanitize_string_field(format, 'format_id')
1594            sanitize_numeric_fields(format)
1595            format['url'] = sanitize_url(format['url'])
1596            if not format.get('format_id'):
1597                format['format_id'] = compat_str(i)
1598            else:
1599                # Sanitize format_id from characters used in format selector expression
1600                format['format_id'] = re.sub(r'[\s,/+\[\]()]', '_', format['format_id'])
1601            format_id = format['format_id']
1602            if format_id not in formats_dict:
1603                formats_dict[format_id] = []
1604            formats_dict[format_id].append(format)
1605
1606        # Make sure all formats have unique format_id
1607        for format_id, ambiguous_formats in formats_dict.items():
1608            if len(ambiguous_formats) > 1:
1609                for i, format in enumerate(ambiguous_formats):
1610                    format['format_id'] = '%s-%d' % (format_id, i)
1611
1612        for i, format in enumerate(formats):
1613            if format.get('format') is None:
1614                format['format'] = '{id} - {res}{note}'.format(
1615                    id=format['format_id'],
1616                    res=self.format_resolution(format),
1617                    note=' ({0})'.format(format['format_note']) if format.get('format_note') is not None else '',
1618                )
1619            # Automatically determine file extension if missing
1620            if format.get('ext') is None:
1621                format['ext'] = determine_ext(format['url']).lower()
1622            # Automatically determine protocol if missing (useful for format
1623            # selection purposes)
1624            if format.get('protocol') is None:
1625                format['protocol'] = determine_protocol(format)
1626            # Add HTTP headers, so that external programs can use them from the
1627            # json output
1628            full_format_info = info_dict.copy()
1629            full_format_info.update(format)
1630            format['http_headers'] = self._calc_headers(full_format_info)
1631        # Remove private housekeeping stuff
1632        if '__x_forwarded_for_ip' in info_dict:
1633            del info_dict['__x_forwarded_for_ip']
1634
1635        # TODO Central sorting goes here
1636
1637        if formats[0] is not info_dict:
1638            # only set the 'formats' fields if the original info_dict list them
1639            # otherwise we end up with a circular reference, the first (and unique)
1640            # element in the 'formats' field in info_dict is info_dict itself,
1641            # which can't be exported to json
1642            info_dict['formats'] = formats
1643        if self.params.get('listformats'):
1644            self.list_formats(info_dict)
1645            return
1646
1647        req_format = self.params.get('format')
1648        if req_format is None:
1649            req_format = self._default_format_spec(info_dict, download=download)
1650            if self.params.get('verbose'):
1651                self._write_string('[debug] Default format spec: %s\n' % req_format)
1652
1653        format_selector = self.build_format_selector(req_format)
1654
1655        # While in format selection we may need to have an access to the original
1656        # format set in order to calculate some metrics or do some processing.
1657        # For now we need to be able to guess whether original formats provided
1658        # by extractor are incomplete or not (i.e. whether extractor provides only
1659        # video-only or audio-only formats) for proper formats selection for
1660        # extractors with such incomplete formats (see
1661        # https://github.com/ytdl-org/youtube-dl/pull/5556).
1662        # Since formats may be filtered during format selection and may not match
1663        # the original formats the results may be incorrect. Thus original formats
1664        # or pre-calculated metrics should be passed to format selection routines
1665        # as well.
1666        # We will pass a context object containing all necessary additional data
1667        # instead of just formats.
1668        # This fixes incorrect format selection issue (see
1669        # https://github.com/ytdl-org/youtube-dl/issues/10083).
1670        incomplete_formats = (
1671            # All formats are video-only or
1672            all(f.get('vcodec') != 'none' and f.get('acodec') == 'none' for f in formats)
1673            # all formats are audio-only
1674            or all(f.get('vcodec') == 'none' and f.get('acodec') != 'none' for f in formats))
1675
1676        ctx = {
1677            'formats': formats,
1678            'incomplete_formats': incomplete_formats,
1679        }
1680
1681        formats_to_download = list(format_selector(ctx))
1682        if not formats_to_download:
1683            raise ExtractorError('requested format not available',
1684                                 expected=True)
1685
1686        if download:
1687            if len(formats_to_download) > 1:
1688                self.to_screen('[info] %s: downloading video in %s formats' % (info_dict['id'], len(formats_to_download)))
1689            for format in formats_to_download:
1690                new_info = dict(info_dict)
1691                new_info.update(format)
1692                self.process_info(new_info)
1693        # We update the info dict with the best quality format (backwards compatibility)
1694        info_dict.update(formats_to_download[-1])
1695        return info_dict
1696
1697    def process_subtitles(self, video_id, normal_subtitles, automatic_captions):
1698        """Select the requested subtitles and their format"""
1699        available_subs = {}
1700        if normal_subtitles and self.params.get('writesubtitles'):
1701            available_subs.update(normal_subtitles)
1702        if automatic_captions and self.params.get('writeautomaticsub'):
1703            for lang, cap_info in automatic_captions.items():
1704                if lang not in available_subs:
1705                    available_subs[lang] = cap_info
1706
1707        if (not self.params.get('writesubtitles') and not
1708                self.params.get('writeautomaticsub') or not
1709                available_subs):
1710            return None
1711
1712        if self.params.get('allsubtitles', False):
1713            requested_langs = available_subs.keys()
1714        else:
1715            if self.params.get('subtitleslangs', False):
1716                requested_langs = self.params.get('subtitleslangs')
1717            elif 'en' in available_subs:
1718                requested_langs = ['en']
1719            else:
1720                requested_langs = [list(available_subs.keys())[0]]
1721
1722        formats_query = self.params.get('subtitlesformat', 'best')
1723        formats_preference = formats_query.split('/') if formats_query else []
1724        subs = {}
1725        for lang in requested_langs:
1726            formats = available_subs.get(lang)
1727            if formats is None:
1728                self.report_warning('%s subtitles not available for %s' % (lang, video_id))
1729                continue
1730            for ext in formats_preference:
1731                if ext == 'best':
1732                    f = formats[-1]
1733                    break
1734                matches = list(filter(lambda f: f['ext'] == ext, formats))
1735                if matches:
1736                    f = matches[-1]
1737                    break
1738            else:
1739                f = formats[-1]
1740                self.report_warning(
1741                    'No subtitle format found matching "%s" for language %s, '
1742                    'using %s' % (formats_query, lang, f['ext']))
1743            subs[lang] = f
1744        return subs
1745
1746    def __forced_printings(self, info_dict, filename, incomplete):
1747        def print_mandatory(field):
1748            if (self.params.get('force%s' % field, False)
1749                    and (not incomplete or info_dict.get(field) is not None)):
1750                self.to_stdout(info_dict[field])
1751
1752        def print_optional(field):
1753            if (self.params.get('force%s' % field, False)
1754                    and info_dict.get(field) is not None):
1755                self.to_stdout(info_dict[field])
1756
1757        print_mandatory('title')
1758        print_mandatory('id')
1759        if self.params.get('forceurl', False) and not incomplete:
1760            if info_dict.get('requested_formats') is not None:
1761                for f in info_dict['requested_formats']:
1762                    self.to_stdout(f['url'] + f.get('play_path', ''))
1763            else:
1764                # For RTMP URLs, also include the playpath
1765                self.to_stdout(info_dict['url'] + info_dict.get('play_path', ''))
1766        print_optional('thumbnail')
1767        print_optional('description')
1768        if self.params.get('forcefilename', False) and filename is not None:
1769            self.to_stdout(filename)
1770        if self.params.get('forceduration', False) and info_dict.get('duration') is not None:
1771            self.to_stdout(formatSeconds(info_dict['duration']))
1772        print_mandatory('format')
1773        if self.params.get('forcejson', False):
1774            self.to_stdout(json.dumps(info_dict))
1775
1776    def process_info(self, info_dict):
1777        """Process a single resolved IE result."""
1778
1779        assert info_dict.get('_type', 'video') == 'video'
1780
1781        max_downloads = self.params.get('max_downloads')
1782        if max_downloads is not None:
1783            if self._num_downloads >= int(max_downloads):
1784                raise MaxDownloadsReached()
1785
1786        # TODO: backward compatibility, to be removed
1787        info_dict['fulltitle'] = info_dict['title']
1788
1789        if 'format' not in info_dict:
1790            info_dict['format'] = info_dict['ext']
1791
1792        reason = self._match_entry(info_dict, incomplete=False)
1793        if reason is not None:
1794            self.to_screen('[download] ' + reason)
1795            return
1796
1797        self._num_downloads += 1
1798
1799        info_dict['_filename'] = filename = self.prepare_filename(info_dict)
1800
1801        # Forced printings
1802        self.__forced_printings(info_dict, filename, incomplete=False)
1803
1804        # Do nothing else if in simulate mode
1805        if self.params.get('simulate', False):
1806            return
1807
1808        if filename is None:
1809            return
1810
1811        def ensure_dir_exists(path):
1812            try:
1813                dn = os.path.dirname(path)
1814                if dn and not os.path.exists(dn):
1815                    os.makedirs(dn)
1816                return True
1817            except (OSError, IOError) as err:
1818                if isinstance(err, OSError) and err.errno == errno.EEXIST:
1819                    return True
1820                self.report_error('unable to create directory ' + error_to_compat_str(err))
1821                return False
1822
1823        if not ensure_dir_exists(sanitize_path(encodeFilename(filename))):
1824            return
1825
1826        if self.params.get('writedescription', False):
1827            descfn = replace_extension(filename, 'description', info_dict.get('ext'))
1828            if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(descfn)):
1829                self.to_screen('[info] Video description is already present')
1830            elif info_dict.get('description') is None:
1831                self.report_warning('There\'s no description to write.')
1832            else:
1833                try:
1834                    self.to_screen('[info] Writing video description to: ' + descfn)
1835                    with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
1836                        descfile.write(info_dict['description'])
1837                except (OSError, IOError):
1838                    self.report_error('Cannot write description file ' + descfn)
1839                    return
1840
1841        if self.params.get('writeannotations', False):
1842            annofn = replace_extension(filename, 'annotations.xml', info_dict.get('ext'))
1843            if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(annofn)):
1844                self.to_screen('[info] Video annotations are already present')
1845            elif not info_dict.get('annotations'):
1846                self.report_warning('There are no annotations to write.')
1847            else:
1848                try:
1849                    self.to_screen('[info] Writing video annotations to: ' + annofn)
1850                    with io.open(encodeFilename(annofn), 'w', encoding='utf-8') as annofile:
1851                        annofile.write(info_dict['annotations'])
1852                except (KeyError, TypeError):
1853                    self.report_warning('There are no annotations to write.')
1854                except (OSError, IOError):
1855                    self.report_error('Cannot write annotations file: ' + annofn)
1856                    return
1857
1858        subtitles_are_requested = any([self.params.get('writesubtitles', False),
1859                                       self.params.get('writeautomaticsub')])
1860
1861        if subtitles_are_requested and info_dict.get('requested_subtitles'):
1862            # subtitles download errors are already managed as troubles in relevant IE
1863            # that way it will silently go on when used with unsupporting IE
1864            subtitles = info_dict['requested_subtitles']
1865            ie = self.get_info_extractor(info_dict['extractor_key'])
1866            for sub_lang, sub_info in subtitles.items():
1867                sub_format = sub_info['ext']
1868                sub_filename = subtitles_filename(filename, sub_lang, sub_format, info_dict.get('ext'))
1869                if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(sub_filename)):
1870                    self.to_screen('[info] Video subtitle %s.%s is already present' % (sub_lang, sub_format))
1871                else:
1872                    self.to_screen('[info] Writing video subtitles to: ' + sub_filename)
1873                    if sub_info.get('data') is not None:
1874                        try:
1875                            # Use newline='' to prevent conversion of newline characters
1876                            # See https://github.com/ytdl-org/youtube-dl/issues/10268
1877                            with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8', newline='') as subfile:
1878                                subfile.write(sub_info['data'])
1879                        except (OSError, IOError):
1880                            self.report_error('Cannot write subtitles file ' + sub_filename)
1881                            return
1882                    else:
1883                        try:
1884                            sub_data = ie._request_webpage(
1885                                sub_info['url'], info_dict['id'], note=False).read()
1886                            with io.open(encodeFilename(sub_filename), 'wb') as subfile:
1887                                subfile.write(sub_data)
1888                        except (ExtractorError, IOError, OSError, ValueError) as err:
1889                            self.report_warning('Unable to download subtitle for "%s": %s' %
1890                                                (sub_lang, error_to_compat_str(err)))
1891                            continue
1892
1893        if self.params.get('writeinfojson', False):
1894            infofn = replace_extension(filename, 'info.json', info_dict.get('ext'))
1895            if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(infofn)):
1896                self.to_screen('[info] Video description metadata is already present')
1897            else:
1898                self.to_screen('[info] Writing video description metadata as JSON to: ' + infofn)
1899                try:
1900                    write_json_file(self.filter_requested_info(info_dict), infofn)
1901                except (OSError, IOError):
1902                    self.report_error('Cannot write metadata to JSON file ' + infofn)
1903                    return
1904
1905        self._write_thumbnails(info_dict, filename)
1906
1907        if not self.params.get('skip_download', False):
1908            try:
1909                def dl(name, info):
1910                    fd = get_suitable_downloader(info, self.params)(self, self.params)
1911                    for ph in self._progress_hooks:
1912                        fd.add_progress_hook(ph)
1913                    if self.params.get('verbose'):
1914                        self.to_screen('[debug] Invoking downloader on %r' % info.get('url'))
1915                    return fd.download(name, info)
1916
1917                if info_dict.get('requested_formats') is not None:
1918                    downloaded = []
1919                    success = True
1920                    merger = FFmpegMergerPP(self)
1921                    if not merger.available:
1922                        postprocessors = []
1923                        self.report_warning('You have requested multiple '
1924                                            'formats but ffmpeg or avconv are not installed.'
1925                                            ' The formats won\'t be merged.')
1926                    else:
1927                        postprocessors = [merger]
1928
1929                    def compatible_formats(formats):
1930                        video, audio = formats
1931                        # Check extension
1932                        video_ext, audio_ext = video.get('ext'), audio.get('ext')
1933                        if video_ext and audio_ext:
1934                            COMPATIBLE_EXTS = (
1935                                ('mp3', 'mp4', 'm4a', 'm4p', 'm4b', 'm4r', 'm4v', 'ismv', 'isma'),
1936                                ('webm')
1937                            )
1938                            for exts in COMPATIBLE_EXTS:
1939                                if video_ext in exts and audio_ext in exts:
1940                                    return True
1941                        # TODO: Check acodec/vcodec
1942                        return False
1943
1944                    filename_real_ext = os.path.splitext(filename)[1][1:]
1945                    filename_wo_ext = (
1946                        os.path.splitext(filename)[0]
1947                        if filename_real_ext == info_dict['ext']
1948                        else filename)
1949                    requested_formats = info_dict['requested_formats']
1950                    if self.params.get('merge_output_format') is None and not compatible_formats(requested_formats):
1951                        info_dict['ext'] = 'mkv'
1952                        self.report_warning(
1953                            'Requested formats are incompatible for merge and will be merged into mkv.')
1954                    # Ensure filename always has a correct extension for successful merge
1955                    filename = '%s.%s' % (filename_wo_ext, info_dict['ext'])
1956                    if os.path.exists(encodeFilename(filename)):
1957                        self.to_screen(
1958                            '[download] %s has already been downloaded and '
1959                            'merged' % filename)
1960                    else:
1961                        for f in requested_formats:
1962                            new_info = dict(info_dict)
1963                            new_info.update(f)
1964                            fname = prepend_extension(
1965                                self.prepare_filename(new_info),
1966                                'f%s' % f['format_id'], new_info['ext'])
1967                            if not ensure_dir_exists(fname):
1968                                return
1969                            downloaded.append(fname)
1970                            partial_success = dl(fname, new_info)
1971                            success = success and partial_success
1972                        info_dict['__postprocessors'] = postprocessors
1973                        info_dict['__files_to_merge'] = downloaded
1974                else:
1975                    # Just a single file
1976                    success = dl(filename, info_dict)
1977            except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
1978                self.report_error('unable to download video data: %s' % error_to_compat_str(err))
1979                return
1980            except (OSError, IOError) as err:
1981                raise UnavailableVideoError(err)
1982            except (ContentTooShortError, ) as err:
1983                self.report_error('content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded))
1984                return
1985
1986            if success and filename != '-':
1987                # Fixup content
1988                fixup_policy = self.params.get('fixup')
1989                if fixup_policy is None:
1990                    fixup_policy = 'detect_or_warn'
1991
1992                INSTALL_FFMPEG_MESSAGE = 'Install ffmpeg or avconv to fix this automatically.'
1993
1994                stretched_ratio = info_dict.get('stretched_ratio')
1995                if stretched_ratio is not None and stretched_ratio != 1:
1996                    if fixup_policy == 'warn':
1997                        self.report_warning('%s: Non-uniform pixel ratio (%s)' % (
1998                            info_dict['id'], stretched_ratio))
1999                    elif fixup_policy == 'detect_or_warn':
2000                        stretched_pp = FFmpegFixupStretchedPP(self)
2001                        if stretched_pp.available:
2002                            info_dict.setdefault('__postprocessors', [])
2003                            info_dict['__postprocessors'].append(stretched_pp)
2004                        else:
2005                            self.report_warning(
2006                                '%s: Non-uniform pixel ratio (%s). %s'
2007                                % (info_dict['id'], stretched_ratio, INSTALL_FFMPEG_MESSAGE))
2008                    else:
2009                        assert fixup_policy in ('ignore', 'never')
2010
2011                if (info_dict.get('requested_formats') is None
2012                        and info_dict.get('container') == 'm4a_dash'):
2013                    if fixup_policy == 'warn':
2014                        self.report_warning(
2015                            '%s: writing DASH m4a. '
2016                            'Only some players support this container.'
2017                            % info_dict['id'])
2018                    elif fixup_policy == 'detect_or_warn':
2019                        fixup_pp = FFmpegFixupM4aPP(self)
2020                        if fixup_pp.available:
2021                            info_dict.setdefault('__postprocessors', [])
2022                            info_dict['__postprocessors'].append(fixup_pp)
2023                        else:
2024                            self.report_warning(
2025                                '%s: writing DASH m4a. '
2026                                'Only some players support this container. %s'
2027                                % (info_dict['id'], INSTALL_FFMPEG_MESSAGE))
2028                    else:
2029                        assert fixup_policy in ('ignore', 'never')
2030
2031                if (info_dict.get('protocol') == 'm3u8_native'
2032                        or info_dict.get('protocol') == 'm3u8'
2033                        and self.params.get('hls_prefer_native')):
2034                    if fixup_policy == 'warn':
2035                        self.report_warning('%s: malformed AAC bitstream detected.' % (
2036                            info_dict['id']))
2037                    elif fixup_policy == 'detect_or_warn':
2038                        fixup_pp = FFmpegFixupM3u8PP(self)
2039                        if fixup_pp.available:
2040                            info_dict.setdefault('__postprocessors', [])
2041                            info_dict['__postprocessors'].append(fixup_pp)
2042                        else:
2043                            self.report_warning(
2044                                '%s: malformed AAC bitstream detected. %s'
2045                                % (info_dict['id'], INSTALL_FFMPEG_MESSAGE))
2046                    else:
2047                        assert fixup_policy in ('ignore', 'never')
2048
2049                try:
2050                    self.post_process(filename, info_dict)
2051                except (PostProcessingError) as err:
2052                    self.report_error('postprocessing: %s' % str(err))
2053                    return
2054                self.record_download_archive(info_dict)
2055
2056    def download(self, url_list):
2057        """Download a given list of URLs."""
2058        outtmpl = self.params.get('outtmpl', DEFAULT_OUTTMPL)
2059        if (len(url_list) > 1
2060                and outtmpl != '-'
2061                and '%' not in outtmpl
2062                and self.params.get('max_downloads') != 1):
2063            raise SameFileError(outtmpl)
2064
2065        for url in url_list:
2066            try:
2067                # It also downloads the videos
2068                res = self.extract_info(
2069                    url, force_generic_extractor=self.params.get('force_generic_extractor', False))
2070            except UnavailableVideoError:
2071                self.report_error('unable to download video')
2072            except MaxDownloadsReached:
2073                self.to_screen('[info] Maximum number of downloaded files reached.')
2074                raise
2075            else:
2076                if self.params.get('dump_single_json', False):
2077                    self.to_stdout(json.dumps(res))
2078
2079        return self._download_retcode
2080
2081    def download_with_info_file(self, info_filename):
2082        with contextlib.closing(fileinput.FileInput(
2083                [info_filename], mode='r',
2084                openhook=fileinput.hook_encoded('utf-8'))) as f:
2085            # FileInput doesn't have a read method, we can't call json.load
2086            info = self.filter_requested_info(json.loads('\n'.join(f)))
2087        try:
2088            self.process_ie_result(info, download=True)
2089        except DownloadError:
2090            webpage_url = info.get('webpage_url')
2091            if webpage_url is not None:
2092                self.report_warning('The info failed to download, trying with "%s"' % webpage_url)
2093                return self.download([webpage_url])
2094            else:
2095                raise
2096        return self._download_retcode
2097
2098    @staticmethod
2099    def filter_requested_info(info_dict):
2100        return dict(
2101            (k, v) for k, v in info_dict.items()
2102            if k not in ['requested_formats', 'requested_subtitles'])
2103
2104    def post_process(self, filename, ie_info):
2105        """Run all the postprocessors on the given file."""
2106        info = dict(ie_info)
2107        info['filepath'] = filename
2108        pps_chain = []
2109        if ie_info.get('__postprocessors') is not None:
2110            pps_chain.extend(ie_info['__postprocessors'])
2111        pps_chain.extend(self._pps)
2112        for pp in pps_chain:
2113            files_to_delete = []
2114            try:
2115                files_to_delete, info = pp.run(info)
2116            except PostProcessingError as e:
2117                self.report_error(e.msg)
2118            if files_to_delete and not self.params.get('keepvideo', False):
2119                for old_filename in files_to_delete:
2120                    self.to_screen('Deleting original file %s (pass -k to keep)' % old_filename)
2121                    try:
2122                        os.remove(encodeFilename(old_filename))
2123                    except (IOError, OSError):
2124                        self.report_warning('Unable to remove downloaded original file')
2125
2126    def _make_archive_id(self, info_dict):
2127        video_id = info_dict.get('id')
2128        if not video_id:
2129            return
2130        # Future-proof against any change in case
2131        # and backwards compatibility with prior versions
2132        extractor = info_dict.get('extractor_key') or info_dict.get('ie_key')  # key in a playlist
2133        if extractor is None:
2134            url = str_or_none(info_dict.get('url'))
2135            if not url:
2136                return
2137            # Try to find matching extractor for the URL and take its ie_key
2138            for ie in self._ies:
2139                if ie.suitable(url):
2140                    extractor = ie.ie_key()
2141                    break
2142            else:
2143                return
2144        return extractor.lower() + ' ' + video_id
2145
2146    def in_download_archive(self, info_dict):
2147        fn = self.params.get('download_archive')
2148        if fn is None:
2149            return False
2150
2151        vid_id = self._make_archive_id(info_dict)
2152        if not vid_id:
2153            return False  # Incomplete video information
2154
2155        try:
2156            with locked_file(fn, 'r', encoding='utf-8') as archive_file:
2157                for line in archive_file:
2158                    if line.strip() == vid_id:
2159                        return True
2160        except IOError as ioe:
2161            if ioe.errno != errno.ENOENT:
2162                raise
2163        return False
2164
2165    def record_download_archive(self, info_dict):
2166        fn = self.params.get('download_archive')
2167        if fn is None:
2168            return
2169        vid_id = self._make_archive_id(info_dict)
2170        assert vid_id
2171        with locked_file(fn, 'a', encoding='utf-8') as archive_file:
2172            archive_file.write(vid_id + '\n')
2173
2174    @staticmethod
2175    def format_resolution(format, default='unknown'):
2176        if format.get('vcodec') == 'none':
2177            return 'audio only'
2178        if format.get('resolution') is not None:
2179            return format['resolution']
2180        if format.get('height') is not None:
2181            if format.get('width') is not None:
2182                res = '%sx%s' % (format['width'], format['height'])
2183            else:
2184                res = '%sp' % format['height']
2185        elif format.get('width') is not None:
2186            res = '%dx?' % format['width']
2187        else:
2188            res = default
2189        return res
2190
2191    def _format_note(self, fdict):
2192        res = ''
2193        if fdict.get('ext') in ['f4f', 'f4m']:
2194            res += '(unsupported) '
2195        if fdict.get('language'):
2196            if res:
2197                res += ' '
2198            res += '[%s] ' % fdict['language']
2199        if fdict.get('format_note') is not None:
2200            res += fdict['format_note'] + ' '
2201        if fdict.get('tbr') is not None:
2202            res += '%4dk ' % fdict['tbr']
2203        if fdict.get('container') is not None:
2204            if res:
2205                res += ', '
2206            res += '%s container' % fdict['container']
2207        if (fdict.get('vcodec') is not None
2208                and fdict.get('vcodec') != 'none'):
2209            if res:
2210                res += ', '
2211            res += fdict['vcodec']
2212            if fdict.get('vbr') is not None:
2213                res += '@'
2214        elif fdict.get('vbr') is not None and fdict.get('abr') is not None:
2215            res += 'video@'
2216        if fdict.get('vbr') is not None:
2217            res += '%4dk' % fdict['vbr']
2218        if fdict.get('fps') is not None:
2219            if res:
2220                res += ', '
2221            res += '%sfps' % fdict['fps']
2222        if fdict.get('acodec') is not None:
2223            if res:
2224                res += ', '
2225            if fdict['acodec'] == 'none':
2226                res += 'video only'
2227            else:
2228                res += '%-5s' % fdict['acodec']
2229        elif fdict.get('abr') is not None:
2230            if res:
2231                res += ', '
2232            res += 'audio'
2233        if fdict.get('abr') is not None:
2234            res += '@%3dk' % fdict['abr']
2235        if fdict.get('asr') is not None:
2236            res += ' (%5dHz)' % fdict['asr']
2237        if fdict.get('filesize') is not None:
2238            if res:
2239                res += ', '
2240            res += format_bytes(fdict['filesize'])
2241        elif fdict.get('filesize_approx') is not None:
2242            if res:
2243                res += ', '
2244            res += '~' + format_bytes(fdict['filesize_approx'])
2245        return res
2246
2247    def list_formats(self, info_dict):
2248        formats = info_dict.get('formats', [info_dict])
2249        table = [
2250            [f['format_id'], f['ext'], self.format_resolution(f), self._format_note(f)]
2251            for f in formats
2252            if f.get('preference') is None or f['preference'] >= -1000]
2253        if len(formats) > 1:
2254            table[-1][-1] += (' ' if table[-1][-1] else '') + '(best)'
2255
2256        header_line = ['format code', 'extension', 'resolution', 'note']
2257        self.to_screen(
2258            '[info] Available formats for %s:\n%s' %
2259            (info_dict['id'], render_table(header_line, table)))
2260
2261    def list_thumbnails(self, info_dict):
2262        thumbnails = info_dict.get('thumbnails')
2263        if not thumbnails:
2264            self.to_screen('[info] No thumbnails present for %s' % info_dict['id'])
2265            return
2266
2267        self.to_screen(
2268            '[info] Thumbnails for %s:' % info_dict['id'])
2269        self.to_screen(render_table(
2270            ['ID', 'width', 'height', 'URL'],
2271            [[t['id'], t.get('width', 'unknown'), t.get('height', 'unknown'), t['url']] for t in thumbnails]))
2272
2273    def list_subtitles(self, video_id, subtitles, name='subtitles'):
2274        if not subtitles:
2275            self.to_screen('%s has no %s' % (video_id, name))
2276            return
2277        self.to_screen(
2278            'Available %s for %s:' % (name, video_id))
2279        self.to_screen(render_table(
2280            ['Language', 'formats'],
2281            [[lang, ', '.join(f['ext'] for f in reversed(formats))]
2282                for lang, formats in subtitles.items()]))
2283
2284    def urlopen(self, req):
2285        """ Start an HTTP download """
2286        if isinstance(req, compat_basestring):
2287            req = sanitized_Request(req)
2288        return self._opener.open(req, timeout=self._socket_timeout)
2289
2290    def print_debug_header(self):
2291        if not self.params.get('verbose'):
2292            return
2293
2294        if type('') is not compat_str:
2295            # Python 2.6 on SLES11 SP1 (https://github.com/ytdl-org/youtube-dl/issues/3326)
2296            self.report_warning(
2297                'Your Python is broken! Update to a newer and supported version')
2298
2299        stdout_encoding = getattr(
2300            sys.stdout, 'encoding', 'missing (%s)' % type(sys.stdout).__name__)
2301        encoding_str = (
2302            '[debug] Encodings: locale %s, fs %s, out %s, pref %s\n' % (
2303                locale.getpreferredencoding(),
2304                sys.getfilesystemencoding(),
2305                stdout_encoding,
2306                self.get_encoding()))
2307        write_string(encoding_str, encoding=None)
2308
2309        self._write_string('[debug] youtube-dl version ' + __version__ + '\n')
2310        if _LAZY_LOADER:
2311            self._write_string('[debug] Lazy loading extractors enabled' + '\n')
2312        try:
2313            sp = subprocess.Popen(
2314                ['git', 'rev-parse', '--short', 'HEAD'],
2315                stdout=subprocess.PIPE, stderr=subprocess.PIPE,
2316                cwd=os.path.dirname(os.path.abspath(__file__)))
2317            out, err = sp.communicate()
2318            out = out.decode().strip()
2319            if re.match('[0-9a-f]+', out):
2320                self._write_string('[debug] Git HEAD: ' + out + '\n')
2321        except Exception:
2322            try:
2323                sys.exc_clear()
2324            except Exception:
2325                pass
2326
2327        def python_implementation():
2328            impl_name = platform.python_implementation()
2329            if impl_name == 'PyPy' and hasattr(sys, 'pypy_version_info'):
2330                return impl_name + ' version %d.%d.%d' % sys.pypy_version_info[:3]
2331            return impl_name
2332
2333        self._write_string('[debug] Python version %s (%s) - %s\n' % (
2334            platform.python_version(), python_implementation(),
2335            platform_name()))
2336
2337        exe_versions = FFmpegPostProcessor.get_versions(self)
2338        exe_versions['rtmpdump'] = rtmpdump_version()
2339        exe_versions['phantomjs'] = PhantomJSwrapper._version()
2340        exe_str = ', '.join(
2341            '%s %s' % (exe, v)
2342            for exe, v in sorted(exe_versions.items())
2343            if v
2344        )
2345        if not exe_str:
2346            exe_str = 'none'
2347        self._write_string('[debug] exe versions: %s\n' % exe_str)
2348
2349        proxy_map = {}
2350        for handler in self._opener.handlers:
2351            if hasattr(handler, 'proxies'):
2352                proxy_map.update(handler.proxies)
2353        self._write_string('[debug] Proxy map: ' + compat_str(proxy_map) + '\n')
2354
2355        if self.params.get('call_home', False):
2356            ipaddr = self.urlopen('https://yt-dl.org/ip').read().decode('utf-8')
2357            self._write_string('[debug] Public IP address: %s\n' % ipaddr)
2358            latest_version = self.urlopen(
2359                'https://yt-dl.org/latest/version').read().decode('utf-8')
2360            if version_tuple(latest_version) > version_tuple(__version__):
2361                self.report_warning(
2362                    'You are using an outdated version (newest version: %s)! '
2363                    'See https://yt-dl.org/update if you need help updating.' %
2364                    latest_version)
2365
2366    def _setup_opener(self):
2367        timeout_val = self.params.get('socket_timeout')
2368        self._socket_timeout = 600 if timeout_val is None else float(timeout_val)
2369
2370        opts_cookiefile = self.params.get('cookiefile')
2371        opts_proxy = self.params.get('proxy')
2372
2373        if opts_cookiefile is None:
2374            self.cookiejar = compat_cookiejar.CookieJar()
2375        else:
2376            opts_cookiefile = expand_path(opts_cookiefile)
2377            self.cookiejar = YoutubeDLCookieJar(opts_cookiefile)
2378            if os.access(opts_cookiefile, os.R_OK):
2379                self.cookiejar.load(ignore_discard=True, ignore_expires=True)
2380
2381        cookie_processor = YoutubeDLCookieProcessor(self.cookiejar)
2382        if opts_proxy is not None:
2383            if opts_proxy == '':
2384                proxies = {}
2385            else:
2386                proxies = {'http': opts_proxy, 'https': opts_proxy}
2387        else:
2388            proxies = compat_urllib_request.getproxies()
2389            # Set HTTPS proxy to HTTP one if given (https://github.com/ytdl-org/youtube-dl/issues/805)
2390            if 'http' in proxies and 'https' not in proxies:
2391                proxies['https'] = proxies['http']
2392        proxy_handler = PerRequestProxyHandler(proxies)
2393
2394        debuglevel = 1 if self.params.get('debug_printtraffic') else 0
2395        https_handler = make_HTTPS_handler(self.params, debuglevel=debuglevel)
2396        ydlh = YoutubeDLHandler(self.params, debuglevel=debuglevel)
2397        redirect_handler = YoutubeDLRedirectHandler()
2398        data_handler = compat_urllib_request_DataHandler()
2399
2400        # When passing our own FileHandler instance, build_opener won't add the
2401        # default FileHandler and allows us to disable the file protocol, which
2402        # can be used for malicious purposes (see
2403        # https://github.com/ytdl-org/youtube-dl/issues/8227)
2404        file_handler = compat_urllib_request.FileHandler()
2405
2406        def file_open(*args, **kwargs):
2407            raise compat_urllib_error.URLError('file:// scheme is explicitly disabled in youtube-dl for security reasons')
2408        file_handler.file_open = file_open
2409
2410        opener = compat_urllib_request.build_opener(
2411            proxy_handler, https_handler, cookie_processor, ydlh, redirect_handler, data_handler, file_handler)
2412
2413        # Delete the default user-agent header, which would otherwise apply in
2414        # cases where our custom HTTP handler doesn't come into play
2415        # (See https://github.com/ytdl-org/youtube-dl/issues/1309 for details)
2416        opener.addheaders = []
2417        self._opener = opener
2418
2419    def encode(self, s):
2420        if isinstance(s, bytes):
2421            return s  # Already encoded
2422
2423        try:
2424            return s.encode(self.get_encoding())
2425        except UnicodeEncodeError as err:
2426            err.reason = err.reason + '. Check your system encoding configuration or use the --encoding option.'
2427            raise
2428
2429    def get_encoding(self):
2430        encoding = self.params.get('encoding')
2431        if encoding is None:
2432            encoding = preferredencoding()
2433        return encoding
2434
2435    def _write_thumbnails(self, info_dict, filename):
2436        if self.params.get('writethumbnail', False):
2437            thumbnails = info_dict.get('thumbnails')
2438            if thumbnails:
2439                thumbnails = [thumbnails[-1]]
2440        elif self.params.get('write_all_thumbnails', False):
2441            thumbnails = info_dict.get('thumbnails')
2442        else:
2443            return
2444
2445        if not thumbnails:
2446            # No thumbnails present, so return immediately
2447            return
2448
2449        for t in thumbnails:
2450            thumb_ext = determine_ext(t['url'], 'jpg')
2451            suffix = '_%s' % t['id'] if len(thumbnails) > 1 else ''
2452            thumb_display_id = '%s ' % t['id'] if len(thumbnails) > 1 else ''
2453            t['filename'] = thumb_filename = replace_extension(filename + suffix, thumb_ext, info_dict.get('ext'))
2454
2455            if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(thumb_filename)):
2456                self.to_screen('[%s] %s: Thumbnail %sis already present' %
2457                               (info_dict['extractor'], info_dict['id'], thumb_display_id))
2458            else:
2459                self.to_screen('[%s] %s: Downloading thumbnail %s...' %
2460                               (info_dict['extractor'], info_dict['id'], thumb_display_id))
2461                try:
2462                    uf = self.urlopen(t['url'])
2463                    with open(encodeFilename(thumb_filename), 'wb') as thumbf:
2464                        shutil.copyfileobj(uf, thumbf)
2465                    self.to_screen('[%s] %s: Writing thumbnail %sto: %s' %
2466                                   (info_dict['extractor'], info_dict['id'], thumb_display_id, thumb_filename))
2467                except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
2468                    self.report_warning('Unable to download thumbnail "%s": %s' %
2469                                        (t['url'], error_to_compat_str(err)))
2470