1# -*- coding: utf-8 -*- 2 3# Copyright 2021-2022 Mike Fährmann 4# 5# This program is free software; you can redistribute it and/or modify 6# it under the terms of the GNU General Public License version 2 as 7# published by the Free Software Foundation. 8 9"""Helpers for interacting with youtube-dl""" 10 11import re 12import shlex 13import itertools 14from . import text, util, exception 15 16 17def import_module(module_name): 18 if module_name is None: 19 try: 20 return __import__("yt_dlp") 21 except ImportError: 22 return __import__("youtube_dl") 23 return __import__(module_name.replace("-", "_")) 24 25 26def construct_YoutubeDL(module, obj, user_opts, system_opts=None): 27 opts = argv = None 28 config = obj.config 29 30 cfg = config("config-file") 31 if cfg: 32 with open(util.expand_path(cfg)) as fp: 33 contents = fp.read() 34 argv = shlex.split(contents, comments=True) 35 36 cmd = config("cmdline-args") 37 if cmd: 38 if isinstance(cmd, str): 39 cmd = shlex.split(cmd) 40 argv = (argv + cmd) if argv else cmd 41 42 try: 43 opts = parse_command_line(module, argv) if argv else user_opts 44 except SystemExit: 45 raise exception.StopExtraction("Invalid command-line option") 46 47 if opts.get("format") is None: 48 opts["format"] = config("format") 49 if opts.get("nopart") is None: 50 opts["nopart"] = not config("part", True) 51 if opts.get("updatetime") is None: 52 opts["updatetime"] = config("mtime", True) 53 if opts.get("ratelimit") is None: 54 opts["ratelimit"] = text.parse_bytes(config("rate"), None) 55 if opts.get("min_filesize") is None: 56 opts["min_filesize"] = text.parse_bytes(config("filesize-min"), None) 57 if opts.get("max_filesize") is None: 58 opts["max_filesize"] = text.parse_bytes(config("filesize-max"), None) 59 60 raw_opts = config("raw-options") 61 if raw_opts: 62 opts.update(raw_opts) 63 if config("logging", True): 64 opts["logger"] = obj.log 65 if system_opts: 66 opts.update(system_opts) 67 68 return module.YoutubeDL(opts) 69 70 71def parse_command_line(module, argv): 72 parser, opts, args = module.parseOpts(argv) 73 74 ytdlp = (module.__name__ == "yt_dlp") 75 std_headers = module.std_headers 76 parse_bytes = module.FileDownloader.parse_bytes 77 78 # HTTP headers 79 if opts.user_agent is not None: 80 std_headers["User-Agent"] = opts.user_agent 81 if opts.referer is not None: 82 std_headers["Referer"] = opts.referer 83 if opts.headers: 84 if isinstance(opts.headers, dict): 85 std_headers.update(opts.headers) 86 else: 87 for h in opts.headers: 88 key, _, value = h.partition(":") 89 std_headers[key] = value 90 91 if opts.ratelimit is not None: 92 opts.ratelimit = parse_bytes(opts.ratelimit) 93 if getattr(opts, "throttledratelimit", None) is not None: 94 opts.throttledratelimit = parse_bytes(opts.throttledratelimit) 95 if opts.min_filesize is not None: 96 opts.min_filesize = parse_bytes(opts.min_filesize) 97 if opts.max_filesize is not None: 98 opts.max_filesize = parse_bytes(opts.max_filesize) 99 if opts.max_sleep_interval is None: 100 opts.max_sleep_interval = opts.sleep_interval 101 if getattr(opts, "overwrites", None): 102 opts.continue_dl = False 103 if opts.retries is not None: 104 opts.retries = parse_retries(opts.retries) 105 if getattr(opts, "file_access_retries", None) is not None: 106 opts.file_access_retries = parse_retries(opts.file_access_retries) 107 if opts.fragment_retries is not None: 108 opts.fragment_retries = parse_retries(opts.fragment_retries) 109 if getattr(opts, "extractor_retries", None) is not None: 110 opts.extractor_retries = parse_retries(opts.extractor_retries) 111 if opts.buffersize is not None: 112 opts.buffersize = parse_bytes(opts.buffersize) 113 if opts.http_chunk_size is not None: 114 opts.http_chunk_size = parse_bytes(opts.http_chunk_size) 115 if opts.extractaudio: 116 opts.audioformat = opts.audioformat.lower() 117 if opts.audioquality: 118 opts.audioquality = opts.audioquality.strip("kK") 119 if opts.recodevideo is not None: 120 opts.recodevideo = opts.recodevideo.replace(" ", "") 121 if getattr(opts, "remuxvideo", None) is not None: 122 opts.remuxvideo = opts.remuxvideo.replace(" ", "") 123 if getattr(opts, "wait_for_video", None) is not None: 124 min_wait, _, max_wait = opts.wait_for_video.partition("-") 125 opts.wait_for_video = (module.parse_duration(min_wait), 126 module.parse_duration(max_wait)) 127 128 if opts.date is not None: 129 date = module.DateRange.day(opts.date) 130 else: 131 date = module.DateRange(opts.dateafter, opts.datebefore) 132 133 compat_opts = getattr(opts, "compat_opts", ()) 134 135 def _unused_compat_opt(name): 136 if name not in compat_opts: 137 return False 138 compat_opts.discard(name) 139 compat_opts.update(["*%s" % name]) 140 return True 141 142 def set_default_compat( 143 compat_name, opt_name, default=True, remove_compat=True): 144 attr = getattr(opts, opt_name, None) 145 if compat_name in compat_opts: 146 if attr is None: 147 setattr(opts, opt_name, not default) 148 return True 149 else: 150 if remove_compat: 151 _unused_compat_opt(compat_name) 152 return False 153 elif attr is None: 154 setattr(opts, opt_name, default) 155 return None 156 157 set_default_compat("abort-on-error", "ignoreerrors", "only_download") 158 set_default_compat("no-playlist-metafiles", "allow_playlist_files") 159 set_default_compat("no-clean-infojson", "clean_infojson") 160 if "format-sort" in compat_opts: 161 opts.format_sort.extend(module.InfoExtractor.FormatSort.ytdl_default) 162 _video_multistreams_set = set_default_compat( 163 "multistreams", "allow_multiple_video_streams", 164 False, remove_compat=False) 165 _audio_multistreams_set = set_default_compat( 166 "multistreams", "allow_multiple_audio_streams", 167 False, remove_compat=False) 168 if _video_multistreams_set is False and _audio_multistreams_set is False: 169 _unused_compat_opt("multistreams") 170 171 if isinstance(opts.outtmpl, dict): 172 outtmpl = opts.outtmpl 173 outtmpl_default = outtmpl.get("default") 174 else: 175 opts.outtmpl = outtmpl = outtmpl_default = "" 176 177 if "filename" in compat_opts: 178 if outtmpl_default is None: 179 outtmpl_default = outtmpl["default"] = "%(title)s-%(id)s.%(ext)s" 180 else: 181 _unused_compat_opt("filename") 182 183 if opts.extractaudio and not opts.keepvideo and opts.format is None: 184 opts.format = "bestaudio/best" 185 186 if ytdlp: 187 def metadataparser_actions(f): 188 if isinstance(f, str): 189 yield module.MetadataFromFieldPP.to_action(f) 190 else: 191 REPLACE = module.MetadataParserPP.Actions.REPLACE 192 args = f[1:] 193 for x in f[0].split(","): 194 action = [REPLACE, x] 195 action += args 196 yield action 197 198 if getattr(opts, "parse_metadata", None) is None: 199 opts.parse_metadata = [] 200 if opts.metafromtitle is not None: 201 opts.parse_metadata.append("title:%s" % opts.metafromtitle) 202 opts.metafromtitle = None 203 opts.parse_metadata = list(itertools.chain.from_iterable(map( 204 metadataparser_actions, opts.parse_metadata))) 205 else: 206 opts.parse_metadata = () 207 208 download_archive_fn = module.expand_path(opts.download_archive) \ 209 if opts.download_archive is not None else opts.download_archive 210 211 if getattr(opts, "getcomments", None): 212 opts.writeinfojson = True 213 214 if getattr(opts, "no_sponsorblock", None): 215 opts.sponsorblock_mark = set() 216 opts.sponsorblock_remove = set() 217 else: 218 opts.sponsorblock_mark = \ 219 getattr(opts, "sponsorblock_mark", None) or set() 220 opts.sponsorblock_remove = \ 221 getattr(opts, "sponsorblock_remove", None) or set() 222 sponsorblock_query = opts.sponsorblock_mark | opts.sponsorblock_remove 223 opts.remove_chapters = getattr(opts, "remove_chapters", None) or () 224 225 # PostProcessors 226 postprocessors = [] 227 if opts.metafromtitle: 228 postprocessors.append({ 229 "key": "MetadataFromTitle", 230 "titleformat": opts.metafromtitle, 231 }) 232 if getattr(opts, "add_postprocessors", None): 233 postprocessors += list(opts.add_postprocessors) 234 if sponsorblock_query: 235 postprocessors.append({ 236 "key": "SponsorBlock", 237 "categories": sponsorblock_query, 238 "api": opts.sponsorblock_api, 239 "when": "pre_process", 240 }) 241 if opts.parse_metadata: 242 postprocessors.append({ 243 "key": "MetadataParser", 244 "actions": opts.parse_metadata, 245 "when": "pre_process", 246 }) 247 if opts.convertsubtitles: 248 pp = {"key": "FFmpegSubtitlesConvertor", 249 "format": opts.convertsubtitles} 250 if ytdlp: 251 pp["when"] = "before_dl" 252 postprocessors.append(pp) 253 if getattr(opts, "convertthumbnails", None): 254 postprocessors.append({ 255 "key": "FFmpegThumbnailsConvertor", 256 "format": opts.convertthumbnails, 257 "when": "before_dl", 258 }) 259 if getattr(opts, "exec_before_dl_cmd", None): 260 postprocessors.append({ 261 "key": "Exec", 262 "exec_cmd": opts.exec_before_dl_cmd, 263 "when": "before_dl", 264 }) 265 if opts.extractaudio: 266 postprocessors.append({ 267 "key": "FFmpegExtractAudio", 268 "preferredcodec": opts.audioformat, 269 "preferredquality": opts.audioquality, 270 "nopostoverwrites": opts.nopostoverwrites, 271 }) 272 if getattr(opts, "remuxvideo", None): 273 postprocessors.append({ 274 "key": "FFmpegVideoRemuxer", 275 "preferedformat": opts.remuxvideo, 276 }) 277 if opts.recodevideo: 278 postprocessors.append({ 279 "key": "FFmpegVideoConvertor", 280 "preferedformat": opts.recodevideo, 281 }) 282 if opts.embedsubtitles: 283 pp = {"key": "FFmpegEmbedSubtitle"} 284 if ytdlp: 285 pp["already_have_subtitle"] = ( 286 opts.writesubtitles and "no-keep-subs" not in compat_opts) 287 postprocessors.append(pp) 288 if not opts.writeautomaticsub and "no-keep-subs" not in compat_opts: 289 opts.writesubtitles = True 290 if opts.allsubtitles and not opts.writeautomaticsub: 291 opts.writesubtitles = True 292 remove_chapters_patterns, remove_ranges = [], [] 293 for regex in opts.remove_chapters: 294 if regex.startswith("*"): 295 dur = list(map(module.parse_duration, regex[1:].split("-"))) 296 if len(dur) == 2 and all(t is not None for t in dur): 297 remove_ranges.append(tuple(dur)) 298 continue 299 remove_chapters_patterns.append(re.compile(regex)) 300 if opts.remove_chapters or sponsorblock_query: 301 postprocessors.append({ 302 "key": "ModifyChapters", 303 "remove_chapters_patterns": remove_chapters_patterns, 304 "remove_sponsor_segments": opts.sponsorblock_remove, 305 "remove_ranges": remove_ranges, 306 "sponsorblock_chapter_title": opts.sponsorblock_chapter_title, 307 "force_keyframes": opts.force_keyframes_at_cuts, 308 }) 309 addchapters = getattr(opts, "addchapters", None) 310 embed_infojson = getattr(opts, "embed_infojson", None) 311 if opts.addmetadata or addchapters or embed_infojson: 312 pp = {"key": "FFmpegMetadata"} 313 if ytdlp: 314 if embed_infojson is None: 315 embed_infojson = "if_exists" 316 pp["add_metadata"] = opts.addmetadata 317 pp["add_chapters"] = addchapters 318 pp["add_infojson"] = embed_infojson 319 320 postprocessors.append(pp) 321 if getattr(opts, "sponskrub", False) is not False: 322 postprocessors.append({ 323 "key": "SponSkrub", 324 "path": opts.sponskrub_path, 325 "args": opts.sponskrub_args, 326 "cut": opts.sponskrub_cut, 327 "force": opts.sponskrub_force, 328 "ignoreerror": opts.sponskrub is None, 329 "_from_cli": True, 330 }) 331 if opts.embedthumbnail: 332 already_have_thumbnail = (opts.writethumbnail or 333 getattr(opts, "write_all_thumbnails", False)) 334 postprocessors.append({ 335 "key": "EmbedThumbnail", 336 "already_have_thumbnail": already_have_thumbnail, 337 }) 338 if not already_have_thumbnail: 339 opts.writethumbnail = True 340 if isinstance(opts.outtmpl, dict): 341 opts.outtmpl["pl_thumbnail"] = "" 342 if getattr(opts, "split_chapters", None): 343 postprocessors.append({ 344 "key": "FFmpegSplitChapters", 345 "force_keyframes": opts.force_keyframes_at_cuts, 346 }) 347 if opts.xattrs: 348 postprocessors.append({"key": "XAttrMetadata"}) 349 if opts.exec_cmd: 350 postprocessors.append({ 351 "key": "Exec", 352 "exec_cmd": opts.exec_cmd, 353 "when": "after_move", 354 }) 355 356 match_filter = ( 357 None if opts.match_filter is None 358 else module.match_filter_func(opts.match_filter)) 359 360 return { 361 "usenetrc": opts.usenetrc, 362 "netrc_location": getattr(opts, "netrc_location", None), 363 "username": opts.username, 364 "password": opts.password, 365 "twofactor": opts.twofactor, 366 "videopassword": opts.videopassword, 367 "ap_mso": opts.ap_mso, 368 "ap_username": opts.ap_username, 369 "ap_password": opts.ap_password, 370 "quiet": opts.quiet, 371 "no_warnings": opts.no_warnings, 372 "forceurl": opts.geturl, 373 "forcetitle": opts.gettitle, 374 "forceid": opts.getid, 375 "forcethumbnail": opts.getthumbnail, 376 "forcedescription": opts.getdescription, 377 "forceduration": opts.getduration, 378 "forcefilename": opts.getfilename, 379 "forceformat": opts.getformat, 380 "forceprint": getattr(opts, "forceprint", None) or (), 381 "force_write_download_archive": getattr( 382 opts, "force_write_download_archive", None), 383 "simulate": opts.simulate, 384 "skip_download": opts.skip_download, 385 "format": opts.format, 386 "allow_unplayable_formats": getattr( 387 opts, "allow_unplayable_formats", None), 388 "ignore_no_formats_error": getattr( 389 opts, "ignore_no_formats_error", None), 390 "format_sort": getattr( 391 opts, "format_sort", None), 392 "format_sort_force": getattr( 393 opts, "format_sort_force", None), 394 "allow_multiple_video_streams": opts.allow_multiple_video_streams, 395 "allow_multiple_audio_streams": opts.allow_multiple_audio_streams, 396 "check_formats": getattr( 397 opts, "check_formats", None), 398 "listformats": opts.listformats, 399 "listformats_table": getattr( 400 opts, "listformats_table", None), 401 "outtmpl": opts.outtmpl, 402 "outtmpl_na_placeholder": opts.outtmpl_na_placeholder, 403 "paths": getattr(opts, "paths", None), 404 "autonumber_size": opts.autonumber_size, 405 "autonumber_start": opts.autonumber_start, 406 "restrictfilenames": opts.restrictfilenames, 407 "windowsfilenames": getattr(opts, "windowsfilenames", None), 408 "ignoreerrors": opts.ignoreerrors, 409 "force_generic_extractor": opts.force_generic_extractor, 410 "ratelimit": opts.ratelimit, 411 "throttledratelimit": getattr(opts, "throttledratelimit", None), 412 "overwrites": getattr(opts, "overwrites", None), 413 "retries": opts.retries, 414 "file_access_retries": getattr(opts, "file_access_retries", None), 415 "fragment_retries": opts.fragment_retries, 416 "extractor_retries": getattr(opts, "extractor_retries", None), 417 "skip_unavailable_fragments": opts.skip_unavailable_fragments, 418 "keep_fragments": opts.keep_fragments, 419 "concurrent_fragment_downloads": getattr( 420 opts, "concurrent_fragment_downloads", None), 421 "buffersize": opts.buffersize, 422 "noresizebuffer": opts.noresizebuffer, 423 "http_chunk_size": opts.http_chunk_size, 424 "continuedl": opts.continue_dl, 425 "noprogress": True if opts.noprogress is None else opts.noprogress, 426 "playliststart": opts.playliststart, 427 "playlistend": opts.playlistend, 428 "playlistreverse": opts.playlist_reverse, 429 "playlistrandom": opts.playlist_random, 430 "noplaylist": opts.noplaylist, 431 "logtostderr": outtmpl_default == "-", 432 "consoletitle": opts.consoletitle, 433 "nopart": opts.nopart, 434 "updatetime": opts.updatetime, 435 "writedescription": opts.writedescription, 436 "writeannotations": opts.writeannotations, 437 "writeinfojson": opts.writeinfojson, 438 "allow_playlist_files": opts.allow_playlist_files, 439 "clean_infojson": opts.clean_infojson, 440 "getcomments": getattr(opts, "getcomments", None), 441 "writethumbnail": opts.writethumbnail is True, 442 "write_all_thumbnails": getattr(opts, "write_all_thumbnails", None) or 443 opts.writethumbnail == "all", 444 "writelink": getattr(opts, "writelink", None), 445 "writeurllink": getattr(opts, "writeurllink", None), 446 "writewebloclink": getattr(opts, "writewebloclink", None), 447 "writedesktoplink": getattr(opts, "writedesktoplink", None), 448 "writesubtitles": opts.writesubtitles, 449 "writeautomaticsub": opts.writeautomaticsub, 450 "allsubtitles": opts.allsubtitles, 451 "listsubtitles": opts.listsubtitles, 452 "subtitlesformat": opts.subtitlesformat, 453 "subtitleslangs": opts.subtitleslangs, 454 "matchtitle": module.decodeOption(opts.matchtitle), 455 "rejecttitle": module.decodeOption(opts.rejecttitle), 456 "max_downloads": opts.max_downloads, 457 "prefer_free_formats": opts.prefer_free_formats, 458 "trim_file_name": getattr(opts, "trim_file_name", None), 459 "verbose": opts.verbose, 460 "dump_intermediate_pages": opts.dump_intermediate_pages, 461 "write_pages": opts.write_pages, 462 "test": opts.test, 463 "keepvideo": opts.keepvideo, 464 "min_filesize": opts.min_filesize, 465 "max_filesize": opts.max_filesize, 466 "min_views": opts.min_views, 467 "max_views": opts.max_views, 468 "daterange": date, 469 "cachedir": opts.cachedir, 470 "youtube_print_sig_code": opts.youtube_print_sig_code, 471 "age_limit": opts.age_limit, 472 "download_archive": download_archive_fn, 473 "break_on_existing": getattr(opts, "break_on_existing", None), 474 "break_on_reject": getattr(opts, "break_on_reject", None), 475 "break_per_url": getattr(opts, "break_per_url", None), 476 "skip_playlist_after_errors": getattr( 477 opts, "skip_playlist_after_errors", None), 478 "cookiefile": opts.cookiefile, 479 "cookiesfrombrowser": getattr(opts, "cookiesfrombrowser", None), 480 "nocheckcertificate": opts.no_check_certificate, 481 "prefer_insecure": opts.prefer_insecure, 482 "proxy": opts.proxy, 483 "socket_timeout": opts.socket_timeout, 484 "bidi_workaround": opts.bidi_workaround, 485 "debug_printtraffic": opts.debug_printtraffic, 486 "prefer_ffmpeg": opts.prefer_ffmpeg, 487 "include_ads": opts.include_ads, 488 "default_search": opts.default_search, 489 "dynamic_mpd": getattr(opts, "dynamic_mpd", None), 490 "extractor_args": getattr(opts, "extractor_args", None), 491 "youtube_include_dash_manifest": getattr( 492 opts, "youtube_include_dash_manifest", None), 493 "youtube_include_hls_manifest": getattr( 494 opts, "youtube_include_hls_manifest", None), 495 "encoding": opts.encoding, 496 "extract_flat": opts.extract_flat, 497 "live_from_start": getattr(opts, "live_from_start", None), 498 "wait_for_video": getattr(opts, "wait_for_video", None), 499 "mark_watched": opts.mark_watched, 500 "merge_output_format": opts.merge_output_format, 501 "postprocessors": postprocessors, 502 "fixup": opts.fixup, 503 "source_address": opts.source_address, 504 "sleep_interval_requests": getattr( 505 opts, "sleep_interval_requests", None), 506 "sleep_interval": opts.sleep_interval, 507 "max_sleep_interval": opts.max_sleep_interval, 508 "sleep_interval_subtitles": getattr( 509 opts, "sleep_interval_subtitles", None), 510 "external_downloader": opts.external_downloader, 511 "playlist_items": opts.playlist_items, 512 "xattr_set_filesize": opts.xattr_set_filesize, 513 "match_filter": match_filter, 514 "no_color": opts.no_color, 515 "ffmpeg_location": opts.ffmpeg_location, 516 "hls_prefer_native": opts.hls_prefer_native, 517 "hls_use_mpegts": opts.hls_use_mpegts, 518 "hls_split_discontinuity": getattr( 519 opts, "hls_split_discontinuity", None), 520 "external_downloader_args": opts.external_downloader_args, 521 "postprocessor_args": opts.postprocessor_args, 522 "cn_verification_proxy": opts.cn_verification_proxy, 523 "geo_verification_proxy": opts.geo_verification_proxy, 524 "geo_bypass": opts.geo_bypass, 525 "geo_bypass_country": opts.geo_bypass_country, 526 "geo_bypass_ip_block": opts.geo_bypass_ip_block, 527 "compat_opts": compat_opts, 528 } 529 530 531def parse_retries(retries, name=""): 532 if retries in ("inf", "infinite"): 533 return float("inf") 534 return int(retries) 535