1#!/usr/bin/env python
2# coding: utf-8
3
4from __future__ import unicode_literals
5
6# Allow direct execution
7import os
8import sys
9import unittest
10sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
11
12import copy
13
14from test.helper import FakeYDL, assertRegexpMatches
15from youtube_dl import YoutubeDL
16from youtube_dl.compat import compat_str, compat_urllib_error
17from youtube_dl.extractor import YoutubeIE
18from youtube_dl.extractor.common import InfoExtractor
19from youtube_dl.postprocessor.common import PostProcessor
20from youtube_dl.utils import ExtractorError, match_filter_func
21
22TEST_URL = 'http://localhost/sample.mp4'
23
24
25class YDL(FakeYDL):
26    def __init__(self, *args, **kwargs):
27        super(YDL, self).__init__(*args, **kwargs)
28        self.downloaded_info_dicts = []
29        self.msgs = []
30
31    def process_info(self, info_dict):
32        self.downloaded_info_dicts.append(info_dict)
33
34    def to_screen(self, msg):
35        self.msgs.append(msg)
36
37
38def _make_result(formats, **kwargs):
39    res = {
40        'formats': formats,
41        'id': 'testid',
42        'title': 'testttitle',
43        'extractor': 'testex',
44        'extractor_key': 'TestEx',
45    }
46    res.update(**kwargs)
47    return res
48
49
50class TestFormatSelection(unittest.TestCase):
51    def test_prefer_free_formats(self):
52        # Same resolution => download webm
53        ydl = YDL()
54        ydl.params['prefer_free_formats'] = True
55        formats = [
56            {'ext': 'webm', 'height': 460, 'url': TEST_URL},
57            {'ext': 'mp4', 'height': 460, 'url': TEST_URL},
58        ]
59        info_dict = _make_result(formats)
60        yie = YoutubeIE(ydl)
61        yie._sort_formats(info_dict['formats'])
62        ydl.process_ie_result(info_dict)
63        downloaded = ydl.downloaded_info_dicts[0]
64        self.assertEqual(downloaded['ext'], 'webm')
65
66        # Different resolution => download best quality (mp4)
67        ydl = YDL()
68        ydl.params['prefer_free_formats'] = True
69        formats = [
70            {'ext': 'webm', 'height': 720, 'url': TEST_URL},
71            {'ext': 'mp4', 'height': 1080, 'url': TEST_URL},
72        ]
73        info_dict['formats'] = formats
74        yie = YoutubeIE(ydl)
75        yie._sort_formats(info_dict['formats'])
76        ydl.process_ie_result(info_dict)
77        downloaded = ydl.downloaded_info_dicts[0]
78        self.assertEqual(downloaded['ext'], 'mp4')
79
80        # No prefer_free_formats => prefer mp4 and flv for greater compatibility
81        ydl = YDL()
82        ydl.params['prefer_free_formats'] = False
83        formats = [
84            {'ext': 'webm', 'height': 720, 'url': TEST_URL},
85            {'ext': 'mp4', 'height': 720, 'url': TEST_URL},
86            {'ext': 'flv', 'height': 720, 'url': TEST_URL},
87        ]
88        info_dict['formats'] = formats
89        yie = YoutubeIE(ydl)
90        yie._sort_formats(info_dict['formats'])
91        ydl.process_ie_result(info_dict)
92        downloaded = ydl.downloaded_info_dicts[0]
93        self.assertEqual(downloaded['ext'], 'mp4')
94
95        ydl = YDL()
96        ydl.params['prefer_free_formats'] = False
97        formats = [
98            {'ext': 'flv', 'height': 720, 'url': TEST_URL},
99            {'ext': 'webm', 'height': 720, 'url': TEST_URL},
100        ]
101        info_dict['formats'] = formats
102        yie = YoutubeIE(ydl)
103        yie._sort_formats(info_dict['formats'])
104        ydl.process_ie_result(info_dict)
105        downloaded = ydl.downloaded_info_dicts[0]
106        self.assertEqual(downloaded['ext'], 'flv')
107
108    def test_format_selection(self):
109        formats = [
110            {'format_id': '35', 'ext': 'mp4', 'preference': 1, 'url': TEST_URL},
111            {'format_id': 'example-with-dashes', 'ext': 'webm', 'preference': 1, 'url': TEST_URL},
112            {'format_id': '45', 'ext': 'webm', 'preference': 2, 'url': TEST_URL},
113            {'format_id': '47', 'ext': 'webm', 'preference': 3, 'url': TEST_URL},
114            {'format_id': '2', 'ext': 'flv', 'preference': 4, 'url': TEST_URL},
115        ]
116        info_dict = _make_result(formats)
117
118        ydl = YDL({'format': '20/47'})
119        ydl.process_ie_result(info_dict.copy())
120        downloaded = ydl.downloaded_info_dicts[0]
121        self.assertEqual(downloaded['format_id'], '47')
122
123        ydl = YDL({'format': '20/71/worst'})
124        ydl.process_ie_result(info_dict.copy())
125        downloaded = ydl.downloaded_info_dicts[0]
126        self.assertEqual(downloaded['format_id'], '35')
127
128        ydl = YDL()
129        ydl.process_ie_result(info_dict.copy())
130        downloaded = ydl.downloaded_info_dicts[0]
131        self.assertEqual(downloaded['format_id'], '2')
132
133        ydl = YDL({'format': 'webm/mp4'})
134        ydl.process_ie_result(info_dict.copy())
135        downloaded = ydl.downloaded_info_dicts[0]
136        self.assertEqual(downloaded['format_id'], '47')
137
138        ydl = YDL({'format': '3gp/40/mp4'})
139        ydl.process_ie_result(info_dict.copy())
140        downloaded = ydl.downloaded_info_dicts[0]
141        self.assertEqual(downloaded['format_id'], '35')
142
143        ydl = YDL({'format': 'example-with-dashes'})
144        ydl.process_ie_result(info_dict.copy())
145        downloaded = ydl.downloaded_info_dicts[0]
146        self.assertEqual(downloaded['format_id'], 'example-with-dashes')
147
148    def test_format_selection_audio(self):
149        formats = [
150            {'format_id': 'audio-low', 'ext': 'webm', 'preference': 1, 'vcodec': 'none', 'url': TEST_URL},
151            {'format_id': 'audio-mid', 'ext': 'webm', 'preference': 2, 'vcodec': 'none', 'url': TEST_URL},
152            {'format_id': 'audio-high', 'ext': 'flv', 'preference': 3, 'vcodec': 'none', 'url': TEST_URL},
153            {'format_id': 'vid', 'ext': 'mp4', 'preference': 4, 'url': TEST_URL},
154        ]
155        info_dict = _make_result(formats)
156
157        ydl = YDL({'format': 'bestaudio'})
158        ydl.process_ie_result(info_dict.copy())
159        downloaded = ydl.downloaded_info_dicts[0]
160        self.assertEqual(downloaded['format_id'], 'audio-high')
161
162        ydl = YDL({'format': 'worstaudio'})
163        ydl.process_ie_result(info_dict.copy())
164        downloaded = ydl.downloaded_info_dicts[0]
165        self.assertEqual(downloaded['format_id'], 'audio-low')
166
167        formats = [
168            {'format_id': 'vid-low', 'ext': 'mp4', 'preference': 1, 'url': TEST_URL},
169            {'format_id': 'vid-high', 'ext': 'mp4', 'preference': 2, 'url': TEST_URL},
170        ]
171        info_dict = _make_result(formats)
172
173        ydl = YDL({'format': 'bestaudio/worstaudio/best'})
174        ydl.process_ie_result(info_dict.copy())
175        downloaded = ydl.downloaded_info_dicts[0]
176        self.assertEqual(downloaded['format_id'], 'vid-high')
177
178    def test_format_selection_audio_exts(self):
179        formats = [
180            {'format_id': 'mp3-64', 'ext': 'mp3', 'abr': 64, 'url': 'http://_', 'vcodec': 'none'},
181            {'format_id': 'ogg-64', 'ext': 'ogg', 'abr': 64, 'url': 'http://_', 'vcodec': 'none'},
182            {'format_id': 'aac-64', 'ext': 'aac', 'abr': 64, 'url': 'http://_', 'vcodec': 'none'},
183            {'format_id': 'mp3-32', 'ext': 'mp3', 'abr': 32, 'url': 'http://_', 'vcodec': 'none'},
184            {'format_id': 'aac-32', 'ext': 'aac', 'abr': 32, 'url': 'http://_', 'vcodec': 'none'},
185        ]
186
187        info_dict = _make_result(formats)
188        ydl = YDL({'format': 'best'})
189        ie = YoutubeIE(ydl)
190        ie._sort_formats(info_dict['formats'])
191        ydl.process_ie_result(copy.deepcopy(info_dict))
192        downloaded = ydl.downloaded_info_dicts[0]
193        self.assertEqual(downloaded['format_id'], 'aac-64')
194
195        ydl = YDL({'format': 'mp3'})
196        ie = YoutubeIE(ydl)
197        ie._sort_formats(info_dict['formats'])
198        ydl.process_ie_result(copy.deepcopy(info_dict))
199        downloaded = ydl.downloaded_info_dicts[0]
200        self.assertEqual(downloaded['format_id'], 'mp3-64')
201
202        ydl = YDL({'prefer_free_formats': True})
203        ie = YoutubeIE(ydl)
204        ie._sort_formats(info_dict['formats'])
205        ydl.process_ie_result(copy.deepcopy(info_dict))
206        downloaded = ydl.downloaded_info_dicts[0]
207        self.assertEqual(downloaded['format_id'], 'ogg-64')
208
209    def test_format_selection_video(self):
210        formats = [
211            {'format_id': 'dash-video-low', 'ext': 'mp4', 'preference': 1, 'acodec': 'none', 'url': TEST_URL},
212            {'format_id': 'dash-video-high', 'ext': 'mp4', 'preference': 2, 'acodec': 'none', 'url': TEST_URL},
213            {'format_id': 'vid', 'ext': 'mp4', 'preference': 3, 'url': TEST_URL},
214        ]
215        info_dict = _make_result(formats)
216
217        ydl = YDL({'format': 'bestvideo'})
218        ydl.process_ie_result(info_dict.copy())
219        downloaded = ydl.downloaded_info_dicts[0]
220        self.assertEqual(downloaded['format_id'], 'dash-video-high')
221
222        ydl = YDL({'format': 'worstvideo'})
223        ydl.process_ie_result(info_dict.copy())
224        downloaded = ydl.downloaded_info_dicts[0]
225        self.assertEqual(downloaded['format_id'], 'dash-video-low')
226
227        ydl = YDL({'format': 'bestvideo[format_id^=dash][format_id$=low]'})
228        ydl.process_ie_result(info_dict.copy())
229        downloaded = ydl.downloaded_info_dicts[0]
230        self.assertEqual(downloaded['format_id'], 'dash-video-low')
231
232        formats = [
233            {'format_id': 'vid-vcodec-dot', 'ext': 'mp4', 'preference': 1, 'vcodec': 'avc1.123456', 'acodec': 'none', 'url': TEST_URL},
234        ]
235        info_dict = _make_result(formats)
236
237        ydl = YDL({'format': 'bestvideo[vcodec=avc1.123456]'})
238        ydl.process_ie_result(info_dict.copy())
239        downloaded = ydl.downloaded_info_dicts[0]
240        self.assertEqual(downloaded['format_id'], 'vid-vcodec-dot')
241
242    def test_format_selection_string_ops(self):
243        formats = [
244            {'format_id': 'abc-cba', 'ext': 'mp4', 'url': TEST_URL},
245            {'format_id': 'zxc-cxz', 'ext': 'webm', 'url': TEST_URL},
246        ]
247        info_dict = _make_result(formats)
248
249        # equals (=)
250        ydl = YDL({'format': '[format_id=abc-cba]'})
251        ydl.process_ie_result(info_dict.copy())
252        downloaded = ydl.downloaded_info_dicts[0]
253        self.assertEqual(downloaded['format_id'], 'abc-cba')
254
255        # does not equal (!=)
256        ydl = YDL({'format': '[format_id!=abc-cba]'})
257        ydl.process_ie_result(info_dict.copy())
258        downloaded = ydl.downloaded_info_dicts[0]
259        self.assertEqual(downloaded['format_id'], 'zxc-cxz')
260
261        ydl = YDL({'format': '[format_id!=abc-cba][format_id!=zxc-cxz]'})
262        self.assertRaises(ExtractorError, ydl.process_ie_result, info_dict.copy())
263
264        # starts with (^=)
265        ydl = YDL({'format': '[format_id^=abc]'})
266        ydl.process_ie_result(info_dict.copy())
267        downloaded = ydl.downloaded_info_dicts[0]
268        self.assertEqual(downloaded['format_id'], 'abc-cba')
269
270        # does not start with (!^=)
271        ydl = YDL({'format': '[format_id!^=abc]'})
272        ydl.process_ie_result(info_dict.copy())
273        downloaded = ydl.downloaded_info_dicts[0]
274        self.assertEqual(downloaded['format_id'], 'zxc-cxz')
275
276        ydl = YDL({'format': '[format_id!^=abc][format_id!^=zxc]'})
277        self.assertRaises(ExtractorError, ydl.process_ie_result, info_dict.copy())
278
279        # ends with ($=)
280        ydl = YDL({'format': '[format_id$=cba]'})
281        ydl.process_ie_result(info_dict.copy())
282        downloaded = ydl.downloaded_info_dicts[0]
283        self.assertEqual(downloaded['format_id'], 'abc-cba')
284
285        # does not end with (!$=)
286        ydl = YDL({'format': '[format_id!$=cba]'})
287        ydl.process_ie_result(info_dict.copy())
288        downloaded = ydl.downloaded_info_dicts[0]
289        self.assertEqual(downloaded['format_id'], 'zxc-cxz')
290
291        ydl = YDL({'format': '[format_id!$=cba][format_id!$=cxz]'})
292        self.assertRaises(ExtractorError, ydl.process_ie_result, info_dict.copy())
293
294        # contains (*=)
295        ydl = YDL({'format': '[format_id*=bc-cb]'})
296        ydl.process_ie_result(info_dict.copy())
297        downloaded = ydl.downloaded_info_dicts[0]
298        self.assertEqual(downloaded['format_id'], 'abc-cba')
299
300        # does not contain (!*=)
301        ydl = YDL({'format': '[format_id!*=bc-cb]'})
302        ydl.process_ie_result(info_dict.copy())
303        downloaded = ydl.downloaded_info_dicts[0]
304        self.assertEqual(downloaded['format_id'], 'zxc-cxz')
305
306        ydl = YDL({'format': '[format_id!*=abc][format_id!*=zxc]'})
307        self.assertRaises(ExtractorError, ydl.process_ie_result, info_dict.copy())
308
309        ydl = YDL({'format': '[format_id!*=-]'})
310        self.assertRaises(ExtractorError, ydl.process_ie_result, info_dict.copy())
311
312    def test_youtube_format_selection(self):
313        order = [
314            '38', '37', '46', '22', '45', '35', '44', '18', '34', '43', '6', '5', '17', '36', '13',
315            # Apple HTTP Live Streaming
316            '96', '95', '94', '93', '92', '132', '151',
317            # 3D
318            '85', '84', '102', '83', '101', '82', '100',
319            # Dash video
320            '137', '248', '136', '247', '135', '246',
321            '245', '244', '134', '243', '133', '242', '160',
322            # Dash audio
323            '141', '172', '140', '171', '139',
324        ]
325
326        def format_info(f_id):
327            info = YoutubeIE._formats[f_id].copy()
328
329            # XXX: In real cases InfoExtractor._parse_mpd_formats() fills up 'acodec'
330            # and 'vcodec', while in tests such information is incomplete since
331            # commit a6c2c24479e5f4827ceb06f64d855329c0a6f593
332            # test_YoutubeDL.test_youtube_format_selection is broken without
333            # this fix
334            if 'acodec' in info and 'vcodec' not in info:
335                info['vcodec'] = 'none'
336            elif 'vcodec' in info and 'acodec' not in info:
337                info['acodec'] = 'none'
338
339            info['format_id'] = f_id
340            info['url'] = 'url:' + f_id
341            return info
342        formats_order = [format_info(f_id) for f_id in order]
343
344        info_dict = _make_result(list(formats_order), extractor='youtube')
345        ydl = YDL({'format': 'bestvideo+bestaudio'})
346        yie = YoutubeIE(ydl)
347        yie._sort_formats(info_dict['formats'])
348        ydl.process_ie_result(info_dict)
349        downloaded = ydl.downloaded_info_dicts[0]
350        self.assertEqual(downloaded['format_id'], '137+141')
351        self.assertEqual(downloaded['ext'], 'mp4')
352
353        info_dict = _make_result(list(formats_order), extractor='youtube')
354        ydl = YDL({'format': 'bestvideo[height>=999999]+bestaudio/best'})
355        yie = YoutubeIE(ydl)
356        yie._sort_formats(info_dict['formats'])
357        ydl.process_ie_result(info_dict)
358        downloaded = ydl.downloaded_info_dicts[0]
359        self.assertEqual(downloaded['format_id'], '38')
360
361        info_dict = _make_result(list(formats_order), extractor='youtube')
362        ydl = YDL({'format': 'bestvideo/best,bestaudio'})
363        yie = YoutubeIE(ydl)
364        yie._sort_formats(info_dict['formats'])
365        ydl.process_ie_result(info_dict)
366        downloaded_ids = [info['format_id'] for info in ydl.downloaded_info_dicts]
367        self.assertEqual(downloaded_ids, ['137', '141'])
368
369        info_dict = _make_result(list(formats_order), extractor='youtube')
370        ydl = YDL({'format': '(bestvideo[ext=mp4],bestvideo[ext=webm])+bestaudio'})
371        yie = YoutubeIE(ydl)
372        yie._sort_formats(info_dict['formats'])
373        ydl.process_ie_result(info_dict)
374        downloaded_ids = [info['format_id'] for info in ydl.downloaded_info_dicts]
375        self.assertEqual(downloaded_ids, ['137+141', '248+141'])
376
377        info_dict = _make_result(list(formats_order), extractor='youtube')
378        ydl = YDL({'format': '(bestvideo[ext=mp4],bestvideo[ext=webm])[height<=720]+bestaudio'})
379        yie = YoutubeIE(ydl)
380        yie._sort_formats(info_dict['formats'])
381        ydl.process_ie_result(info_dict)
382        downloaded_ids = [info['format_id'] for info in ydl.downloaded_info_dicts]
383        self.assertEqual(downloaded_ids, ['136+141', '247+141'])
384
385        info_dict = _make_result(list(formats_order), extractor='youtube')
386        ydl = YDL({'format': '(bestvideo[ext=none]/bestvideo[ext=webm])+bestaudio'})
387        yie = YoutubeIE(ydl)
388        yie._sort_formats(info_dict['formats'])
389        ydl.process_ie_result(info_dict)
390        downloaded_ids = [info['format_id'] for info in ydl.downloaded_info_dicts]
391        self.assertEqual(downloaded_ids, ['248+141'])
392
393        for f1, f2 in zip(formats_order, formats_order[1:]):
394            info_dict = _make_result([f1, f2], extractor='youtube')
395            ydl = YDL({'format': 'best/bestvideo'})
396            yie = YoutubeIE(ydl)
397            yie._sort_formats(info_dict['formats'])
398            ydl.process_ie_result(info_dict)
399            downloaded = ydl.downloaded_info_dicts[0]
400            self.assertEqual(downloaded['format_id'], f1['format_id'])
401
402            info_dict = _make_result([f2, f1], extractor='youtube')
403            ydl = YDL({'format': 'best/bestvideo'})
404            yie = YoutubeIE(ydl)
405            yie._sort_formats(info_dict['formats'])
406            ydl.process_ie_result(info_dict)
407            downloaded = ydl.downloaded_info_dicts[0]
408            self.assertEqual(downloaded['format_id'], f1['format_id'])
409
410    def test_audio_only_extractor_format_selection(self):
411        # For extractors with incomplete formats (all formats are audio-only or
412        # video-only) best and worst should fallback to corresponding best/worst
413        # video-only or audio-only formats (as per
414        # https://github.com/ytdl-org/youtube-dl/pull/5556)
415        formats = [
416            {'format_id': 'low', 'ext': 'mp3', 'preference': 1, 'vcodec': 'none', 'url': TEST_URL},
417            {'format_id': 'high', 'ext': 'mp3', 'preference': 2, 'vcodec': 'none', 'url': TEST_URL},
418        ]
419        info_dict = _make_result(formats)
420
421        ydl = YDL({'format': 'best'})
422        ydl.process_ie_result(info_dict.copy())
423        downloaded = ydl.downloaded_info_dicts[0]
424        self.assertEqual(downloaded['format_id'], 'high')
425
426        ydl = YDL({'format': 'worst'})
427        ydl.process_ie_result(info_dict.copy())
428        downloaded = ydl.downloaded_info_dicts[0]
429        self.assertEqual(downloaded['format_id'], 'low')
430
431    def test_format_not_available(self):
432        formats = [
433            {'format_id': 'regular', 'ext': 'mp4', 'height': 360, 'url': TEST_URL},
434            {'format_id': 'video', 'ext': 'mp4', 'height': 720, 'acodec': 'none', 'url': TEST_URL},
435        ]
436        info_dict = _make_result(formats)
437
438        # This must fail since complete video-audio format does not match filter
439        # and extractor does not provide incomplete only formats (i.e. only
440        # video-only or audio-only).
441        ydl = YDL({'format': 'best[height>360]'})
442        self.assertRaises(ExtractorError, ydl.process_ie_result, info_dict.copy())
443
444    def test_format_selection_issue_10083(self):
445        # See https://github.com/ytdl-org/youtube-dl/issues/10083
446        formats = [
447            {'format_id': 'regular', 'height': 360, 'url': TEST_URL},
448            {'format_id': 'video', 'height': 720, 'acodec': 'none', 'url': TEST_URL},
449            {'format_id': 'audio', 'vcodec': 'none', 'url': TEST_URL},
450        ]
451        info_dict = _make_result(formats)
452
453        ydl = YDL({'format': 'best[height>360]/bestvideo[height>360]+bestaudio'})
454        ydl.process_ie_result(info_dict.copy())
455        self.assertEqual(ydl.downloaded_info_dicts[0]['format_id'], 'video+audio')
456
457    def test_invalid_format_specs(self):
458        def assert_syntax_error(format_spec):
459            ydl = YDL({'format': format_spec})
460            info_dict = _make_result([{'format_id': 'foo', 'url': TEST_URL}])
461            self.assertRaises(SyntaxError, ydl.process_ie_result, info_dict)
462
463        assert_syntax_error('bestvideo,,best')
464        assert_syntax_error('+bestaudio')
465        assert_syntax_error('bestvideo+')
466        assert_syntax_error('/')
467        assert_syntax_error('bestvideo+bestvideo+bestaudio')
468
469    def test_format_filtering(self):
470        formats = [
471            {'format_id': 'A', 'filesize': 500, 'width': 1000},
472            {'format_id': 'B', 'filesize': 1000, 'width': 500},
473            {'format_id': 'C', 'filesize': 1000, 'width': 400},
474            {'format_id': 'D', 'filesize': 2000, 'width': 600},
475            {'format_id': 'E', 'filesize': 3000},
476            {'format_id': 'F'},
477            {'format_id': 'G', 'filesize': 1000000},
478        ]
479        for f in formats:
480            f['url'] = 'http://_/'
481            f['ext'] = 'unknown'
482        info_dict = _make_result(formats)
483
484        ydl = YDL({'format': 'best[filesize<3000]'})
485        ydl.process_ie_result(info_dict)
486        downloaded = ydl.downloaded_info_dicts[0]
487        self.assertEqual(downloaded['format_id'], 'D')
488
489        ydl = YDL({'format': 'best[filesize<=3000]'})
490        ydl.process_ie_result(info_dict)
491        downloaded = ydl.downloaded_info_dicts[0]
492        self.assertEqual(downloaded['format_id'], 'E')
493
494        ydl = YDL({'format': 'best[filesize <= ? 3000]'})
495        ydl.process_ie_result(info_dict)
496        downloaded = ydl.downloaded_info_dicts[0]
497        self.assertEqual(downloaded['format_id'], 'F')
498
499        ydl = YDL({'format': 'best [filesize = 1000] [width>450]'})
500        ydl.process_ie_result(info_dict)
501        downloaded = ydl.downloaded_info_dicts[0]
502        self.assertEqual(downloaded['format_id'], 'B')
503
504        ydl = YDL({'format': 'best [filesize = 1000] [width!=450]'})
505        ydl.process_ie_result(info_dict)
506        downloaded = ydl.downloaded_info_dicts[0]
507        self.assertEqual(downloaded['format_id'], 'C')
508
509        ydl = YDL({'format': '[filesize>?1]'})
510        ydl.process_ie_result(info_dict)
511        downloaded = ydl.downloaded_info_dicts[0]
512        self.assertEqual(downloaded['format_id'], 'G')
513
514        ydl = YDL({'format': '[filesize<1M]'})
515        ydl.process_ie_result(info_dict)
516        downloaded = ydl.downloaded_info_dicts[0]
517        self.assertEqual(downloaded['format_id'], 'E')
518
519        ydl = YDL({'format': '[filesize<1MiB]'})
520        ydl.process_ie_result(info_dict)
521        downloaded = ydl.downloaded_info_dicts[0]
522        self.assertEqual(downloaded['format_id'], 'G')
523
524        ydl = YDL({'format': 'all[width>=400][width<=600]'})
525        ydl.process_ie_result(info_dict)
526        downloaded_ids = [info['format_id'] for info in ydl.downloaded_info_dicts]
527        self.assertEqual(downloaded_ids, ['B', 'C', 'D'])
528
529        ydl = YDL({'format': 'best[height<40]'})
530        try:
531            ydl.process_ie_result(info_dict)
532        except ExtractorError:
533            pass
534        self.assertEqual(ydl.downloaded_info_dicts, [])
535
536    def test_default_format_spec(self):
537        ydl = YDL({'simulate': True})
538        self.assertEqual(ydl._default_format_spec({}), 'bestvideo+bestaudio/best')
539
540        ydl = YDL({})
541        self.assertEqual(ydl._default_format_spec({'is_live': True}), 'best/bestvideo+bestaudio')
542
543        ydl = YDL({'simulate': True})
544        self.assertEqual(ydl._default_format_spec({'is_live': True}), 'bestvideo+bestaudio/best')
545
546        ydl = YDL({'outtmpl': '-'})
547        self.assertEqual(ydl._default_format_spec({}), 'best/bestvideo+bestaudio')
548
549        ydl = YDL({})
550        self.assertEqual(ydl._default_format_spec({}, download=False), 'bestvideo+bestaudio/best')
551        self.assertEqual(ydl._default_format_spec({'is_live': True}), 'best/bestvideo+bestaudio')
552
553
554class TestYoutubeDL(unittest.TestCase):
555    def test_subtitles(self):
556        def s_formats(lang, autocaption=False):
557            return [{
558                'ext': ext,
559                'url': 'http://localhost/video.%s.%s' % (lang, ext),
560                '_auto': autocaption,
561            } for ext in ['vtt', 'srt', 'ass']]
562        subtitles = dict((l, s_formats(l)) for l in ['en', 'fr', 'es'])
563        auto_captions = dict((l, s_formats(l, True)) for l in ['it', 'pt', 'es'])
564        info_dict = {
565            'id': 'test',
566            'title': 'Test',
567            'url': 'http://localhost/video.mp4',
568            'subtitles': subtitles,
569            'automatic_captions': auto_captions,
570            'extractor': 'TEST',
571        }
572
573        def get_info(params={}):
574            params.setdefault('simulate', True)
575            ydl = YDL(params)
576            ydl.report_warning = lambda *args, **kargs: None
577            return ydl.process_video_result(info_dict, download=False)
578
579        result = get_info()
580        self.assertFalse(result.get('requested_subtitles'))
581        self.assertEqual(result['subtitles'], subtitles)
582        self.assertEqual(result['automatic_captions'], auto_captions)
583
584        result = get_info({'writesubtitles': True})
585        subs = result['requested_subtitles']
586        self.assertTrue(subs)
587        self.assertEqual(set(subs.keys()), set(['en']))
588        self.assertTrue(subs['en'].get('data') is None)
589        self.assertEqual(subs['en']['ext'], 'ass')
590
591        result = get_info({'writesubtitles': True, 'subtitlesformat': 'foo/srt'})
592        subs = result['requested_subtitles']
593        self.assertEqual(subs['en']['ext'], 'srt')
594
595        result = get_info({'writesubtitles': True, 'subtitleslangs': ['es', 'fr', 'it']})
596        subs = result['requested_subtitles']
597        self.assertTrue(subs)
598        self.assertEqual(set(subs.keys()), set(['es', 'fr']))
599
600        result = get_info({'writesubtitles': True, 'writeautomaticsub': True, 'subtitleslangs': ['es', 'pt']})
601        subs = result['requested_subtitles']
602        self.assertTrue(subs)
603        self.assertEqual(set(subs.keys()), set(['es', 'pt']))
604        self.assertFalse(subs['es']['_auto'])
605        self.assertTrue(subs['pt']['_auto'])
606
607        result = get_info({'writeautomaticsub': True, 'subtitleslangs': ['es', 'pt']})
608        subs = result['requested_subtitles']
609        self.assertTrue(subs)
610        self.assertEqual(set(subs.keys()), set(['es', 'pt']))
611        self.assertTrue(subs['es']['_auto'])
612        self.assertTrue(subs['pt']['_auto'])
613
614    def test_add_extra_info(self):
615        test_dict = {
616            'extractor': 'Foo',
617        }
618        extra_info = {
619            'extractor': 'Bar',
620            'playlist': 'funny videos',
621        }
622        YDL.add_extra_info(test_dict, extra_info)
623        self.assertEqual(test_dict['extractor'], 'Foo')
624        self.assertEqual(test_dict['playlist'], 'funny videos')
625
626    def test_prepare_filename(self):
627        info = {
628            'id': '1234',
629            'ext': 'mp4',
630            'width': None,
631            'height': 1080,
632            'title1': '$PATH',
633            'title2': '%PATH%',
634        }
635
636        def fname(templ, na_placeholder='NA'):
637            params = {'outtmpl': templ}
638            if na_placeholder != 'NA':
639                params['outtmpl_na_placeholder'] = na_placeholder
640            ydl = YoutubeDL(params)
641            return ydl.prepare_filename(info)
642        self.assertEqual(fname('%(id)s.%(ext)s'), '1234.mp4')
643        self.assertEqual(fname('%(id)s-%(width)s.%(ext)s'), '1234-NA.mp4')
644        NA_TEST_OUTTMPL = '%(uploader_date)s-%(width)d-%(id)s.%(ext)s'
645        # Replace missing fields with 'NA' by default
646        self.assertEqual(fname(NA_TEST_OUTTMPL), 'NA-NA-1234.mp4')
647        # Or by provided placeholder
648        self.assertEqual(fname(NA_TEST_OUTTMPL, na_placeholder='none'), 'none-none-1234.mp4')
649        self.assertEqual(fname(NA_TEST_OUTTMPL, na_placeholder=''), '--1234.mp4')
650        self.assertEqual(fname('%(height)d.%(ext)s'), '1080.mp4')
651        self.assertEqual(fname('%(height)6d.%(ext)s'), '  1080.mp4')
652        self.assertEqual(fname('%(height)-6d.%(ext)s'), '1080  .mp4')
653        self.assertEqual(fname('%(height)06d.%(ext)s'), '001080.mp4')
654        self.assertEqual(fname('%(height) 06d.%(ext)s'), ' 01080.mp4')
655        self.assertEqual(fname('%(height)   06d.%(ext)s'), ' 01080.mp4')
656        self.assertEqual(fname('%(height)0 6d.%(ext)s'), ' 01080.mp4')
657        self.assertEqual(fname('%(height)0   6d.%(ext)s'), ' 01080.mp4')
658        self.assertEqual(fname('%(height)   0   6d.%(ext)s'), ' 01080.mp4')
659        self.assertEqual(fname('%%'), '%')
660        self.assertEqual(fname('%%%%'), '%%')
661        self.assertEqual(fname('%%(height)06d.%(ext)s'), '%(height)06d.mp4')
662        self.assertEqual(fname('%(width)06d.%(ext)s'), 'NA.mp4')
663        self.assertEqual(fname('%(width)06d.%%(ext)s'), 'NA.%(ext)s')
664        self.assertEqual(fname('%%(width)06d.%(ext)s'), '%(width)06d.mp4')
665        self.assertEqual(fname('Hello %(title1)s'), 'Hello $PATH')
666        self.assertEqual(fname('Hello %(title2)s'), 'Hello %PATH%')
667
668    def test_format_note(self):
669        ydl = YoutubeDL()
670        self.assertEqual(ydl._format_note({}), '')
671        assertRegexpMatches(self, ydl._format_note({
672            'vbr': 10,
673        }), r'^\s*10k$')
674        assertRegexpMatches(self, ydl._format_note({
675            'fps': 30,
676        }), r'^30fps$')
677
678    def test_postprocessors(self):
679        filename = 'post-processor-testfile.mp4'
680        audiofile = filename + '.mp3'
681
682        class SimplePP(PostProcessor):
683            def run(self, info):
684                with open(audiofile, 'wt') as f:
685                    f.write('EXAMPLE')
686                return [info['filepath']], info
687
688        def run_pp(params, PP):
689            with open(filename, 'wt') as f:
690                f.write('EXAMPLE')
691            ydl = YoutubeDL(params)
692            ydl.add_post_processor(PP())
693            ydl.post_process(filename, {'filepath': filename})
694
695        run_pp({'keepvideo': True}, SimplePP)
696        self.assertTrue(os.path.exists(filename), '%s doesn\'t exist' % filename)
697        self.assertTrue(os.path.exists(audiofile), '%s doesn\'t exist' % audiofile)
698        os.unlink(filename)
699        os.unlink(audiofile)
700
701        run_pp({'keepvideo': False}, SimplePP)
702        self.assertFalse(os.path.exists(filename), '%s exists' % filename)
703        self.assertTrue(os.path.exists(audiofile), '%s doesn\'t exist' % audiofile)
704        os.unlink(audiofile)
705
706        class ModifierPP(PostProcessor):
707            def run(self, info):
708                with open(info['filepath'], 'wt') as f:
709                    f.write('MODIFIED')
710                return [], info
711
712        run_pp({'keepvideo': False}, ModifierPP)
713        self.assertTrue(os.path.exists(filename), '%s doesn\'t exist' % filename)
714        os.unlink(filename)
715
716    def test_match_filter(self):
717        class FilterYDL(YDL):
718            def __init__(self, *args, **kwargs):
719                super(FilterYDL, self).__init__(*args, **kwargs)
720                self.params['simulate'] = True
721
722            def process_info(self, info_dict):
723                super(YDL, self).process_info(info_dict)
724
725            def _match_entry(self, info_dict, incomplete):
726                res = super(FilterYDL, self)._match_entry(info_dict, incomplete)
727                if res is None:
728                    self.downloaded_info_dicts.append(info_dict)
729                return res
730
731        first = {
732            'id': '1',
733            'url': TEST_URL,
734            'title': 'one',
735            'extractor': 'TEST',
736            'duration': 30,
737            'filesize': 10 * 1024,
738            'playlist_id': '42',
739            'uploader': "變態妍字幕版 太妍 тест",
740            'creator': "тест ' 123 ' тест--",
741        }
742        second = {
743            'id': '2',
744            'url': TEST_URL,
745            'title': 'two',
746            'extractor': 'TEST',
747            'duration': 10,
748            'description': 'foo',
749            'filesize': 5 * 1024,
750            'playlist_id': '43',
751            'uploader': "тест 123",
752        }
753        videos = [first, second]
754
755        def get_videos(filter_=None):
756            ydl = FilterYDL({'match_filter': filter_})
757            for v in videos:
758                ydl.process_ie_result(v, download=True)
759            return [v['id'] for v in ydl.downloaded_info_dicts]
760
761        res = get_videos()
762        self.assertEqual(res, ['1', '2'])
763
764        def f(v):
765            if v['id'] == '1':
766                return None
767            else:
768                return 'Video id is not 1'
769        res = get_videos(f)
770        self.assertEqual(res, ['1'])
771
772        f = match_filter_func('duration < 30')
773        res = get_videos(f)
774        self.assertEqual(res, ['2'])
775
776        f = match_filter_func('description = foo')
777        res = get_videos(f)
778        self.assertEqual(res, ['2'])
779
780        f = match_filter_func('description =? foo')
781        res = get_videos(f)
782        self.assertEqual(res, ['1', '2'])
783
784        f = match_filter_func('filesize > 5KiB')
785        res = get_videos(f)
786        self.assertEqual(res, ['1'])
787
788        f = match_filter_func('playlist_id = 42')
789        res = get_videos(f)
790        self.assertEqual(res, ['1'])
791
792        f = match_filter_func('uploader = "變態妍字幕版 太妍 тест"')
793        res = get_videos(f)
794        self.assertEqual(res, ['1'])
795
796        f = match_filter_func('uploader != "變態妍字幕版 太妍 тест"')
797        res = get_videos(f)
798        self.assertEqual(res, ['2'])
799
800        f = match_filter_func('creator = "тест \' 123 \' тест--"')
801        res = get_videos(f)
802        self.assertEqual(res, ['1'])
803
804        f = match_filter_func("creator = 'тест \\' 123 \\' тест--'")
805        res = get_videos(f)
806        self.assertEqual(res, ['1'])
807
808        f = match_filter_func(r"creator = 'тест \' 123 \' тест--' & duration > 30")
809        res = get_videos(f)
810        self.assertEqual(res, [])
811
812    def test_playlist_items_selection(self):
813        entries = [{
814            'id': compat_str(i),
815            'title': compat_str(i),
816            'url': TEST_URL,
817        } for i in range(1, 5)]
818        playlist = {
819            '_type': 'playlist',
820            'id': 'test',
821            'entries': entries,
822            'extractor': 'test:playlist',
823            'extractor_key': 'test:playlist',
824            'webpage_url': 'http://example.com',
825        }
826
827        def get_downloaded_info_dicts(params):
828            ydl = YDL(params)
829            # make a deep copy because the dictionary and nested entries
830            # can be modified
831            ydl.process_ie_result(copy.deepcopy(playlist))
832            return ydl.downloaded_info_dicts
833
834        def get_ids(params):
835            return [int(v['id']) for v in get_downloaded_info_dicts(params)]
836
837        result = get_ids({})
838        self.assertEqual(result, [1, 2, 3, 4])
839
840        result = get_ids({'playlistend': 10})
841        self.assertEqual(result, [1, 2, 3, 4])
842
843        result = get_ids({'playlistend': 2})
844        self.assertEqual(result, [1, 2])
845
846        result = get_ids({'playliststart': 10})
847        self.assertEqual(result, [])
848
849        result = get_ids({'playliststart': 2})
850        self.assertEqual(result, [2, 3, 4])
851
852        result = get_ids({'playlist_items': '2-4'})
853        self.assertEqual(result, [2, 3, 4])
854
855        result = get_ids({'playlist_items': '2,4'})
856        self.assertEqual(result, [2, 4])
857
858        result = get_ids({'playlist_items': '10'})
859        self.assertEqual(result, [])
860
861        result = get_ids({'playlist_items': '3-10'})
862        self.assertEqual(result, [3, 4])
863
864        result = get_ids({'playlist_items': '2-4,3-4,3'})
865        self.assertEqual(result, [2, 3, 4])
866
867        # Tests for https://github.com/ytdl-org/youtube-dl/issues/10591
868        # @{
869        result = get_downloaded_info_dicts({'playlist_items': '2-4,3-4,3'})
870        self.assertEqual(result[0]['playlist_index'], 2)
871        self.assertEqual(result[1]['playlist_index'], 3)
872
873        result = get_downloaded_info_dicts({'playlist_items': '2-4,3-4,3'})
874        self.assertEqual(result[0]['playlist_index'], 2)
875        self.assertEqual(result[1]['playlist_index'], 3)
876        self.assertEqual(result[2]['playlist_index'], 4)
877
878        result = get_downloaded_info_dicts({'playlist_items': '4,2'})
879        self.assertEqual(result[0]['playlist_index'], 4)
880        self.assertEqual(result[1]['playlist_index'], 2)
881        # @}
882
883    def test_urlopen_no_file_protocol(self):
884        # see https://github.com/ytdl-org/youtube-dl/issues/8227
885        ydl = YDL()
886        self.assertRaises(compat_urllib_error.URLError, ydl.urlopen, 'file:///etc/passwd')
887
888    def test_do_not_override_ie_key_in_url_transparent(self):
889        ydl = YDL()
890
891        class Foo1IE(InfoExtractor):
892            _VALID_URL = r'foo1:'
893
894            def _real_extract(self, url):
895                return {
896                    '_type': 'url_transparent',
897                    'url': 'foo2:',
898                    'ie_key': 'Foo2',
899                    'title': 'foo1 title',
900                    'id': 'foo1_id',
901                }
902
903        class Foo2IE(InfoExtractor):
904            _VALID_URL = r'foo2:'
905
906            def _real_extract(self, url):
907                return {
908                    '_type': 'url',
909                    'url': 'foo3:',
910                    'ie_key': 'Foo3',
911                }
912
913        class Foo3IE(InfoExtractor):
914            _VALID_URL = r'foo3:'
915
916            def _real_extract(self, url):
917                return _make_result([{'url': TEST_URL}], title='foo3 title')
918
919        ydl.add_info_extractor(Foo1IE(ydl))
920        ydl.add_info_extractor(Foo2IE(ydl))
921        ydl.add_info_extractor(Foo3IE(ydl))
922        ydl.extract_info('foo1:')
923        downloaded = ydl.downloaded_info_dicts[0]
924        self.assertEqual(downloaded['url'], TEST_URL)
925        self.assertEqual(downloaded['title'], 'foo1 title')
926        self.assertEqual(downloaded['id'], 'testid')
927        self.assertEqual(downloaded['extractor'], 'testex')
928        self.assertEqual(downloaded['extractor_key'], 'TestEx')
929
930    # Test case for https://github.com/ytdl-org/youtube-dl/issues/27064
931    def test_ignoreerrors_for_playlist_with_url_transparent_iterable_entries(self):
932
933        class _YDL(YDL):
934            def __init__(self, *args, **kwargs):
935                super(_YDL, self).__init__(*args, **kwargs)
936
937            def trouble(self, s, tb=None):
938                pass
939
940        ydl = _YDL({
941            'format': 'extra',
942            'ignoreerrors': True,
943        })
944
945        class VideoIE(InfoExtractor):
946            _VALID_URL = r'video:(?P<id>\d+)'
947
948            def _real_extract(self, url):
949                video_id = self._match_id(url)
950                formats = [{
951                    'format_id': 'default',
952                    'url': 'url:',
953                }]
954                if video_id == '0':
955                    raise ExtractorError('foo')
956                if video_id == '2':
957                    formats.append({
958                        'format_id': 'extra',
959                        'url': TEST_URL,
960                    })
961                return {
962                    'id': video_id,
963                    'title': 'Video %s' % video_id,
964                    'formats': formats,
965                }
966
967        class PlaylistIE(InfoExtractor):
968            _VALID_URL = r'playlist:'
969
970            def _entries(self):
971                for n in range(3):
972                    video_id = compat_str(n)
973                    yield {
974                        '_type': 'url_transparent',
975                        'ie_key': VideoIE.ie_key(),
976                        'id': video_id,
977                        'url': 'video:%s' % video_id,
978                        'title': 'Video Transparent %s' % video_id,
979                    }
980
981            def _real_extract(self, url):
982                return self.playlist_result(self._entries())
983
984        ydl.add_info_extractor(VideoIE(ydl))
985        ydl.add_info_extractor(PlaylistIE(ydl))
986        info = ydl.extract_info('playlist:')
987        entries = info['entries']
988        self.assertEqual(len(entries), 3)
989        self.assertTrue(entries[0] is None)
990        self.assertTrue(entries[1] is None)
991        self.assertEqual(len(ydl.downloaded_info_dicts), 1)
992        downloaded = ydl.downloaded_info_dicts[0]
993        self.assertEqual(entries[2], downloaded)
994        self.assertEqual(downloaded['url'], TEST_URL)
995        self.assertEqual(downloaded['title'], 'Video Transparent 2')
996        self.assertEqual(downloaded['id'], '2')
997        self.assertEqual(downloaded['extractor'], 'Video')
998        self.assertEqual(downloaded['extractor_key'], 'Video')
999
1000
1001if __name__ == '__main__':
1002    unittest.main()
1003