1from __future__ import unicode_literals 2 3import re 4import io 5import binascii 6 7from ..downloader import get_suitable_downloader 8from .fragment import FragmentFD 9from .external import FFmpegFD 10 11from ..compat import ( 12 compat_pycrypto_AES, 13 compat_urlparse, 14) 15from ..utils import ( 16 parse_m3u8_attributes, 17 update_url_query, 18 bug_reports_message, 19) 20from .. import webvtt 21 22 23class HlsFD(FragmentFD): 24 """ 25 Download segments in a m3u8 manifest. External downloaders can take over 26 the fragment downloads by supporting the 'm3u8_frag_urls' protocol and 27 re-defining 'supports_manifest' function 28 """ 29 30 FD_NAME = 'hlsnative' 31 32 @staticmethod 33 def can_download(manifest, info_dict, allow_unplayable_formats=False): 34 UNSUPPORTED_FEATURES = [ 35 # r'#EXT-X-BYTERANGE', # playlists composed of byte ranges of media files [2] 36 37 # Live streams heuristic does not always work (e.g. geo restricted to Germany 38 # http://hls-geo.daserste.de/i/videoportal/Film/c_620000/622873/format,716451,716457,716450,716458,716459,.mp4.csmil/index_4_av.m3u8?null=0) 39 # r'#EXT-X-MEDIA-SEQUENCE:(?!0$)', # live streams [3] 40 41 # This heuristic also is not correct since segments may not be appended as well. 42 # Twitch vods of finished streams have EXT-X-PLAYLIST-TYPE:EVENT despite 43 # no segments will definitely be appended to the end of the playlist. 44 # r'#EXT-X-PLAYLIST-TYPE:EVENT', # media segments may be appended to the end of 45 # # event media playlists [4] 46 # r'#EXT-X-MAP:', # media initialization [5] 47 # 1. https://tools.ietf.org/html/draft-pantos-http-live-streaming-17#section-4.3.2.4 48 # 2. https://tools.ietf.org/html/draft-pantos-http-live-streaming-17#section-4.3.2.2 49 # 3. https://tools.ietf.org/html/draft-pantos-http-live-streaming-17#section-4.3.3.2 50 # 4. https://tools.ietf.org/html/draft-pantos-http-live-streaming-17#section-4.3.3.5 51 # 5. https://tools.ietf.org/html/draft-pantos-http-live-streaming-17#section-4.3.2.5 52 ] 53 if not allow_unplayable_formats: 54 UNSUPPORTED_FEATURES += [ 55 r'#EXT-X-KEY:METHOD=(?!NONE|AES-128)', # encrypted streams [1] 56 ] 57 58 def check_results(): 59 yield not info_dict.get('is_live') 60 for feature in UNSUPPORTED_FEATURES: 61 yield not re.search(feature, manifest) 62 return all(check_results()) 63 64 def real_download(self, filename, info_dict): 65 man_url = info_dict['url'] 66 self.to_screen('[%s] Downloading m3u8 manifest' % self.FD_NAME) 67 68 urlh = self.ydl.urlopen(self._prepare_url(info_dict, man_url)) 69 man_url = urlh.geturl() 70 s = urlh.read().decode('utf-8', 'ignore') 71 72 can_download, message = self.can_download(s, info_dict, self.params.get('allow_unplayable_formats')), None 73 if can_download and not compat_pycrypto_AES and '#EXT-X-KEY:METHOD=AES-128' in s: 74 if FFmpegFD.available(): 75 can_download, message = False, 'The stream has AES-128 encryption and pycryptodomex is not available' 76 else: 77 message = ('The stream has AES-128 encryption and neither ffmpeg nor pycryptodomex are available; ' 78 'Decryption will be performed natively, but will be extremely slow') 79 if not can_download: 80 has_drm = re.search('|'.join([ 81 r'#EXT-X-FAXS-CM:', # Adobe Flash Access 82 r'#EXT-X-(?:SESSION-)?KEY:.*?URI="skd://', # Apple FairPlay 83 ]), s) 84 if has_drm and not self.params.get('allow_unplayable_formats'): 85 self.report_error( 86 'This video is DRM protected; Try selecting another format with --format or ' 87 'add --check-formats to automatically fallback to the next best format') 88 return False 89 message = message or 'Unsupported features have been detected' 90 fd = FFmpegFD(self.ydl, self.params) 91 self.report_warning(f'{message}; extraction will be delegated to {fd.get_basename()}') 92 return fd.real_download(filename, info_dict) 93 elif message: 94 self.report_warning(message) 95 96 is_webvtt = info_dict['ext'] == 'vtt' 97 if is_webvtt: 98 real_downloader = None # Packing the fragments is not currently supported for external downloader 99 else: 100 real_downloader = get_suitable_downloader( 101 info_dict, self.params, None, protocol='m3u8_frag_urls', to_stdout=(filename == '-')) 102 if real_downloader and not real_downloader.supports_manifest(s): 103 real_downloader = None 104 if real_downloader: 105 self.to_screen( 106 '[%s] Fragment downloads will be delegated to %s' % (self.FD_NAME, real_downloader.get_basename())) 107 108 def is_ad_fragment_start(s): 109 return (s.startswith('#ANVATO-SEGMENT-INFO') and 'type=ad' in s 110 or s.startswith('#UPLYNK-SEGMENT') and s.endswith(',ad')) 111 112 def is_ad_fragment_end(s): 113 return (s.startswith('#ANVATO-SEGMENT-INFO') and 'type=master' in s 114 or s.startswith('#UPLYNK-SEGMENT') and s.endswith(',segment')) 115 116 fragments = [] 117 118 media_frags = 0 119 ad_frags = 0 120 ad_frag_next = False 121 for line in s.splitlines(): 122 line = line.strip() 123 if not line: 124 continue 125 if line.startswith('#'): 126 if is_ad_fragment_start(line): 127 ad_frag_next = True 128 elif is_ad_fragment_end(line): 129 ad_frag_next = False 130 continue 131 if ad_frag_next: 132 ad_frags += 1 133 continue 134 media_frags += 1 135 136 ctx = { 137 'filename': filename, 138 'total_frags': media_frags, 139 'ad_frags': ad_frags, 140 } 141 142 if real_downloader: 143 self._prepare_external_frag_download(ctx) 144 else: 145 self._prepare_and_start_frag_download(ctx, info_dict) 146 147 extra_state = ctx.setdefault('extra_state', {}) 148 149 format_index = info_dict.get('format_index') 150 extra_query = None 151 extra_param_to_segment_url = info_dict.get('extra_param_to_segment_url') 152 if extra_param_to_segment_url: 153 extra_query = compat_urlparse.parse_qs(extra_param_to_segment_url) 154 i = 0 155 media_sequence = 0 156 decrypt_info = {'METHOD': 'NONE'} 157 byte_range = {} 158 discontinuity_count = 0 159 frag_index = 0 160 ad_frag_next = False 161 for line in s.splitlines(): 162 line = line.strip() 163 if line: 164 if not line.startswith('#'): 165 if format_index and discontinuity_count != format_index: 166 continue 167 if ad_frag_next: 168 continue 169 frag_index += 1 170 if frag_index <= ctx['fragment_index']: 171 continue 172 frag_url = ( 173 line 174 if re.match(r'^https?://', line) 175 else compat_urlparse.urljoin(man_url, line)) 176 if extra_query: 177 frag_url = update_url_query(frag_url, extra_query) 178 179 fragments.append({ 180 'frag_index': frag_index, 181 'url': frag_url, 182 'decrypt_info': decrypt_info, 183 'byte_range': byte_range, 184 'media_sequence': media_sequence, 185 }) 186 media_sequence += 1 187 188 elif line.startswith('#EXT-X-MAP'): 189 if format_index and discontinuity_count != format_index: 190 continue 191 if frag_index > 0: 192 self.report_error( 193 'Initialization fragment found after media fragments, unable to download') 194 return False 195 frag_index += 1 196 map_info = parse_m3u8_attributes(line[11:]) 197 frag_url = ( 198 map_info.get('URI') 199 if re.match(r'^https?://', map_info.get('URI')) 200 else compat_urlparse.urljoin(man_url, map_info.get('URI'))) 201 if extra_query: 202 frag_url = update_url_query(frag_url, extra_query) 203 204 fragments.append({ 205 'frag_index': frag_index, 206 'url': frag_url, 207 'decrypt_info': decrypt_info, 208 'byte_range': byte_range, 209 'media_sequence': media_sequence 210 }) 211 media_sequence += 1 212 213 if map_info.get('BYTERANGE'): 214 splitted_byte_range = map_info.get('BYTERANGE').split('@') 215 sub_range_start = int(splitted_byte_range[1]) if len(splitted_byte_range) == 2 else byte_range['end'] 216 byte_range = { 217 'start': sub_range_start, 218 'end': sub_range_start + int(splitted_byte_range[0]), 219 } 220 221 elif line.startswith('#EXT-X-KEY'): 222 decrypt_url = decrypt_info.get('URI') 223 decrypt_info = parse_m3u8_attributes(line[11:]) 224 if decrypt_info['METHOD'] == 'AES-128': 225 if 'IV' in decrypt_info: 226 decrypt_info['IV'] = binascii.unhexlify(decrypt_info['IV'][2:].zfill(32)) 227 if not re.match(r'^https?://', decrypt_info['URI']): 228 decrypt_info['URI'] = compat_urlparse.urljoin( 229 man_url, decrypt_info['URI']) 230 if extra_query: 231 decrypt_info['URI'] = update_url_query(decrypt_info['URI'], extra_query) 232 if decrypt_url != decrypt_info['URI']: 233 decrypt_info['KEY'] = None 234 235 elif line.startswith('#EXT-X-MEDIA-SEQUENCE'): 236 media_sequence = int(line[22:]) 237 elif line.startswith('#EXT-X-BYTERANGE'): 238 splitted_byte_range = line[17:].split('@') 239 sub_range_start = int(splitted_byte_range[1]) if len(splitted_byte_range) == 2 else byte_range['end'] 240 byte_range = { 241 'start': sub_range_start, 242 'end': sub_range_start + int(splitted_byte_range[0]), 243 } 244 elif is_ad_fragment_start(line): 245 ad_frag_next = True 246 elif is_ad_fragment_end(line): 247 ad_frag_next = False 248 elif line.startswith('#EXT-X-DISCONTINUITY'): 249 discontinuity_count += 1 250 i += 1 251 252 # We only download the first fragment during the test 253 if self.params.get('test', False): 254 fragments = [fragments[0] if fragments else None] 255 256 if real_downloader: 257 info_dict['fragments'] = fragments 258 fd = real_downloader(self.ydl, self.params) 259 # TODO: Make progress updates work without hooking twice 260 # for ph in self._progress_hooks: 261 # fd.add_progress_hook(ph) 262 return fd.real_download(filename, info_dict) 263 264 if is_webvtt: 265 def pack_fragment(frag_content, frag_index): 266 output = io.StringIO() 267 adjust = 0 268 overflow = False 269 mpegts_last = None 270 for block in webvtt.parse_fragment(frag_content): 271 if isinstance(block, webvtt.CueBlock): 272 extra_state['webvtt_mpegts_last'] = mpegts_last 273 if overflow: 274 extra_state['webvtt_mpegts_adjust'] += 1 275 overflow = False 276 block.start += adjust 277 block.end += adjust 278 279 dedup_window = extra_state.setdefault('webvtt_dedup_window', []) 280 281 ready = [] 282 283 i = 0 284 is_new = True 285 while i < len(dedup_window): 286 wcue = dedup_window[i] 287 wblock = webvtt.CueBlock.from_json(wcue) 288 i += 1 289 if wblock.hinges(block): 290 wcue['end'] = block.end 291 is_new = False 292 continue 293 if wblock == block: 294 is_new = False 295 continue 296 if wblock.end > block.start: 297 continue 298 ready.append(wblock) 299 i -= 1 300 del dedup_window[i] 301 302 if is_new: 303 dedup_window.append(block.as_json) 304 for block in ready: 305 block.write_into(output) 306 307 # we only emit cues once they fall out of the duplicate window 308 continue 309 elif isinstance(block, webvtt.Magic): 310 # take care of MPEG PES timestamp overflow 311 if block.mpegts is None: 312 block.mpegts = 0 313 extra_state.setdefault('webvtt_mpegts_adjust', 0) 314 block.mpegts += extra_state['webvtt_mpegts_adjust'] << 33 315 if block.mpegts < extra_state.get('webvtt_mpegts_last', 0): 316 overflow = True 317 block.mpegts += 1 << 33 318 mpegts_last = block.mpegts 319 320 if frag_index == 1: 321 extra_state['webvtt_mpegts'] = block.mpegts or 0 322 extra_state['webvtt_local'] = block.local or 0 323 # XXX: block.local = block.mpegts = None ? 324 else: 325 if block.mpegts is not None and block.local is not None: 326 adjust = ( 327 (block.mpegts - extra_state.get('webvtt_mpegts', 0)) 328 - (block.local - extra_state.get('webvtt_local', 0)) 329 ) 330 continue 331 elif isinstance(block, webvtt.HeaderBlock): 332 if frag_index != 1: 333 # XXX: this should probably be silent as well 334 # or verify that all segments contain the same data 335 self.report_warning(bug_reports_message( 336 'Discarding a %s block found in the middle of the stream; ' 337 'if the subtitles display incorrectly,' 338 % (type(block).__name__))) 339 continue 340 block.write_into(output) 341 342 return output.getvalue().encode('utf-8') 343 344 def fin_fragments(): 345 dedup_window = extra_state.get('webvtt_dedup_window') 346 if not dedup_window: 347 return b'' 348 349 output = io.StringIO() 350 for cue in dedup_window: 351 webvtt.CueBlock.from_json(cue).write_into(output) 352 353 return output.getvalue().encode('utf-8') 354 355 self.download_and_append_fragments( 356 ctx, fragments, info_dict, pack_func=pack_fragment, finish_func=fin_fragments) 357 else: 358 return self.download_and_append_fragments(ctx, fragments, info_dict) 359