1# coding: utf-8 2from __future__ import unicode_literals 3 4import re 5import itertools 6 7from .common import InfoExtractor 8from ..compat import ( 9 compat_str, 10) 11from ..utils import ( 12 determine_ext, 13 bool_or_none, 14 int_or_none, 15 parse_qs, 16 try_get, 17 unified_timestamp, 18 url_or_none, 19) 20 21 22class RutubeBaseIE(InfoExtractor): 23 def _download_api_info(self, video_id, query=None): 24 if not query: 25 query = {} 26 query['format'] = 'json' 27 return self._download_json( 28 'http://rutube.ru/api/video/%s/' % video_id, 29 video_id, 'Downloading video JSON', 30 'Unable to download video JSON', query=query) 31 32 @staticmethod 33 def _extract_info(video, video_id=None, require_title=True): 34 title = video['title'] if require_title else video.get('title') 35 36 age_limit = video.get('is_adult') 37 if age_limit is not None: 38 age_limit = 18 if age_limit is True else 0 39 40 uploader_id = try_get(video, lambda x: x['author']['id']) 41 category = try_get(video, lambda x: x['category']['name']) 42 43 return { 44 'id': video.get('id') or video_id if video_id else video['id'], 45 'title': title, 46 'description': video.get('description'), 47 'thumbnail': video.get('thumbnail_url'), 48 'duration': int_or_none(video.get('duration')), 49 'uploader': try_get(video, lambda x: x['author']['name']), 50 'uploader_id': compat_str(uploader_id) if uploader_id else None, 51 'timestamp': unified_timestamp(video.get('created_ts')), 52 'category': [category] if category else None, 53 'age_limit': age_limit, 54 'view_count': int_or_none(video.get('hits')), 55 'comment_count': int_or_none(video.get('comments_count')), 56 'is_live': bool_or_none(video.get('is_livestream')), 57 } 58 59 def _download_and_extract_info(self, video_id, query=None): 60 return self._extract_info( 61 self._download_api_info(video_id, query=query), video_id) 62 63 def _download_api_options(self, video_id, query=None): 64 if not query: 65 query = {} 66 query['format'] = 'json' 67 return self._download_json( 68 'http://rutube.ru/api/play/options/%s/' % video_id, 69 video_id, 'Downloading options JSON', 70 'Unable to download options JSON', 71 headers=self.geo_verification_headers(), query=query) 72 73 def _extract_formats(self, options, video_id): 74 formats = [] 75 for format_id, format_url in options['video_balancer'].items(): 76 ext = determine_ext(format_url) 77 if ext == 'm3u8': 78 formats.extend(self._extract_m3u8_formats( 79 format_url, video_id, 'mp4', m3u8_id=format_id, fatal=False)) 80 elif ext == 'f4m': 81 formats.extend(self._extract_f4m_formats( 82 format_url, video_id, f4m_id=format_id, fatal=False)) 83 else: 84 formats.append({ 85 'url': format_url, 86 'format_id': format_id, 87 }) 88 self._sort_formats(formats) 89 return formats 90 91 def _download_and_extract_formats(self, video_id, query=None): 92 return self._extract_formats( 93 self._download_api_options(video_id, query=query), video_id) 94 95 96class RutubeIE(RutubeBaseIE): 97 IE_NAME = 'rutube' 98 IE_DESC = 'Rutube videos' 99 _VALID_URL = r'https?://rutube\.ru/(?:video|(?:play/)?embed)/(?P<id>[\da-z]{32})' 100 101 _TESTS = [{ 102 'url': 'http://rutube.ru/video/3eac3b4561676c17df9132a9a1e62e3e/', 103 'md5': '1d24f180fac7a02f3900712e5a5764d6', 104 'info_dict': { 105 'id': '3eac3b4561676c17df9132a9a1e62e3e', 106 'ext': 'mp4', 107 'title': 'Раненный кенгуру забежал в аптеку', 108 'description': 'http://www.ntdtv.ru ', 109 'duration': 81, 110 'uploader': 'NTDRussian', 111 'uploader_id': '29790', 112 'timestamp': 1381943602, 113 'upload_date': '20131016', 114 'age_limit': 0, 115 }, 116 }, { 117 'url': 'http://rutube.ru/play/embed/a10e53b86e8f349080f718582ce4c661', 118 'only_matching': True, 119 }, { 120 'url': 'http://rutube.ru/embed/a10e53b86e8f349080f718582ce4c661', 121 'only_matching': True, 122 }, { 123 'url': 'http://rutube.ru/video/3eac3b4561676c17df9132a9a1e62e3e/?pl_id=4252', 124 'only_matching': True, 125 }, { 126 'url': 'https://rutube.ru/video/10b3a03fc01d5bbcc632a2f3514e8aab/?pl_type=source', 127 'only_matching': True, 128 }] 129 130 @classmethod 131 def suitable(cls, url): 132 return False if RutubePlaylistIE.suitable(url) else super(RutubeIE, cls).suitable(url) 133 134 @staticmethod 135 def _extract_urls(webpage): 136 return [mobj.group('url') for mobj in re.finditer( 137 r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//rutube\.ru/embed/[\da-z]{32}.*?)\1', 138 webpage)] 139 140 def _real_extract(self, url): 141 video_id = self._match_id(url) 142 info = self._download_and_extract_info(video_id) 143 info['formats'] = self._download_and_extract_formats(video_id) 144 return info 145 146 147class RutubeEmbedIE(RutubeBaseIE): 148 IE_NAME = 'rutube:embed' 149 IE_DESC = 'Rutube embedded videos' 150 _VALID_URL = r'https?://rutube\.ru/(?:video|play)/embed/(?P<id>[0-9]+)' 151 152 _TESTS = [{ 153 'url': 'http://rutube.ru/video/embed/6722881?vk_puid37=&vk_puid38=', 154 'info_dict': { 155 'id': 'a10e53b86e8f349080f718582ce4c661', 156 'ext': 'mp4', 157 'timestamp': 1387830582, 158 'upload_date': '20131223', 159 'uploader_id': '297833', 160 'description': 'Видео группы ★http://vk.com/foxkidsreset★ музей Fox Kids и Jetix<br/><br/> восстановлено и сделано в шикоформате subziro89 http://vk.com/subziro89', 161 'uploader': 'subziro89 ILya', 162 'title': 'Мистический городок Эйри в Индиан 5 серия озвучка subziro89', 163 }, 164 'params': { 165 'skip_download': True, 166 }, 167 }, { 168 'url': 'http://rutube.ru/play/embed/8083783', 169 'only_matching': True, 170 }, { 171 # private video 172 'url': 'https://rutube.ru/play/embed/10631925?p=IbAigKqWd1do4mjaM5XLIQ', 173 'only_matching': True, 174 }] 175 176 def _real_extract(self, url): 177 embed_id = self._match_id(url) 178 # Query may contain private videos token and should be passed to API 179 # requests (see #19163) 180 query = parse_qs(url) 181 options = self._download_api_options(embed_id, query) 182 video_id = options['effective_video'] 183 formats = self._extract_formats(options, video_id) 184 info = self._download_and_extract_info(video_id, query) 185 info.update({ 186 'extractor_key': 'Rutube', 187 'formats': formats, 188 }) 189 return info 190 191 192class RutubePlaylistBaseIE(RutubeBaseIE): 193 def _next_page_url(self, page_num, playlist_id, *args, **kwargs): 194 return self._PAGE_TEMPLATE % (playlist_id, page_num) 195 196 def _entries(self, playlist_id, *args, **kwargs): 197 next_page_url = None 198 for pagenum in itertools.count(1): 199 page = self._download_json( 200 next_page_url or self._next_page_url( 201 pagenum, playlist_id, *args, **kwargs), 202 playlist_id, 'Downloading page %s' % pagenum) 203 204 results = page.get('results') 205 if not results or not isinstance(results, list): 206 break 207 208 for result in results: 209 video_url = url_or_none(result.get('video_url')) 210 if not video_url: 211 continue 212 entry = self._extract_info(result, require_title=False) 213 entry.update({ 214 '_type': 'url', 215 'url': video_url, 216 'ie_key': RutubeIE.ie_key(), 217 }) 218 yield entry 219 220 next_page_url = page.get('next') 221 if not next_page_url or not page.get('has_next'): 222 break 223 224 def _extract_playlist(self, playlist_id, *args, **kwargs): 225 return self.playlist_result( 226 self._entries(playlist_id, *args, **kwargs), 227 playlist_id, kwargs.get('playlist_name')) 228 229 def _real_extract(self, url): 230 return self._extract_playlist(self._match_id(url)) 231 232 233class RutubeTagsIE(RutubePlaylistBaseIE): 234 IE_NAME = 'rutube:tags' 235 IE_DESC = 'Rutube tags' 236 _VALID_URL = r'https?://rutube\.ru/tags/video/(?P<id>\d+)' 237 _TESTS = [{ 238 'url': 'http://rutube.ru/tags/video/1800/', 239 'info_dict': { 240 'id': '1800', 241 }, 242 'playlist_mincount': 68, 243 }] 244 245 _PAGE_TEMPLATE = 'http://rutube.ru/api/tags/video/%s/?page=%s&format=json' 246 247 248class RutubeMovieIE(RutubePlaylistBaseIE): 249 IE_NAME = 'rutube:movie' 250 IE_DESC = 'Rutube movies' 251 _VALID_URL = r'https?://rutube\.ru/metainfo/tv/(?P<id>\d+)' 252 _TESTS = [] 253 254 _MOVIE_TEMPLATE = 'http://rutube.ru/api/metainfo/tv/%s/?format=json' 255 _PAGE_TEMPLATE = 'http://rutube.ru/api/metainfo/tv/%s/video?page=%s&format=json' 256 257 def _real_extract(self, url): 258 movie_id = self._match_id(url) 259 movie = self._download_json( 260 self._MOVIE_TEMPLATE % movie_id, movie_id, 261 'Downloading movie JSON') 262 return self._extract_playlist( 263 movie_id, playlist_name=movie.get('name')) 264 265 266class RutubePersonIE(RutubePlaylistBaseIE): 267 IE_NAME = 'rutube:person' 268 IE_DESC = 'Rutube person videos' 269 _VALID_URL = r'https?://rutube\.ru/video/person/(?P<id>\d+)' 270 _TESTS = [{ 271 'url': 'http://rutube.ru/video/person/313878/', 272 'info_dict': { 273 'id': '313878', 274 }, 275 'playlist_mincount': 37, 276 }] 277 278 _PAGE_TEMPLATE = 'http://rutube.ru/api/video/person/%s/?page=%s&format=json' 279 280 281class RutubePlaylistIE(RutubePlaylistBaseIE): 282 IE_NAME = 'rutube:playlist' 283 IE_DESC = 'Rutube playlists' 284 _VALID_URL = r'https?://rutube\.ru/(?:video|(?:play/)?embed)/[\da-z]{32}/\?.*?\bpl_id=(?P<id>\d+)' 285 _TESTS = [{ 286 'url': 'https://rutube.ru/video/cecd58ed7d531fc0f3d795d51cee9026/?pl_id=3097&pl_type=tag', 287 'info_dict': { 288 'id': '3097', 289 }, 290 'playlist_count': 27, 291 }, { 292 'url': 'https://rutube.ru/video/10b3a03fc01d5bbcc632a2f3514e8aab/?pl_id=4252&pl_type=source', 293 'only_matching': True, 294 }] 295 296 _PAGE_TEMPLATE = 'http://rutube.ru/api/playlist/%s/%s/?page=%s&format=json' 297 298 @classmethod 299 def suitable(cls, url): 300 from ..utils import int_or_none, parse_qs 301 302 if not super(RutubePlaylistIE, cls).suitable(url): 303 return False 304 params = parse_qs(url) 305 return params.get('pl_type', [None])[0] and int_or_none(params.get('pl_id', [None])[0]) 306 307 def _next_page_url(self, page_num, playlist_id, item_kind): 308 return self._PAGE_TEMPLATE % (item_kind, playlist_id, page_num) 309 310 def _real_extract(self, url): 311 qs = parse_qs(url) 312 playlist_kind = qs['pl_type'][0] 313 playlist_id = qs['pl_id'][0] 314 return self._extract_playlist(playlist_id, item_kind=playlist_kind) 315 316 317class RutubeChannelIE(RutubePlaylistBaseIE): 318 IE_NAME = 'rutube:channel' 319 IE_DESC = 'Rutube channel' 320 _VALID_URL = r'https?://rutube\.ru/channel/(?P<id>\d+)/videos' 321 _TESTS = [{ 322 'url': 'https://rutube.ru/channel/639184/videos/', 323 'info_dict': { 324 'id': '639184', 325 }, 326 'playlist_mincount': 133, 327 }] 328 329 _PAGE_TEMPLATE = 'http://rutube.ru/api/video/person/%s/?page=%s&format=json' 330