1# coding: utf-8 2from __future__ import unicode_literals 3 4from .common import InfoExtractor 5from ..compat import compat_str 6from ..utils import ( 7 ExtractorError, 8 smuggle_url, 9 str_or_none, 10 traverse_obj, 11 unified_strdate, 12 unsmuggle_url, 13) 14 15import itertools 16 17 18class VoicyBaseIE(InfoExtractor): 19 def _extract_from_playlist_data(self, value): 20 voice_id = compat_str(value.get('PlaylistId')) 21 upload_date = unified_strdate(value.get('Published'), False) 22 items = [self._extract_single_article(voice_data) for voice_data in value['VoiceData']] 23 return { 24 '_type': 'multi_video', 25 'entries': items, 26 'id': voice_id, 27 'title': compat_str(value.get('PlaylistName')), 28 'uploader': value.get('SpeakerName'), 29 'uploader_id': str_or_none(value.get('SpeakerId')), 30 'channel': value.get('ChannelName'), 31 'channel_id': str_or_none(value.get('ChannelId')), 32 'upload_date': upload_date, 33 } 34 35 def _extract_single_article(self, entry): 36 formats = [{ 37 'url': entry['VoiceHlsFile'], 38 'format_id': 'hls', 39 'ext': 'm4a', 40 'acodec': 'aac', 41 'vcodec': 'none', 42 'protocol': 'm3u8_native', 43 }, { 44 'url': entry['VoiceFile'], 45 'format_id': 'mp3', 46 'ext': 'mp3', 47 'acodec': 'mp3', 48 'vcodec': 'none', 49 }] 50 self._sort_formats(formats) 51 return { 52 'id': compat_str(entry.get('ArticleId')), 53 'title': entry.get('ArticleTitle'), 54 'description': entry.get('MediaName'), 55 'formats': formats, 56 } 57 58 def _call_api(self, url, video_id, **kwargs): 59 response = self._download_json(url, video_id, **kwargs) 60 if response.get('Status') != 0: 61 message = traverse_obj(response, ('Value', 'Error', 'Message'), expected_type=compat_str) 62 if not message: 63 message = 'There was a error in the response: %d' % response.get('Status') 64 raise ExtractorError(message, expected=False) 65 return response.get('Value') 66 67 68class VoicyIE(VoicyBaseIE): 69 IE_NAME = 'voicy' 70 _VALID_URL = r'https?://voicy\.jp/channel/(?P<channel_id>\d+)/(?P<id>\d+)' 71 ARTICLE_LIST_API_URL = 'https://vmw.api.voicy.jp/articles_list?channel_id=%s&pid=%s' 72 _TESTS = [{ 73 'url': 'https://voicy.jp/channel/1253/122754', 74 'info_dict': { 75 'id': '122754', 76 'title': '1/21(木)声日記:ついに原稿終わった!!', 77 'uploader': 'ちょまど@ ITエンジニアなオタク', 78 'uploader_id': '7339', 79 }, 80 'playlist_mincount': 9, 81 }] 82 83 def _real_extract(self, url): 84 mobj = self._match_valid_url(url) 85 assert mobj 86 voice_id = mobj.group('id') 87 channel_id = mobj.group('channel_id') 88 url, article_list = unsmuggle_url(url) 89 if not article_list: 90 article_list = self._call_api(self.ARTICLE_LIST_API_URL % (channel_id, voice_id), voice_id) 91 return self._extract_from_playlist_data(article_list) 92 93 94class VoicyChannelIE(VoicyBaseIE): 95 IE_NAME = 'voicy:channel' 96 _VALID_URL = r'https?://voicy\.jp/channel/(?P<id>\d+)' 97 PROGRAM_LIST_API_URL = 'https://vmw.api.voicy.jp/program_list/all?channel_id=%s&limit=20&public_type=3%s' 98 _TESTS = [{ 99 'url': 'https://voicy.jp/channel/1253/', 100 'info_dict': { 101 'id': '7339', 102 'title': 'ゆるふわ日常ラジオ #ちょまラジ', 103 'uploader': 'ちょまど@ ITエンジニアなオタク', 104 'uploader_id': '7339', 105 }, 106 'playlist_mincount': 54, 107 }] 108 109 @classmethod 110 def suitable(cls, url): 111 return not VoicyIE.suitable(url) and super(VoicyChannelIE, cls).suitable(url) 112 113 def _entries(self, channel_id): 114 pager = '' 115 for count in itertools.count(1): 116 article_list = self._call_api(self.PROGRAM_LIST_API_URL % (channel_id, pager), channel_id, note='Paging #%d' % count) 117 playlist_data = article_list.get('PlaylistData') 118 if not playlist_data: 119 break 120 yield from playlist_data 121 last = playlist_data[-1] 122 pager = '&pid=%d&p_date=%s&play_count=%s' % (last['PlaylistId'], last['Published'], last['PlayCount']) 123 124 def _real_extract(self, url): 125 channel_id = self._match_id(url) 126 articles = self._entries(channel_id) 127 128 first_article = next(articles, None) 129 title = traverse_obj(first_article, ('ChannelName', ), expected_type=compat_str) 130 speaker_name = traverse_obj(first_article, ('SpeakerName', ), expected_type=compat_str) 131 if not title and speaker_name: 132 title = 'Uploads from %s' % speaker_name 133 if not title: 134 title = 'Uploads from channel ID %s' % channel_id 135 136 articles = itertools.chain([first_article], articles) if first_article else articles 137 138 playlist = ( 139 self.url_result(smuggle_url('https://voicy.jp/channel/%s/%d' % (channel_id, value['PlaylistId']), value), VoicyIE.ie_key()) 140 for value in articles) 141 return { 142 '_type': 'playlist', 143 'entries': playlist, 144 'id': channel_id, 145 'title': title, 146 'channel': speaker_name, 147 'channel_id': channel_id, 148 } 149