123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145 |
- import itertools
- from .common import InfoExtractor
- from ..utils import (
- ExtractorError,
- smuggle_url,
- str_or_none,
- traverse_obj,
- unified_strdate,
- unsmuggle_url,
- )
- class VoicyBaseIE(InfoExtractor):
- def _extract_from_playlist_data(self, value):
- voice_id = str(value.get('PlaylistId'))
- upload_date = unified_strdate(value.get('Published'), False)
- items = [self._extract_single_article(voice_data) for voice_data in value['VoiceData']]
- return {
- '_type': 'multi_video',
- 'entries': items,
- 'id': voice_id,
- 'title': str(value.get('PlaylistName')),
- 'uploader': value.get('SpeakerName'),
- 'uploader_id': str_or_none(value.get('SpeakerId')),
- 'channel': value.get('ChannelName'),
- 'channel_id': str_or_none(value.get('ChannelId')),
- 'upload_date': upload_date,
- }
- def _extract_single_article(self, entry):
- formats = [{
- 'url': entry['VoiceHlsFile'],
- 'format_id': 'hls',
- 'ext': 'm4a',
- 'acodec': 'aac',
- 'vcodec': 'none',
- 'protocol': 'm3u8_native',
- }, {
- 'url': entry['VoiceFile'],
- 'format_id': 'mp3',
- 'ext': 'mp3',
- 'acodec': 'mp3',
- 'vcodec': 'none',
- }]
- return {
- 'id': str(entry.get('ArticleId')),
- 'title': entry.get('ArticleTitle'),
- 'description': entry.get('MediaName'),
- 'formats': formats,
- }
- def _call_api(self, url, video_id, **kwargs):
- response = self._download_json(url, video_id, **kwargs)
- if response.get('Status') != 0:
- message = traverse_obj(response, ('Value', 'Error', 'Message'), expected_type=str)
- if not message:
- message = 'There was a error in the response: %d' % response.get('Status')
- raise ExtractorError(message, expected=False)
- return response.get('Value')
- class VoicyIE(VoicyBaseIE):
- _WORKING = False
- IE_NAME = 'voicy'
- _VALID_URL = r'https?://voicy\.jp/channel/(?P<channel_id>\d+)/(?P<id>\d+)'
- ARTICLE_LIST_API_URL = 'https://vmw.api.voicy.jp/articles_list?channel_id=%s&pid=%s'
- _TESTS = [{
- 'url': 'https://voicy.jp/channel/1253/122754',
- 'info_dict': {
- 'id': '122754',
- 'title': '1/21(木)声日記:ついに原稿終わった!!',
- 'uploader': 'ちょまど@ ITエンジニアなオタク',
- 'uploader_id': '7339',
- },
- 'playlist_mincount': 9,
- }]
- def _real_extract(self, url):
- mobj = self._match_valid_url(url)
- assert mobj
- voice_id = mobj.group('id')
- channel_id = mobj.group('channel_id')
- url, article_list = unsmuggle_url(url)
- if not article_list:
- article_list = self._call_api(self.ARTICLE_LIST_API_URL % (channel_id, voice_id), voice_id)
- return self._extract_from_playlist_data(article_list)
- class VoicyChannelIE(VoicyBaseIE):
- _WORKING = False
- IE_NAME = 'voicy:channel'
- _VALID_URL = r'https?://voicy\.jp/channel/(?P<id>\d+)'
- PROGRAM_LIST_API_URL = 'https://vmw.api.voicy.jp/program_list/all?channel_id=%s&limit=20&public_type=3%s'
- _TESTS = [{
- 'url': 'https://voicy.jp/channel/1253/',
- 'info_dict': {
- 'id': '7339',
- 'title': 'ゆるふわ日常ラジオ #ちょまラジ',
- 'uploader': 'ちょまど@ ITエンジニアなオタク',
- 'uploader_id': '7339',
- },
- 'playlist_mincount': 54,
- }]
- @classmethod
- def suitable(cls, url):
- return not VoicyIE.suitable(url) and super().suitable(url)
- def _entries(self, channel_id):
- pager = ''
- for count in itertools.count(1):
- article_list = self._call_api(self.PROGRAM_LIST_API_URL % (channel_id, pager), channel_id, note=f'Paging #{count}')
- playlist_data = article_list.get('PlaylistData')
- if not playlist_data:
- break
- yield from playlist_data
- last = playlist_data[-1]
- pager = '&pid=%d&p_date=%s&play_count=%s' % (last['PlaylistId'], last['Published'], last['PlayCount'])
- def _real_extract(self, url):
- channel_id = self._match_id(url)
- articles = self._entries(channel_id)
- first_article = next(articles, None)
- title = traverse_obj(first_article, ('ChannelName', ), expected_type=str)
- speaker_name = traverse_obj(first_article, ('SpeakerName', ), expected_type=str)
- if not title and speaker_name:
- title = f'Uploads from {speaker_name}'
- if not title:
- title = f'Uploads from channel ID {channel_id}'
- articles = itertools.chain([first_article], articles) if first_article else articles
- playlist = (
- self.url_result(smuggle_url('https://voicy.jp/channel/%s/%d' % (channel_id, value['PlaylistId']), value), VoicyIE.ie_key())
- for value in articles)
- return {
- '_type': 'playlist',
- 'entries': playlist,
- 'id': channel_id,
- 'title': title,
- 'channel': speaker_name,
- 'channel_id': channel_id,
- }
|