voicy.py 5.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145
  1. import itertools
  2. from .common import InfoExtractor
  3. from ..utils import (
  4. ExtractorError,
  5. smuggle_url,
  6. str_or_none,
  7. traverse_obj,
  8. unified_strdate,
  9. unsmuggle_url,
  10. )
  11. class VoicyBaseIE(InfoExtractor):
  12. def _extract_from_playlist_data(self, value):
  13. voice_id = str(value.get('PlaylistId'))
  14. upload_date = unified_strdate(value.get('Published'), False)
  15. items = [self._extract_single_article(voice_data) for voice_data in value['VoiceData']]
  16. return {
  17. '_type': 'multi_video',
  18. 'entries': items,
  19. 'id': voice_id,
  20. 'title': str(value.get('PlaylistName')),
  21. 'uploader': value.get('SpeakerName'),
  22. 'uploader_id': str_or_none(value.get('SpeakerId')),
  23. 'channel': value.get('ChannelName'),
  24. 'channel_id': str_or_none(value.get('ChannelId')),
  25. 'upload_date': upload_date,
  26. }
  27. def _extract_single_article(self, entry):
  28. formats = [{
  29. 'url': entry['VoiceHlsFile'],
  30. 'format_id': 'hls',
  31. 'ext': 'm4a',
  32. 'acodec': 'aac',
  33. 'vcodec': 'none',
  34. 'protocol': 'm3u8_native',
  35. }, {
  36. 'url': entry['VoiceFile'],
  37. 'format_id': 'mp3',
  38. 'ext': 'mp3',
  39. 'acodec': 'mp3',
  40. 'vcodec': 'none',
  41. }]
  42. return {
  43. 'id': str(entry.get('ArticleId')),
  44. 'title': entry.get('ArticleTitle'),
  45. 'description': entry.get('MediaName'),
  46. 'formats': formats,
  47. }
  48. def _call_api(self, url, video_id, **kwargs):
  49. response = self._download_json(url, video_id, **kwargs)
  50. if response.get('Status') != 0:
  51. message = traverse_obj(response, ('Value', 'Error', 'Message'), expected_type=str)
  52. if not message:
  53. message = 'There was a error in the response: %d' % response.get('Status')
  54. raise ExtractorError(message, expected=False)
  55. return response.get('Value')
  56. class VoicyIE(VoicyBaseIE):
  57. _WORKING = False
  58. IE_NAME = 'voicy'
  59. _VALID_URL = r'https?://voicy\.jp/channel/(?P<channel_id>\d+)/(?P<id>\d+)'
  60. ARTICLE_LIST_API_URL = 'https://vmw.api.voicy.jp/articles_list?channel_id=%s&pid=%s'
  61. _TESTS = [{
  62. 'url': 'https://voicy.jp/channel/1253/122754',
  63. 'info_dict': {
  64. 'id': '122754',
  65. 'title': '1/21(木)声日記:ついに原稿終わった!!',
  66. 'uploader': 'ちょまど@ ITエンジニアなオタク',
  67. 'uploader_id': '7339',
  68. },
  69. 'playlist_mincount': 9,
  70. }]
  71. def _real_extract(self, url):
  72. mobj = self._match_valid_url(url)
  73. assert mobj
  74. voice_id = mobj.group('id')
  75. channel_id = mobj.group('channel_id')
  76. url, article_list = unsmuggle_url(url)
  77. if not article_list:
  78. article_list = self._call_api(self.ARTICLE_LIST_API_URL % (channel_id, voice_id), voice_id)
  79. return self._extract_from_playlist_data(article_list)
  80. class VoicyChannelIE(VoicyBaseIE):
  81. _WORKING = False
  82. IE_NAME = 'voicy:channel'
  83. _VALID_URL = r'https?://voicy\.jp/channel/(?P<id>\d+)'
  84. PROGRAM_LIST_API_URL = 'https://vmw.api.voicy.jp/program_list/all?channel_id=%s&limit=20&public_type=3%s'
  85. _TESTS = [{
  86. 'url': 'https://voicy.jp/channel/1253/',
  87. 'info_dict': {
  88. 'id': '7339',
  89. 'title': 'ゆるふわ日常ラジオ #ちょまラジ',
  90. 'uploader': 'ちょまど@ ITエンジニアなオタク',
  91. 'uploader_id': '7339',
  92. },
  93. 'playlist_mincount': 54,
  94. }]
  95. @classmethod
  96. def suitable(cls, url):
  97. return not VoicyIE.suitable(url) and super().suitable(url)
  98. def _entries(self, channel_id):
  99. pager = ''
  100. for count in itertools.count(1):
  101. article_list = self._call_api(self.PROGRAM_LIST_API_URL % (channel_id, pager), channel_id, note=f'Paging #{count}')
  102. playlist_data = article_list.get('PlaylistData')
  103. if not playlist_data:
  104. break
  105. yield from playlist_data
  106. last = playlist_data[-1]
  107. pager = '&pid=%d&p_date=%s&play_count=%s' % (last['PlaylistId'], last['Published'], last['PlayCount'])
  108. def _real_extract(self, url):
  109. channel_id = self._match_id(url)
  110. articles = self._entries(channel_id)
  111. first_article = next(articles, None)
  112. title = traverse_obj(first_article, ('ChannelName', ), expected_type=str)
  113. speaker_name = traverse_obj(first_article, ('SpeakerName', ), expected_type=str)
  114. if not title and speaker_name:
  115. title = f'Uploads from {speaker_name}'
  116. if not title:
  117. title = f'Uploads from channel ID {channel_id}'
  118. articles = itertools.chain([first_article], articles) if first_article else articles
  119. playlist = (
  120. self.url_result(smuggle_url('https://voicy.jp/channel/%s/%d' % (channel_id, value['PlaylistId']), value), VoicyIE.ie_key())
  121. for value in articles)
  122. return {
  123. '_type': 'playlist',
  124. 'entries': playlist,
  125. 'id': channel_id,
  126. 'title': title,
  127. 'channel': speaker_name,
  128. 'channel_id': channel_id,
  129. }