callin.py 8.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155
  1. from .common import InfoExtractor
  2. from ..utils import float_or_none, int_or_none, make_archive_id, traverse_obj
  3. class CallinIE(InfoExtractor):
  4. _VALID_URL = r'https?://(?:www\.)?callin\.com/(episode)/(?P<id>[-a-zA-Z]+)'
  5. _TESTS = [{
  6. 'url': 'https://www.callin.com/episode/the-title-ix-regime-and-the-long-march-through-EBfXYSrsjc',
  7. 'info_dict': {
  8. 'id': '218b979630a35ead12c6fd096f2996c56c37e4d0dc1f6dc0feada32dcf7b31cd',
  9. 'title': 'The Title IX Regime and the Long March Through and Beyond the Institutions',
  10. 'ext': 'ts',
  11. 'display_id': 'the-title-ix-regime-and-the-long-march-through-EBfXYSrsjc',
  12. 'thumbnail': 're:https://.+\\.png',
  13. 'description': 'First episode',
  14. 'uploader': 'Wesley Yang',
  15. 'timestamp': 1639404128.65,
  16. 'upload_date': '20211213',
  17. 'uploader_id': 'wesyang',
  18. 'uploader_url': 'http://wesleyyang.substack.com',
  19. 'channel': 'Conversations in Year Zero',
  20. 'channel_id': '436d1f82ddeb30cd2306ea9156044d8d2cfdc3f1f1552d245117a42173e78553',
  21. 'channel_url': 'https://callin.com/show/conversations-in-year-zero-oJNllRFSfx',
  22. 'duration': 9951.936,
  23. 'view_count': int,
  24. 'categories': ['News & Politics', 'History', 'Technology'],
  25. 'cast': ['Wesley Yang', 'KC Johnson', 'Gabi Abramovich'],
  26. 'series': 'Conversations in Year Zero',
  27. 'series_id': '436d1f82ddeb30cd2306ea9156044d8d2cfdc3f1f1552d245117a42173e78553',
  28. 'episode': 'The Title IX Regime and the Long March Through and Beyond the Institutions',
  29. 'episode_number': 1,
  30. 'episode_id': '218b979630a35ead12c6fd096f2996c56c37e4d0dc1f6dc0feada32dcf7b31cd',
  31. },
  32. }, {
  33. 'url': 'https://www.callin.com/episode/fcc-commissioner-brendan-carr-on-elons-PrumRdSQJW',
  34. 'md5': '14ede27ee2c957b7e4db93140fc0745c',
  35. 'info_dict': {
  36. 'id': 'c3dab47f237bf953d180d3f243477a84302798be0e0b29bc9ade6d60a69f04f5',
  37. 'ext': 'ts',
  38. 'title': 'FCC Commissioner Brendan Carr on Elon’s Starlink',
  39. 'description': 'Or, why the government doesn’t like SpaceX',
  40. 'channel': 'The Pull Request',
  41. 'channel_url': 'https://callin.com/show/the-pull-request-ucnDJmEKAa',
  42. 'duration': 3182.472,
  43. 'series_id': '7e9c23156e4aecfdcaef46bfb2ed7ca268509622ec006c0f0f25d90e34496638',
  44. 'uploader_url': 'http://thepullrequest.com',
  45. 'upload_date': '20220902',
  46. 'episode': 'FCC Commissioner Brendan Carr on Elon’s Starlink',
  47. 'display_id': 'fcc-commissioner-brendan-carr-on-elons-PrumRdSQJW',
  48. 'series': 'The Pull Request',
  49. 'channel_id': '7e9c23156e4aecfdcaef46bfb2ed7ca268509622ec006c0f0f25d90e34496638',
  50. 'view_count': int,
  51. 'uploader': 'Antonio García Martínez',
  52. 'thumbnail': 'https://d1z76fhpoqkd01.cloudfront.net/shows/legacy/1ade9142625344045dc17cf523469ced1d93610762f4c886d06aa190a2f979e8.png',
  53. 'episode_id': 'c3dab47f237bf953d180d3f243477a84302798be0e0b29bc9ade6d60a69f04f5',
  54. 'timestamp': 1662100688.005,
  55. },
  56. }, {
  57. 'url': 'https://www.callin.com/episode/episode-81-elites-melt-down-over-student-debt-lzxMidUnjA',
  58. 'md5': '16f704ddbf82a27e3930533b12062f07',
  59. 'info_dict': {
  60. 'id': '8d06f869798f93a7814e380bceabea72d501417e620180416ff6bd510596e83c',
  61. 'ext': 'ts',
  62. 'title': 'Episode 81- Elites MELT DOWN over Student Debt Victory? Rumble in NYC?',
  63. 'description': 'Let’s talk todays episode about the primary election shake up in NYC and the elites melting down over student debt cancelation.',
  64. 'channel': 'The DEBRIEF With Briahna Joy Gray',
  65. 'channel_url': 'https://callin.com/show/the-debrief-with-briahna-joy-gray-siiFDzGegm',
  66. 'duration': 10043.16,
  67. 'series_id': '61cea58444465fd26674069703bd8322993bc9e5b4f1a6d0872690554a046ff7',
  68. 'uploader_url': 'http://patreon.com/badfaithpodcast',
  69. 'upload_date': '20220826',
  70. 'episode': 'Episode 81- Elites MELT DOWN over Student Debt Victory? Rumble in NYC?',
  71. 'display_id': 'episode-',
  72. 'series': 'The DEBRIEF With Briahna Joy Gray',
  73. 'channel_id': '61cea58444465fd26674069703bd8322993bc9e5b4f1a6d0872690554a046ff7',
  74. 'view_count': int,
  75. 'uploader': 'Briahna Gray',
  76. 'thumbnail': 'https://d1z76fhpoqkd01.cloudfront.net/shows/legacy/461ea0d86172cb6aff7d6c80fd49259cf5e64bdf737a4650f8bc24cf392ca218.png',
  77. 'episode_id': '8d06f869798f93a7814e380bceabea72d501417e620180416ff6bd510596e83c',
  78. 'timestamp': 1661476708.282,
  79. },
  80. }]
  81. def try_get_user_name(self, d):
  82. names = [d.get(n) for n in ('first', 'last')]
  83. if None in names:
  84. return next((n for n in names if n), default=None)
  85. return ' '.join(names)
  86. def _real_extract(self, url):
  87. display_id = self._match_id(url)
  88. webpage = self._download_webpage(url, display_id)
  89. next_data = self._search_nextjs_data(webpage, display_id)
  90. episode = next_data['props']['pageProps']['episode']
  91. video_id = episode['id']
  92. title = episode.get('title') or self._generic_title('', webpage)
  93. url = episode['m3u8']
  94. formats = self._extract_m3u8_formats(url, display_id, ext='ts')
  95. show = traverse_obj(episode, ('show', 'title'))
  96. show_id = traverse_obj(episode, ('show', 'id'))
  97. show_json = None
  98. app_slug = (self._html_search_regex(
  99. '<script\\s+src=["\']/_next/static/([-_a-zA-Z0-9]+)/_',
  100. webpage, 'app slug', fatal=False) or next_data.get('buildId'))
  101. show_slug = traverse_obj(episode, ('show', 'linkObj', 'resourceUrl'))
  102. if app_slug and show_slug and '/' in show_slug:
  103. show_slug = show_slug.rsplit('/', 1)[1]
  104. show_json_url = f'https://www.callin.com/_next/data/{app_slug}/show/{show_slug}.json'
  105. show_json = self._download_json(show_json_url, display_id, fatal=False)
  106. host = (traverse_obj(show_json, ('pageProps', 'show', 'hosts', 0))
  107. or traverse_obj(episode, ('speakers', 0)))
  108. host_nick = traverse_obj(host, ('linkObj', 'resourceUrl'))
  109. host_nick = host_nick.rsplit('/', 1)[1] if (host_nick and '/' in host_nick) else None
  110. cast = list(filter(None, [
  111. self.try_get_user_name(u) for u in
  112. traverse_obj(episode, (('speakers', 'callerTags'), ...)) or []
  113. ]))
  114. episode_list = traverse_obj(show_json, ('pageProps', 'show', 'episodes')) or []
  115. episode_number = next(
  116. (len(episode_list) - i for i, e in enumerate(episode_list) if e.get('id') == video_id),
  117. None)
  118. return {
  119. 'id': video_id,
  120. '_old_archive_ids': [make_archive_id(self, display_id.rsplit('-', 1)[-1])],
  121. 'display_id': display_id,
  122. 'title': title,
  123. 'formats': formats,
  124. 'thumbnail': traverse_obj(episode, ('show', 'photo')),
  125. 'description': episode.get('description'),
  126. 'uploader': self.try_get_user_name(host) if host else None,
  127. 'timestamp': episode.get('publishedAt'),
  128. 'uploader_id': host_nick,
  129. 'uploader_url': traverse_obj(show_json, ('pageProps', 'show', 'url')),
  130. 'channel': show,
  131. 'channel_id': show_id,
  132. 'channel_url': traverse_obj(episode, ('show', 'linkObj', 'resourceUrl')),
  133. 'duration': float_or_none(episode.get('runtime')),
  134. 'view_count': int_or_none(episode.get('plays')),
  135. 'categories': traverse_obj(episode, ('show', 'categorizations', ..., 'name')),
  136. 'cast': cast if cast else None,
  137. 'series': show,
  138. 'series_id': show_id,
  139. 'episode': title,
  140. 'episode_number': episode_number,
  141. 'episode_id': video_id,
  142. }