crowdbunker.py 4.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111
  1. import itertools
  2. from .common import InfoExtractor
  3. from ..utils import (
  4. int_or_none,
  5. try_get,
  6. unified_strdate,
  7. )
  8. class CrowdBunkerIE(InfoExtractor):
  9. _VALID_URL = r'https?://(?:www\.)?crowdbunker\.com/v/(?P<id>[^/?#$&]+)'
  10. _TESTS = [{
  11. 'url': 'https://crowdbunker.com/v/0z4Kms8pi8I',
  12. 'info_dict': {
  13. 'id': '0z4Kms8pi8I',
  14. 'ext': 'mp4',
  15. 'title': '117) Pass vax et solutions',
  16. 'description': 'md5:86bcb422c29475dbd2b5dcfa6ec3749c',
  17. 'view_count': int,
  18. 'duration': 5386,
  19. 'uploader': 'Jérémie Mercier',
  20. 'uploader_id': 'UCeN_qQV829NYf0pvPJhW5dQ',
  21. 'like_count': int,
  22. 'upload_date': '20211218',
  23. 'thumbnail': 'https://scw.divulg.org/cb-medias4/images/0z4Kms8pi8I/maxres.jpg',
  24. },
  25. 'params': {'skip_download': True},
  26. }]
  27. def _real_extract(self, url):
  28. video_id = self._match_id(url)
  29. data_json = self._download_json(
  30. f'https://api.divulg.org/post/{video_id}/details', video_id,
  31. headers={'accept': 'application/json, text/plain, */*'})
  32. video_json = data_json['video']
  33. formats, subtitles = [], {}
  34. for sub in video_json.get('captions') or []:
  35. sub_url = try_get(sub, lambda x: x['file']['url'])
  36. if not sub_url:
  37. continue
  38. subtitles.setdefault(sub.get('languageCode', 'fr'), []).append({
  39. 'url': sub_url,
  40. })
  41. mpd_url = try_get(video_json, lambda x: x['dashManifest']['url'])
  42. if mpd_url:
  43. fmts, subs = self._extract_mpd_formats_and_subtitles(mpd_url, video_id)
  44. formats.extend(fmts)
  45. subtitles = self._merge_subtitles(subtitles, subs)
  46. m3u8_url = try_get(video_json, lambda x: x['hlsManifest']['url'])
  47. if m3u8_url:
  48. fmts, subs = self._extract_m3u8_formats_and_subtitles(mpd_url, video_id)
  49. formats.extend(fmts)
  50. subtitles = self._merge_subtitles(subtitles, subs)
  51. thumbnails = [{
  52. 'url': image['url'],
  53. 'height': int_or_none(image.get('height')),
  54. 'width': int_or_none(image.get('width')),
  55. } for image in video_json.get('thumbnails') or [] if image.get('url')]
  56. return {
  57. 'id': video_id,
  58. 'title': video_json.get('title'),
  59. 'description': video_json.get('description'),
  60. 'view_count': video_json.get('viewCount'),
  61. 'duration': video_json.get('duration'),
  62. 'uploader': try_get(data_json, lambda x: x['channel']['name']),
  63. 'uploader_id': try_get(data_json, lambda x: x['channel']['id']),
  64. 'like_count': data_json.get('likesCount'),
  65. 'upload_date': unified_strdate(video_json.get('publishedAt') or video_json.get('createdAt')),
  66. 'thumbnails': thumbnails,
  67. 'formats': formats,
  68. 'subtitles': subtitles,
  69. }
  70. class CrowdBunkerChannelIE(InfoExtractor):
  71. _VALID_URL = r'https?://(?:www\.)?crowdbunker\.com/@(?P<id>[^/?#$&]+)'
  72. _TESTS = [{
  73. 'url': 'https://crowdbunker.com/@Milan_UHRIN',
  74. 'playlist_mincount': 14,
  75. 'info_dict': {
  76. 'id': 'Milan_UHRIN',
  77. },
  78. }]
  79. def _entries(self, playlist_id):
  80. last = None
  81. for page in itertools.count():
  82. channel_json = self._download_json(
  83. f'https://api.divulg.org/organization/{playlist_id}/posts', playlist_id,
  84. headers={'accept': 'application/json, text/plain, */*'},
  85. query={'after': last} if last else {}, note=f'Downloading Page {page}')
  86. for item in channel_json.get('items') or []:
  87. v_id = item.get('uid')
  88. if not v_id:
  89. continue
  90. yield self.url_result(
  91. f'https://crowdbunker.com/v/{v_id}', ie=CrowdBunkerIE.ie_key(), video_id=v_id)
  92. last = channel_json.get('last')
  93. if not last:
  94. break
  95. def _real_extract(self, url):
  96. playlist_id = self._match_id(url)
  97. return self.playlist_result(self._entries(playlist_id), playlist_id=playlist_id)