123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111 |
- import itertools
- from .common import InfoExtractor
- from ..utils import (
- int_or_none,
- try_get,
- unified_strdate,
- )
- class CrowdBunkerIE(InfoExtractor):
- _VALID_URL = r'https?://(?:www\.)?crowdbunker\.com/v/(?P<id>[^/?#$&]+)'
- _TESTS = [{
- 'url': 'https://crowdbunker.com/v/0z4Kms8pi8I',
- 'info_dict': {
- 'id': '0z4Kms8pi8I',
- 'ext': 'mp4',
- 'title': '117) Pass vax et solutions',
- 'description': 'md5:86bcb422c29475dbd2b5dcfa6ec3749c',
- 'view_count': int,
- 'duration': 5386,
- 'uploader': 'Jérémie Mercier',
- 'uploader_id': 'UCeN_qQV829NYf0pvPJhW5dQ',
- 'like_count': int,
- 'upload_date': '20211218',
- 'thumbnail': 'https://scw.divulg.org/cb-medias4/images/0z4Kms8pi8I/maxres.jpg',
- },
- 'params': {'skip_download': True},
- }]
- def _real_extract(self, url):
- video_id = self._match_id(url)
- data_json = self._download_json(
- f'https://api.divulg.org/post/{video_id}/details', video_id,
- headers={'accept': 'application/json, text/plain, */*'})
- video_json = data_json['video']
- formats, subtitles = [], {}
- for sub in video_json.get('captions') or []:
- sub_url = try_get(sub, lambda x: x['file']['url'])
- if not sub_url:
- continue
- subtitles.setdefault(sub.get('languageCode', 'fr'), []).append({
- 'url': sub_url,
- })
- mpd_url = try_get(video_json, lambda x: x['dashManifest']['url'])
- if mpd_url:
- fmts, subs = self._extract_mpd_formats_and_subtitles(mpd_url, video_id)
- formats.extend(fmts)
- subtitles = self._merge_subtitles(subtitles, subs)
- m3u8_url = try_get(video_json, lambda x: x['hlsManifest']['url'])
- if m3u8_url:
- fmts, subs = self._extract_m3u8_formats_and_subtitles(mpd_url, video_id)
- formats.extend(fmts)
- subtitles = self._merge_subtitles(subtitles, subs)
- thumbnails = [{
- 'url': image['url'],
- 'height': int_or_none(image.get('height')),
- 'width': int_or_none(image.get('width')),
- } for image in video_json.get('thumbnails') or [] if image.get('url')]
- return {
- 'id': video_id,
- 'title': video_json.get('title'),
- 'description': video_json.get('description'),
- 'view_count': video_json.get('viewCount'),
- 'duration': video_json.get('duration'),
- 'uploader': try_get(data_json, lambda x: x['channel']['name']),
- 'uploader_id': try_get(data_json, lambda x: x['channel']['id']),
- 'like_count': data_json.get('likesCount'),
- 'upload_date': unified_strdate(video_json.get('publishedAt') or video_json.get('createdAt')),
- 'thumbnails': thumbnails,
- 'formats': formats,
- 'subtitles': subtitles,
- }
- class CrowdBunkerChannelIE(InfoExtractor):
- _VALID_URL = r'https?://(?:www\.)?crowdbunker\.com/@(?P<id>[^/?#$&]+)'
- _TESTS = [{
- 'url': 'https://crowdbunker.com/@Milan_UHRIN',
- 'playlist_mincount': 14,
- 'info_dict': {
- 'id': 'Milan_UHRIN',
- },
- }]
- def _entries(self, playlist_id):
- last = None
- for page in itertools.count():
- channel_json = self._download_json(
- f'https://api.divulg.org/organization/{playlist_id}/posts', playlist_id,
- headers={'accept': 'application/json, text/plain, */*'},
- query={'after': last} if last else {}, note=f'Downloading Page {page}')
- for item in channel_json.get('items') or []:
- v_id = item.get('uid')
- if not v_id:
- continue
- yield self.url_result(
- f'https://crowdbunker.com/v/{v_id}', ie=CrowdBunkerIE.ie_key(), video_id=v_id)
- last = channel_json.get('last')
- if not last:
- break
- def _real_extract(self, url):
- playlist_id = self._match_id(url)
- return self.playlist_result(self._entries(playlist_id), playlist_id=playlist_id)
|