123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260 |
- import functools
- import urllib.parse
- from .common import InfoExtractor
- from ..networking.exceptions import HTTPError
- from ..utils import (
- ExtractorError,
- OnDemandPagedList,
- int_or_none,
- qualities,
- try_get,
- )
- class RedGifsBaseInfoExtractor(InfoExtractor):
- _FORMATS = {
- 'gif': 250,
- 'sd': 480,
- 'hd': None,
- }
- _API_HEADERS = {
- 'referer': 'https://www.redgifs.com/',
- 'origin': 'https://www.redgifs.com',
- 'content-type': 'application/json',
- }
- def _parse_gif_data(self, gif_data):
- video_id = gif_data.get('id')
- quality = qualities(tuple(self._FORMATS.keys()))
- orig_height = int_or_none(gif_data.get('height'))
- aspect_ratio = try_get(gif_data, lambda x: orig_height / x['width'])
- formats = []
- for format_id, height in self._FORMATS.items():
- video_url = gif_data['urls'].get(format_id)
- if not video_url:
- continue
- height = min(orig_height, height or orig_height)
- formats.append({
- 'url': video_url,
- 'format_id': format_id,
- 'width': height * aspect_ratio if aspect_ratio else None,
- 'height': height,
- 'quality': quality(format_id),
- })
- return {
- 'id': video_id,
- 'webpage_url': f'https://redgifs.com/watch/{video_id}',
- 'extractor_key': RedGifsIE.ie_key(),
- 'extractor': 'RedGifs',
- 'title': ' '.join(gif_data.get('tags') or []) or 'RedGifs',
- 'timestamp': int_or_none(gif_data.get('createDate')),
- 'uploader': gif_data.get('userName'),
- 'duration': int_or_none(gif_data.get('duration')),
- 'view_count': int_or_none(gif_data.get('views')),
- 'like_count': int_or_none(gif_data.get('likes')),
- 'categories': gif_data.get('tags') or [],
- 'tags': gif_data.get('tags'),
- 'age_limit': 18,
- 'formats': formats,
- }
- def _fetch_oauth_token(self, video_id):
- # https://github.com/Redgifs/api/wiki/Temporary-tokens
- auth = self._download_json('https://api.redgifs.com/v2/auth/temporary',
- video_id, note='Fetching temporary token')
- if not auth.get('token'):
- raise ExtractorError('Unable to get temporary token')
- self._API_HEADERS['authorization'] = f'Bearer {auth["token"]}'
- def _call_api(self, ep, video_id, **kwargs):
- for first_attempt in True, False:
- if 'authorization' not in self._API_HEADERS:
- self._fetch_oauth_token(video_id)
- try:
- headers = dict(self._API_HEADERS)
- headers['x-customheader'] = f'https://www.redgifs.com/watch/{video_id}'
- data = self._download_json(
- f'https://api.redgifs.com/v2/{ep}', video_id, headers=headers, **kwargs)
- break
- except ExtractorError as e:
- if first_attempt and isinstance(e.cause, HTTPError) and e.cause.status == 401:
- del self._API_HEADERS['authorization'] # refresh the token
- continue
- raise
- if 'error' in data:
- raise ExtractorError(f'RedGifs said: {data["error"]}', expected=True, video_id=video_id)
- return data
- def _fetch_page(self, ep, video_id, query, page):
- query['page'] = page + 1
- data = self._call_api(
- ep, video_id, query=query, note=f'Downloading JSON metadata page {page + 1}')
- for entry in data['gifs']:
- yield self._parse_gif_data(entry)
- def _prepare_api_query(self, query, fields):
- api_query = [
- (field_name, query.get(field_name, (default,))[0])
- for field_name, default in fields.items()]
- return {key: val for key, val in api_query if val is not None}
- def _paged_entries(self, ep, item_id, query, fields):
- page = int_or_none(query.get('page', (None,))[0])
- page_fetcher = functools.partial(
- self._fetch_page, ep, item_id, self._prepare_api_query(query, fields))
- return page_fetcher(page) if page else OnDemandPagedList(page_fetcher, self._PAGE_SIZE)
- class RedGifsIE(RedGifsBaseInfoExtractor):
- _VALID_URL = r'https?://(?:(?:www\.)?redgifs\.com/watch/|thumbs2\.redgifs\.com/)(?P<id>[^-/?#\.]+)'
- _TESTS = [{
- 'url': 'https://www.redgifs.com/watch/squeakyhelplesswisent',
- 'info_dict': {
- 'id': 'squeakyhelplesswisent',
- 'ext': 'mp4',
- 'title': 'Hotwife Legs Thick',
- 'timestamp': 1636287915,
- 'upload_date': '20211107',
- 'uploader': 'ignored52',
- 'duration': 16,
- 'view_count': int,
- 'like_count': int,
- 'categories': list,
- 'age_limit': 18,
- 'tags': list,
- },
- }, {
- 'url': 'https://thumbs2.redgifs.com/SqueakyHelplessWisent-mobile.mp4#t=0',
- 'info_dict': {
- 'id': 'squeakyhelplesswisent',
- 'ext': 'mp4',
- 'title': 'Hotwife Legs Thick',
- 'timestamp': 1636287915,
- 'upload_date': '20211107',
- 'uploader': 'ignored52',
- 'duration': 16,
- 'view_count': int,
- 'like_count': int,
- 'categories': list,
- 'age_limit': 18,
- 'tags': list,
- },
- }]
- def _real_extract(self, url):
- video_id = self._match_id(url).lower()
- video_info = self._call_api(
- f'gifs/{video_id}?views=yes', video_id, note='Downloading video info')
- return self._parse_gif_data(video_info['gif'])
- class RedGifsSearchIE(RedGifsBaseInfoExtractor):
- IE_DESC = 'Redgifs search'
- _VALID_URL = r'https?://(?:www\.)?redgifs\.com/browse\?(?P<query>[^#]+)'
- _PAGE_SIZE = 80
- _TESTS = [
- {
- 'url': 'https://www.redgifs.com/browse?tags=Lesbian',
- 'info_dict': {
- 'id': 'tags=Lesbian',
- 'title': 'Lesbian',
- 'description': 'RedGifs search for Lesbian, ordered by trending',
- },
- 'playlist_mincount': 100,
- },
- {
- 'url': 'https://www.redgifs.com/browse?type=g&order=latest&tags=Lesbian',
- 'info_dict': {
- 'id': 'type=g&order=latest&tags=Lesbian',
- 'title': 'Lesbian',
- 'description': 'RedGifs search for Lesbian, ordered by latest',
- },
- 'playlist_mincount': 100,
- },
- {
- 'url': 'https://www.redgifs.com/browse?type=g&order=latest&tags=Lesbian&page=2',
- 'info_dict': {
- 'id': 'type=g&order=latest&tags=Lesbian&page=2',
- 'title': 'Lesbian',
- 'description': 'RedGifs search for Lesbian, ordered by latest',
- },
- 'playlist_count': 80,
- },
- ]
- def _real_extract(self, url):
- query_str = self._match_valid_url(url).group('query')
- query = urllib.parse.parse_qs(query_str)
- if not query.get('tags'):
- raise ExtractorError('Invalid query tags', expected=True)
- tags = query.get('tags')[0]
- order = query.get('order', ('trending',))[0]
- query['search_text'] = [tags]
- entries = self._paged_entries('gifs/search', query_str, query, {
- 'search_text': None,
- 'order': 'trending',
- 'type': None,
- })
- return self.playlist_result(
- entries, query_str, tags, f'RedGifs search for {tags}, ordered by {order}')
- class RedGifsUserIE(RedGifsBaseInfoExtractor):
- IE_DESC = 'Redgifs user'
- _VALID_URL = r'https?://(?:www\.)?redgifs\.com/users/(?P<username>[^/?#]+)(?:\?(?P<query>[^#]+))?'
- _PAGE_SIZE = 30
- _TESTS = [
- {
- 'url': 'https://www.redgifs.com/users/lamsinka89',
- 'info_dict': {
- 'id': 'lamsinka89',
- 'title': 'lamsinka89',
- 'description': 'RedGifs user lamsinka89, ordered by recent',
- },
- 'playlist_mincount': 100,
- },
- {
- 'url': 'https://www.redgifs.com/users/lamsinka89?page=3',
- 'info_dict': {
- 'id': 'lamsinka89?page=3',
- 'title': 'lamsinka89',
- 'description': 'RedGifs user lamsinka89, ordered by recent',
- },
- 'playlist_count': 30,
- },
- {
- 'url': 'https://www.redgifs.com/users/lamsinka89?order=best&type=g',
- 'info_dict': {
- 'id': 'lamsinka89?order=best&type=g',
- 'title': 'lamsinka89',
- 'description': 'RedGifs user lamsinka89, ordered by best',
- },
- 'playlist_mincount': 100,
- },
- ]
- def _real_extract(self, url):
- username, query_str = self._match_valid_url(url).group('username', 'query')
- playlist_id = f'{username}?{query_str}' if query_str else username
- query = urllib.parse.parse_qs(query_str)
- order = query.get('order', ('recent',))[0]
- entries = self._paged_entries(f'users/{username}/search', playlist_id, query, {
- 'order': 'recent',
- 'type': None,
- })
- return self.playlist_result(
- entries, playlist_id, username, f'RedGifs user {username}, ordered by {order}')
|