redgifs.py 9.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260
  1. import functools
  2. import urllib.parse
  3. from .common import InfoExtractor
  4. from ..networking.exceptions import HTTPError
  5. from ..utils import (
  6. ExtractorError,
  7. OnDemandPagedList,
  8. int_or_none,
  9. qualities,
  10. try_get,
  11. )
  12. class RedGifsBaseInfoExtractor(InfoExtractor):
  13. _FORMATS = {
  14. 'gif': 250,
  15. 'sd': 480,
  16. 'hd': None,
  17. }
  18. _API_HEADERS = {
  19. 'referer': 'https://www.redgifs.com/',
  20. 'origin': 'https://www.redgifs.com',
  21. 'content-type': 'application/json',
  22. }
  23. def _parse_gif_data(self, gif_data):
  24. video_id = gif_data.get('id')
  25. quality = qualities(tuple(self._FORMATS.keys()))
  26. orig_height = int_or_none(gif_data.get('height'))
  27. aspect_ratio = try_get(gif_data, lambda x: orig_height / x['width'])
  28. formats = []
  29. for format_id, height in self._FORMATS.items():
  30. video_url = gif_data['urls'].get(format_id)
  31. if not video_url:
  32. continue
  33. height = min(orig_height, height or orig_height)
  34. formats.append({
  35. 'url': video_url,
  36. 'format_id': format_id,
  37. 'width': height * aspect_ratio if aspect_ratio else None,
  38. 'height': height,
  39. 'quality': quality(format_id),
  40. })
  41. return {
  42. 'id': video_id,
  43. 'webpage_url': f'https://redgifs.com/watch/{video_id}',
  44. 'extractor_key': RedGifsIE.ie_key(),
  45. 'extractor': 'RedGifs',
  46. 'title': ' '.join(gif_data.get('tags') or []) or 'RedGifs',
  47. 'timestamp': int_or_none(gif_data.get('createDate')),
  48. 'uploader': gif_data.get('userName'),
  49. 'duration': int_or_none(gif_data.get('duration')),
  50. 'view_count': int_or_none(gif_data.get('views')),
  51. 'like_count': int_or_none(gif_data.get('likes')),
  52. 'categories': gif_data.get('tags') or [],
  53. 'tags': gif_data.get('tags'),
  54. 'age_limit': 18,
  55. 'formats': formats,
  56. }
  57. def _fetch_oauth_token(self, video_id):
  58. # https://github.com/Redgifs/api/wiki/Temporary-tokens
  59. auth = self._download_json('https://api.redgifs.com/v2/auth/temporary',
  60. video_id, note='Fetching temporary token')
  61. if not auth.get('token'):
  62. raise ExtractorError('Unable to get temporary token')
  63. self._API_HEADERS['authorization'] = f'Bearer {auth["token"]}'
  64. def _call_api(self, ep, video_id, **kwargs):
  65. for first_attempt in True, False:
  66. if 'authorization' not in self._API_HEADERS:
  67. self._fetch_oauth_token(video_id)
  68. try:
  69. headers = dict(self._API_HEADERS)
  70. headers['x-customheader'] = f'https://www.redgifs.com/watch/{video_id}'
  71. data = self._download_json(
  72. f'https://api.redgifs.com/v2/{ep}', video_id, headers=headers, **kwargs)
  73. break
  74. except ExtractorError as e:
  75. if first_attempt and isinstance(e.cause, HTTPError) and e.cause.status == 401:
  76. del self._API_HEADERS['authorization'] # refresh the token
  77. continue
  78. raise
  79. if 'error' in data:
  80. raise ExtractorError(f'RedGifs said: {data["error"]}', expected=True, video_id=video_id)
  81. return data
  82. def _fetch_page(self, ep, video_id, query, page):
  83. query['page'] = page + 1
  84. data = self._call_api(
  85. ep, video_id, query=query, note=f'Downloading JSON metadata page {page + 1}')
  86. for entry in data['gifs']:
  87. yield self._parse_gif_data(entry)
  88. def _prepare_api_query(self, query, fields):
  89. api_query = [
  90. (field_name, query.get(field_name, (default,))[0])
  91. for field_name, default in fields.items()]
  92. return {key: val for key, val in api_query if val is not None}
  93. def _paged_entries(self, ep, item_id, query, fields):
  94. page = int_or_none(query.get('page', (None,))[0])
  95. page_fetcher = functools.partial(
  96. self._fetch_page, ep, item_id, self._prepare_api_query(query, fields))
  97. return page_fetcher(page) if page else OnDemandPagedList(page_fetcher, self._PAGE_SIZE)
  98. class RedGifsIE(RedGifsBaseInfoExtractor):
  99. _VALID_URL = r'https?://(?:(?:www\.)?redgifs\.com/watch/|thumbs2\.redgifs\.com/)(?P<id>[^-/?#\.]+)'
  100. _TESTS = [{
  101. 'url': 'https://www.redgifs.com/watch/squeakyhelplesswisent',
  102. 'info_dict': {
  103. 'id': 'squeakyhelplesswisent',
  104. 'ext': 'mp4',
  105. 'title': 'Hotwife Legs Thick',
  106. 'timestamp': 1636287915,
  107. 'upload_date': '20211107',
  108. 'uploader': 'ignored52',
  109. 'duration': 16,
  110. 'view_count': int,
  111. 'like_count': int,
  112. 'categories': list,
  113. 'age_limit': 18,
  114. 'tags': list,
  115. },
  116. }, {
  117. 'url': 'https://thumbs2.redgifs.com/SqueakyHelplessWisent-mobile.mp4#t=0',
  118. 'info_dict': {
  119. 'id': 'squeakyhelplesswisent',
  120. 'ext': 'mp4',
  121. 'title': 'Hotwife Legs Thick',
  122. 'timestamp': 1636287915,
  123. 'upload_date': '20211107',
  124. 'uploader': 'ignored52',
  125. 'duration': 16,
  126. 'view_count': int,
  127. 'like_count': int,
  128. 'categories': list,
  129. 'age_limit': 18,
  130. 'tags': list,
  131. },
  132. }]
  133. def _real_extract(self, url):
  134. video_id = self._match_id(url).lower()
  135. video_info = self._call_api(
  136. f'gifs/{video_id}?views=yes', video_id, note='Downloading video info')
  137. return self._parse_gif_data(video_info['gif'])
  138. class RedGifsSearchIE(RedGifsBaseInfoExtractor):
  139. IE_DESC = 'Redgifs search'
  140. _VALID_URL = r'https?://(?:www\.)?redgifs\.com/browse\?(?P<query>[^#]+)'
  141. _PAGE_SIZE = 80
  142. _TESTS = [
  143. {
  144. 'url': 'https://www.redgifs.com/browse?tags=Lesbian',
  145. 'info_dict': {
  146. 'id': 'tags=Lesbian',
  147. 'title': 'Lesbian',
  148. 'description': 'RedGifs search for Lesbian, ordered by trending',
  149. },
  150. 'playlist_mincount': 100,
  151. },
  152. {
  153. 'url': 'https://www.redgifs.com/browse?type=g&order=latest&tags=Lesbian',
  154. 'info_dict': {
  155. 'id': 'type=g&order=latest&tags=Lesbian',
  156. 'title': 'Lesbian',
  157. 'description': 'RedGifs search for Lesbian, ordered by latest',
  158. },
  159. 'playlist_mincount': 100,
  160. },
  161. {
  162. 'url': 'https://www.redgifs.com/browse?type=g&order=latest&tags=Lesbian&page=2',
  163. 'info_dict': {
  164. 'id': 'type=g&order=latest&tags=Lesbian&page=2',
  165. 'title': 'Lesbian',
  166. 'description': 'RedGifs search for Lesbian, ordered by latest',
  167. },
  168. 'playlist_count': 80,
  169. },
  170. ]
  171. def _real_extract(self, url):
  172. query_str = self._match_valid_url(url).group('query')
  173. query = urllib.parse.parse_qs(query_str)
  174. if not query.get('tags'):
  175. raise ExtractorError('Invalid query tags', expected=True)
  176. tags = query.get('tags')[0]
  177. order = query.get('order', ('trending',))[0]
  178. query['search_text'] = [tags]
  179. entries = self._paged_entries('gifs/search', query_str, query, {
  180. 'search_text': None,
  181. 'order': 'trending',
  182. 'type': None,
  183. })
  184. return self.playlist_result(
  185. entries, query_str, tags, f'RedGifs search for {tags}, ordered by {order}')
  186. class RedGifsUserIE(RedGifsBaseInfoExtractor):
  187. IE_DESC = 'Redgifs user'
  188. _VALID_URL = r'https?://(?:www\.)?redgifs\.com/users/(?P<username>[^/?#]+)(?:\?(?P<query>[^#]+))?'
  189. _PAGE_SIZE = 30
  190. _TESTS = [
  191. {
  192. 'url': 'https://www.redgifs.com/users/lamsinka89',
  193. 'info_dict': {
  194. 'id': 'lamsinka89',
  195. 'title': 'lamsinka89',
  196. 'description': 'RedGifs user lamsinka89, ordered by recent',
  197. },
  198. 'playlist_mincount': 100,
  199. },
  200. {
  201. 'url': 'https://www.redgifs.com/users/lamsinka89?page=3',
  202. 'info_dict': {
  203. 'id': 'lamsinka89?page=3',
  204. 'title': 'lamsinka89',
  205. 'description': 'RedGifs user lamsinka89, ordered by recent',
  206. },
  207. 'playlist_count': 30,
  208. },
  209. {
  210. 'url': 'https://www.redgifs.com/users/lamsinka89?order=best&type=g',
  211. 'info_dict': {
  212. 'id': 'lamsinka89?order=best&type=g',
  213. 'title': 'lamsinka89',
  214. 'description': 'RedGifs user lamsinka89, ordered by best',
  215. },
  216. 'playlist_mincount': 100,
  217. },
  218. ]
  219. def _real_extract(self, url):
  220. username, query_str = self._match_valid_url(url).group('username', 'query')
  221. playlist_id = f'{username}?{query_str}' if query_str else username
  222. query = urllib.parse.parse_qs(query_str)
  223. order = query.get('order', ('recent',))[0]
  224. entries = self._paged_entries(f'users/{username}/search', playlist_id, query, {
  225. 'order': 'recent',
  226. 'type': None,
  227. })
  228. return self.playlist_result(
  229. entries, playlist_id, username, f'RedGifs user {username}, ordered by {order}')