pr0gramm.py 7.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203
  1. import datetime as dt
  2. import functools
  3. import json
  4. import urllib.parse
  5. from .common import InfoExtractor
  6. from ..utils import (
  7. ExtractorError,
  8. float_or_none,
  9. int_or_none,
  10. make_archive_id,
  11. mimetype2ext,
  12. str_or_none,
  13. urljoin,
  14. )
  15. from ..utils.traversal import traverse_obj
  16. class Pr0grammIE(InfoExtractor):
  17. _VALID_URL = r'https?://pr0gramm\.com\/(?:[^/?#]+/)+(?P<id>[\d]+)(?:[/?#:]|$)'
  18. _TESTS = [{
  19. 'url': 'https://pr0gramm.com/new/video/5466437',
  20. 'info_dict': {
  21. 'id': '5466437',
  22. 'ext': 'mp4',
  23. 'title': 'pr0gramm-5466437 by g11st',
  24. 'tags': ['Neon Genesis Evangelion', 'Touhou Project', 'Fly me to the Moon', 'Marisad', 'Marisa Kirisame', 'video', 'sound', 'Marisa', 'Anime'],
  25. 'uploader': 'g11st',
  26. 'uploader_id': '394718',
  27. 'timestamp': 1671590240,
  28. 'upload_date': '20221221',
  29. 'like_count': int,
  30. 'dislike_count': int,
  31. 'age_limit': 0,
  32. 'thumbnail': r're:^https://thumb\.pr0gramm\.com/.*\.jpg',
  33. '_old_archive_ids': ['pr0grammstatic 5466437'],
  34. },
  35. }, {
  36. 'url': 'https://pr0gramm.com/new/3052805:comment28391322',
  37. 'info_dict': {
  38. 'id': '3052805',
  39. 'ext': 'mp4',
  40. 'title': 'pr0gramm-3052805 by Hansking1',
  41. 'tags': 'count:15',
  42. 'uploader': 'Hansking1',
  43. 'uploader_id': '385563',
  44. 'timestamp': 1552930408,
  45. 'upload_date': '20190318',
  46. 'like_count': int,
  47. 'dislike_count': int,
  48. 'age_limit': 0,
  49. 'thumbnail': r're:^https://thumb\.pr0gramm\.com/.*\.jpg',
  50. '_old_archive_ids': ['pr0grammstatic 3052805'],
  51. },
  52. }, {
  53. # Requires verified account
  54. 'url': 'https://pr0gramm.com/new/Gianna%20Michaels/5848332',
  55. 'info_dict': {
  56. 'id': '5848332',
  57. 'ext': 'mp4',
  58. 'title': 'pr0gramm-5848332 by erd0pfel',
  59. 'tags': 'count:18',
  60. 'uploader': 'erd0pfel',
  61. 'uploader_id': '349094',
  62. 'timestamp': 1694489652,
  63. 'upload_date': '20230912',
  64. 'like_count': int,
  65. 'dislike_count': int,
  66. 'age_limit': 18,
  67. 'thumbnail': r're:^https://thumb\.pr0gramm\.com/.*\.jpg',
  68. '_old_archive_ids': ['pr0grammstatic 5848332'],
  69. },
  70. }, {
  71. 'url': 'https://pr0gramm.com/top/5895149',
  72. 'info_dict': {
  73. 'id': '5895149',
  74. 'ext': 'mp4',
  75. 'title': 'pr0gramm-5895149 by algoholigSeeManThrower',
  76. 'tags': 'count:19',
  77. 'uploader': 'algoholigSeeManThrower',
  78. 'uploader_id': '457556',
  79. 'timestamp': 1697580902,
  80. 'upload_date': '20231018',
  81. 'like_count': int,
  82. 'dislike_count': int,
  83. 'age_limit': 0,
  84. 'thumbnail': 'https://thumb.pr0gramm.com/2023/10/18/db47bb3db5e1a1b3.jpg',
  85. '_old_archive_ids': ['pr0grammstatic 5895149'],
  86. },
  87. }, {
  88. 'url': 'https://pr0gramm.com/static/5466437',
  89. 'only_matching': True,
  90. }, {
  91. 'url': 'https://pr0gramm.com/new/rowan%20atkinson%20herr%20bohne/3052805',
  92. 'only_matching': True,
  93. }, {
  94. 'url': 'https://pr0gramm.com/user/froschler/dafur-ist-man-hier/5091290',
  95. 'only_matching': True,
  96. }]
  97. BASE_URL = 'https://pr0gramm.com'
  98. @functools.cached_property
  99. def _is_logged_in(self):
  100. return 'pp' in self._get_cookies(self.BASE_URL)
  101. @functools.cached_property
  102. def _maximum_flags(self):
  103. # We need to guess the flags for the content otherwise the api will raise an error
  104. # We can guess the maximum allowed flags for the account from the cookies
  105. # Bitflags are (msbf): pol, nsfp, nsfl, nsfw, sfw
  106. flags = 0b10001
  107. if self._is_logged_in:
  108. flags |= 0b01000
  109. cookies = self._get_cookies(self.BASE_URL)
  110. if 'me' not in cookies:
  111. self._download_webpage(self.BASE_URL, None, 'Refreshing verification information')
  112. if traverse_obj(cookies, ('me', {lambda x: x.value}, {urllib.parse.unquote}, {json.loads}, 'verified')):
  113. flags |= 0b00110
  114. return flags
  115. def _call_api(self, endpoint, video_id, query={}, note='Downloading API json'):
  116. data = self._download_json(
  117. f'https://pr0gramm.com/api/items/{endpoint}',
  118. video_id, note, query=query, expected_status=403)
  119. error = traverse_obj(data, ('error', {str}))
  120. if error in ('nsfwRequired', 'nsflRequired', 'nsfpRequired', 'verificationRequired'):
  121. if not self._is_logged_in:
  122. self.raise_login_required()
  123. raise ExtractorError(f'Unverified account cannot access NSFW/NSFL ({error})', expected=True)
  124. elif error:
  125. message = traverse_obj(data, ('msg', {str})) or error
  126. raise ExtractorError(f'API returned error: {message}', expected=True)
  127. return data
  128. @staticmethod
  129. def _create_source_url(path):
  130. return urljoin('https://img.pr0gramm.com', path)
  131. def _real_extract(self, url):
  132. video_id = self._match_id(url)
  133. video_info = traverse_obj(
  134. self._call_api('get', video_id, {'id': video_id, 'flags': self._maximum_flags}),
  135. ('items', 0, {dict}))
  136. source = video_info.get('image')
  137. if not source or not source.endswith('mp4'):
  138. self.raise_no_formats('Could not extract a video', expected=bool(source), video_id=video_id)
  139. metadata = self._call_api('info', video_id, {'itemId': video_id}, note='Downloading tags')
  140. tags = traverse_obj(metadata, ('tags', ..., 'tag', {str}))
  141. # Sorted by "confidence", higher confidence = earlier in list
  142. confidences = traverse_obj(metadata, ('tags', ..., 'confidence', ({int}, {float})))
  143. if confidences:
  144. tags = [tag for _, tag in sorted(zip(confidences, tags), reverse=True)]
  145. formats = traverse_obj(video_info, ('variants', ..., {
  146. 'format_id': ('name', {str}),
  147. 'url': ('path', {self._create_source_url}),
  148. 'ext': ('mimeType', {mimetype2ext}),
  149. 'vcodec': ('codec', {str}),
  150. 'width': ('width', {int_or_none}),
  151. 'height': ('height', {int_or_none}),
  152. 'bitrate': ('bitRate', {float_or_none}),
  153. 'filesize': ('fileSize', {int_or_none}),
  154. })) if video_info.get('variants') else [{
  155. 'ext': 'mp4',
  156. 'format_id': 'source',
  157. **traverse_obj(video_info, {
  158. 'url': ('image', {self._create_source_url}),
  159. 'width': ('width', {int_or_none}),
  160. 'height': ('height', {int_or_none}),
  161. }),
  162. }]
  163. subtitles = {}
  164. for subtitle in traverse_obj(video_info, ('subtitles', lambda _, v: v['language'])):
  165. subtitles.setdefault(subtitle['language'], []).append(traverse_obj(subtitle, {
  166. 'url': ('path', {self._create_source_url}),
  167. 'note': ('label', {str}),
  168. }))
  169. return {
  170. 'id': video_id,
  171. 'title': f'pr0gramm-{video_id} by {video_info.get("user")}',
  172. 'tags': tags,
  173. 'formats': formats,
  174. 'subtitles': subtitles,
  175. 'age_limit': 18 if traverse_obj(video_info, ('flags', {0b110.__and__})) else 0,
  176. '_old_archive_ids': [make_archive_id('Pr0grammStatic', video_id)],
  177. **traverse_obj(video_info, {
  178. 'uploader': ('user', {str}),
  179. 'uploader_id': ('userId', {str_or_none}),
  180. 'like_count': ('up', {int}),
  181. 'dislike_count': ('down', {int}),
  182. 'timestamp': ('created', {int}),
  183. 'upload_date': ('created', {int}, {dt.date.fromtimestamp}, {lambda x: x.strftime('%Y%m%d')}),
  184. 'thumbnail': ('thumb', {lambda x: urljoin('https://thumb.pr0gramm.com', x)}),
  185. }),
  186. }