ninegag.py 5.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148
  1. from .common import InfoExtractor
  2. from ..utils import (
  3. ExtractorError,
  4. determine_ext,
  5. int_or_none,
  6. traverse_obj,
  7. unescapeHTML,
  8. url_or_none,
  9. )
  10. class NineGagIE(InfoExtractor):
  11. IE_NAME = '9gag'
  12. IE_DESC = '9GAG'
  13. _VALID_URL = r'https?://(?:www\.)?9gag\.com/gag/(?P<id>[^/?&#]+)'
  14. _TESTS = [{
  15. 'url': 'https://9gag.com/gag/ae5Ag7B',
  16. 'info_dict': {
  17. 'id': 'ae5Ag7B',
  18. 'ext': 'webm',
  19. 'title': 'Capybara Agility Training',
  20. 'upload_date': '20191108',
  21. 'timestamp': 1573237208,
  22. 'thumbnail': 'https://img-9gag-fun.9cache.com/photo/ae5Ag7B_460s.jpg',
  23. 'categories': ['Awesome'],
  24. 'tags': ['Awesome'],
  25. 'duration': 44,
  26. 'like_count': int,
  27. 'dislike_count': int,
  28. 'comment_count': int,
  29. },
  30. }, {
  31. # HTML escaped title
  32. 'url': 'https://9gag.com/gag/av5nvyb',
  33. 'only_matching': True,
  34. }, {
  35. # Non Anonymous Uploader
  36. 'url': 'https://9gag.com/gag/ajgp66G',
  37. 'info_dict': {
  38. 'id': 'ajgp66G',
  39. 'ext': 'webm',
  40. 'title': 'Master Shifu! Or Splinter! You decide:',
  41. 'upload_date': '20220806',
  42. 'timestamp': 1659803411,
  43. 'thumbnail': 'https://img-9gag-fun.9cache.com/photo/ajgp66G_460s.jpg',
  44. 'categories': ['Funny'],
  45. 'tags': ['Funny'],
  46. 'duration': 26,
  47. 'like_count': int,
  48. 'dislike_count': int,
  49. 'comment_count': int,
  50. 'uploader': 'Peter Klaus',
  51. 'uploader_id': 'peterklaus12',
  52. 'uploader_url': 'https://9gag.com/u/peterklaus12',
  53. },
  54. }]
  55. def _real_extract(self, url):
  56. post_id = self._match_id(url)
  57. post = self._download_json(
  58. 'https://9gag.com/v1/post', post_id, query={
  59. 'id': post_id,
  60. })['data']['post']
  61. if post.get('type') != 'Animated':
  62. raise ExtractorError(
  63. 'The given url does not contain a video',
  64. expected=True)
  65. duration = None
  66. formats = []
  67. thumbnails = []
  68. for key, image in (post.get('images') or {}).items():
  69. image_url = url_or_none(image.get('url'))
  70. if not image_url:
  71. continue
  72. ext = determine_ext(image_url)
  73. image_id = key.strip('image')
  74. common = {
  75. 'url': image_url,
  76. 'width': int_or_none(image.get('width')),
  77. 'height': int_or_none(image.get('height')),
  78. }
  79. if ext in ('jpg', 'png'):
  80. webp_url = image.get('webpUrl')
  81. if webp_url:
  82. t = common.copy()
  83. t.update({
  84. 'id': image_id + '-webp',
  85. 'url': webp_url,
  86. })
  87. thumbnails.append(t)
  88. common.update({
  89. 'id': image_id,
  90. 'ext': ext,
  91. })
  92. thumbnails.append(common)
  93. elif ext in ('webm', 'mp4'):
  94. if not duration:
  95. duration = int_or_none(image.get('duration'))
  96. common['acodec'] = 'none' if image.get('hasAudio') == 0 else None
  97. for vcodec in ('vp8', 'vp9', 'h265'):
  98. c_url = image.get(vcodec + 'Url')
  99. if not c_url:
  100. continue
  101. c_f = common.copy()
  102. c_f.update({
  103. 'format_id': image_id + '-' + vcodec,
  104. 'url': c_url,
  105. 'vcodec': vcodec,
  106. })
  107. formats.append(c_f)
  108. common.update({
  109. 'ext': ext,
  110. 'format_id': image_id,
  111. })
  112. formats.append(common)
  113. section = traverse_obj(post, ('postSection', 'name'))
  114. tags = None
  115. post_tags = post.get('tags')
  116. if post_tags:
  117. tags = []
  118. for tag in post_tags:
  119. tag_key = tag.get('key')
  120. if not tag_key:
  121. continue
  122. tags.append(tag_key)
  123. return {
  124. 'id': post_id,
  125. 'title': unescapeHTML(post.get('title')),
  126. 'timestamp': int_or_none(post.get('creationTs')),
  127. 'duration': duration,
  128. 'uploader': traverse_obj(post, ('creator', 'fullName')),
  129. 'uploader_id': traverse_obj(post, ('creator', 'username')),
  130. 'uploader_url': url_or_none(traverse_obj(post, ('creator', 'profileUrl'))),
  131. 'formats': formats,
  132. 'thumbnails': thumbnails,
  133. 'like_count': int_or_none(post.get('upVoteCount')),
  134. 'dislike_count': int_or_none(post.get('downVoteCount')),
  135. 'comment_count': int_or_none(post.get('commentsCount')),
  136. 'age_limit': 18 if post.get('nsfw') == 1 else None,
  137. 'categories': [section] if section else None,
  138. 'tags': tags,
  139. }