bannedvideo.py 5.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155
  1. import json
  2. from .common import InfoExtractor
  3. from ..utils import (
  4. float_or_none,
  5. int_or_none,
  6. try_get,
  7. unified_timestamp,
  8. url_or_none,
  9. )
  10. class BannedVideoIE(InfoExtractor):
  11. _VALID_URL = r'https?://(?:www\.)?banned\.video/watch\?id=(?P<id>[0-f]{24})'
  12. _TESTS = [{
  13. 'url': 'https://banned.video/watch?id=5e7a859644e02200c6ef5f11',
  14. 'md5': '14b6e81d41beaaee2215cd75c6ed56e4',
  15. 'info_dict': {
  16. 'id': '5e7a859644e02200c6ef5f11',
  17. 'ext': 'mp4',
  18. 'title': 'China Discovers Origin of Corona Virus: Issues Emergency Statement',
  19. 'thumbnail': r're:^https?://(?:www\.)?assets\.infowarsmedia.com/images/',
  20. 'description': 'md5:560d96f02abbebe6c6b78b47465f6b28',
  21. 'upload_date': '20200324',
  22. 'timestamp': 1585087895,
  23. },
  24. }]
  25. _GRAPHQL_GETMETADATA_QUERY = '''
  26. query GetVideoAndComments($id: String!) {
  27. getVideo(id: $id) {
  28. streamUrl
  29. directUrl
  30. unlisted
  31. live
  32. tags {
  33. name
  34. }
  35. title
  36. summary
  37. playCount
  38. largeImage
  39. videoDuration
  40. channel {
  41. _id
  42. title
  43. }
  44. createdAt
  45. }
  46. getVideoComments(id: $id, limit: 999999, offset: 0) {
  47. _id
  48. content
  49. user {
  50. _id
  51. username
  52. }
  53. voteCount {
  54. positive
  55. }
  56. createdAt
  57. replyCount
  58. }
  59. }'''
  60. _GRAPHQL_GETCOMMENTSREPLIES_QUERY = '''
  61. query GetCommentReplies($id: String!) {
  62. getCommentReplies(id: $id, limit: 999999, offset: 0) {
  63. _id
  64. content
  65. user {
  66. _id
  67. username
  68. }
  69. voteCount {
  70. positive
  71. }
  72. createdAt
  73. replyCount
  74. }
  75. }'''
  76. _GRAPHQL_QUERIES = {
  77. 'GetVideoAndComments': _GRAPHQL_GETMETADATA_QUERY,
  78. 'GetCommentReplies': _GRAPHQL_GETCOMMENTSREPLIES_QUERY,
  79. }
  80. def _call_api(self, video_id, id_var, operation, note):
  81. return self._download_json(
  82. 'https://api.infowarsmedia.com/graphql', video_id, note=note,
  83. headers={
  84. 'Content-Type': 'application/json; charset=utf-8',
  85. }, data=json.dumps({
  86. 'variables': {'id': id_var},
  87. 'operationName': operation,
  88. 'query': self._GRAPHQL_QUERIES[operation],
  89. }).encode('utf8')).get('data')
  90. def _get_comments(self, video_id, comments, comment_data):
  91. yield from comments
  92. for comment in comment_data.copy():
  93. comment_id = comment.get('_id')
  94. if comment.get('replyCount') > 0:
  95. reply_json = self._call_api(
  96. video_id, comment_id, 'GetCommentReplies',
  97. f'Downloading replies for comment {comment_id}')
  98. for reply in reply_json.get('getCommentReplies'):
  99. yield self._parse_comment(reply, comment_id)
  100. @staticmethod
  101. def _parse_comment(comment_data, parent):
  102. return {
  103. 'id': comment_data.get('_id'),
  104. 'text': comment_data.get('content'),
  105. 'author': try_get(comment_data, lambda x: x['user']['username']),
  106. 'author_id': try_get(comment_data, lambda x: x['user']['_id']),
  107. 'timestamp': unified_timestamp(comment_data.get('createdAt')),
  108. 'parent': parent,
  109. 'like_count': try_get(comment_data, lambda x: x['voteCount']['positive']),
  110. }
  111. def _real_extract(self, url):
  112. video_id = self._match_id(url)
  113. video_json = self._call_api(video_id, video_id, 'GetVideoAndComments', 'Downloading video metadata')
  114. video_info = video_json['getVideo']
  115. is_live = video_info.get('live')
  116. comments = [self._parse_comment(comment, 'root') for comment in video_json.get('getVideoComments')]
  117. formats = [{
  118. 'format_id': 'direct',
  119. 'quality': 1,
  120. 'url': video_info.get('directUrl'),
  121. 'ext': 'mp4',
  122. }] if url_or_none(video_info.get('directUrl')) else []
  123. if video_info.get('streamUrl'):
  124. formats.extend(self._extract_m3u8_formats(
  125. video_info.get('streamUrl'), video_id, 'mp4',
  126. entry_protocol='m3u8_native', m3u8_id='hls', live=True))
  127. return {
  128. 'id': video_id,
  129. 'title': video_info.get('title')[:-1],
  130. 'formats': formats,
  131. 'is_live': is_live,
  132. 'description': video_info.get('summary'),
  133. 'channel': try_get(video_info, lambda x: x['channel']['title']),
  134. 'channel_id': try_get(video_info, lambda x: x['channel']['_id']),
  135. 'view_count': int_or_none(video_info.get('playCount')),
  136. 'thumbnail': url_or_none(video_info.get('largeImage')),
  137. 'duration': float_or_none(video_info.get('videoDuration')),
  138. 'timestamp': unified_timestamp(video_info.get('createdAt')),
  139. 'tags': [tag.get('name') for tag in video_info.get('tags')],
  140. 'availability': self._availability(is_unlisted=video_info.get('unlisted')),
  141. 'comments': comments,
  142. '__post_extractor': self.extract_comments(video_id, comments, video_json.get('getVideoComments')),
  143. }