ixigua.py 3.3 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283
  1. import base64
  2. from .common import InfoExtractor
  3. from ..utils import (
  4. ExtractorError,
  5. get_element_by_id,
  6. int_or_none,
  7. js_to_json,
  8. str_or_none,
  9. traverse_obj,
  10. )
  11. class IxiguaIE(InfoExtractor):
  12. _VALID_URL = r'https?://(?:\w+\.)?ixigua\.com/(?:video/)?(?P<id>\d+).+'
  13. _TESTS = [{
  14. 'url': 'https://www.ixigua.com/6996881461559165471',
  15. 'info_dict': {
  16. 'id': '6996881461559165471',
  17. 'ext': 'mp4',
  18. 'title': '盲目涉水风险大,亲身示范高水位行车注意事项',
  19. 'description': 'md5:8c82f46186299add4a1c455430740229',
  20. 'tags': ['video_car'],
  21. 'like_count': int,
  22. 'dislike_count': int,
  23. 'view_count': int,
  24. 'uploader': '懂车帝原创',
  25. 'uploader_id': '6480145787',
  26. 'thumbnail': r're:^https?://.+\.(avif|webp)',
  27. 'timestamp': 1629088414,
  28. 'duration': 1030,
  29. },
  30. }]
  31. def _get_json_data(self, webpage, video_id):
  32. js_data = get_element_by_id('SSR_HYDRATED_DATA', webpage)
  33. if not js_data:
  34. if self._cookies_passed:
  35. raise ExtractorError('Failed to get SSR_HYDRATED_DATA')
  36. raise ExtractorError('Cookies (not necessarily logged in) are needed', expected=True)
  37. return self._parse_json(
  38. js_data.replace('window._SSR_HYDRATED_DATA=', ''), video_id, transform_source=js_to_json)
  39. def _media_selector(self, json_data):
  40. for path, override in (
  41. (('video_list', ), {}),
  42. (('dynamic_video', 'dynamic_video_list'), {'acodec': 'none'}),
  43. (('dynamic_video', 'dynamic_audio_list'), {'vcodec': 'none', 'ext': 'm4a'}),
  44. ):
  45. for media in traverse_obj(json_data, (..., *path, lambda _, v: v['main_url'])):
  46. yield {
  47. 'url': base64.b64decode(media['main_url']).decode(),
  48. 'width': int_or_none(media.get('vwidth')),
  49. 'height': int_or_none(media.get('vheight')),
  50. 'fps': int_or_none(media.get('fps')),
  51. 'vcodec': media.get('codec_type'),
  52. 'format_id': str_or_none(media.get('quality_type')),
  53. 'filesize': int_or_none(media.get('size')),
  54. 'ext': 'mp4',
  55. **override,
  56. }
  57. def _real_extract(self, url):
  58. video_id = self._match_id(url)
  59. webpage = self._download_webpage(url, video_id)
  60. json_data = self._get_json_data(webpage, video_id)['anyVideo']['gidInformation']['packerData']['video']
  61. formats = list(self._media_selector(json_data.get('videoResource')))
  62. return {
  63. 'id': video_id,
  64. 'title': json_data.get('title'),
  65. 'description': json_data.get('video_abstract'),
  66. 'formats': formats,
  67. 'like_count': json_data.get('video_like_count'),
  68. 'duration': int_or_none(json_data.get('duration')),
  69. 'tags': [json_data.get('tag')],
  70. 'uploader_id': traverse_obj(json_data, ('user_info', 'user_id')),
  71. 'uploader': traverse_obj(json_data, ('user_info', 'name')),
  72. 'view_count': json_data.get('video_watch_count'),
  73. 'dislike_count': json_data.get('video_unlike_count'),
  74. 'timestamp': int_or_none(json_data.get('video_publish_time')),
  75. }