yappy.py 5.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128
  1. from .common import InfoExtractor
  2. from ..utils import (
  3. OnDemandPagedList,
  4. int_or_none,
  5. traverse_obj,
  6. unified_timestamp,
  7. url_or_none,
  8. )
  9. class YappyIE(InfoExtractor):
  10. _WORKING = False
  11. _VALID_URL = r'https?://yappy\.media/video/(?P<id>\w+)'
  12. _TESTS = [{
  13. 'url': 'https://yappy.media/video/47fea6d8586f48d1a0cf96a7342aabd2',
  14. 'info_dict': {
  15. 'id': '47fea6d8586f48d1a0cf96a7342aabd2',
  16. 'ext': 'mp4',
  17. 'title': 'Куда нажимать? Как снимать? Смотри видос и погнали!🤘🏻',
  18. 'timestamp': 1661893200,
  19. 'description': 'Куда нажимать? Как снимать? Смотри видос и погнали!🤘🏻',
  20. 'thumbnail': 'https://cdn-st.ritm.media/static/pic/thumbnails/0c7c4d73388f47848acaf540d2e2bb8c-thumbnail.jpg',
  21. 'upload_date': '20220830',
  22. 'view_count': int,
  23. 'like_count': int,
  24. 'uploader_id': '59a0c8c485e5410b9c43474bf4c6a373',
  25. 'categories': ['Образование и наука', 'Лайфхак', 'Технологии', 'Арт/искусство'],
  26. 'repost_count': int,
  27. 'uploader': 'YAPPY',
  28. },
  29. }, {
  30. 'url': 'https://yappy.media/video/3862451954ad4bd58ae2ccefddb0bd33',
  31. 'info_dict': {
  32. 'id': '3862451954ad4bd58ae2ccefddb0bd33',
  33. 'ext': 'mp4',
  34. 'title': 'Опиши свой характер 3 словами🙃\n#психология #дружба #отношения',
  35. 'timestamp': 1674726985,
  36. 'like_count': int,
  37. 'description': 'Опиши свой характер 3 словами🙃\n#психология #дружба #отношения',
  38. 'uploader_id': '6793ee3581974a3586fc01e157de6c99',
  39. 'view_count': int,
  40. 'repost_count': int,
  41. 'uploader': 'LENA SHTURMAN',
  42. 'upload_date': '20230126',
  43. 'thumbnail': 'https://cdn-st.ritm.media/static/pic/user_thumbnails/6e76bb4bbad640b6/9ec84c115b2b1967/1674716171.jpg',
  44. },
  45. }]
  46. def _real_extract(self, url):
  47. video_id = self._match_id(url)
  48. webpage = self._download_webpage(url, video_id)
  49. json_ld = self._search_json_ld(webpage, video_id)
  50. nextjs_data = self._search_nextjs_data(webpage, video_id)
  51. media_data = (
  52. traverse_obj(
  53. nextjs_data, ('props', 'pageProps', ('data', 'OpenGraphParameters')), get_all=False)
  54. or self._download_json(f'https://yappy.media/api/video/{video_id}', video_id))
  55. media_url = traverse_obj(media_data, ('link', {url_or_none})) or ''
  56. has_watermark = media_url.endswith('-wm.mp4')
  57. formats = [{
  58. 'url': media_url,
  59. 'ext': 'mp4',
  60. 'format_note': 'Watermarked' if has_watermark else None,
  61. 'preference': -10 if has_watermark else None,
  62. }] if media_url else []
  63. if has_watermark:
  64. formats.append({
  65. 'url': media_url.replace('-wm.mp4', '.mp4'),
  66. 'ext': 'mp4',
  67. })
  68. audio_link = traverse_obj(media_data, ('audio', 'link'))
  69. if audio_link:
  70. formats.append({
  71. 'url': audio_link,
  72. 'ext': 'mp3',
  73. 'acodec': 'mp3',
  74. 'vcodec': 'none',
  75. })
  76. return {
  77. 'id': video_id,
  78. 'title': (json_ld.get('description') or self._html_search_meta(['og:title'], webpage)
  79. or self._html_extract_title(webpage)),
  80. 'formats': formats,
  81. 'thumbnail': (media_data.get('thumbnail')
  82. or self._html_search_meta(['og:image', 'og:image:secure_url'], webpage)),
  83. 'description': (media_data.get('description') or json_ld.get('description')
  84. or self._html_search_meta(['description', 'og:description'], webpage)),
  85. 'timestamp': unified_timestamp(media_data.get('publishedAt') or json_ld.get('timestamp')),
  86. 'view_count': int_or_none(media_data.get('viewsCount') or json_ld.get('view_count')),
  87. 'like_count': int_or_none(media_data.get('likesCount')),
  88. 'uploader': traverse_obj(media_data, ('creator', 'firstName')),
  89. 'uploader_id': traverse_obj(media_data, ('creator', ('uuid', 'nickname')), get_all=False),
  90. 'categories': traverse_obj(media_data, ('categories', ..., 'name')) or None,
  91. 'repost_count': int_or_none(media_data.get('sharingCount')),
  92. }
  93. class YappyProfileIE(InfoExtractor):
  94. _VALID_URL = r'https?://yappy\.media/profile/(?P<id>\w+)'
  95. _TESTS = [{
  96. 'url': 'https://yappy.media/profile/59a0c8c485e5410b9c43474bf4c6a373',
  97. 'info_dict': {
  98. 'id': '59a0c8c485e5410b9c43474bf4c6a373',
  99. },
  100. 'playlist_mincount': 527,
  101. }]
  102. def _real_extract(self, url):
  103. profile_id = self._match_id(url)
  104. def fetch_page(page_num):
  105. page_num += 1
  106. videos = self._download_json(
  107. f'https://yappy.media/api/video/list/{profile_id}?page={page_num}',
  108. profile_id, f'Downloading profile page {page_num} JSON')
  109. for video in traverse_obj(videos, ('results', lambda _, v: v['uuid'])):
  110. yield self.url_result(
  111. f'https://yappy.media/video/{video["uuid"]}', YappyIE,
  112. video['uuid'], video.get('description'))
  113. return self.playlist_result(OnDemandPagedList(fetch_page, 15), profile_id)