koo.py 4.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115
  1. from .common import InfoExtractor
  2. from ..utils import (
  3. clean_html,
  4. try_get,
  5. )
  6. class KooIE(InfoExtractor):
  7. _WORKING = False
  8. _VALID_URL = r'https?://(?:www\.)?kooapp\.com/koo/[^/]+/(?P<id>[^/&#$?]+)'
  9. _TESTS = [{ # Test for video in the comments
  10. 'url': 'https://www.kooapp.com/koo/ytdlpTestAccount/946c4189-bc2d-4524-b95b-43f641e2adde',
  11. 'info_dict': {
  12. 'id': '946c4189-bc2d-4524-b95b-43f641e2adde',
  13. 'ext': 'mp4',
  14. 'title': 'test for video in comment',
  15. 'description': 'md5:daa77dc214add4da8b6ea7d2226776e7',
  16. 'timestamp': 1632215195,
  17. 'uploader_id': 'ytdlpTestAccount',
  18. 'uploader': 'yt-dlpTestAccount',
  19. 'duration': 7000,
  20. 'upload_date': '20210921',
  21. },
  22. 'params': {'skip_download': True},
  23. }, { # Test for koo with long title
  24. 'url': 'https://www.kooapp.com/koo/laxman_kumarDBFEC/33decbf7-5e1e-4bb8-bfd7-04744a064361',
  25. 'info_dict': {
  26. 'id': '33decbf7-5e1e-4bb8-bfd7-04744a064361',
  27. 'ext': 'mp4',
  28. 'title': 'md5:47a71c2337295330c5a19a8af1bbf450',
  29. 'description': 'md5:06a6a84e9321499486dab541693d8425',
  30. 'timestamp': 1632106884,
  31. 'uploader_id': 'laxman_kumarDBFEC',
  32. 'uploader': 'Laxman Kumar 🇮🇳',
  33. 'duration': 46000,
  34. 'upload_date': '20210920',
  35. },
  36. 'params': {'skip_download': True},
  37. }, { # Test for audio
  38. 'url': 'https://www.kooapp.com/koo/ytdlpTestAccount/a2a9c88e-ce4b-4d2d-952f-d06361c5b602',
  39. 'info_dict': {
  40. 'id': 'a2a9c88e-ce4b-4d2d-952f-d06361c5b602',
  41. 'ext': 'mp4',
  42. 'title': 'Test for audio',
  43. 'description': 'md5:ecb9a2b6a5d34b736cecb53788cb11e8',
  44. 'timestamp': 1632211634,
  45. 'uploader_id': 'ytdlpTestAccount',
  46. 'uploader': 'yt-dlpTestAccount',
  47. 'duration': 214000,
  48. 'upload_date': '20210921',
  49. },
  50. 'params': {'skip_download': True},
  51. }, { # Test for video
  52. 'url': 'https://www.kooapp.com/koo/ytdlpTestAccount/a3e56c53-c1ed-4ac9-ac02-ed1630e6b1d1',
  53. 'info_dict': {
  54. 'id': 'a3e56c53-c1ed-4ac9-ac02-ed1630e6b1d1',
  55. 'ext': 'mp4',
  56. 'title': 'Test for video',
  57. 'description': 'md5:7afc4eb839074ddeb2beea5dd6fe9500',
  58. 'timestamp': 1632211468,
  59. 'uploader_id': 'ytdlpTestAccount',
  60. 'uploader': 'yt-dlpTestAccount',
  61. 'duration': 14000,
  62. 'upload_date': '20210921',
  63. },
  64. 'params': {'skip_download': True},
  65. }, { # Test for link
  66. 'url': 'https://www.kooapp.com/koo/ytdlpTestAccount/01bf5b94-81a5-4d8e-a387-5f732022e15a',
  67. 'skip': 'No video/audio found at the provided url.',
  68. 'info_dict': {
  69. 'id': '01bf5b94-81a5-4d8e-a387-5f732022e15a',
  70. 'title': 'Test for link',
  71. 'ext': 'none',
  72. },
  73. }, { # Test for images
  74. 'url': 'https://www.kooapp.com/koo/ytdlpTestAccount/dc05d9cd-a61d-45fd-bb07-e8019d8ca8cb',
  75. 'skip': 'No video/audio found at the provided url.',
  76. 'info_dict': {
  77. 'id': 'dc05d9cd-a61d-45fd-bb07-e8019d8ca8cb',
  78. 'title': 'Test for images',
  79. 'ext': 'none',
  80. },
  81. }]
  82. def _real_extract(self, url):
  83. video_id = self._match_id(url)
  84. data_json = self._download_json(
  85. f'https://www.kooapp.com/apiV1/ku/{video_id}?limit=20&offset=0&showSimilarKoos=true', video_id)['parentContent']
  86. item_json = next(content['items'][0] for content in data_json
  87. if try_get(content, lambda x: x['items'][0]['id']) == video_id)
  88. media_json = item_json['mediaMap']
  89. formats = []
  90. mp4_url = media_json.get('videoMp4')
  91. video_m3u8_url = media_json.get('videoHls')
  92. if mp4_url:
  93. formats.append({
  94. 'url': mp4_url,
  95. 'ext': 'mp4',
  96. })
  97. if video_m3u8_url:
  98. formats.extend(self._extract_m3u8_formats(video_m3u8_url, video_id, fatal=False, ext='mp4'))
  99. if not formats:
  100. self.raise_no_formats('No video/audio found at the provided url.', expected=True)
  101. return {
  102. 'id': video_id,
  103. 'title': clean_html(item_json.get('title')),
  104. 'description': f'{clean_html(item_json.get("title"))}\n\n{clean_html(item_json.get("enTransliteration"))}',
  105. 'timestamp': item_json.get('createdAt'),
  106. 'uploader_id': item_json.get('handle'),
  107. 'uploader': item_json.get('name'),
  108. 'duration': media_json.get('duration'),
  109. 'formats': formats,
  110. }