cloudycdn.py 3.6 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798
  1. import re
  2. from .common import InfoExtractor
  3. from ..utils import (
  4. int_or_none,
  5. parse_iso8601,
  6. url_or_none,
  7. urlencode_postdata,
  8. )
  9. from ..utils.traversal import traverse_obj
  10. class CloudyCDNIE(InfoExtractor):
  11. _VALID_URL = r'(?:https?:)?//embed\.cloudycdn\.services/(?P<site_id>[^/?#]+)/media/(?P<id>[\w-]+)'
  12. _EMBED_REGEX = [rf'<iframe[^>]+\bsrc=[\'"](?P<url>{_VALID_URL})']
  13. _TESTS = [{
  14. 'url': 'https://embed.cloudycdn.services/ltv/media/46k_d23-6000-105?',
  15. 'md5': '64f72a360ca530d5ed89c77646c9eee5',
  16. 'info_dict': {
  17. 'id': '46k_d23-6000-105',
  18. 'ext': 'mp4',
  19. 'timestamp': 1700589151,
  20. 'duration': 1442,
  21. 'upload_date': '20231121',
  22. 'title': 'D23-6000-105_cetstud',
  23. 'thumbnail': 'https://store.cloudycdn.services/tmsp00060/assets/media/660858/placeholder1700589200.jpg',
  24. },
  25. }, {
  26. 'url': 'https://embed.cloudycdn.services/izm/media/26e_lv-8-5-1',
  27. 'md5': '798828a479151e2444d8dcfbec76e482',
  28. 'info_dict': {
  29. 'id': '26e_lv-8-5-1',
  30. 'ext': 'mp4',
  31. 'title': 'LV-8-5-1',
  32. 'timestamp': 1669767167,
  33. 'thumbnail': 'https://store.cloudycdn.services/tmsp00120/assets/media/488306/placeholder1679423604.jpg',
  34. 'duration': 1205,
  35. 'upload_date': '20221130',
  36. },
  37. }, {
  38. # Video-only m3u8 formats need manual fixup
  39. 'url': 'https://embed.cloudycdn.services/ltv/media/08j_d24-6000-074',
  40. 'md5': 'fc472e40f6e6238446509be411c920e2',
  41. 'info_dict': {
  42. 'id': '08j_d24-6000-074',
  43. 'ext': 'mp4',
  44. 'upload_date': '20240620',
  45. 'duration': 1673,
  46. 'title': 'D24-6000-074-cetstud',
  47. 'timestamp': 1718902233,
  48. 'thumbnail': 'https://store.cloudycdn.services/tmsp00060/assets/media/788392/placeholder1718903938.jpg',
  49. },
  50. 'params': {'format': 'bv'},
  51. }]
  52. _WEBPAGE_TESTS = [{
  53. 'url': 'https://www.tavaklase.lv/video/es-esmu-mina-um-2/',
  54. 'md5': '63074e8e6c84ac2a01f2fb8bf03b8f43',
  55. 'info_dict': {
  56. 'id': 'cqd_lib-2',
  57. 'ext': 'mp4',
  58. 'upload_date': '20230223',
  59. 'duration': 629,
  60. 'thumbnail': 'https://store.cloudycdn.services/tmsp00120/assets/media/518407/placeholder1678748124.jpg',
  61. 'timestamp': 1677181513,
  62. 'title': 'LIB-2',
  63. },
  64. }]
  65. def _real_extract(self, url):
  66. site_id, video_id = self._match_valid_url(url).group('site_id', 'id')
  67. data = self._download_json(
  68. f'https://player.cloudycdn.services/player/{site_id}/media/{video_id}/',
  69. video_id, data=urlencode_postdata({
  70. 'version': '6.4.0',
  71. 'referer': url,
  72. }))
  73. formats, subtitles = [], {}
  74. for m3u8_url in traverse_obj(data, ('source', 'sources', ..., 'src', {url_or_none})):
  75. fmts, subs = self._extract_m3u8_formats_and_subtitles(m3u8_url, video_id, fatal=False)
  76. for fmt in fmts:
  77. if re.search(r'chunklist_b\d+_vo_', fmt['url']):
  78. fmt['acodec'] = 'none'
  79. formats.extend(fmts)
  80. self._merge_subtitles(subs, target=subtitles)
  81. return {
  82. 'id': video_id,
  83. 'formats': formats,
  84. 'subtitles': subtitles,
  85. **traverse_obj(data, {
  86. 'title': ('name', {str}),
  87. 'duration': ('duration', {int_or_none}),
  88. 'timestamp': ('upload_date', {parse_iso8601}),
  89. 'thumbnail': ('source', 'poster', {url_or_none}),
  90. }),
  91. }