elementorembed.py 3.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172
  1. import re
  2. from .common import InfoExtractor
  3. from .vimeo import VimeoIE
  4. from .youtube import YoutubeIE
  5. from ..utils import unescapeHTML, url_or_none
  6. from ..utils.traversal import traverse_obj
  7. class ElementorEmbedIE(InfoExtractor):
  8. _VALID_URL = False
  9. _WEBPAGE_TESTS = [{
  10. 'url': 'https://capitaltv.cy/2023/12/14/υγεια-και-ζωη-14-12-2023-δρ-ξενια-κωσταντινιδο/',
  11. 'info_dict': {
  12. 'id': 'KgzuxwuQwM4',
  13. 'ext': 'mp4',
  14. 'title': 'ΥΓΕΙΑ ΚΑΙ ΖΩΗ 14 12 2023 ΔΡ ΞΕΝΙΑ ΚΩΣΤΑΝΤΙΝΙΔΟΥ',
  15. 'thumbnail': 'https://i.ytimg.com/vi/KgzuxwuQwM4/maxresdefault.jpg',
  16. 'playable_in_embed': True,
  17. 'tags': 'count:16',
  18. 'like_count': int,
  19. 'channel': 'Capital TV Cyprus',
  20. 'channel_id': 'UCR8LwVKTLGEXt4ZAErpCMrg',
  21. 'availability': 'public',
  22. 'description': 'md5:7a3308a22881aea4612358c4ba121f77',
  23. 'duration': 2891,
  24. 'upload_date': '20231214',
  25. 'uploader_id': '@capitaltvcyprus6389',
  26. 'live_status': 'not_live',
  27. 'channel_url': 'https://www.youtube.com/channel/UCR8LwVKTLGEXt4ZAErpCMrg',
  28. 'uploader_url': 'https://www.youtube.com/@capitaltvcyprus6389',
  29. 'uploader': 'Capital TV Cyprus',
  30. 'age_limit': 0,
  31. 'categories': ['News & Politics'],
  32. 'view_count': int,
  33. 'channel_follower_count': int,
  34. },
  35. }, {
  36. 'url': 'https://elementor.com/academy/theme-builder-collection/?playlist=76011151&video=9e59909',
  37. 'info_dict': {
  38. 'id': '?playlist=76011151&video=9e59909',
  39. 'title': 'Theme Builder Collection - Academy',
  40. 'age_limit': 0,
  41. 'timestamp': 1702196984.0,
  42. 'upload_date': '20231210',
  43. 'description': 'md5:7f52c52715ee9e54fd7f82210511673d',
  44. 'thumbnail': 'https://elementor.com/academy/wp-content/uploads/2021/07/Theme-Builder-1.png',
  45. },
  46. 'playlist_count': 11,
  47. 'params': {
  48. 'skip_download': True,
  49. },
  50. }]
  51. _WIDGET_REGEX = r'<div[^>]+class="[^"]*elementor-widget-video(?:-playlist)?[^"]*"[^>]*data-settings="([^"]*)"'
  52. def _extract_from_webpage(self, url, webpage):
  53. for data_settings in re.findall(self._WIDGET_REGEX, webpage):
  54. data = self._parse_json(data_settings, None, fatal=False, transform_source=unescapeHTML)
  55. if youtube_url := traverse_obj(data, ('youtube_url', {url_or_none})):
  56. yield self.url_result(youtube_url, ie=YoutubeIE)
  57. for video in traverse_obj(data, ('tabs', lambda _, v: v['_id'], {dict})):
  58. if youtube_url := traverse_obj(video, ('youtube_url', {url_or_none})):
  59. yield self.url_result(youtube_url, ie=YoutubeIE)
  60. if vimeo_url := traverse_obj(video, ('vimeo_url', {url_or_none})):
  61. yield self.url_result(vimeo_url, ie=VimeoIE)
  62. for direct_url in traverse_obj(video, (('hosted_url', 'external_url'), 'url', {url_or_none})):
  63. yield {
  64. 'id': video['_id'],
  65. 'url': direct_url,
  66. 'title': video.get('title'),
  67. }