trtworld.py 4.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101
  1. from .common import InfoExtractor
  2. from ..utils import ExtractorError, determine_ext, parse_iso8601, url_or_none
  3. from ..utils.traversal import traverse_obj
  4. class TrtWorldIE(InfoExtractor):
  5. _VALID_URL = r'https?://www\.trtworld\.com/video/[\w-]+/[\w-]+-(?P<id>\d+)'
  6. _TESTS = [{
  7. 'url': 'https://www.trtworld.com/video/news/turkiye-switches-to-sustainable-tourism-16067690',
  8. 'info_dict': {
  9. 'id': '16067690',
  10. 'ext': 'mp4',
  11. 'title': 'Türkiye switches to sustainable tourism',
  12. 'release_timestamp': 1701529569,
  13. 'release_date': '20231202',
  14. 'thumbnail': 'https://cdn-i.pr.trt.com.tr/trtworld/17647563_0-0-1920-1080.jpeg',
  15. 'description': 'md5:0a975c04257fb529c8f99c7b76a2cf12',
  16. },
  17. }, {
  18. 'url': 'https://www.trtworld.com/video/one-offs/frames-from-anatolia-recreating-a-james-bond-scene-in-istanbuls-grand-bazaar-14541780',
  19. 'info_dict': {
  20. 'id': '14541780',
  21. 'ext': 'mp4',
  22. 'title': 'Frames From Anatolia: Recreating a ‘James Bond’ Scene in Istanbul’s Grand Bazaar',
  23. 'release_timestamp': 1692440844,
  24. 'release_date': '20230819',
  25. 'thumbnail': 'https://cdn-i.pr.trt.com.tr/trtworld/16939810_0-0-1920-1080.jpeg',
  26. 'description': 'md5:4050e21570cc3c40b6c9badae800a94f',
  27. },
  28. }, {
  29. 'url': 'https://www.trtworld.com/video/the-newsmakers/can-sudan-find-peace-amidst-failed-transition-to-democracy-12904760',
  30. 'info_dict': {
  31. 'id': '12904760',
  32. 'ext': 'mp4',
  33. 'title': 'Can Sudan find peace amidst failed transition to democracy?',
  34. 'release_timestamp': 1681972747,
  35. 'release_date': '20230420',
  36. 'thumbnail': 'http://cdni0.trtworld.com/w768/q70/154214_NMYOUTUBETEMPLATE1_1681833018736.jpg',
  37. },
  38. }, {
  39. 'url': 'https://www.trtworld.com/video/africa-matters/locals-learning-to-cope-with-rising-tides-of-kenyas-great-lakes-16059545',
  40. 'info_dict': {
  41. 'id': 'zEns2dWl00w',
  42. 'ext': 'mp4',
  43. 'title': "Locals learning to cope with rising tides of Kenya's Great Lakes",
  44. 'thumbnail': 'https://i.ytimg.com/vi/zEns2dWl00w/maxresdefault.jpg',
  45. 'description': 'md5:3ad9d7c5234d752a4ead4340c79c6b8d',
  46. 'channel_id': 'UC7fWeaHhqgM4Ry-RMpM2YYw',
  47. 'channel_url': 'https://www.youtube.com/channel/UC7fWeaHhqgM4Ry-RMpM2YYw',
  48. 'duration': 210,
  49. 'view_count': int,
  50. 'age_limit': 0,
  51. 'webpage_url': 'https://www.youtube.com/watch?v=zEns2dWl00w',
  52. 'categories': ['News & Politics'],
  53. 'channel': 'TRT World',
  54. 'channel_follower_count': int,
  55. 'channel_is_verified': True,
  56. 'uploader': 'TRT World',
  57. 'uploader_id': '@trtworld',
  58. 'uploader_url': 'https://www.youtube.com/@trtworld',
  59. 'upload_date': '20231202',
  60. 'availability': 'public',
  61. 'comment_count': int,
  62. 'playable_in_embed': True,
  63. 'tags': [],
  64. 'live_status': 'not_live',
  65. 'like_count': int,
  66. },
  67. }]
  68. def _real_extract(self, url):
  69. display_id = self._match_id(url)
  70. webpage = self._download_webpage(url, display_id)
  71. nuxtjs_data = self._search_nuxt_data(webpage, display_id)['videoData']['content']['platforms']
  72. formats = []
  73. for media_url in traverse_obj(nuxtjs_data, (
  74. ('website', 'ott'), 'metadata', ('hls_url', 'url'), {url_or_none})):
  75. # NB: Website sometimes serves mp4 files under `hls_url` key
  76. if determine_ext(media_url) == 'm3u8':
  77. formats.extend(self._extract_m3u8_formats(media_url, display_id, fatal=False))
  78. else:
  79. formats.append({
  80. 'format_id': 'http',
  81. 'url': media_url,
  82. })
  83. if not formats:
  84. if youtube_id := traverse_obj(nuxtjs_data, ('youtube', 'metadata', 'youtubeId')):
  85. return self.url_result(youtube_id, 'Youtube')
  86. raise ExtractorError('No video found', expected=True)
  87. return {
  88. 'id': display_id,
  89. 'formats': formats,
  90. **traverse_obj(nuxtjs_data, (('website', 'ott'), {
  91. 'title': ('fields', 'title', 'text', {str}),
  92. 'description': ('fields', 'description', 'text', {str}),
  93. 'thumbnail': ('fields', 'thumbnail', 'url', {url_or_none}),
  94. 'release_timestamp': ('published', 'date', {parse_iso8601}),
  95. }), get_all=False),
  96. }