onenewsnz.py 5.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107
  1. from .brightcove import BrightcoveNewIE
  2. from .common import InfoExtractor
  3. from ..utils import ExtractorError, traverse_obj
  4. class OneNewsNZIE(InfoExtractor):
  5. IE_NAME = '1News'
  6. IE_DESC = '1news.co.nz article videos'
  7. _VALID_URL = r'https?://(?:www\.)?(?:1|one)news\.co\.nz/\d+/\d+/\d+/(?P<id>[^/?#&]+)'
  8. _TESTS = [
  9. { # Brightcove video
  10. 'url': 'https://www.1news.co.nz/2022/09/29/cows-painted-green-on-parliament-lawn-in-climate-protest/',
  11. 'info_dict': {
  12. 'id': 'cows-painted-green-on-parliament-lawn-in-climate-protest',
  13. 'title': '\'Cows\' painted green on Parliament lawn in climate protest',
  14. },
  15. 'playlist': [{
  16. 'info_dict': {
  17. 'id': '6312993358112',
  18. 'title': 'Activists dressed as cows painted green outside Parliament in climate protest',
  19. 'ext': 'mp4',
  20. 'tags': 'count:6',
  21. 'uploader_id': '963482464001',
  22. 'timestamp': 1664416255,
  23. 'upload_date': '20220929',
  24. 'duration': 38.272,
  25. 'thumbnail': r're:^https?://.*\.jpg$',
  26. 'description': 'Greenpeace accused the Government of "greenwashing" instead of taking climate action.',
  27. },
  28. }],
  29. }, {
  30. # YouTube video
  31. 'url': 'https://www.1news.co.nz/2022/09/30/now-is-the-time-to-care-about-womens-rugby/',
  32. 'info_dict': {
  33. 'id': 'now-is-the-time-to-care-about-womens-rugby',
  34. 'title': 'Now is the time to care about women\'s rugby',
  35. },
  36. 'playlist': [{
  37. 'info_dict': {
  38. 'id': 's4wEB9neTfU',
  39. 'title': 'Why I love women’s rugby: Black Fern Ruahei Demant',
  40. 'ext': 'mp4',
  41. 'channel_follower_count': int,
  42. 'channel_url': 'https://www.youtube.com/channel/UC2BQ3U9IxoYIJyulv0bN5PQ',
  43. 'tags': 'count:12',
  44. 'uploader': 'Re: News',
  45. 'upload_date': '20211215',
  46. 'uploader_id': 'UC2BQ3U9IxoYIJyulv0bN5PQ',
  47. 'uploader_url': 'http://www.youtube.com/channel/UC2BQ3U9IxoYIJyulv0bN5PQ',
  48. 'channel_id': 'UC2BQ3U9IxoYIJyulv0bN5PQ',
  49. 'channel': 'Re: News',
  50. 'like_count': int,
  51. 'thumbnail': 'https://i.ytimg.com/vi/s4wEB9neTfU/maxresdefault.jpg',
  52. 'age_limit': 0,
  53. 'view_count': int,
  54. 'categories': ['Sports'],
  55. 'duration': 222,
  56. 'description': 'md5:8874410e5740ed1d8fd0df839f849813',
  57. 'availability': 'public',
  58. 'playable_in_embed': True,
  59. 'live_status': 'not_live',
  60. },
  61. }],
  62. }, {
  63. # 2 Brightcove videos
  64. 'url': 'https://www.1news.co.nz/2022/09/29/raw-videos-capture-hurricane-ians-fury-as-it-slams-florida/',
  65. 'info_dict': {
  66. 'id': 'raw-videos-capture-hurricane-ians-fury-as-it-slams-florida',
  67. 'title': 'Raw videos capture Hurricane Ian\'s fury as it slams Florida',
  68. },
  69. 'playlist_mincount': 2,
  70. }, {
  71. 'url': 'https://www.onenews.co.nz/2022/09/29/cows-painted-green-on-parliament-lawn-in-climate-protest/',
  72. 'only_matching': True,
  73. }]
  74. BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/%s/0xpHIR6IB_default/index.html?videoId=%s'
  75. def _real_extract(self, url):
  76. display_id = self._match_id(url)
  77. webpage = self._download_webpage(url, display_id)
  78. fusion_metadata = self._search_json(r'Fusion\.globalContent\s*=', webpage, 'fusion metadata', display_id)
  79. entries = []
  80. for item in traverse_obj(fusion_metadata, 'content_elements') or []:
  81. item_type = traverse_obj(item, 'subtype')
  82. if item_type == 'video':
  83. brightcove_config = traverse_obj(item, ('embed', 'config'))
  84. brightcove_url = self.BRIGHTCOVE_URL_TEMPLATE % (
  85. traverse_obj(brightcove_config, 'brightcoveAccount') or '963482464001',
  86. traverse_obj(brightcove_config, 'brightcoveVideoId'),
  87. )
  88. entries.append(self.url_result(brightcove_url, BrightcoveNewIE))
  89. elif item_type == 'youtube':
  90. video_id_or_url = traverse_obj(item, ('referent', 'id'), ('raw_oembed', '_id'))
  91. if video_id_or_url:
  92. entries.append(self.url_result(video_id_or_url, ie='Youtube'))
  93. if not entries:
  94. raise ExtractorError('This article does not have a video.', expected=True)
  95. playlist_title = (
  96. traverse_obj(fusion_metadata, ('headlines', 'basic'))
  97. or self._generic_title('', webpage)
  98. )
  99. return self.playlist_result(entries, display_id, playlist_title)