noovo.py 3.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100
  1. from .brightcove import BrightcoveNewIE
  2. from .common import InfoExtractor
  3. from ..utils import (
  4. int_or_none,
  5. js_to_json,
  6. smuggle_url,
  7. try_get,
  8. )
  9. class NoovoIE(InfoExtractor):
  10. _VALID_URL = r'https?://(?:[^/]+\.)?noovo\.ca/videos/(?P<id>[^/]+/[^/?#&]+)'
  11. _TESTS = [{
  12. # clip
  13. 'url': 'http://noovo.ca/videos/rpm-plus/chrysler-imperial',
  14. 'info_dict': {
  15. 'id': '5386045029001',
  16. 'ext': 'mp4',
  17. 'title': 'Chrysler Imperial',
  18. 'description': 'md5:de3c898d1eb810f3e6243e08c8b4a056',
  19. 'timestamp': 1491399228,
  20. 'upload_date': '20170405',
  21. 'uploader_id': '618566855001',
  22. 'series': 'RPM+',
  23. },
  24. 'params': {
  25. 'skip_download': True,
  26. },
  27. }, {
  28. # episode
  29. 'url': 'http://noovo.ca/videos/l-amour-est-dans-le-pre/episode-13-8',
  30. 'info_dict': {
  31. 'id': '5395865725001',
  32. 'title': 'Épisode 13 : Les retrouvailles',
  33. 'description': 'md5:888c3330f0c1b4476c5bc99a1c040473',
  34. 'ext': 'mp4',
  35. 'timestamp': 1492019320,
  36. 'upload_date': '20170412',
  37. 'uploader_id': '618566855001',
  38. 'series': "L'amour est dans le pré",
  39. 'season_number': 5,
  40. 'episode': 'Épisode 13',
  41. 'episode_number': 13,
  42. },
  43. 'params': {
  44. 'skip_download': True,
  45. },
  46. }]
  47. BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/618566855001/default_default/index.html?videoId=%s'
  48. def _real_extract(self, url):
  49. video_id = self._match_id(url)
  50. webpage = self._download_webpage(url, video_id)
  51. brightcove_id = self._search_regex(
  52. r'data-video-id=["\'](\d+)', webpage, 'brightcove id')
  53. data = self._parse_json(
  54. self._search_regex(
  55. r'(?s)dataLayer\.push\(\s*({.+?})\s*\);', webpage, 'data',
  56. default='{}'),
  57. video_id, transform_source=js_to_json, fatal=False)
  58. title = try_get(
  59. data, lambda x: x['video']['nom'],
  60. str) or self._html_search_meta(
  61. 'dcterms.Title', webpage, 'title', fatal=True)
  62. description = self._html_search_meta(
  63. ('dcterms.Description', 'description'), webpage, 'description')
  64. series = try_get(
  65. data, lambda x: x['emission']['nom']) or self._search_regex(
  66. r'<div[^>]+class="banner-card__subtitle h4"[^>]*>([^<]+)',
  67. webpage, 'series', default=None)
  68. season_el = try_get(data, lambda x: x['emission']['saison'], dict) or {}
  69. season = try_get(season_el, lambda x: x['nom'], str)
  70. season_number = int_or_none(try_get(season_el, lambda x: x['numero']))
  71. episode_el = try_get(season_el, lambda x: x['episode'], dict) or {}
  72. episode = try_get(episode_el, lambda x: x['nom'], str)
  73. episode_number = int_or_none(try_get(episode_el, lambda x: x['numero']))
  74. return {
  75. '_type': 'url_transparent',
  76. 'ie_key': BrightcoveNewIE.ie_key(),
  77. 'url': smuggle_url(
  78. self.BRIGHTCOVE_URL_TEMPLATE % brightcove_id,
  79. {'geo_countries': ['CA']}),
  80. 'id': brightcove_id,
  81. 'title': title,
  82. 'description': description,
  83. 'series': series,
  84. 'season': season,
  85. 'season_number': season_number,
  86. 'episode': episode,
  87. 'episode_number': episode_number,
  88. }