rds.py 2.7 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667
  1. from .common import InfoExtractor
  2. from ..utils import (
  3. js_to_json,
  4. parse_duration,
  5. parse_iso8601,
  6. )
  7. class RDSIE(InfoExtractor):
  8. _WORKING = False
  9. IE_DESC = 'RDS.ca'
  10. _VALID_URL = r'https?://(?:www\.)?rds\.ca/vid(?:[eé]|%C3%A9)os/(?:[^/]+/)*(?P<id>[^/]+)-\d+\.\d+'
  11. _TESTS = [{
  12. # has two 9c9media ContentPackages, the web player selects the first ContentPackage
  13. 'url': 'https://www.rds.ca/videos/Hockey/NationalHockeyLeague/teams/9/forum-du-5-a-7-jesperi-kotkaniemi-de-retour-de-finlande-3.1377606',
  14. 'info_dict': {
  15. 'id': '2083309',
  16. 'display_id': 'forum-du-5-a-7-jesperi-kotkaniemi-de-retour-de-finlande',
  17. 'ext': 'flv',
  18. 'title': 'Forum du 5 à 7 : Kotkaniemi de retour de Finlande',
  19. 'description': 'md5:83fa38ecc4a79b19e433433254077f25',
  20. 'timestamp': 1606129030,
  21. 'upload_date': '20201123',
  22. 'duration': 773.039,
  23. },
  24. }, {
  25. 'url': 'http://www.rds.ca/vid%C3%A9os/un-voyage-positif-3.877934',
  26. 'only_matching': True,
  27. }]
  28. def _real_extract(self, url):
  29. display_id = self._match_id(url)
  30. webpage = self._download_webpage(url, display_id)
  31. item = self._parse_json(self._search_regex(r'(?s)itemToPush\s*=\s*({.+?});', webpage, 'item'), display_id, js_to_json)
  32. video_id = str(item['id'])
  33. title = item.get('title') or self._og_search_title(webpage) or self._html_search_meta(
  34. 'title', webpage, 'title', fatal=True)
  35. description = self._og_search_description(webpage) or self._html_search_meta(
  36. 'description', webpage, 'description')
  37. thumbnail = item.get('urlImageBig') or self._og_search_thumbnail(webpage) or self._search_regex(
  38. [r'<link[^>]+itemprop="thumbnailUrl"[^>]+href="([^"]+)"',
  39. r'<span[^>]+itemprop="thumbnailUrl"[^>]+content="([^"]+)"'],
  40. webpage, 'thumbnail', fatal=False)
  41. timestamp = parse_iso8601(self._search_regex(
  42. r'<span[^>]+itemprop="uploadDate"[^>]+content="([^"]+)"',
  43. webpage, 'upload date', fatal=False))
  44. duration = parse_duration(self._search_regex(
  45. r'<span[^>]+itemprop="duration"[^>]+content="([^"]+)"',
  46. webpage, 'duration', fatal=False))
  47. age_limit = self._family_friendly_search(webpage)
  48. return {
  49. '_type': 'url_transparent',
  50. 'id': video_id,
  51. 'display_id': display_id,
  52. 'url': f'9c9media:rds_web:{video_id}',
  53. 'title': title,
  54. 'description': description,
  55. 'thumbnail': thumbnail,
  56. 'timestamp': timestamp,
  57. 'duration': duration,
  58. 'age_limit': age_limit,
  59. 'ie_key': 'NineCNineMedia',
  60. }