parlview.py 2.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263
  1. from .common import InfoExtractor
  2. from ..utils import (
  3. int_or_none,
  4. try_get,
  5. unified_timestamp,
  6. )
  7. class ParlviewIE(InfoExtractor):
  8. _WORKING = False
  9. _VALID_URL = r'https?://(?:www\.)?parlview\.aph\.gov\.au/(?:[^/]+)?\bvideoID=(?P<id>\d{6})'
  10. _TESTS = [{
  11. 'url': 'https://parlview.aph.gov.au/mediaPlayer.php?videoID=542661',
  12. 'info_dict': {
  13. 'id': '542661',
  14. 'ext': 'mp4',
  15. 'title': "Australia's Family Law System [Part 2]",
  16. 'duration': 5799,
  17. 'description': 'md5:7099883b391619dbae435891ca871a62',
  18. 'timestamp': 1621430700,
  19. 'upload_date': '20210519',
  20. 'uploader': 'Joint Committee',
  21. },
  22. 'params': {
  23. 'skip_download': True,
  24. },
  25. }, {
  26. 'url': 'https://parlview.aph.gov.au/mediaPlayer.php?videoID=539936',
  27. 'only_matching': True,
  28. }]
  29. _API_URL = 'https://parlview.aph.gov.au/api_v3/1/playback/getUniversalPlayerConfig?videoID=%s&format=json'
  30. _MEDIA_INFO_URL = 'https://parlview.aph.gov.au/ajaxPlayer.php?videoID=%s&tabNum=4&action=loadTab'
  31. def _real_extract(self, url):
  32. video_id = self._match_id(url)
  33. webpage = self._download_webpage(url, video_id)
  34. media = self._download_json(self._API_URL % video_id, video_id).get('media')
  35. timestamp = try_get(media, lambda x: x['timeMap']['source']['timecode_offsets'][0], str) or '/'
  36. stream = try_get(media, lambda x: x['renditions'][0], dict)
  37. if not stream:
  38. self.raise_no_formats('No streams were detected')
  39. elif stream.get('streamType') != 'VOD':
  40. self.raise_no_formats('Unknown type of stream was detected: "{}"'.format(str(stream.get('streamType'))))
  41. formats = self._extract_m3u8_formats(stream['url'], video_id, 'mp4', 'm3u8_native')
  42. media_info = self._download_webpage(
  43. self._MEDIA_INFO_URL % video_id, video_id, note='Downloading media info', fatal=False)
  44. return {
  45. 'id': video_id,
  46. 'url': url,
  47. 'title': self._html_search_regex(r'<h2>([^<]+)<', webpage, 'title', fatal=False),
  48. 'formats': formats,
  49. 'duration': int_or_none(media.get('duration')),
  50. 'timestamp': unified_timestamp(timestamp.split('/', 1)[1].replace('_', ' ')),
  51. 'description': self._html_search_regex(
  52. r'<div[^>]+class="descripti?on"[^>]*>[^>]+<strong>[^>]+>[^>]+>([^<]+)',
  53. webpage, 'description', fatal=False),
  54. 'uploader': self._html_search_regex(
  55. r'<td>[^>]+>Channel:[^>]+>([^<]+)', media_info, 'channel', fatal=False),
  56. 'thumbnail': media.get('staticImage'),
  57. }