nzz.py 1.3 KB

12345678910111213141516171819202122232425262728293031323334353637383940
  1. import re
  2. from .common import InfoExtractor
  3. from ..utils import (
  4. extract_attributes,
  5. )
  6. class NZZIE(InfoExtractor):
  7. _VALID_URL = r'https?://(?:www\.)?nzz\.ch/(?:[^/]+/)*[^/?#]+-ld\.(?P<id>\d+)'
  8. _TESTS = [{
  9. 'url': 'http://www.nzz.ch/zuerich/gymizyte/gymizyte-schreiben-schueler-heute-noch-diktate-ld.9153',
  10. 'info_dict': {
  11. 'id': '9153',
  12. },
  13. 'playlist_mincount': 6,
  14. }, {
  15. 'url': 'https://www.nzz.ch/video/nzz-standpunkte/cvp-auf-der-suche-nach-dem-mass-der-mitte-ld.1368112',
  16. 'info_dict': {
  17. 'id': '1368112',
  18. },
  19. 'playlist_count': 1,
  20. }]
  21. def _real_extract(self, url):
  22. page_id = self._match_id(url)
  23. webpage = self._download_webpage(url, page_id)
  24. entries = []
  25. for player_element in re.findall(
  26. r'(<[^>]+class="kalturaPlayer[^"]*"[^>]*>)', webpage):
  27. player_params = extract_attributes(player_element)
  28. if player_params.get('data-type') not in ('kaltura_singleArticle',):
  29. self.report_warning('Unsupported player type')
  30. continue
  31. entry_id = player_params['data-id']
  32. entries.append(self.url_result(
  33. 'kaltura:1750922:' + entry_id, 'Kaltura', entry_id))
  34. return self.playlist_result(entries, page_id)