lemonde.py 2.2 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556
  1. from .common import InfoExtractor
  2. class LemondeIE(InfoExtractor):
  3. _VALID_URL = r'https?://(?:.+?\.)?lemonde\.fr/(?:[^/]+/)*(?P<id>[^/]+)\.html'
  4. _TESTS = [{
  5. 'url': 'http://www.lemonde.fr/police-justice/video/2016/01/19/comprendre-l-affaire-bygmalion-en-cinq-minutes_4849702_1653578.html',
  6. 'md5': 'da120c8722d8632eec6ced937536cc98',
  7. 'info_dict': {
  8. 'id': 'lqm3kl',
  9. 'ext': 'mp4',
  10. 'title': "Comprendre l'affaire Bygmalion en 5 minutes",
  11. 'thumbnail': r're:^https?://.*\.jpg',
  12. 'duration': 309,
  13. 'upload_date': '20160119',
  14. 'timestamp': 1453194778,
  15. 'uploader_id': '3pmkp',
  16. },
  17. }, {
  18. # standard iframe embed
  19. 'url': 'http://www.lemonde.fr/les-decodeurs/article/2016/10/18/tout-comprendre-du-ceta-le-petit-cousin-du-traite-transatlantique_5015920_4355770.html',
  20. 'info_dict': {
  21. 'id': 'uzsxms',
  22. 'ext': 'mp4',
  23. 'title': "CETA : quelles suites pour l'accord commercial entre l'Europe et le Canada ?",
  24. 'thumbnail': r're:^https?://.*\.jpg',
  25. 'duration': 325,
  26. 'upload_date': '20161021',
  27. 'timestamp': 1477044540,
  28. 'uploader_id': '3pmkp',
  29. },
  30. 'params': {
  31. 'skip_download': True,
  32. },
  33. }, {
  34. 'url': 'http://redaction.actu.lemonde.fr/societe/video/2016/01/18/calais-debut-des-travaux-de-defrichement-dans-la-jungle_4849233_3224.html',
  35. 'only_matching': True,
  36. }, {
  37. # YouTube embeds
  38. 'url': 'http://www.lemonde.fr/pixels/article/2016/12/09/pourquoi-pewdiepie-superstar-de-youtube-a-menace-de-fermer-sa-chaine_5046649_4408996.html',
  39. 'only_matching': True,
  40. }]
  41. def _real_extract(self, url):
  42. display_id = self._match_id(url)
  43. webpage = self._download_webpage(url, display_id)
  44. digiteka_url = self._proto_relative_url(self._search_regex(
  45. r'url\s*:\s*(["\'])(?P<url>(?:https?://)?//(?:www\.)?(?:digiteka\.net|ultimedia\.com)/deliver/.+?)\1',
  46. webpage, 'digiteka url', group='url', default=None))
  47. if digiteka_url:
  48. return self.url_result(digiteka_url, 'Digiteka')
  49. return self.url_result(url, 'Generic')