lefigaro.py 5.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136
  1. import json
  2. import math
  3. from .common import InfoExtractor
  4. from ..utils import (
  5. InAdvancePagedList,
  6. traverse_obj,
  7. )
  8. class LeFigaroVideoEmbedIE(InfoExtractor):
  9. _VALID_URL = r'https?://video\.lefigaro\.fr/embed/[^?#]+/(?P<id>[\w-]+)'
  10. _TESTS = [{
  11. 'url': 'https://video.lefigaro.fr/embed/figaro/video/les-francais-ne-veulent-ils-plus-travailler-suivez-en-direct-le-club-le-figaro-idees/',
  12. 'md5': 'a0c3069b7e4c4526abf0053a7713f56f',
  13. 'info_dict': {
  14. 'id': 'g9j7Eovo',
  15. 'title': 'Les Français ne veulent-ils plus travailler ? Retrouvez Le Club Le Figaro Idées',
  16. 'description': 'md5:862b8813148ba4bf10763a65a69dfe41',
  17. 'upload_date': '20230216',
  18. 'timestamp': 1676581615,
  19. 'duration': 3076,
  20. 'thumbnail': r're:^https?://[^?#]+\.(?:jpeg|jpg)',
  21. 'ext': 'mp4',
  22. },
  23. }, {
  24. 'url': 'https://video.lefigaro.fr/embed/figaro/video/intelligence-artificielle-faut-il-sen-mefier/',
  25. 'md5': '319c662943dd777bab835cae1e2d73a5',
  26. 'info_dict': {
  27. 'id': 'LeAgybyc',
  28. 'title': 'Intelligence artificielle : faut-il s’en méfier ?',
  29. 'description': 'md5:249d136e3e5934a67c8cb704f8abf4d2',
  30. 'upload_date': '20230124',
  31. 'timestamp': 1674584477,
  32. 'duration': 860,
  33. 'thumbnail': r're:^https?://[^?#]+\.(?:jpeg|jpg)',
  34. 'ext': 'mp4',
  35. },
  36. }]
  37. _WEBPAGE_TESTS = [{
  38. 'url': 'https://video.lefigaro.fr/figaro/video/suivez-en-direct-le-club-le-figaro-international-avec-philippe-gelie-9/',
  39. 'md5': '6289f9489efb969e38245f31721596fe',
  40. 'info_dict': {
  41. 'id': 'QChnbPYA',
  42. 'title': 'Où en est le couple franco-allemand ? Retrouvez Le Club Le Figaro International',
  43. 'description': 'md5:6f47235b7e7c93b366fd8ebfa10572ac',
  44. 'upload_date': '20230123',
  45. 'timestamp': 1674503575,
  46. 'duration': 3153,
  47. 'thumbnail': r're:^https?://[^?#]+\.(?:jpeg|jpg)',
  48. 'age_limit': 0,
  49. 'ext': 'mp4',
  50. },
  51. }, {
  52. 'url': 'https://video.lefigaro.fr/figaro/video/la-philosophe-nathalie-sarthou-lajus-est-linvitee-du-figaro-live/',
  53. 'md5': 'f6df814cae53e85937621599d2967520',
  54. 'info_dict': {
  55. 'id': 'QJzqoNbf',
  56. 'title': 'La philosophe Nathalie Sarthou-Lajus est l’invitée du Figaro Live',
  57. 'description': 'md5:c586793bb72e726c83aa257f99a8c8c4',
  58. 'upload_date': '20230217',
  59. 'timestamp': 1676661986,
  60. 'duration': 1558,
  61. 'thumbnail': r're:^https?://[^?#]+\.(?:jpeg|jpg)',
  62. 'age_limit': 0,
  63. 'ext': 'mp4',
  64. },
  65. }]
  66. def _real_extract(self, url):
  67. display_id = self._match_id(url)
  68. webpage = self._download_webpage(url, display_id)
  69. player_data = self._search_nextjs_data(
  70. webpage, display_id)['props']['pageProps']['initialProps']['pageData']['playerData']
  71. return self.url_result(
  72. f'jwplatform:{player_data["videoId"]}', title=player_data.get('title'),
  73. description=player_data.get('description'), thumbnail=player_data.get('poster'))
  74. class LeFigaroVideoSectionIE(InfoExtractor):
  75. _VALID_URL = r'https?://video\.lefigaro\.fr/figaro/(?P<id>[\w-]+)/?(?:[#?]|$)'
  76. _TESTS = [{
  77. 'url': 'https://video.lefigaro.fr/figaro/le-club-le-figaro-idees/',
  78. 'info_dict': {
  79. 'id': 'le-club-le-figaro-idees',
  80. 'title': 'Le Club Le Figaro Idées',
  81. },
  82. 'playlist_mincount': 14,
  83. }, {
  84. 'url': 'https://video.lefigaro.fr/figaro/factu/',
  85. 'info_dict': {
  86. 'id': 'factu',
  87. 'title': 'Factu',
  88. },
  89. 'playlist_mincount': 519,
  90. }]
  91. _PAGE_SIZE = 20
  92. def _get_api_response(self, display_id, page_num, note=None):
  93. return self._download_json(
  94. 'https://api-graphql.lefigaro.fr/graphql', display_id, note=note,
  95. query={
  96. 'id': 'flive-website_UpdateListPage_1fb260f996bca2d78960805ac382544186b3225f5bedb43ad08b9b8abef79af6',
  97. 'variables': json.dumps({
  98. 'slug': display_id,
  99. 'videosLimit': self._PAGE_SIZE,
  100. 'sort': 'DESC',
  101. 'order': 'PUBLISHED_AT',
  102. 'page': page_num,
  103. }).encode(),
  104. })
  105. def _real_extract(self, url):
  106. display_id = self._match_id(url)
  107. initial_response = self._get_api_response(display_id, page_num=1)['data']['playlist']
  108. def page_func(page_num):
  109. api_response = self._get_api_response(display_id, page_num + 1, note=f'Downloading page {page_num + 1}')
  110. return [self.url_result(
  111. video['embedUrl'], LeFigaroVideoEmbedIE, **traverse_obj(video, {
  112. 'title': 'name',
  113. 'description': 'description',
  114. 'thumbnail': 'thumbnailUrl',
  115. })) for video in api_response['data']['playlist']['jsonLd'][0]['itemListElement']]
  116. entries = InAdvancePagedList(
  117. page_func, math.ceil(initial_response['videoCount'] / self._PAGE_SIZE), self._PAGE_SIZE)
  118. return self.playlist_result(entries, playlist_id=display_id, playlist_title=initial_response.get('title'))