aljazeera.py 3.3 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283
  1. import json
  2. from .common import InfoExtractor
  3. from ..utils import (
  4. try_get,
  5. )
  6. class AlJazeeraIE(InfoExtractor):
  7. _VALID_URL = r'https?://(?P<base>\w+\.aljazeera\.\w+)/(?P<type>programs?/[^/]+|(?:feature|video|new)s)?/\d{4}/\d{1,2}/\d{1,2}/(?P<id>[^/?&#]+)'
  8. _TESTS = [{
  9. 'url': 'https://balkans.aljazeera.net/videos/2021/11/6/pojedini-domovi-u-sarajevu-jos-pod-vodom-mjestanima-se-dostavlja-hrana',
  10. 'info_dict': {
  11. 'id': '6280641530001',
  12. 'ext': 'mp4',
  13. 'title': 'Pojedini domovi u Sarajevu još pod vodom, mještanima se dostavlja hrana',
  14. 'timestamp': 1636219149,
  15. 'description': 'U sarajevskim naseljima Rajlovac i Reljevo stambeni objekti, ali i industrijska postrojenja i dalje su pod vodom.',
  16. 'upload_date': '20211106',
  17. },
  18. }, {
  19. 'url': 'https://balkans.aljazeera.net/videos/2021/11/6/djokovic-usao-u-finale-mastersa-u-parizu',
  20. 'info_dict': {
  21. 'id': '6280654936001',
  22. 'ext': 'mp4',
  23. 'title': 'Đoković ušao u finale Mastersa u Parizu',
  24. 'timestamp': 1636221686,
  25. 'description': 'Novak Đoković je u polufinalu Mastersa u Parizu nakon preokreta pobijedio Poljaka Huberta Hurkacza.',
  26. 'upload_date': '20211106',
  27. },
  28. }]
  29. BRIGHTCOVE_URL_RE = r'https?://players.brightcove.net/(?P<account>\d+)/(?P<player_id>[a-zA-Z0-9]+)_(?P<embed>[^/]+)/index.html\?videoId=(?P<id>\d+)'
  30. def _real_extract(self, url):
  31. base, post_type, display_id = self._match_valid_url(url).groups()
  32. wp = {
  33. 'balkans.aljazeera.net': 'ajb',
  34. 'chinese.aljazeera.net': 'chinese',
  35. 'mubasher.aljazeera.net': 'ajm',
  36. }.get(base) or 'aje'
  37. post_type = {
  38. 'features': 'post',
  39. 'program': 'episode',
  40. 'programs': 'episode',
  41. 'videos': 'video',
  42. 'news': 'news',
  43. }[post_type.split('/')[0]]
  44. video = self._download_json(
  45. f'https://{base}/graphql', display_id, query={
  46. 'wp-site': wp,
  47. 'operationName': 'ArchipelagoSingleArticleQuery',
  48. 'variables': json.dumps({
  49. 'name': display_id,
  50. 'postType': post_type,
  51. }),
  52. }, headers={
  53. 'wp-site': wp,
  54. })
  55. video = try_get(video, lambda x: x['data']['article']['video']) or {}
  56. video_id = video.get('id')
  57. account = video.get('accountId') or '911432371001'
  58. player_id = video.get('playerId') or 'csvTfAlKW'
  59. embed = 'default'
  60. if video_id is None:
  61. webpage = self._download_webpage(url, display_id)
  62. account, player_id, embed, video_id = self._search_regex(self.BRIGHTCOVE_URL_RE, webpage, 'video id',
  63. group=(1, 2, 3, 4), default=(None, None, None, None))
  64. if video_id is None:
  65. return {
  66. '_type': 'url_transparent',
  67. 'url': url,
  68. 'ie_key': 'Generic',
  69. }
  70. return {
  71. '_type': 'url_transparent',
  72. 'url': f'https://players.brightcove.net/{account}/{player_id}_{embed}/index.html?videoId={video_id}',
  73. 'ie_key': 'BrightcoveNew',
  74. }