viously.py 2.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960
  1. import base64
  2. import re
  3. from .common import InfoExtractor
  4. from ..utils import (
  5. extract_attributes,
  6. int_or_none,
  7. parse_iso8601,
  8. )
  9. from ..utils.traversal import traverse_obj
  10. class ViouslyIE(InfoExtractor):
  11. _VALID_URL = False
  12. _WEBPAGE_TESTS = [{
  13. 'url': 'http://www.turbo.fr/videos-voiture/454443-turbo-du-07-09-2014-renault-twingo-3-bentley-continental-gt-speed-ces-guide-achat-dacia.html',
  14. 'md5': '37a6c3381599381ff53a7e1e0575c0bc',
  15. 'info_dict': {
  16. 'id': 'F_xQzS2jwb3',
  17. 'ext': 'mp4',
  18. 'title': 'Turbo du 07/09/2014\xa0: Renault Twingo 3, Bentley Continental GT Speed, CES, Guide Achat Dacia...',
  19. 'description': 'Turbo du 07/09/2014\xa0: Renault Twingo 3, Bentley Continental GT Speed, CES, Guide Achat Dacia...',
  20. 'age_limit': 0,
  21. 'upload_date': '20230328',
  22. 'timestamp': 1680037507,
  23. 'duration': 3716,
  24. 'categories': ['motors'],
  25. },
  26. }]
  27. def _extract_from_webpage(self, url, webpage):
  28. viously_players = re.findall(r'<div[^>]*class="(?:[^"]*\s)?v(?:iou)?sly-player(?:\s[^"]*)?"[^>]*>', webpage)
  29. if not viously_players:
  30. return
  31. def custom_decode(text):
  32. STANDARD_ALPHABET = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/='
  33. CUSTOM_ALPHABET = 'VIOUSLYABCDEFGHJKMNPQRTWXZviouslyabcdefghjkmnpqrtwxz9876543210+/='
  34. data = base64.b64decode(text.translate(str.maketrans(CUSTOM_ALPHABET, STANDARD_ALPHABET)))
  35. return data.decode('utf-8').strip('\x00')
  36. for video_id in traverse_obj(viously_players, (..., {extract_attributes}, 'id')):
  37. formats = self._extract_m3u8_formats(
  38. f'https://www.viously.com/video/hls/{video_id}/index.m3u8', video_id, fatal=False)
  39. if not formats:
  40. continue
  41. data = self._download_json(
  42. f'https://www.viously.com/export/json/{video_id}', video_id,
  43. transform_source=custom_decode, fatal=False)
  44. yield {
  45. 'id': video_id,
  46. 'formats': formats,
  47. **traverse_obj(data, ('video', {
  48. 'title': ('title', {str}),
  49. 'description': ('description', {str}),
  50. 'duration': ('duration', {int_or_none}),
  51. 'timestamp': ('iso_date', {parse_iso8601}),
  52. 'categories': ('category', 'name', {str}, {lambda x: [x] if x else None}),
  53. })),
  54. }