chaturbate.py 3.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106
  1. import re
  2. from .common import InfoExtractor
  3. from ..utils import (
  4. ExtractorError,
  5. lowercase_escape,
  6. url_or_none,
  7. )
  8. class ChaturbateIE(InfoExtractor):
  9. _VALID_URL = r'https?://(?:[^/]+\.)?chaturbate\.com/(?:fullvideo/?\?.*?\bb=)?(?P<id>[^/?&#]+)'
  10. _TESTS = [{
  11. 'url': 'https://www.chaturbate.com/siswet19/',
  12. 'info_dict': {
  13. 'id': 'siswet19',
  14. 'ext': 'mp4',
  15. 'title': 're:^siswet19 [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
  16. 'age_limit': 18,
  17. 'is_live': True,
  18. },
  19. 'params': {
  20. 'skip_download': True,
  21. },
  22. 'skip': 'Room is offline',
  23. }, {
  24. 'url': 'https://chaturbate.com/fullvideo/?b=caylin',
  25. 'only_matching': True,
  26. }, {
  27. 'url': 'https://en.chaturbate.com/siswet19/',
  28. 'only_matching': True,
  29. }]
  30. _ROOM_OFFLINE = 'Room is currently offline'
  31. def _real_extract(self, url):
  32. video_id = self._match_id(url)
  33. webpage = self._download_webpage(
  34. f'https://chaturbate.com/{video_id}/', video_id,
  35. headers=self.geo_verification_headers())
  36. found_m3u8_urls = []
  37. data = self._parse_json(
  38. self._search_regex(
  39. r'initialRoomDossier\s*=\s*(["\'])(?P<value>(?:(?!\1).)+)\1',
  40. webpage, 'data', default='{}', group='value'),
  41. video_id, transform_source=lowercase_escape, fatal=False)
  42. if data:
  43. m3u8_url = url_or_none(data.get('hls_source'))
  44. if m3u8_url:
  45. found_m3u8_urls.append(m3u8_url)
  46. if not found_m3u8_urls:
  47. for m in re.finditer(
  48. r'(\\u002[27])(?P<url>http.+?\.m3u8.*?)\1', webpage):
  49. found_m3u8_urls.append(lowercase_escape(m.group('url')))
  50. if not found_m3u8_urls:
  51. for m in re.finditer(
  52. r'(["\'])(?P<url>http.+?\.m3u8.*?)\1', webpage):
  53. found_m3u8_urls.append(m.group('url'))
  54. m3u8_urls = []
  55. for found_m3u8_url in found_m3u8_urls:
  56. m3u8_fast_url, m3u8_no_fast_url = found_m3u8_url, found_m3u8_url.replace('_fast', '')
  57. for m3u8_url in (m3u8_fast_url, m3u8_no_fast_url):
  58. if m3u8_url not in m3u8_urls:
  59. m3u8_urls.append(m3u8_url)
  60. if not m3u8_urls:
  61. error = self._search_regex(
  62. [r'<span[^>]+class=(["\'])desc_span\1[^>]*>(?P<error>[^<]+)</span>',
  63. r'<div[^>]+id=(["\'])defchat\1[^>]*>\s*<p><strong>(?P<error>[^<]+)<'],
  64. webpage, 'error', group='error', default=None)
  65. if not error:
  66. if any(p in webpage for p in (
  67. self._ROOM_OFFLINE, 'offline_tipping', 'tip_offline')):
  68. error = self._ROOM_OFFLINE
  69. if error:
  70. raise ExtractorError(error, expected=True)
  71. raise ExtractorError('Unable to find stream URL')
  72. formats = []
  73. for m3u8_url in m3u8_urls:
  74. for known_id in ('fast', 'slow'):
  75. if f'_{known_id}' in m3u8_url:
  76. m3u8_id = known_id
  77. break
  78. else:
  79. m3u8_id = None
  80. formats.extend(self._extract_m3u8_formats(
  81. m3u8_url, video_id, ext='mp4',
  82. # ffmpeg skips segments for fast m3u8
  83. preference=-10 if m3u8_id == 'fast' else None,
  84. m3u8_id=m3u8_id, fatal=False, live=True))
  85. return {
  86. 'id': video_id,
  87. 'title': video_id,
  88. 'thumbnail': f'https://roomimg.stream.highwebmedia.com/ri/{video_id}.jpg',
  89. 'age_limit': self._rta_search(webpage),
  90. 'is_live': True,
  91. 'formats': formats,
  92. }