xnxx.py 2.8 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283
  1. import re
  2. from .common import InfoExtractor
  3. from ..utils import (
  4. NO_DEFAULT,
  5. determine_ext,
  6. int_or_none,
  7. str_to_int,
  8. )
  9. class XNXXIE(InfoExtractor):
  10. _VALID_URL = r'https?://(?:video|www)\.xnxx3?\.com/video-?(?P<id>[0-9a-z]+)/'
  11. _TESTS = [{
  12. 'url': 'http://www.xnxx.com/video-55awb78/skyrim_test_video',
  13. 'md5': '7583e96c15c0f21e9da3453d9920fbba',
  14. 'info_dict': {
  15. 'id': '55awb78',
  16. 'ext': 'mp4',
  17. 'title': 'Skyrim Test Video',
  18. 'thumbnail': r're:^https?://.*\.jpg',
  19. 'duration': 469,
  20. 'view_count': int,
  21. 'age_limit': 18,
  22. },
  23. }, {
  24. 'url': 'http://video.xnxx.com/video1135332/lida_naked_funny_actress_5_',
  25. 'only_matching': True,
  26. }, {
  27. 'url': 'http://www.xnxx.com/video-55awb78/',
  28. 'only_matching': True,
  29. }, {
  30. 'url': 'http://www.xnxx3.com/video-55awb78/',
  31. 'only_matching': True,
  32. }]
  33. def _real_extract(self, url):
  34. video_id = self._match_id(url)
  35. webpage = self._download_webpage(url, video_id)
  36. def get(meta, default=NO_DEFAULT, fatal=True):
  37. return self._search_regex(
  38. rf'set{meta}\s*\(\s*(["\'])(?P<value>(?:(?!\1).)+)\1',
  39. webpage, meta, default=default, fatal=fatal, group='value')
  40. title = self._og_search_title(
  41. webpage, default=None) or get('VideoTitle')
  42. formats = []
  43. for mobj in re.finditer(
  44. r'setVideo(?:Url(?P<id>Low|High)|HLS)\s*\(\s*(?P<q>["\'])(?P<url>(?:https?:)?//.+?)(?P=q)', webpage):
  45. format_url = mobj.group('url')
  46. if determine_ext(format_url) == 'm3u8':
  47. formats.extend(self._extract_m3u8_formats(
  48. format_url, video_id, 'mp4', entry_protocol='m3u8_native',
  49. quality=1, m3u8_id='hls', fatal=False))
  50. else:
  51. format_id = mobj.group('id')
  52. if format_id:
  53. format_id = format_id.lower()
  54. formats.append({
  55. 'url': format_url,
  56. 'format_id': format_id,
  57. 'quality': -1 if format_id == 'low' else 0,
  58. })
  59. thumbnail = self._og_search_thumbnail(webpage, default=None) or get(
  60. 'ThumbUrl', fatal=False) or get('ThumbUrl169', fatal=False)
  61. duration = int_or_none(self._og_search_property('duration', webpage))
  62. view_count = str_to_int(self._search_regex(
  63. r'id=["\']nb-views-number[^>]+>([\d,.]+)', webpage, 'view count',
  64. default=None))
  65. return {
  66. 'id': video_id,
  67. 'title': title,
  68. 'thumbnail': thumbnail,
  69. 'duration': duration,
  70. 'view_count': view_count,
  71. 'age_limit': 18,
  72. 'formats': formats,
  73. }