eroprofile.py 4.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122
  1. import re
  2. import urllib.parse
  3. from .common import InfoExtractor
  4. from ..utils import (
  5. ExtractorError,
  6. merge_dicts,
  7. )
  8. class EroProfileIE(InfoExtractor):
  9. _VALID_URL = r'https?://(?:www\.)?eroprofile\.com/m/videos/view/(?P<id>[^/]+)'
  10. _LOGIN_URL = 'http://www.eroprofile.com/auth/auth.php?'
  11. _NETRC_MACHINE = 'eroprofile'
  12. _TESTS = [{
  13. 'url': 'http://www.eroprofile.com/m/videos/view/sexy-babe-softcore',
  14. 'md5': 'c26f351332edf23e1ea28ce9ec9de32f',
  15. 'info_dict': {
  16. 'id': '3733775',
  17. 'display_id': 'sexy-babe-softcore',
  18. 'ext': 'm4v',
  19. 'title': 'sexy babe softcore',
  20. 'thumbnail': r're:https?://.*\.jpg',
  21. 'age_limit': 18,
  22. },
  23. 'skip': 'Video not found',
  24. }, {
  25. 'url': 'http://www.eroprofile.com/m/videos/view/Try-It-On-Pee_cut_2-wmv-4shared-com-file-sharing-download-movie-file',
  26. 'md5': '1baa9602ede46ce904c431f5418d8916',
  27. 'info_dict': {
  28. 'id': '1133519',
  29. 'ext': 'm4v',
  30. 'title': 'Try It On Pee_cut_2.wmv - 4shared.com - file sharing - download movie file',
  31. 'thumbnail': r're:https?://.*\.jpg',
  32. 'age_limit': 18,
  33. },
  34. 'skip': 'Requires login',
  35. }]
  36. def _perform_login(self, username, password):
  37. query = urllib.parse.urlencode({
  38. 'username': username,
  39. 'password': password,
  40. 'url': 'http://www.eroprofile.com/',
  41. })
  42. login_url = self._LOGIN_URL + query
  43. login_page = self._download_webpage(login_url, None, False)
  44. m = re.search(r'Your username or password was incorrect\.', login_page)
  45. if m:
  46. raise ExtractorError(
  47. 'Wrong username and/or password.', expected=True)
  48. self.report_login()
  49. redirect_url = self._search_regex(
  50. r'<script[^>]+?src="([^"]+)"', login_page, 'login redirect url')
  51. self._download_webpage(redirect_url, None, False)
  52. def _real_extract(self, url):
  53. display_id = self._match_id(url)
  54. webpage = self._download_webpage(url, display_id)
  55. m = re.search(r'You must be logged in to view this video\.', webpage)
  56. if m:
  57. self.raise_login_required('This video requires login')
  58. video_id = self._search_regex(
  59. [r"glbUpdViews\s*\('\d*','(\d+)'", r'p/report/video/(\d+)'],
  60. webpage, 'video id', default=None)
  61. title = self._html_search_regex(
  62. (r'Title:</th><td>([^<]+)</td>', r'<h1[^>]*>(.+?)</h1>'),
  63. webpage, 'title')
  64. info = self._parse_html5_media_entries(url, webpage, video_id)[0]
  65. return merge_dicts(info, {
  66. 'id': video_id,
  67. 'display_id': display_id,
  68. 'title': title,
  69. 'age_limit': 18,
  70. })
  71. class EroProfileAlbumIE(InfoExtractor):
  72. _VALID_URL = r'https?://(?:www\.)?eroprofile\.com/m/videos/album/(?P<id>[^/]+)'
  73. IE_NAME = 'EroProfile:album'
  74. _TESTS = [{
  75. 'url': 'https://www.eroprofile.com/m/videos/album/BBW-2-893',
  76. 'info_dict': {
  77. 'id': 'BBW-2-893',
  78. 'title': 'BBW 2',
  79. },
  80. 'playlist_mincount': 486,
  81. },
  82. ]
  83. def _extract_from_page(self, page):
  84. for url in re.findall(r'href=".*?(/m/videos/view/[^"]+)"', page):
  85. yield self.url_result(f'https://www.eroprofile.com{url}', EroProfileIE.ie_key())
  86. def _entries(self, playlist_id, first_page):
  87. yield from self._extract_from_page(first_page)
  88. page_urls = re.findall(rf'href=".*?(/m/videos/album/{playlist_id}\?pnum=(\d+))"', first_page)
  89. max_page = max(int(n) for _, n in page_urls)
  90. for n in range(2, max_page + 1):
  91. url = f'https://www.eroprofile.com/m/videos/album/{playlist_id}?pnum={n}'
  92. yield from self._extract_from_page(
  93. self._download_webpage(url, playlist_id,
  94. note=f'Downloading playlist page {int(n) - 1}'))
  95. def _real_extract(self, url):
  96. playlist_id = self._match_id(url)
  97. first_page = self._download_webpage(url, playlist_id, note='Downloading playlist')
  98. playlist_title = self._search_regex(
  99. r'<title>Album: (.*) - EroProfile</title>', first_page, 'playlist_title')
  100. return self.playlist_result(self._entries(playlist_id, first_page), playlist_id, playlist_title)