noodlemagazine.py 3.4 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980
  1. from .common import InfoExtractor
  2. from ..utils import (
  3. int_or_none,
  4. parse_count,
  5. parse_duration,
  6. unified_strdate,
  7. urljoin,
  8. )
  9. from ..utils.traversal import traverse_obj
  10. class NoodleMagazineIE(InfoExtractor):
  11. _VALID_URL = r'https?://(?:www|adult\.)?noodlemagazine\.com/watch/(?P<id>[0-9-_]+)'
  12. _TEST = {
  13. 'url': 'https://adult.noodlemagazine.com/watch/-67421364_456239604',
  14. 'md5': '9e02aa763612929d0b4b850591a9248b',
  15. 'info_dict': {
  16. 'id': '-67421364_456239604',
  17. 'title': 'Aria alexander manojob',
  18. 'thumbnail': r're:^https://.*\.jpg',
  19. 'ext': 'mp4',
  20. 'duration': 903,
  21. 'view_count': int,
  22. 'like_count': int,
  23. 'description': 'Aria alexander manojob',
  24. 'tags': ['aria', 'alexander', 'manojob'],
  25. 'upload_date': '20190218',
  26. 'age_limit': 18,
  27. },
  28. }
  29. def _real_extract(self, url):
  30. video_id = self._match_id(url)
  31. webpage = self._download_webpage(url, video_id)
  32. title = self._og_search_title(webpage)
  33. duration = parse_duration(self._html_search_meta('video:duration', webpage, 'duration', default=None))
  34. description = self._og_search_property('description', webpage, default='').replace(' watch online hight quality video', '')
  35. tags = self._html_search_meta('video:tag', webpage, default='').split(', ')
  36. view_count = parse_count(self._html_search_meta('ya:ovs:views_total', webpage, default=None))
  37. like_count = parse_count(self._html_search_meta('ya:ovs:likes', webpage, default=None))
  38. upload_date = unified_strdate(self._html_search_meta('ya:ovs:upload_date', webpage, default=''))
  39. def build_url(url_or_path):
  40. return urljoin('https://adult.noodlemagazine.com', url_or_path)
  41. headers = {'Referer': url}
  42. player_path = self._html_search_regex(
  43. r'<iframe[^>]+\bid="iplayer"[^>]+\bsrc="([^"]+)"', webpage, 'player path')
  44. player_iframe = self._download_webpage(
  45. build_url(player_path), video_id, 'Downloading iframe page', headers=headers)
  46. playlist_url = self._search_regex(
  47. r'window\.playlistUrl\s*=\s*["\']([^"\']+)["\']', player_iframe, 'playlist url')
  48. playlist_info = self._download_json(build_url(playlist_url), video_id, headers=headers)
  49. formats = []
  50. for source in traverse_obj(playlist_info, ('sources', lambda _, v: v['file'])):
  51. if source.get('type') == 'hls':
  52. formats.extend(self._extract_m3u8_formats(
  53. build_url(source['file']), video_id, 'mp4', fatal=False, m3u8_id='hls'))
  54. else:
  55. formats.append(traverse_obj(source, {
  56. 'url': ('file', {build_url}),
  57. 'format_id': 'label',
  58. 'height': ('label', {int_or_none}),
  59. 'ext': 'type',
  60. }))
  61. return {
  62. 'id': video_id,
  63. 'formats': formats,
  64. 'title': title,
  65. 'thumbnail': self._og_search_property('image', webpage, default=None) or playlist_info.get('image'),
  66. 'duration': duration,
  67. 'description': description,
  68. 'tags': tags,
  69. 'view_count': view_count,
  70. 'like_count': like_count,
  71. 'upload_date': upload_date,
  72. 'age_limit': 18,
  73. }