appleconnect.py 1.8 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647
  1. from .common import InfoExtractor
  2. from ..utils import ExtractorError, str_to_int
  3. class AppleConnectIE(InfoExtractor):
  4. _VALID_URL = r'https?://itunes\.apple\.com/\w{0,2}/?post/(?:id)?sa\.(?P<id>[\w-]+)'
  5. _TESTS = [{
  6. 'url': 'https://itunes.apple.com/us/post/idsa.4ab17a39-2720-11e5-96c5-a5b38f6c42d3',
  7. 'md5': 'c1d41f72c8bcaf222e089434619316e4',
  8. 'info_dict': {
  9. 'id': '4ab17a39-2720-11e5-96c5-a5b38f6c42d3',
  10. 'ext': 'm4v',
  11. 'title': 'Energy',
  12. 'uploader': 'Drake',
  13. 'thumbnail': r're:^https?://.*\.jpg$',
  14. 'upload_date': '20150710',
  15. 'timestamp': 1436545535,
  16. },
  17. }, {
  18. 'url': 'https://itunes.apple.com/us/post/sa.0fe0229f-2457-11e5-9f40-1bb645f2d5d9',
  19. 'only_matching': True,
  20. }]
  21. def _real_extract(self, url):
  22. video_id = self._match_id(url)
  23. webpage = self._download_webpage(url, video_id)
  24. try:
  25. video_json = self._html_search_regex(
  26. r'class="auc-video-data">(\{.*?\})', webpage, 'json')
  27. except ExtractorError:
  28. raise ExtractorError('This post doesn\'t contain a video', expected=True)
  29. video_data = self._parse_json(video_json, video_id)
  30. timestamp = str_to_int(self._html_search_regex(r'data-timestamp="(\d+)"', webpage, 'timestamp'))
  31. like_count = str_to_int(self._html_search_regex(r'(\d+) Loves', webpage, 'like count', default=None))
  32. return {
  33. 'id': video_id,
  34. 'url': video_data['sslSrc'],
  35. 'title': video_data['title'],
  36. 'description': video_data['description'],
  37. 'uploader': video_data['artistName'],
  38. 'thumbnail': video_data['artworkUrl'],
  39. 'timestamp': timestamp,
  40. 'like_count': like_count,
  41. }