clippit.py 2.4 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970
  1. import re
  2. from .common import InfoExtractor
  3. from ..utils import (
  4. parse_iso8601,
  5. qualities,
  6. )
  7. class ClippitIE(InfoExtractor):
  8. _VALID_URL = r'https?://(?:www\.)?clippituser\.tv/c/(?P<id>[a-z]+)'
  9. _TEST = {
  10. 'url': 'https://www.clippituser.tv/c/evmgm',
  11. 'md5': '963ae7a59a2ec4572ab8bf2f2d2c5f09',
  12. 'info_dict': {
  13. 'id': 'evmgm',
  14. 'ext': 'mp4',
  15. 'title': 'Bye bye Brutus. #BattleBots - Clippit',
  16. 'uploader': 'lizllove',
  17. 'uploader_url': 'https://www.clippituser.tv/p/lizllove',
  18. 'timestamp': 1472183818,
  19. 'upload_date': '20160826',
  20. 'description': 'BattleBots | ABC',
  21. 'thumbnail': r're:^https?://.*\.jpg$',
  22. },
  23. }
  24. def _real_extract(self, url):
  25. video_id = self._match_id(url)
  26. webpage = self._download_webpage(url, video_id)
  27. title = self._html_search_regex(r'<title.*>(.+?)</title>', webpage, 'title')
  28. FORMATS = ('sd', 'hd')
  29. quality = qualities(FORMATS)
  30. formats = []
  31. for format_id in FORMATS:
  32. url = self._html_search_regex(rf'data-{format_id}-file="(.+?)"',
  33. webpage, 'url', fatal=False)
  34. if not url:
  35. continue
  36. match = re.search(r'/(?P<height>\d+)\.mp4', url)
  37. formats.append({
  38. 'url': url,
  39. 'format_id': format_id,
  40. 'quality': quality(format_id),
  41. 'height': int(match.group('height')) if match else None,
  42. })
  43. uploader = self._html_search_regex(r'class="username".*>\s+(.+?)\n',
  44. webpage, 'uploader', fatal=False)
  45. uploader_url = ('https://www.clippituser.tv/p/' + uploader
  46. if uploader else None)
  47. timestamp = self._html_search_regex(r'datetime="(.+?)"',
  48. webpage, 'date', fatal=False)
  49. thumbnail = self._html_search_regex(r'data-image="(.+?)"',
  50. webpage, 'thumbnail', fatal=False)
  51. return {
  52. 'id': video_id,
  53. 'title': title,
  54. 'formats': formats,
  55. 'uploader': uploader,
  56. 'uploader_url': uploader_url,
  57. 'timestamp': parse_iso8601(timestamp),
  58. 'description': self._og_search_description(webpage),
  59. 'thumbnail': thumbnail,
  60. }