snotr.py 2.4 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768
  1. from .common import InfoExtractor
  2. from ..utils import (
  3. parse_duration,
  4. parse_filesize,
  5. str_to_int,
  6. )
  7. class SnotrIE(InfoExtractor):
  8. _VALID_URL = r'http?://(?:www\.)?snotr\.com/video/(?P<id>\d+)/([\w]+)'
  9. _TESTS = [{
  10. 'url': 'http://www.snotr.com/video/13708/Drone_flying_through_fireworks',
  11. 'info_dict': {
  12. 'id': '13708',
  13. 'ext': 'mp4',
  14. 'title': 'Drone flying through fireworks!',
  15. 'duration': 248,
  16. 'filesize_approx': 40700000,
  17. 'description': 'A drone flying through Fourth of July Fireworks',
  18. 'thumbnail': r're:^https?://.*\.jpg$',
  19. },
  20. 'expected_warnings': ['description'],
  21. }, {
  22. 'url': 'http://www.snotr.com/video/530/David_Letteman_-_George_W_Bush_Top_10',
  23. 'info_dict': {
  24. 'id': '530',
  25. 'ext': 'mp4',
  26. 'title': 'David Letteman - George W. Bush Top 10',
  27. 'duration': 126,
  28. 'filesize_approx': 8500000,
  29. 'description': 'The top 10 George W. Bush moments, brought to you by David Letterman!',
  30. 'thumbnail': r're:^https?://.*\.jpg$',
  31. },
  32. }]
  33. def _real_extract(self, url):
  34. mobj = self._match_valid_url(url)
  35. video_id = mobj.group('id')
  36. webpage = self._download_webpage(url, video_id)
  37. title = self._og_search_title(webpage)
  38. description = self._og_search_description(webpage)
  39. info_dict = self._parse_html5_media_entries(
  40. url, webpage, video_id, m3u8_entry_protocol='m3u8_native')[0]
  41. view_count = str_to_int(self._html_search_regex(
  42. r'<p[^>]*>\s*<strong[^>]*>Views:</strong>\s*<span[^>]*>([\d,\.]+)',
  43. webpage, 'view count', fatal=False))
  44. duration = parse_duration(self._html_search_regex(
  45. r'<p[^>]*>\s*<strong[^>]*>Length:</strong>\s*<span[^>]*>([\d:]+)',
  46. webpage, 'duration', fatal=False))
  47. filesize_approx = parse_filesize(self._html_search_regex(
  48. r'<p[^>]*>\s*<strong[^>]*>Filesize:</strong>\s*<span[^>]*>([^<]+)',
  49. webpage, 'filesize', fatal=False))
  50. info_dict.update({
  51. 'id': video_id,
  52. 'description': description,
  53. 'title': title,
  54. 'view_count': view_count,
  55. 'duration': duration,
  56. 'filesize_approx': filesize_approx,
  57. })
  58. return info_dict