viddler.py 4.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135
  1. from .common import InfoExtractor
  2. from ..utils import (
  3. float_or_none,
  4. int_or_none,
  5. )
  6. class ViddlerIE(InfoExtractor):
  7. _VALID_URL = r'https?://(?:www\.)?viddler\.com/(?:v|embed|player)/(?P<id>[a-z0-9]+)(?:.+?\bsecret=(\d+))?'
  8. _EMBED_REGEX = [r'<(?:iframe[^>]+?src|param[^>]+?value)=(["\'])(?P<url>(?:https?:)?//(?:www\.)?viddler\.com/(?:embed|player)/.+?)\1']
  9. _TESTS = [{
  10. 'url': 'http://www.viddler.com/v/43903784',
  11. 'md5': '9eee21161d2c7f5b39690c3e325fab2f',
  12. 'info_dict': {
  13. 'id': '43903784',
  14. 'ext': 'mov',
  15. 'title': 'Video Made Easy',
  16. 'description': 'md5:6a697ebd844ff3093bd2e82c37b409cd',
  17. 'uploader': 'viddler',
  18. 'timestamp': 1335371429,
  19. 'upload_date': '20120425',
  20. 'duration': 100.89,
  21. 'thumbnail': r're:^https?://.*\.jpg$',
  22. 'view_count': int,
  23. 'comment_count': int,
  24. 'categories': ['video content', 'high quality video', 'video made easy', 'how to produce video with limited resources', 'viddler'],
  25. },
  26. }, {
  27. 'url': 'http://www.viddler.com/v/4d03aad9/',
  28. 'md5': 'f12c5a7fa839c47a79363bfdf69404fb',
  29. 'info_dict': {
  30. 'id': '4d03aad9',
  31. 'ext': 'ts',
  32. 'title': 'WALL-TO-GORTAT',
  33. 'upload_date': '20150126',
  34. 'uploader': 'deadspin',
  35. 'timestamp': 1422285291,
  36. 'view_count': int,
  37. 'comment_count': int,
  38. },
  39. }, {
  40. 'url': 'http://www.viddler.com/player/221ebbbd/0/',
  41. 'md5': '740511f61d3d1bb71dc14a0fe01a1c10',
  42. 'info_dict': {
  43. 'id': '221ebbbd',
  44. 'ext': 'mov',
  45. 'title': 'LETeens-Grammar-snack-third-conditional',
  46. 'description': ' ',
  47. 'upload_date': '20140929',
  48. 'uploader': 'BCLETeens',
  49. 'timestamp': 1411997190,
  50. 'view_count': int,
  51. 'comment_count': int,
  52. },
  53. }, {
  54. # secret protected
  55. 'url': 'http://www.viddler.com/v/890c0985?secret=34051570',
  56. 'info_dict': {
  57. 'id': '890c0985',
  58. 'ext': 'mp4',
  59. 'title': 'Complete Property Training - Traineeships',
  60. 'description': ' ',
  61. 'upload_date': '20130606',
  62. 'uploader': 'TiffanyBowtell',
  63. 'timestamp': 1370496993,
  64. 'view_count': int,
  65. 'comment_count': int,
  66. },
  67. 'params': {
  68. 'skip_download': True,
  69. },
  70. }]
  71. def _real_extract(self, url):
  72. video_id, secret = self._match_valid_url(url).groups()
  73. query = {
  74. 'video_id': video_id,
  75. 'key': 'v0vhrt7bg2xq1vyxhkct',
  76. }
  77. if secret:
  78. query['secret'] = secret
  79. data = self._download_json(
  80. 'http://api.viddler.com/api/v2/viddler.videos.getPlaybackDetails.json',
  81. video_id, headers={'Referer': url}, query=query)['video']
  82. formats = []
  83. for filed in data['files']:
  84. if filed.get('status', 'ready') != 'ready':
  85. continue
  86. format_id = filed.get('profile_id') or filed['profile_name']
  87. f = {
  88. 'format_id': format_id,
  89. 'format_note': filed['profile_name'],
  90. 'url': self._proto_relative_url(filed['url']),
  91. 'width': int_or_none(filed.get('width')),
  92. 'height': int_or_none(filed.get('height')),
  93. 'filesize': int_or_none(filed.get('size')),
  94. 'ext': filed.get('ext'),
  95. 'source_preference': -1,
  96. }
  97. formats.append(f)
  98. if filed.get('cdn_url'):
  99. f = f.copy()
  100. f['url'] = self._proto_relative_url(filed['cdn_url'], 'http:')
  101. f['format_id'] = format_id + '-cdn'
  102. f['source_preference'] = 1
  103. formats.append(f)
  104. if filed.get('html5_video_source'):
  105. f = f.copy()
  106. f['url'] = self._proto_relative_url(filed['html5_video_source'])
  107. f['format_id'] = format_id + '-html5'
  108. f['source_preference'] = 0
  109. formats.append(f)
  110. categories = [
  111. t.get('text') for t in data.get('tags', []) if 'text' in t]
  112. return {
  113. 'id': video_id,
  114. 'title': data['title'],
  115. 'formats': formats,
  116. 'description': data.get('description'),
  117. 'timestamp': int_or_none(data.get('upload_time')),
  118. 'thumbnail': self._proto_relative_url(data.get('thumbnail_url')),
  119. 'uploader': data.get('author'),
  120. 'duration': float_or_none(data.get('length')),
  121. 'view_count': int_or_none(data.get('view_count')),
  122. 'comment_count': int_or_none(data.get('comment_count')),
  123. 'categories': categories,
  124. }