closertotruth.py 2.9 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889
  1. import re
  2. from .common import InfoExtractor
  3. class CloserToTruthIE(InfoExtractor):
  4. _WORKING = False
  5. _VALID_URL = r'https?://(?:www\.)?closertotruth\.com/(?:[^/]+/)*(?P<id>[^/?#&]+)'
  6. _TESTS = [{
  7. 'url': 'http://closertotruth.com/series/solutions-the-mind-body-problem#video-3688',
  8. 'info_dict': {
  9. 'id': '0_zof1ktre',
  10. 'display_id': 'solutions-the-mind-body-problem',
  11. 'ext': 'mov',
  12. 'title': 'Solutions to the Mind-Body Problem?',
  13. 'upload_date': '20140221',
  14. 'timestamp': 1392956007,
  15. 'uploader_id': 'CTTXML',
  16. },
  17. 'params': {
  18. 'skip_download': True,
  19. },
  20. }, {
  21. 'url': 'http://closertotruth.com/episodes/how-do-brains-work',
  22. 'info_dict': {
  23. 'id': '0_iuxai6g6',
  24. 'display_id': 'how-do-brains-work',
  25. 'ext': 'mov',
  26. 'title': 'How do Brains Work?',
  27. 'upload_date': '20140221',
  28. 'timestamp': 1392956024,
  29. 'uploader_id': 'CTTXML',
  30. },
  31. 'params': {
  32. 'skip_download': True,
  33. },
  34. }, {
  35. 'url': 'http://closertotruth.com/interviews/1725',
  36. 'info_dict': {
  37. 'id': '1725',
  38. 'title': 'AyaFr-002',
  39. },
  40. 'playlist_mincount': 2,
  41. }]
  42. def _real_extract(self, url):
  43. display_id = self._match_id(url)
  44. webpage = self._download_webpage(url, display_id)
  45. partner_id = self._search_regex(
  46. r'<script[^>]+src=["\'].*?\b(?:partner_id|p)/(\d+)',
  47. webpage, 'kaltura partner_id')
  48. title = self._html_extract_title(webpage, 'video title')
  49. select = self._search_regex(
  50. r'(?s)<select[^>]+id="select-version"[^>]*>(.+?)</select>',
  51. webpage, 'select version', default=None)
  52. if select:
  53. entry_ids = set()
  54. entries = []
  55. for mobj in re.finditer(
  56. r'<option[^>]+value=(["\'])(?P<id>[0-9a-z_]+)(?:#.+?)?\1[^>]*>(?P<title>[^<]+)',
  57. webpage):
  58. entry_id = mobj.group('id')
  59. if entry_id in entry_ids:
  60. continue
  61. entry_ids.add(entry_id)
  62. entries.append({
  63. '_type': 'url_transparent',
  64. 'url': f'kaltura:{partner_id}:{entry_id}',
  65. 'ie_key': 'Kaltura',
  66. 'title': mobj.group('title'),
  67. })
  68. if entries:
  69. return self.playlist_result(entries, display_id, title)
  70. entry_id = self._search_regex(
  71. r'<a[^>]+id=(["\'])embed-kaltura\1[^>]+data-kaltura=(["\'])(?P<id>[0-9a-z_]+)\2',
  72. webpage, 'kaltura entry_id', group='id')
  73. return {
  74. '_type': 'url_transparent',
  75. 'display_id': display_id,
  76. 'url': f'kaltura:{partner_id}:{entry_id}',
  77. 'ie_key': 'Kaltura',
  78. 'title': title,
  79. }