camtasia.py 2.4 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071
  1. import os
  2. import urllib.parse
  3. from .common import InfoExtractor
  4. from ..utils import float_or_none
  5. class CamtasiaEmbedIE(InfoExtractor):
  6. _VALID_URL = False
  7. _WEBPAGE_TESTS = [
  8. {
  9. 'url': 'http://www.ll.mit.edu/workshops/education/videocourses/antennas/lecture1/video/',
  10. 'playlist': [{
  11. 'md5': '0c5e352edabf715d762b0ad4e6d9ee67',
  12. 'info_dict': {
  13. 'id': 'Fenn-AA_PA_Radar_Course_Lecture_1c_Final',
  14. 'title': 'Fenn-AA_PA_Radar_Course_Lecture_1c_Final - video1',
  15. 'ext': 'flv',
  16. 'duration': 2235.90,
  17. },
  18. }, {
  19. 'md5': '10e4bb3aaca9fd630e273ff92d9f3c63',
  20. 'info_dict': {
  21. 'id': 'Fenn-AA_PA_Radar_Course_Lecture_1c_Final_PIP',
  22. 'title': 'Fenn-AA_PA_Radar_Course_Lecture_1c_Final - pip',
  23. 'ext': 'flv',
  24. 'duration': 2235.93,
  25. },
  26. }],
  27. 'info_dict': {
  28. 'title': 'Fenn-AA_PA_Radar_Course_Lecture_1c_Final',
  29. },
  30. 'skip': 'webpage dead',
  31. },
  32. ]
  33. def _extract_from_webpage(self, url, webpage):
  34. camtasia_cfg = self._search_regex(
  35. r'fo\.addVariable\(\s*"csConfigFile",\s*"([^"]+)"\s*\);',
  36. webpage, 'camtasia configuration file', default=None)
  37. if camtasia_cfg is None:
  38. return None
  39. title = self._html_search_meta('DC.title', webpage, fatal=True)
  40. camtasia_url = urllib.parse.urljoin(url, camtasia_cfg)
  41. camtasia_cfg = self._download_xml(
  42. camtasia_url, self._generic_id(url),
  43. note='Downloading camtasia configuration',
  44. errnote='Failed to download camtasia configuration')
  45. fileset_node = camtasia_cfg.find('./playlist/array/fileset')
  46. entries = []
  47. for n in fileset_node.getchildren():
  48. url_n = n.find('./uri')
  49. if url_n is None:
  50. continue
  51. entries.append({
  52. 'id': os.path.splitext(url_n.text.rpartition('/')[2])[0],
  53. 'title': f'{title} - {n.tag}',
  54. 'url': urllib.parse.urljoin(url, url_n.text),
  55. 'duration': float_or_none(n.find('./duration').text),
  56. })
  57. return {
  58. '_type': 'playlist',
  59. 'entries': entries,
  60. 'title': title,
  61. }