laracasts.py 4.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114
  1. import json
  2. from .common import InfoExtractor
  3. from .vimeo import VimeoIE
  4. from ..utils import (
  5. clean_html,
  6. extract_attributes,
  7. get_element_html_by_id,
  8. int_or_none,
  9. parse_duration,
  10. str_or_none,
  11. unified_strdate,
  12. url_or_none,
  13. urljoin,
  14. )
  15. from ..utils.traversal import traverse_obj
  16. class LaracastsBaseIE(InfoExtractor):
  17. def _get_prop_data(self, url, display_id):
  18. webpage = self._download_webpage(url, display_id)
  19. return traverse_obj(
  20. get_element_html_by_id('app', webpage),
  21. ({extract_attributes}, 'data-page', {json.loads}, 'props'))
  22. def _parse_episode(self, episode):
  23. if not traverse_obj(episode, 'vimeoId'):
  24. self.raise_login_required('This video is only available for subscribers.')
  25. return self.url_result(
  26. VimeoIE._smuggle_referrer(
  27. f'https://player.vimeo.com/video/{episode["vimeoId"]}', 'https://laracasts.com/'),
  28. VimeoIE, url_transparent=True,
  29. **traverse_obj(episode, {
  30. 'id': ('id', {int}, {str_or_none}),
  31. 'webpage_url': ('path', {lambda x: urljoin('https://laracasts.com', x)}),
  32. 'title': ('title', {clean_html}),
  33. 'season_number': ('chapter', {int_or_none}),
  34. 'episode_number': ('position', {int_or_none}),
  35. 'description': ('body', {clean_html}),
  36. 'thumbnail': ('largeThumbnail', {url_or_none}),
  37. 'duration': ('length', {int_or_none}),
  38. 'date': ('dateSegments', 'published', {unified_strdate}),
  39. }))
  40. class LaracastsIE(LaracastsBaseIE):
  41. IE_NAME = 'laracasts'
  42. _VALID_URL = r'https?://(?:www\.)?laracasts\.com/series/(?P<id>[\w-]+/episodes/\d+)/?(?:[?#]|$)'
  43. _TESTS = [{
  44. 'url': 'https://laracasts.com/series/30-days-to-learn-laravel-11/episodes/1',
  45. 'md5': 'c8f5e7b02ad0e438ef9280a08c8493dc',
  46. 'info_dict': {
  47. 'id': '922040563',
  48. 'title': 'Hello, Laravel',
  49. 'ext': 'mp4',
  50. 'duration': 519,
  51. 'date': '20240312',
  52. 'thumbnail': 'https://laracasts.s3.amazonaws.com/videos/thumbnails/youtube/30-days-to-learn-laravel-11-1.png',
  53. 'description': 'md5:ddd658bb241975871d236555657e1dd1',
  54. 'season_number': 1,
  55. 'season': 'Season 1',
  56. 'episode_number': 1,
  57. 'episode': 'Episode 1',
  58. 'uploader': 'Laracasts',
  59. 'uploader_id': 'user20182673',
  60. 'uploader_url': 'https://vimeo.com/user20182673',
  61. },
  62. 'expected_warnings': ['Failed to parse XML'], # TODO: Remove when vimeo extractor is fixed
  63. }]
  64. def _real_extract(self, url):
  65. display_id = self._match_id(url)
  66. return self._parse_episode(self._get_prop_data(url, display_id)['lesson'])
  67. class LaracastsPlaylistIE(LaracastsBaseIE):
  68. IE_NAME = 'laracasts:series'
  69. _VALID_URL = r'https?://(?:www\.)?laracasts\.com/series/(?P<id>[\w-]+)/?(?:[?#]|$)'
  70. _TESTS = [{
  71. 'url': 'https://laracasts.com/series/30-days-to-learn-laravel-11',
  72. 'info_dict': {
  73. 'title': '30 Days to Learn Laravel',
  74. 'id': '210',
  75. 'thumbnail': 'https://laracasts.s3.amazonaws.com/series/thumbnails/social-cards/30-days-to-learn-laravel-11.png?v=2',
  76. 'duration': 30600.0,
  77. 'modified_date': '20240511',
  78. 'description': 'md5:27c260a1668a450984e8f901579912dd',
  79. 'categories': ['Frameworks'],
  80. 'tags': ['Laravel'],
  81. 'display_id': '30-days-to-learn-laravel-11',
  82. },
  83. 'playlist_count': 30,
  84. }]
  85. def _real_extract(self, url):
  86. display_id = self._match_id(url)
  87. series = self._get_prop_data(url, display_id)['series']
  88. metadata = {
  89. 'display_id': display_id,
  90. **traverse_obj(series, {
  91. 'title': ('title', {str}),
  92. 'id': ('id', {int}, {str_or_none}),
  93. 'description': ('body', {clean_html}),
  94. 'thumbnail': (('large_thumbnail', 'thumbnail'), {url_or_none}, any),
  95. 'duration': ('runTime', {parse_duration}),
  96. 'categories': ('taxonomy', 'name', {str}, {lambda x: x and [x]}),
  97. 'tags': ('topics', ..., 'name', {str}),
  98. 'modified_date': ('lastUpdated', {unified_strdate}),
  99. }),
  100. }
  101. return self.playlist_result(traverse_obj(
  102. series, ('chapters', ..., 'episodes', lambda _, v: v['vimeoId'], {self._parse_episode})), **metadata)