raywenderlich.py 5.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175
  1. import re
  2. from .common import InfoExtractor
  3. from .vimeo import VimeoIE
  4. from ..utils import (
  5. ExtractorError,
  6. int_or_none,
  7. merge_dicts,
  8. try_get,
  9. unescapeHTML,
  10. unified_timestamp,
  11. urljoin,
  12. )
  13. class RayWenderlichIE(InfoExtractor):
  14. _VALID_URL = r'''(?x)
  15. https?://
  16. (?:
  17. videos\.raywenderlich\.com/courses|
  18. (?:www\.)?raywenderlich\.com
  19. )/
  20. (?P<course_id>[^/]+)/lessons/(?P<id>\d+)
  21. '''
  22. _TESTS = [{
  23. 'url': 'https://www.raywenderlich.com/3530-testing-in-ios/lessons/1',
  24. 'info_dict': {
  25. 'id': '248377018',
  26. 'ext': 'mp4',
  27. 'title': 'Introduction',
  28. 'description': 'md5:804d031b3efa9fcb49777d512d74f722',
  29. 'timestamp': 1513906277,
  30. 'upload_date': '20171222',
  31. 'duration': 133,
  32. 'uploader': 'Ray Wenderlich',
  33. 'uploader_id': 'user3304672',
  34. },
  35. 'params': {
  36. 'noplaylist': True,
  37. 'skip_download': True,
  38. },
  39. 'add_ie': [VimeoIE.ie_key()],
  40. 'expected_warnings': ['HTTP Error 403: Forbidden'],
  41. }, {
  42. 'url': 'https://videos.raywenderlich.com/courses/105-testing-in-ios/lessons/1',
  43. 'only_matching': True,
  44. }]
  45. @staticmethod
  46. def _extract_video_id(data, lesson_id):
  47. if not data:
  48. return
  49. groups = try_get(data, lambda x: x['groups'], list) or []
  50. if not groups:
  51. return
  52. for group in groups:
  53. if not isinstance(group, dict):
  54. continue
  55. contents = try_get(data, lambda x: x['contents'], list) or []
  56. for content in contents:
  57. if not isinstance(content, dict):
  58. continue
  59. ordinal = int_or_none(content.get('ordinal'))
  60. if ordinal != lesson_id:
  61. continue
  62. video_id = content.get('identifier')
  63. if video_id:
  64. return str(video_id)
  65. def _real_extract(self, url):
  66. mobj = self._match_valid_url(url)
  67. course_id, lesson_id = mobj.group('course_id', 'id')
  68. display_id = f'{course_id}/{lesson_id}'
  69. webpage = self._download_webpage(url, display_id)
  70. thumbnail = self._og_search_thumbnail(
  71. webpage, default=None) or self._html_search_meta(
  72. 'twitter:image', webpage, 'thumbnail')
  73. if '>Subscribe to unlock' in webpage:
  74. raise ExtractorError(
  75. 'This content is only available for subscribers',
  76. expected=True)
  77. info = {
  78. 'thumbnail': thumbnail,
  79. }
  80. vimeo_id = self._search_regex(
  81. r'data-vimeo-id=["\'](\d+)', webpage, 'vimeo id', default=None)
  82. if not vimeo_id:
  83. data = self._parse_json(
  84. self._search_regex(
  85. r'data-collection=(["\'])(?P<data>{.+?})\1', webpage,
  86. 'data collection', default='{}', group='data'),
  87. display_id, transform_source=unescapeHTML, fatal=False)
  88. video_id = self._extract_video_id(
  89. data, lesson_id) or self._search_regex(
  90. r'/videos/(\d+)/', thumbnail, 'video id')
  91. headers = {
  92. 'Referer': url,
  93. 'X-Requested-With': 'XMLHttpRequest',
  94. }
  95. csrf_token = self._html_search_meta(
  96. 'csrf-token', webpage, 'csrf token', default=None)
  97. if csrf_token:
  98. headers['X-CSRF-Token'] = csrf_token
  99. video = self._download_json(
  100. f'https://videos.raywenderlich.com/api/v1/videos/{video_id}.json',
  101. display_id, headers=headers)['video']
  102. vimeo_id = video['clips'][0]['provider_id']
  103. info.update({
  104. '_type': 'url_transparent',
  105. 'title': video.get('name'),
  106. 'description': video.get('description') or video.get(
  107. 'meta_description'),
  108. 'duration': int_or_none(video.get('duration')),
  109. 'timestamp': unified_timestamp(video.get('created_at')),
  110. })
  111. return merge_dicts(info, self.url_result(
  112. VimeoIE._smuggle_referrer(
  113. f'https://player.vimeo.com/video/{vimeo_id}', url),
  114. ie=VimeoIE.ie_key(), video_id=vimeo_id))
  115. class RayWenderlichCourseIE(InfoExtractor):
  116. _VALID_URL = r'''(?x)
  117. https?://
  118. (?:
  119. videos\.raywenderlich\.com/courses|
  120. (?:www\.)?raywenderlich\.com
  121. )/
  122. (?P<id>[^/]+)
  123. '''
  124. _TEST = {
  125. 'url': 'https://www.raywenderlich.com/3530-testing-in-ios',
  126. 'info_dict': {
  127. 'title': 'Testing in iOS',
  128. 'id': '3530-testing-in-ios',
  129. },
  130. 'params': {
  131. 'noplaylist': False,
  132. },
  133. 'playlist_count': 29,
  134. }
  135. @classmethod
  136. def suitable(cls, url):
  137. return False if RayWenderlichIE.suitable(url) else super().suitable(url)
  138. def _real_extract(self, url):
  139. course_id = self._match_id(url)
  140. webpage = self._download_webpage(url, course_id)
  141. entries = []
  142. lesson_urls = set()
  143. for lesson_url in re.findall(
  144. rf'<a[^>]+\bhref=["\'](/{course_id}/lessons/\d+)', webpage):
  145. if lesson_url in lesson_urls:
  146. continue
  147. lesson_urls.add(lesson_url)
  148. entries.append(self.url_result(
  149. urljoin(url, lesson_url), ie=RayWenderlichIE.ie_key()))
  150. title = self._og_search_title(
  151. webpage, default=None) or self._html_search_meta(
  152. 'twitter:title', webpage, 'title', default=None)
  153. return self.playlist_result(entries, course_id, title)