getcourseru.py 7.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178
  1. import re
  2. import time
  3. import urllib.parse
  4. from .common import InfoExtractor
  5. from ..utils import ExtractorError, int_or_none, url_or_none, urlencode_postdata
  6. from ..utils.traversal import traverse_obj
  7. class GetCourseRuPlayerIE(InfoExtractor):
  8. _VALID_URL = r'https?://player02\.getcourse\.ru/sign-player/?\?(?:[^#]+&)?json=[^#&]+'
  9. _EMBED_REGEX = [rf'<iframe[^>]+\bsrc=[\'"](?P<url>{_VALID_URL}[^\'"]*)']
  10. _TESTS = [{
  11. 'url': 'http://player02.getcourse.ru/sign-player/?json=eyJ2aWRlb19oYXNoIjoiMTkwYmRmOTNmMWIyOTczNTMwOTg1M2E3YTE5ZTI0YjMiLCJ1c2VyX2lkIjozNTk1MjUxODMsInN1Yl9sb2dpbl91c2VyX2lkIjpudWxsLCJsZXNzb25faWQiOm51bGwsImlwIjoiNDYuMTQyLjE4Mi4yNDciLCJnY19ob3N0IjoiYWNhZGVteW1lbC5vbmxpbmUiLCJ0aW1lIjoxNzA1NDQ5NjQyLCJwYXlsb2FkIjoidV8zNTk1MjUxODMiLCJ1aV9sYW5ndWFnZSI6InJ1IiwiaXNfaGF2ZV9jdXN0b21fc3R5bGUiOnRydWV9&s=354ad2c993d95d5ac629e3133d6cefea&vh-static-feature=zigzag',
  12. 'info_dict': {
  13. 'id': '513573381',
  14. 'title': '190bdf93f1b29735309853a7a19e24b3',
  15. 'ext': 'mp4',
  16. 'thumbnail': 'https://preview-htz.kinescopecdn.net/preview/190bdf93f1b29735309853a7a19e24b3/preview.jpg?version=1702370546&host=vh-80',
  17. 'duration': 1693,
  18. },
  19. 'skip': 'JWT expired',
  20. }]
  21. def _real_extract(self, url):
  22. webpage = self._download_webpage(url, None, 'Downloading player page')
  23. window_configs = self._search_json(
  24. r'window\.configs\s*=', webpage, 'config', None)
  25. video_id = str(window_configs['gcFileId'])
  26. formats, subtitles = self._extract_m3u8_formats_and_subtitles(
  27. window_configs['masterPlaylistUrl'], video_id)
  28. return {
  29. **traverse_obj(window_configs, {
  30. 'title': ('videoHash', {str}),
  31. 'thumbnail': ('previewUrl', {url_or_none}),
  32. 'duration': ('videoDuration', {int_or_none}),
  33. }),
  34. 'id': video_id,
  35. 'formats': formats,
  36. 'subtitles': subtitles,
  37. }
  38. class GetCourseRuIE(InfoExtractor):
  39. _NETRC_MACHINE = 'getcourseru'
  40. _DOMAINS = [
  41. 'academymel.online',
  42. 'marafon.mani-beauty.com',
  43. 'on.psbook.ru',
  44. ]
  45. _BASE_URL_RE = rf'https?://(?:(?!player02\.)[^.]+\.getcourse\.(?:ru|io)|{"|".join(map(re.escape, _DOMAINS))})'
  46. _VALID_URL = [
  47. rf'{_BASE_URL_RE}/(?!pl/|teach/)(?P<id>[^?#]+)',
  48. rf'{_BASE_URL_RE}/(:?pl/)?teach/control/lesson/view\?(?:[^#]+&)?id=(?P<id>\d+)',
  49. ]
  50. _TESTS = [{
  51. 'url': 'http://academymel.online/3video_1',
  52. 'info_dict': {
  53. 'id': '3059742',
  54. 'display_id': '3video_1',
  55. 'title': 'Промоуроки Академии МЕЛ',
  56. },
  57. 'playlist_count': 1,
  58. 'playlist': [{
  59. 'info_dict': {
  60. 'id': '513573381',
  61. 'ext': 'mp4',
  62. 'title': 'Промоуроки Академии МЕЛ',
  63. 'thumbnail': 'https://preview-htz.kinescopecdn.net/preview/190bdf93f1b29735309853a7a19e24b3/preview.jpg?version=1702370546&host=vh-80',
  64. 'duration': 1693,
  65. },
  66. }],
  67. }, {
  68. 'url': 'https://academymel.getcourse.ru/3video_1',
  69. 'info_dict': {
  70. 'id': '3059742',
  71. 'display_id': '3video_1',
  72. 'title': 'Промоуроки Академии МЕЛ',
  73. },
  74. 'playlist_count': 1,
  75. 'playlist': [{
  76. 'info_dict': {
  77. 'id': '513573381',
  78. 'ext': 'mp4',
  79. 'title': 'Промоуроки Академии МЕЛ',
  80. 'thumbnail': 'https://preview-htz.kinescopecdn.net/preview/190bdf93f1b29735309853a7a19e24b3/preview.jpg?version=1702370546&host=vh-80',
  81. 'duration': 1693,
  82. },
  83. }],
  84. }, {
  85. 'url': 'https://academymel.getcourse.ru/pl/teach/control/lesson/view?id=319141781&editMode=0',
  86. 'info_dict': {
  87. 'id': '319141781',
  88. 'title': '1. Разминка у стены',
  89. },
  90. 'playlist_count': 1,
  91. 'playlist': [{
  92. 'info_dict': {
  93. 'id': '4919601',
  94. 'ext': 'mp4',
  95. 'title': '1. Разминка у стены',
  96. 'thumbnail': 'https://preview-htz.vhcdn.com/preview/5a521788e7dc25b4f70c3dff6512d90e/preview.jpg?version=1703223532&host=vh-81',
  97. 'duration': 704,
  98. },
  99. }],
  100. 'skip': 'paid lesson',
  101. }, {
  102. 'url': 'https://manibeauty.getcourse.ru/pl/teach/control/lesson/view?id=272499894',
  103. 'info_dict': {
  104. 'id': '272499894',
  105. 'title': 'Мотивация к тренировкам',
  106. },
  107. 'playlist_count': 1,
  108. 'playlist': [{
  109. 'info_dict': {
  110. 'id': '447479687',
  111. 'ext': 'mp4',
  112. 'title': 'Мотивация к тренировкам',
  113. 'thumbnail': 'https://preview-htz.vhcdn.com/preview/70ed5b9f489dd03b4aff55bfdff71a26/preview.jpg?version=1685115787&host=vh-71',
  114. 'duration': 30,
  115. },
  116. }],
  117. 'skip': 'paid lesson',
  118. }, {
  119. 'url': 'https://gaismasmandalas.getcourse.io/ATLAUTSEVBUT',
  120. 'only_matching': True,
  121. }]
  122. _LOGIN_URL_PATH = '/cms/system/login'
  123. def _login(self, hostname, username, password):
  124. if self._get_cookies(f'https://{hostname}').get('PHPSESSID5'):
  125. return
  126. login_url = f'https://{hostname}{self._LOGIN_URL_PATH}'
  127. webpage = self._download_webpage(login_url, None)
  128. self._request_webpage(
  129. login_url, None, 'Logging in', 'Failed to log in',
  130. data=urlencode_postdata({
  131. 'action': 'processXdget',
  132. 'xdgetId': self._html_search_regex(
  133. r'<form[^>]+\bclass="[^"]*\bstate-login[^"]*"[^>]+\bdata-xdget-id="([^"]+)"',
  134. webpage, 'xdgetId'),
  135. 'params[action]': 'login',
  136. 'params[url]': login_url,
  137. 'params[object_type]': 'cms_page',
  138. 'params[object_id]': -1,
  139. 'params[email]': username,
  140. 'params[password]': password,
  141. 'requestTime': int(time.time()),
  142. 'requestSimpleSign': self._html_search_regex(
  143. r'window.requestSimpleSign\s*=\s*"([\da-f]+)"', webpage, 'simple sign'),
  144. }))
  145. def _real_extract(self, url):
  146. hostname = urllib.parse.urlparse(url).hostname
  147. username, password = self._get_login_info(netrc_machine=hostname)
  148. if username:
  149. self._login(hostname, username, password)
  150. display_id = self._match_id(url)
  151. webpage, urlh = self._download_webpage_handle(url, display_id)
  152. if self._LOGIN_URL_PATH in urlh.url:
  153. raise ExtractorError(
  154. f'This video is only available for registered users. {self._login_hint("any", netrc=hostname)}',
  155. expected=True)
  156. playlist_id = self._search_regex(
  157. r'window\.(?:lessonId|gcsObjectId)\s*=\s*(\d+)', webpage, 'playlist id', default=display_id)
  158. title = self._og_search_title(webpage) or self._html_extract_title(webpage)
  159. return self.playlist_from_matches(
  160. re.findall(GetCourseRuPlayerIE._EMBED_REGEX[0], webpage),
  161. playlist_id, title, display_id=display_id, ie=GetCourseRuPlayerIE, video_kwargs={
  162. 'url_transparent': True,
  163. 'title': title,
  164. })