brainpop.py 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318
  1. import json
  2. import re
  3. from .common import InfoExtractor
  4. from ..utils import (
  5. classproperty,
  6. int_or_none,
  7. traverse_obj,
  8. urljoin,
  9. )
  10. class BrainPOPBaseIE(InfoExtractor):
  11. _NETRC_MACHINE = 'brainpop'
  12. _ORIGIN = '' # So that _VALID_URL doesn't crash
  13. _LOGIN_ERRORS = {
  14. 1502: 'The username and password you entered did not match.', # LOGIN_FAILED
  15. 1503: 'Payment method is expired.', # LOGIN_FAILED_ACCOUNT_NOT_ACTIVE
  16. 1506: 'Your BrainPOP plan has expired.', # LOGIN_FAILED_ACCOUNT_EXPIRED
  17. 1507: 'Terms not accepted.', # LOGIN_FAILED_TERMS_NOT_ACCEPTED
  18. 1508: 'Account not activated.', # LOGIN_FAILED_SUBSCRIPTION_NOT_ACTIVE
  19. 1512: 'The maximum number of devices permitted are logged in with your account right now.', # LOGIN_FAILED_LOGIN_LIMIT_REACHED
  20. 1513: 'You are trying to access your account from outside of its allowed IP range.', # LOGIN_FAILED_INVALID_IP
  21. 1514: 'Individual accounts are not included in your plan. Try again with your shared username and password.', # LOGIN_FAILED_MBP_DISABLED
  22. 1515: 'Account not activated.', # LOGIN_FAILED_TEACHER_NOT_ACTIVE
  23. 1523: 'That username and password won\'t work on this BrainPOP site.', # LOGIN_FAILED_NO_ACCESS
  24. 1524: 'You\'ll need to join a class before you can login.', # LOGIN_FAILED_STUDENT_NO_PERIOD
  25. 1526: 'Your account is locked. Reset your password, or ask a teacher or administrator for help.', # LOGIN_FAILED_ACCOUNT_LOCKED
  26. }
  27. @classproperty
  28. def _VALID_URL(cls):
  29. root = re.escape(cls._ORIGIN).replace(r'https:', r'https?:').replace(r'www\.', r'(?:www\.)?')
  30. return rf'{root}/(?P<slug>[^/]+/[^/]+/(?P<id>[^/?#&]+))'
  31. def _assemble_formats(self, slug, format_id, display_id, token='', extra_fields={}):
  32. formats = []
  33. formats = self._extract_m3u8_formats(
  34. f'{urljoin(self._HLS_URL, slug)}.m3u8?{token}',
  35. display_id, 'mp4', m3u8_id=f'{format_id}-hls', fatal=False)
  36. formats.append({
  37. 'format_id': format_id,
  38. 'url': f'{urljoin(self._VIDEO_URL, slug)}?{token}',
  39. })
  40. for f in formats:
  41. f.update(extra_fields)
  42. return formats
  43. def _extract_adaptive_formats(self, data, token, display_id, key_format='%s', extra_fields={}):
  44. formats = []
  45. additional_key_formats = {
  46. '%s': {},
  47. 'ad_%s': {
  48. 'format_note': 'Audio description',
  49. 'source_preference': -2,
  50. },
  51. }
  52. for additional_key_format, additional_key_fields in additional_key_formats.items():
  53. for key_quality, key_index in enumerate(('high', 'low')):
  54. full_key_index = additional_key_format % (key_format % key_index)
  55. if data.get(full_key_index):
  56. formats.extend(self._assemble_formats(data[full_key_index], full_key_index, display_id, token, {
  57. 'quality': -1 - key_quality,
  58. **additional_key_fields,
  59. **extra_fields,
  60. }))
  61. return formats
  62. def _perform_login(self, username, password):
  63. login_res = self._download_json(
  64. 'https://api.brainpop.com/api/login', None,
  65. data=json.dumps({'username': username, 'password': password}).encode(),
  66. headers={
  67. 'Content-Type': 'application/json',
  68. 'Referer': self._ORIGIN,
  69. }, note='Logging in', errnote='Unable to log in', expected_status=400)
  70. status_code = int_or_none(login_res['status_code'])
  71. if status_code != 1505:
  72. self.report_warning(
  73. f'Unable to login: {self._LOGIN_ERRORS.get(status_code) or login_res.get("message")}'
  74. or f'Got status code {status_code}')
  75. class BrainPOPIE(BrainPOPBaseIE):
  76. _ORIGIN = 'https://www.brainpop.com'
  77. _VIDEO_URL = 'https://svideos.brainpop.com'
  78. _HLS_URL = 'https://hls.brainpop.com'
  79. _CDN_URL = 'https://cdn.brainpop.com'
  80. _TESTS = [{
  81. 'url': 'https://www.brainpop.com/health/conflictresolution/martinlutherkingjr/movie?ref=null',
  82. 'md5': '3ead374233ae74c7f1b0029a01c972f0',
  83. 'info_dict': {
  84. 'id': '1f3259fa457292b4',
  85. 'ext': 'mp4',
  86. 'title': 'Martin Luther King, Jr.',
  87. 'display_id': 'martinlutherkingjr',
  88. 'description': 'md5:f403dbb2bf3ccc7cf4c59d9e43e3c349',
  89. },
  90. }, {
  91. 'url': 'https://www.brainpop.com/science/space/bigbang/',
  92. 'md5': '9a1ff0e77444dd9e437354eb669c87ec',
  93. 'info_dict': {
  94. 'id': 'acae52cd48c99acf',
  95. 'ext': 'mp4',
  96. 'title': 'Big Bang',
  97. 'display_id': 'bigbang',
  98. 'description': 'md5:3e53b766b0f116f631b13f4cae185d38',
  99. },
  100. 'skip': 'Requires login',
  101. }]
  102. def _real_extract(self, url):
  103. slug, display_id = self._match_valid_url(url).group('slug', 'id')
  104. movie_data = self._download_json(
  105. f'https://api.brainpop.com/api/content/published/bp/en/{slug}/movie?full=1', display_id,
  106. 'Downloading movie data JSON', 'Unable to download movie data')['data']
  107. topic_data = traverse_obj(self._download_json(
  108. f'https://api.brainpop.com/api/content/published/bp/en/{slug}?full=1', display_id,
  109. 'Downloading topic data JSON', 'Unable to download topic data', fatal=False),
  110. ('data', 'topic'), expected_type=dict) or movie_data['topic']
  111. if not traverse_obj(movie_data, ('access', 'allow')):
  112. reason = traverse_obj(movie_data, ('access', 'reason'))
  113. if 'logged' in reason:
  114. self.raise_login_required(reason, metadata_available=True)
  115. else:
  116. self.raise_no_formats(reason, video_id=display_id)
  117. movie_feature = movie_data['feature']
  118. movie_feature_data = movie_feature['data']
  119. formats, subtitles = [], {}
  120. formats.extend(self._extract_adaptive_formats(movie_feature_data, movie_feature_data.get('token', ''), display_id, '%s_v2', {
  121. 'language': movie_feature.get('language') or 'en',
  122. 'language_preference': 10,
  123. }))
  124. for lang, localized_feature in traverse_obj(movie_feature, 'localization', default={}, expected_type=dict).items():
  125. formats.extend(self._extract_adaptive_formats(localized_feature, localized_feature.get('token', ''), display_id, '%s_v2', {
  126. 'language': lang,
  127. 'language_preference': -10,
  128. }))
  129. # TODO: Do localization fields also have subtitles?
  130. for name, url in movie_feature_data.items():
  131. lang = self._search_regex(
  132. r'^subtitles_(?P<lang>\w+)$', name, 'subtitle metadata', default=None)
  133. if lang and url:
  134. subtitles.setdefault(lang, []).append({
  135. 'url': urljoin(self._CDN_URL, url),
  136. })
  137. return {
  138. 'id': topic_data['topic_id'],
  139. 'display_id': display_id,
  140. 'title': topic_data.get('name'),
  141. 'description': topic_data.get('synopsis'),
  142. 'formats': formats,
  143. 'subtitles': subtitles,
  144. }
  145. class BrainPOPLegacyBaseIE(BrainPOPBaseIE):
  146. def _parse_js_topic_data(self, topic_data, display_id, token):
  147. movie_data = topic_data['movies']
  148. # TODO: Are there non-burned subtitles?
  149. formats = self._extract_adaptive_formats(movie_data, token, display_id)
  150. return {
  151. 'id': topic_data['EntryID'],
  152. 'display_id': display_id,
  153. 'title': topic_data.get('name'),
  154. 'alt_title': topic_data.get('title'),
  155. 'description': topic_data.get('synopsis'),
  156. 'formats': formats,
  157. }
  158. def _real_extract(self, url):
  159. slug, display_id = self._match_valid_url(url).group('slug', 'id')
  160. webpage = self._download_webpage(url, display_id)
  161. topic_data = self._search_json(
  162. r'var\s+content\s*=\s*', webpage, 'content data',
  163. display_id, end_pattern=';')['category']['unit']['topic']
  164. token = self._search_regex(r'ec_token\s*:\s*[\'"]([^\'"]+)', webpage, 'video token')
  165. return self._parse_js_topic_data(topic_data, display_id, token)
  166. class BrainPOPJrIE(BrainPOPLegacyBaseIE):
  167. _ORIGIN = 'https://jr.brainpop.com'
  168. _VIDEO_URL = 'https://svideos-jr.brainpop.com'
  169. _HLS_URL = 'https://hls-jr.brainpop.com'
  170. _CDN_URL = 'https://cdn-jr.brainpop.com'
  171. _TESTS = [{
  172. 'url': 'https://jr.brainpop.com/health/feelingsandsel/emotions/',
  173. 'md5': '04e0561bb21770f305a0ce6cf0d869ab',
  174. 'info_dict': {
  175. 'id': '347',
  176. 'ext': 'mp4',
  177. 'title': 'Emotions',
  178. 'display_id': 'emotions',
  179. },
  180. }, {
  181. 'url': 'https://jr.brainpop.com/science/habitats/arctichabitats/',
  182. 'md5': 'b0ed063bbd1910df00220ee29340f5d6',
  183. 'info_dict': {
  184. 'id': '29',
  185. 'ext': 'mp4',
  186. 'title': 'Arctic Habitats',
  187. 'display_id': 'arctichabitats',
  188. },
  189. 'skip': 'Requires login',
  190. }]
  191. class BrainPOPELLIE(BrainPOPLegacyBaseIE):
  192. _ORIGIN = 'https://ell.brainpop.com'
  193. _VIDEO_URL = 'https://svideos-esl.brainpop.com'
  194. _HLS_URL = 'https://hls-esl.brainpop.com'
  195. _CDN_URL = 'https://cdn-esl.brainpop.com'
  196. _TESTS = [{
  197. 'url': 'https://ell.brainpop.com/level1/unit1/lesson1/',
  198. 'md5': 'a2012700cfb774acb7ad2e8834eed0d0',
  199. 'info_dict': {
  200. 'id': '1',
  201. 'ext': 'mp4',
  202. 'title': 'Lesson 1',
  203. 'display_id': 'lesson1',
  204. 'alt_title': 'Personal Pronouns',
  205. },
  206. }, {
  207. 'url': 'https://ell.brainpop.com/level3/unit6/lesson5/',
  208. 'md5': 'be19c8292c87b24aacfb5fda2f3f8363',
  209. 'info_dict': {
  210. 'id': '101',
  211. 'ext': 'mp4',
  212. 'title': 'Lesson 5',
  213. 'display_id': 'lesson5',
  214. 'alt_title': 'Review: Unit 6',
  215. },
  216. 'skip': 'Requires login',
  217. }]
  218. class BrainPOPEspIE(BrainPOPLegacyBaseIE):
  219. IE_DESC = 'BrainPOP Español'
  220. _ORIGIN = 'https://esp.brainpop.com'
  221. _VIDEO_URL = 'https://svideos.brainpop.com'
  222. _HLS_URL = 'https://hls.brainpop.com'
  223. _CDN_URL = 'https://cdn.brainpop.com/mx'
  224. _TESTS = [{
  225. 'url': 'https://esp.brainpop.com/ciencia/la_diversidad_de_la_vida/ecosistemas/',
  226. 'md5': 'cb3f062db2b3c5240ddfcfde7108f8c9',
  227. 'info_dict': {
  228. 'id': '3893',
  229. 'ext': 'mp4',
  230. 'title': 'Ecosistemas',
  231. 'display_id': 'ecosistemas',
  232. 'description': 'md5:80fc55b07e241f8c8f2aa8d74deaf3c3',
  233. },
  234. }, {
  235. 'url': 'https://esp.brainpop.com/espanol/la_escritura/emily_dickinson/',
  236. 'md5': '98c1b9559e0e33777209c425cda7dac4',
  237. 'info_dict': {
  238. 'id': '7146',
  239. 'ext': 'mp4',
  240. 'title': 'Emily Dickinson',
  241. 'display_id': 'emily_dickinson',
  242. 'description': 'md5:2795ad87b1d239c9711c1e92ab5a978b',
  243. },
  244. 'skip': 'Requires login',
  245. }]
  246. class BrainPOPFrIE(BrainPOPLegacyBaseIE):
  247. IE_DESC = 'BrainPOP Français'
  248. _ORIGIN = 'https://fr.brainpop.com'
  249. _VIDEO_URL = 'https://svideos.brainpop.com'
  250. _HLS_URL = 'https://hls.brainpop.com'
  251. _CDN_URL = 'https://cdn.brainpop.com/fr'
  252. _TESTS = [{
  253. 'url': 'https://fr.brainpop.com/sciencesdelaterre/energie/sourcesdenergie/',
  254. 'md5': '97e7f48af8af93f8a2be11709f239371',
  255. 'info_dict': {
  256. 'id': '1651',
  257. 'ext': 'mp4',
  258. 'title': 'Sources d\'énergie',
  259. 'display_id': 'sourcesdenergie',
  260. 'description': 'md5:7eece350f019a21ef9f64d4088b2d857',
  261. },
  262. }, {
  263. 'url': 'https://fr.brainpop.com/francais/ecrire/plagiat/',
  264. 'md5': '0cf2b4f89804d0dd4a360a51310d445a',
  265. 'info_dict': {
  266. 'id': '5803',
  267. 'ext': 'mp4',
  268. 'title': 'Plagiat',
  269. 'display_id': 'plagiat',
  270. 'description': 'md5:4496d87127ace28e8b1eda116e77cd2b',
  271. },
  272. 'skip': 'Requires login',
  273. }]
  274. class BrainPOPIlIE(BrainPOPLegacyBaseIE):
  275. IE_DESC = 'BrainPOP Hebrew'
  276. _ORIGIN = 'https://il.brainpop.com'
  277. _VIDEO_URL = 'https://svideos.brainpop.com'
  278. _HLS_URL = 'https://hls.brainpop.com'
  279. _CDN_URL = 'https://cdn.brainpop.com/he'
  280. _TESTS = [{
  281. 'url': 'https://il.brainpop.com/category_9/subcategory_150/subjects_3782/',
  282. 'md5': '9e4ea9dc60ecd385a6e5ca12ccf31641',
  283. 'info_dict': {
  284. 'id': '3782',
  285. 'ext': 'mp4',
  286. 'title': 'md5:e993632fcda0545d9205602ec314ad67',
  287. 'display_id': 'subjects_3782',
  288. 'description': 'md5:4cc084a8012beb01f037724423a4d4ed',
  289. },
  290. }]