amazonminitv.py 10 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294
  1. import json
  2. from .common import InfoExtractor
  3. from ..utils import ExtractorError, int_or_none, traverse_obj, try_get
  4. class AmazonMiniTVBaseIE(InfoExtractor):
  5. def _real_initialize(self):
  6. self._download_webpage(
  7. 'https://www.amazon.in/minitv', None,
  8. note='Fetching guest session cookies')
  9. AmazonMiniTVBaseIE.session_id = self._get_cookies('https://www.amazon.in')['session-id'].value
  10. def _call_api(self, asin, data=None, note=None):
  11. device = {'clientId': 'ATVIN', 'deviceLocale': 'en_GB'}
  12. if data:
  13. data['variables'].update({
  14. 'contentType': 'VOD',
  15. 'sessionIdToken': self.session_id,
  16. **device,
  17. })
  18. resp = self._download_json(
  19. f'https://www.amazon.in/minitv/api/web/{"graphql" if data else "prs"}',
  20. asin, note=note, headers={
  21. 'Content-Type': 'application/json',
  22. 'currentpageurl': '/',
  23. 'currentplatform': 'dWeb',
  24. }, data=json.dumps(data).encode() if data else None,
  25. query=None if data else {
  26. 'deviceType': 'A1WMMUXPCUJL4N',
  27. 'contentId': asin,
  28. **device,
  29. })
  30. if resp.get('errors'):
  31. raise ExtractorError(f'MiniTV said: {resp["errors"][0]["message"]}')
  32. elif not data:
  33. return resp
  34. return resp['data'][data['operationName']]
  35. class AmazonMiniTVIE(AmazonMiniTVBaseIE):
  36. _VALID_URL = r'(?:https?://(?:www\.)?amazon\.in/minitv/tp/|amazonminitv:(?:amzn1\.dv\.gti\.)?)(?P<id>[a-f0-9-]+)'
  37. _TESTS = [{
  38. 'url': 'https://www.amazon.in/minitv/tp/75fe3a75-b8fe-4499-8100-5c9424344840?referrer=https%3A%2F%2Fwww.amazon.in%2Fminitv',
  39. 'info_dict': {
  40. 'id': 'amzn1.dv.gti.75fe3a75-b8fe-4499-8100-5c9424344840',
  41. 'ext': 'mp4',
  42. 'title': 'May I Kiss You?',
  43. 'language': 'Hindi',
  44. 'thumbnail': r're:^https?://.*\.(?:jpg|png)$',
  45. 'description': 'md5:a549bfc747973e04feb707833474e59d',
  46. 'release_timestamp': 1644710400,
  47. 'release_date': '20220213',
  48. 'duration': 846,
  49. 'chapters': 'count:2',
  50. 'series': 'Couple Goals',
  51. 'series_id': 'amzn1.dv.gti.56521d46-b040-4fd5-872e-3e70476a04b0',
  52. 'season': 'Season 3',
  53. 'season_number': 3,
  54. 'season_id': 'amzn1.dv.gti.20331016-d9b9-4968-b991-c89fa4927a36',
  55. 'episode': 'May I Kiss You?',
  56. 'episode_number': 2,
  57. 'episode_id': 'amzn1.dv.gti.75fe3a75-b8fe-4499-8100-5c9424344840',
  58. },
  59. }, {
  60. 'url': 'https://www.amazon.in/minitv/tp/280d2564-584f-452f-9c98-7baf906e01ab?referrer=https%3A%2F%2Fwww.amazon.in%2Fminitv',
  61. 'info_dict': {
  62. 'id': 'amzn1.dv.gti.280d2564-584f-452f-9c98-7baf906e01ab',
  63. 'ext': 'mp4',
  64. 'title': 'Jahaan',
  65. 'language': 'Hindi',
  66. 'thumbnail': r're:^https?://.*\.(?:jpg|png)',
  67. 'description': 'md5:05eb765a77bf703f322f120ec6867339',
  68. 'release_timestamp': 1647475200,
  69. 'release_date': '20220317',
  70. 'duration': 783,
  71. 'chapters': [],
  72. },
  73. }, {
  74. 'url': 'https://www.amazon.in/minitv/tp/280d2564-584f-452f-9c98-7baf906e01ab',
  75. 'only_matching': True,
  76. }, {
  77. 'url': 'amazonminitv:amzn1.dv.gti.280d2564-584f-452f-9c98-7baf906e01ab',
  78. 'only_matching': True,
  79. }, {
  80. 'url': 'amazonminitv:280d2564-584f-452f-9c98-7baf906e01ab',
  81. 'only_matching': True,
  82. }]
  83. _GRAPHQL_QUERY_CONTENT = '''
  84. query content($sessionIdToken: String!, $deviceLocale: String, $contentId: ID!, $contentType: ContentType!, $clientId: String) {
  85. content(
  86. applicationContextInput: {deviceLocale: $deviceLocale, sessionIdToken: $sessionIdToken, clientId: $clientId}
  87. contentId: $contentId
  88. contentType: $contentType
  89. ) {
  90. contentId
  91. name
  92. ... on Episode {
  93. contentId
  94. vodType
  95. name
  96. images
  97. description {
  98. synopsis
  99. contentLengthInSeconds
  100. }
  101. publicReleaseDateUTC
  102. audioTracks
  103. seasonId
  104. seriesId
  105. seriesName
  106. seasonNumber
  107. episodeNumber
  108. timecode {
  109. endCreditsTime
  110. }
  111. }
  112. ... on MovieContent {
  113. contentId
  114. vodType
  115. name
  116. description {
  117. synopsis
  118. contentLengthInSeconds
  119. }
  120. images
  121. publicReleaseDateUTC
  122. audioTracks
  123. }
  124. }
  125. }'''
  126. def _real_extract(self, url):
  127. asin = f'amzn1.dv.gti.{self._match_id(url)}'
  128. prs = self._call_api(asin, note='Downloading playback info')
  129. formats, subtitles = [], {}
  130. for type_, asset in prs['playbackAssets'].items():
  131. if not traverse_obj(asset, 'manifestUrl'):
  132. continue
  133. if type_ == 'hls':
  134. m3u8_fmts, m3u8_subs = self._extract_m3u8_formats_and_subtitles(
  135. asset['manifestUrl'], asin, ext='mp4', entry_protocol='m3u8_native',
  136. m3u8_id=type_, fatal=False)
  137. formats.extend(m3u8_fmts)
  138. subtitles = self._merge_subtitles(subtitles, m3u8_subs)
  139. elif type_ == 'dash':
  140. mpd_fmts, mpd_subs = self._extract_mpd_formats_and_subtitles(
  141. asset['manifestUrl'], asin, mpd_id=type_, fatal=False)
  142. formats.extend(mpd_fmts)
  143. subtitles = self._merge_subtitles(subtitles, mpd_subs)
  144. else:
  145. self.report_warning(f'Unknown asset type: {type_}')
  146. title_info = self._call_api(
  147. asin, note='Downloading title info', data={
  148. 'operationName': 'content',
  149. 'variables': {'contentId': asin},
  150. 'query': self._GRAPHQL_QUERY_CONTENT,
  151. })
  152. credits_time = try_get(title_info, lambda x: x['timecode']['endCreditsTime'] / 1000)
  153. is_episode = title_info.get('vodType') == 'EPISODE'
  154. return {
  155. 'id': asin,
  156. 'title': title_info.get('name'),
  157. 'formats': formats,
  158. 'subtitles': subtitles,
  159. 'language': traverse_obj(title_info, ('audioTracks', 0)),
  160. 'thumbnails': [{
  161. 'id': type_,
  162. 'url': url,
  163. } for type_, url in (title_info.get('images') or {}).items()],
  164. 'description': traverse_obj(title_info, ('description', 'synopsis')),
  165. 'release_timestamp': int_or_none(try_get(title_info, lambda x: x['publicReleaseDateUTC'] / 1000)),
  166. 'duration': traverse_obj(title_info, ('description', 'contentLengthInSeconds')),
  167. 'chapters': [{
  168. 'start_time': credits_time,
  169. 'title': 'End Credits',
  170. }] if credits_time else [],
  171. 'series': title_info.get('seriesName'),
  172. 'series_id': title_info.get('seriesId'),
  173. 'season_number': title_info.get('seasonNumber'),
  174. 'season_id': title_info.get('seasonId'),
  175. 'episode': title_info.get('name') if is_episode else None,
  176. 'episode_number': title_info.get('episodeNumber'),
  177. 'episode_id': asin if is_episode else None,
  178. }
  179. class AmazonMiniTVSeasonIE(AmazonMiniTVBaseIE):
  180. IE_NAME = 'amazonminitv:season'
  181. _VALID_URL = r'amazonminitv:season:(?:amzn1\.dv\.gti\.)?(?P<id>[a-f0-9-]+)'
  182. IE_DESC = 'Amazon MiniTV Season, "minitv:season:" prefix'
  183. _TESTS = [{
  184. 'url': 'amazonminitv:season:amzn1.dv.gti.0aa996eb-6a1b-4886-a342-387fbd2f1db0',
  185. 'playlist_mincount': 6,
  186. 'info_dict': {
  187. 'id': 'amzn1.dv.gti.0aa996eb-6a1b-4886-a342-387fbd2f1db0',
  188. },
  189. }, {
  190. 'url': 'amazonminitv:season:0aa996eb-6a1b-4886-a342-387fbd2f1db0',
  191. 'only_matching': True,
  192. }]
  193. _GRAPHQL_QUERY = '''
  194. query getEpisodes($sessionIdToken: String!, $clientId: String, $episodeOrSeasonId: ID!, $deviceLocale: String) {
  195. getEpisodes(
  196. applicationContextInput: {sessionIdToken: $sessionIdToken, deviceLocale: $deviceLocale, clientId: $clientId}
  197. episodeOrSeasonId: $episodeOrSeasonId
  198. ) {
  199. episodes {
  200. ... on Episode {
  201. contentId
  202. name
  203. images
  204. seriesName
  205. seasonId
  206. seriesId
  207. seasonNumber
  208. episodeNumber
  209. description {
  210. synopsis
  211. contentLengthInSeconds
  212. }
  213. publicReleaseDateUTC
  214. }
  215. }
  216. }
  217. }
  218. '''
  219. def _entries(self, asin):
  220. season_info = self._call_api(
  221. asin, note='Downloading season info', data={
  222. 'operationName': 'getEpisodes',
  223. 'variables': {'episodeOrSeasonId': asin},
  224. 'query': self._GRAPHQL_QUERY,
  225. })
  226. for episode in season_info['episodes']:
  227. yield self.url_result(
  228. f'amazonminitv:{episode["contentId"]}', AmazonMiniTVIE, episode['contentId'])
  229. def _real_extract(self, url):
  230. asin = f'amzn1.dv.gti.{self._match_id(url)}'
  231. return self.playlist_result(self._entries(asin), asin)
  232. class AmazonMiniTVSeriesIE(AmazonMiniTVBaseIE):
  233. IE_NAME = 'amazonminitv:series'
  234. _VALID_URL = r'amazonminitv:series:(?:amzn1\.dv\.gti\.)?(?P<id>[a-f0-9-]+)'
  235. IE_DESC = 'Amazon MiniTV Series, "minitv:series:" prefix'
  236. _TESTS = [{
  237. 'url': 'amazonminitv:series:amzn1.dv.gti.56521d46-b040-4fd5-872e-3e70476a04b0',
  238. 'playlist_mincount': 3,
  239. 'info_dict': {
  240. 'id': 'amzn1.dv.gti.56521d46-b040-4fd5-872e-3e70476a04b0',
  241. },
  242. }, {
  243. 'url': 'amazonminitv:series:56521d46-b040-4fd5-872e-3e70476a04b0',
  244. 'only_matching': True,
  245. }]
  246. _GRAPHQL_QUERY = '''
  247. query getSeasons($sessionIdToken: String!, $deviceLocale: String, $episodeOrSeasonOrSeriesId: ID!, $clientId: String) {
  248. getSeasons(
  249. applicationContextInput: {deviceLocale: $deviceLocale, sessionIdToken: $sessionIdToken, clientId: $clientId}
  250. episodeOrSeasonOrSeriesId: $episodeOrSeasonOrSeriesId
  251. ) {
  252. seasons {
  253. seasonId
  254. }
  255. }
  256. }
  257. '''
  258. def _entries(self, asin):
  259. season_info = self._call_api(
  260. asin, note='Downloading series info', data={
  261. 'operationName': 'getSeasons',
  262. 'variables': {'episodeOrSeasonOrSeriesId': asin},
  263. 'query': self._GRAPHQL_QUERY,
  264. })
  265. for season in season_info['seasons']:
  266. yield self.url_result(f'amazonminitv:season:{season["seasonId"]}', AmazonMiniTVSeasonIE, season['seasonId'])
  267. def _real_extract(self, url):
  268. asin = f'amzn1.dv.gti.{self._match_id(url)}'
  269. return self.playlist_result(self._entries(asin), asin)