netverse.py 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281
  1. import itertools
  2. from .common import InfoExtractor, SearchInfoExtractor
  3. from .dailymotion import DailymotionIE
  4. from ..utils import smuggle_url, traverse_obj
  5. class NetverseBaseIE(InfoExtractor):
  6. _ENDPOINTS = {
  7. 'watch': 'watchvideo',
  8. 'video': 'watchvideo',
  9. 'webseries': 'webseries',
  10. 'season': 'webseason_videos',
  11. }
  12. def _call_api(self, slug, endpoint, query={}, season_id='', display_id=None):
  13. return self._download_json(
  14. f'https://api.netverse.id/medias/api/v2/{self._ENDPOINTS[endpoint]}/{slug}/{season_id}',
  15. display_id or slug, query=query)
  16. def _get_comments(self, video_id):
  17. last_page_number = None
  18. for i in itertools.count(1):
  19. comment_data = self._download_json(
  20. f'https://api.netverse.id/mediadetails/api/v3/videos/comments/{video_id}',
  21. video_id, data=b'', fatal=False, query={'page': i},
  22. note=f'Downloading JSON comment metadata page {i}') or {}
  23. yield from traverse_obj(comment_data, ('response', 'comments', 'data', ..., {
  24. 'id': '_id',
  25. 'text': 'comment',
  26. 'author_id': 'customer_id',
  27. 'author': ('customer', 'name'),
  28. 'author_thumbnail': ('customer', 'profile_picture'),
  29. }))
  30. if not last_page_number:
  31. last_page_number = traverse_obj(comment_data, ('response', 'comments', 'last_page'))
  32. if i >= (last_page_number or 0):
  33. break
  34. class NetverseIE(NetverseBaseIE):
  35. _VALID_URL = r'https?://(?:\w+\.)?netverse\.id/(?P<type>watch|video)/(?P<display_id>[^/?#&]+)'
  36. _TESTS = [{
  37. # Watch video
  38. 'url': 'https://www.netverse.id/watch/waktu-indonesia-bercanda-edisi-spesial-lebaran-2016',
  39. 'info_dict': {
  40. 'id': 'k4yhqUwINAGtmHx3NkL',
  41. 'title': 'Waktu Indonesia Bercanda - Edisi Spesial Lebaran 2016',
  42. 'ext': 'mp4',
  43. 'season': 'Season 2016',
  44. 'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',
  45. 'thumbnail': r're:https?://s\d+\.dmcdn\.net/v/[^/]+/x1080',
  46. 'episode_number': 22,
  47. 'episode': 'Episode 22',
  48. 'uploader_id': 'x2ir3vq',
  49. 'age_limit': 0,
  50. 'tags': [],
  51. 'view_count': int,
  52. 'display_id': 'waktu-indonesia-bercanda-edisi-spesial-lebaran-2016',
  53. 'duration': 2990,
  54. 'upload_date': '20210722',
  55. 'timestamp': 1626919804,
  56. 'like_count': int,
  57. 'uploader': 'Net Prime',
  58. },
  59. }, {
  60. # series
  61. 'url': 'https://www.netverse.id/watch/jadoo-seorang-model',
  62. 'info_dict': {
  63. 'id': 'x88izwc',
  64. 'title': 'Jadoo Seorang Model',
  65. 'ext': 'mp4',
  66. 'season': 'Season 2',
  67. 'description': 'md5:8a74f70812cca267e19ee0635f0af835',
  68. 'thumbnail': r're:https?://s\d+\.dmcdn\.net/v/[^/]+/x1080',
  69. 'episode_number': 2,
  70. 'episode': 'Episode 2',
  71. 'view_count': int,
  72. 'like_count': int,
  73. 'display_id': 'jadoo-seorang-model',
  74. 'uploader_id': 'x2ir3vq',
  75. 'duration': 635,
  76. 'timestamp': 1646372927,
  77. 'tags': ['PG069497-hellojadooseason2eps2'],
  78. 'upload_date': '20220304',
  79. 'uploader': 'Net Prime',
  80. 'age_limit': 0,
  81. },
  82. 'skip': 'video get Geo-blocked for some country',
  83. }, {
  84. # non www host
  85. 'url': 'https://netverse.id/watch/tetangga-baru',
  86. 'info_dict': {
  87. 'id': 'k4CNGz7V0HJ7vfwZbXy',
  88. 'ext': 'mp4',
  89. 'title': 'Tetangga Baru',
  90. 'season': 'Season 1',
  91. 'description': 'md5:23fcf70e97d461d3029d25d59b2ccfb9',
  92. 'thumbnail': r're:https?://s\d+\.dmcdn\.net/v/[^/]+/x1080',
  93. 'episode_number': 1,
  94. 'episode': 'Episode 1',
  95. 'timestamp': 1624538169,
  96. 'view_count': int,
  97. 'upload_date': '20210624',
  98. 'age_limit': 0,
  99. 'uploader_id': 'x2ir3vq',
  100. 'like_count': int,
  101. 'uploader': 'Net Prime',
  102. 'tags': ['PG008534', 'tetangga', 'Baru'],
  103. 'display_id': 'tetangga-baru',
  104. 'duration': 1406,
  105. },
  106. }, {
  107. # /video url
  108. 'url': 'https://www.netverse.id/video/pg067482-hellojadoo-season1',
  109. 'title': 'Namaku Choi Jadoo',
  110. 'info_dict': {
  111. 'id': 'x887jzz',
  112. 'ext': 'mp4',
  113. 'thumbnail': r're:https?://s\d+\.dmcdn\.net/v/[^/]+/x1080',
  114. 'season': 'Season 1',
  115. 'episode_number': 1,
  116. 'description': 'md5:d4f627b3e7a3f9acdc55f6cdd5ea41d5',
  117. 'title': 'Namaku Choi Jadoo',
  118. 'episode': 'Episode 1',
  119. 'age_limit': 0,
  120. 'like_count': int,
  121. 'view_count': int,
  122. 'tags': ['PG067482', 'PG067482-HelloJadoo-season1'],
  123. 'duration': 780,
  124. 'display_id': 'pg067482-hellojadoo-season1',
  125. 'uploader_id': 'x2ir3vq',
  126. 'uploader': 'Net Prime',
  127. 'timestamp': 1645764984,
  128. 'upload_date': '20220225',
  129. },
  130. 'skip': 'This video get Geo-blocked for some country',
  131. }, {
  132. # video with comments
  133. 'url': 'https://netverse.id/video/episode-1-season-2016-ok-food',
  134. 'info_dict': {
  135. 'id': 'k6hetBPiQMljSxxvAy7',
  136. 'ext': 'mp4',
  137. 'thumbnail': r're:https?://s\d+\.dmcdn\.net/v/[^/]+/x1080',
  138. 'display_id': 'episode-1-season-2016-ok-food',
  139. 'like_count': int,
  140. 'description': '',
  141. 'duration': 1471,
  142. 'age_limit': 0,
  143. 'timestamp': 1642405848,
  144. 'episode_number': 1,
  145. 'season': 'Season 2016',
  146. 'uploader_id': 'x2ir3vq',
  147. 'title': 'Episode 1 - Season 2016 - Ok Food',
  148. 'upload_date': '20220117',
  149. 'tags': [],
  150. 'view_count': int,
  151. 'episode': 'Episode 1',
  152. 'uploader': 'Net Prime',
  153. 'comment_count': int,
  154. },
  155. 'params': {
  156. 'getcomments': True,
  157. },
  158. }, {
  159. # video with multiple page comment
  160. 'url': 'https://netverse.id/video/match-island-eps-1-fix',
  161. 'info_dict': {
  162. 'id': 'x8aznjc',
  163. 'ext': 'mp4',
  164. 'like_count': int,
  165. 'tags': ['Match-Island', 'Pd00111'],
  166. 'display_id': 'match-island-eps-1-fix',
  167. 'view_count': int,
  168. 'episode': 'Episode 1',
  169. 'uploader': 'Net Prime',
  170. 'duration': 4070,
  171. 'timestamp': 1653068165,
  172. 'description': 'md5:e9cf3b480ad18e9c33b999e3494f223f',
  173. 'age_limit': 0,
  174. 'title': 'Welcome To Match Island',
  175. 'upload_date': '20220520',
  176. 'episode_number': 1,
  177. 'thumbnail': r're:https?://s\d+\.dmcdn\.net/v/[^/]+/x1080',
  178. 'uploader_id': 'x2ir3vq',
  179. 'season': 'Season 1',
  180. 'comment_count': int,
  181. },
  182. 'params': {
  183. 'getcomments': True,
  184. },
  185. }]
  186. def _real_extract(self, url):
  187. display_id, sites_type = self._match_valid_url(url).group('display_id', 'type')
  188. program_json = self._call_api(display_id, sites_type)
  189. videos = program_json['response']['videos']
  190. return {
  191. '_type': 'url_transparent',
  192. 'ie_key': DailymotionIE.ie_key(),
  193. 'url': smuggle_url(videos['dailymotion_url'], {'query': {'embedder': 'https://www.netverse.id'}}),
  194. 'display_id': display_id,
  195. 'title': videos.get('title'),
  196. 'season': videos.get('season_name'),
  197. 'thumbnail': traverse_obj(videos, ('program_detail', 'thumbnail_image')),
  198. 'description': traverse_obj(videos, ('program_detail', 'description')),
  199. 'episode_number': videos.get('episode_order'),
  200. '__post_extractor': self.extract_comments(display_id),
  201. }
  202. class NetversePlaylistIE(NetverseBaseIE):
  203. _VALID_URL = r'https?://(?:\w+\.)?netverse\.id/(?P<type>webseries)/(?P<display_id>[^/?#&]+)'
  204. _TESTS = [{
  205. # multiple season
  206. 'url': 'https://netverse.id/webseries/tetangga-masa-gitu',
  207. 'info_dict': {
  208. 'id': 'tetangga-masa-gitu',
  209. 'title': 'Tetangga Masa Gitu',
  210. },
  211. 'playlist_count': 519,
  212. }, {
  213. # single season
  214. 'url': 'https://netverse.id/webseries/kelas-internasional',
  215. 'info_dict': {
  216. 'id': 'kelas-internasional',
  217. 'title': 'Kelas Internasional',
  218. },
  219. 'playlist_count': 203,
  220. }]
  221. def parse_playlist(self, json_data, playlist_id):
  222. slug_sample = traverse_obj(json_data, ('related', 'data', ..., 'slug'))[0]
  223. for season in traverse_obj(json_data, ('seasons', ..., 'id')):
  224. playlist_json = self._call_api(
  225. slug_sample, 'season', display_id=playlist_id, season_id=season)
  226. for current_page in range(playlist_json['response']['season_list']['last_page']):
  227. playlist_json = self._call_api(slug_sample, 'season', query={'page': current_page + 1},
  228. season_id=season, display_id=playlist_id)
  229. for slug in traverse_obj(playlist_json, ('response', ..., 'data', ..., 'slug')):
  230. yield self.url_result(f'https://www.netverse.id/video/{slug}', NetverseIE)
  231. def _real_extract(self, url):
  232. playlist_id, sites_type = self._match_valid_url(url).group('display_id', 'type')
  233. playlist_data = self._call_api(playlist_id, sites_type)
  234. return self.playlist_result(
  235. self.parse_playlist(playlist_data['response'], playlist_id),
  236. traverse_obj(playlist_data, ('response', 'webseries_info', 'slug')),
  237. traverse_obj(playlist_data, ('response', 'webseries_info', 'title')))
  238. class NetverseSearchIE(SearchInfoExtractor):
  239. _SEARCH_KEY = 'netsearch'
  240. _TESTS = [{
  241. 'url': 'netsearch10:tetangga',
  242. 'info_dict': {
  243. 'id': 'tetangga',
  244. 'title': 'tetangga',
  245. },
  246. 'playlist_count': 10,
  247. }]
  248. def _search_results(self, query):
  249. last_page = None
  250. for i in itertools.count(1):
  251. search_data = self._download_json(
  252. 'https://api.netverse.id/search/elastic/search', query,
  253. query={'q': query, 'page': i}, note=f'Downloading page {i}')
  254. videos = traverse_obj(search_data, ('response', 'data', ...))
  255. for video in videos:
  256. yield self.url_result(f'https://netverse.id/video/{video["slug"]}', NetverseIE)
  257. last_page = last_page or traverse_obj(search_data, ('response', 'lastpage'))
  258. if not videos or i >= (last_page or 0):
  259. break