newgrounds.py 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311
  1. import functools
  2. import re
  3. from .common import InfoExtractor
  4. from ..networking.exceptions import HTTPError
  5. from ..utils import (
  6. ExtractorError,
  7. OnDemandPagedList,
  8. clean_html,
  9. extract_attributes,
  10. get_element_by_id,
  11. int_or_none,
  12. parse_count,
  13. parse_duration,
  14. unified_timestamp,
  15. url_or_none,
  16. urlencode_postdata,
  17. urljoin,
  18. )
  19. from ..utils.traversal import traverse_obj
  20. class NewgroundsIE(InfoExtractor):
  21. _NETRC_MACHINE = 'newgrounds'
  22. _VALID_URL = r'https?://(?:www\.)?newgrounds\.com/(?:audio/listen|portal/view)/(?P<id>\d+)(?:/format/flash)?'
  23. _TESTS = [{
  24. 'url': 'https://www.newgrounds.com/audio/listen/549479',
  25. 'md5': 'fe6033d297591288fa1c1f780386f07a',
  26. 'info_dict': {
  27. 'id': '549479',
  28. 'ext': 'mp3',
  29. 'title': 'B7 - BusMode',
  30. 'uploader': 'Burn7',
  31. 'timestamp': 1378892945,
  32. 'upload_date': '20130911',
  33. 'duration': 143,
  34. 'view_count': int,
  35. 'description': 'md5:b8b3c2958875189f07d8e313462e8c4f',
  36. 'age_limit': 0,
  37. 'thumbnail': r're:^https://aicon\.ngfiles\.com/549/549479\.png',
  38. },
  39. }, {
  40. 'url': 'https://www.newgrounds.com/portal/view/1',
  41. 'md5': 'fbfb40e2dc765a7e830cb251d370d981',
  42. 'info_dict': {
  43. 'id': '1',
  44. 'ext': 'mp4',
  45. 'title': 'Scrotum 1',
  46. 'uploader': 'Brian-Beaton',
  47. 'timestamp': 955078533,
  48. 'upload_date': '20000407',
  49. 'view_count': int,
  50. 'description': 'Scrotum plays "catch."',
  51. 'age_limit': 17,
  52. 'thumbnail': r're:^https://picon\.ngfiles\.com/0/flash_1_card\.png',
  53. },
  54. }, {
  55. # source format unavailable, additional mp4 formats
  56. 'url': 'http://www.newgrounds.com/portal/view/689400',
  57. 'info_dict': {
  58. 'id': '689400',
  59. 'ext': 'mp4',
  60. 'title': 'ZTV News Episode 8',
  61. 'uploader': 'ZONE-SAMA',
  62. 'timestamp': 1487983183,
  63. 'upload_date': '20170225',
  64. 'view_count': int,
  65. 'description': 'md5:aff9b330ec2e78ed93b1ad6d017accc6',
  66. 'age_limit': 17,
  67. 'thumbnail': r're:^https://picon\.ngfiles\.com/689000/flash_689400_card\.png',
  68. },
  69. 'params': {
  70. 'skip_download': True,
  71. },
  72. }, {
  73. 'url': 'https://www.newgrounds.com/portal/view/297383',
  74. 'md5': '2c11f5fd8cb6b433a63c89ba3141436c',
  75. 'info_dict': {
  76. 'id': '297383',
  77. 'ext': 'mp4',
  78. 'title': 'Metal Gear Awesome',
  79. 'uploader': 'Egoraptor',
  80. 'timestamp': 1140681292,
  81. 'upload_date': '20060223',
  82. 'view_count': int,
  83. 'description': 'md5:9246c181614e23754571995104da92e0',
  84. 'age_limit': 13,
  85. 'thumbnail': r're:^https://picon\.ngfiles\.com/297000/flash_297383_card\.png',
  86. },
  87. }, {
  88. 'url': 'https://www.newgrounds.com/portal/view/297383/format/flash',
  89. 'md5': '5d05585a9a0caca059f5abfbd3865524',
  90. 'info_dict': {
  91. 'id': '297383',
  92. 'ext': 'swf',
  93. 'title': 'Metal Gear Awesome',
  94. 'description': 'Metal Gear Awesome',
  95. 'uploader': 'Egoraptor',
  96. 'upload_date': '20060223',
  97. 'timestamp': 1140681292,
  98. 'view_count': int,
  99. 'age_limit': 13,
  100. 'thumbnail': r're:^https://picon\.ngfiles\.com/297000/flash_297383_card\.png',
  101. },
  102. }, {
  103. 'url': 'https://www.newgrounds.com/portal/view/823109',
  104. 'info_dict': {
  105. 'id': '823109',
  106. 'ext': 'mp4',
  107. 'title': 'Rouge Futa Fleshlight Fuck',
  108. 'description': 'I made a fleshlight model and I wanted to use it in an animation. Based on a video by CDNaturally.',
  109. 'uploader': 'DefaultUser12',
  110. 'upload_date': '20211122',
  111. 'timestamp': 1637611540,
  112. 'view_count': int,
  113. 'age_limit': 18,
  114. 'thumbnail': r're:^https://picon\.ngfiles\.com/823000/flash_823109_card\.png',
  115. },
  116. }]
  117. _AGE_LIMIT = {
  118. 'e': 0,
  119. 't': 13,
  120. 'm': 17,
  121. 'a': 18,
  122. }
  123. _LOGIN_URL = 'https://www.newgrounds.com/passport'
  124. def _perform_login(self, username, password):
  125. login_webpage = self._download_webpage(self._LOGIN_URL, None, 'Downloading login page')
  126. login_url = urljoin(self._LOGIN_URL, self._search_regex(
  127. r'<form action="([^"]+)"', login_webpage, 'login endpoint', default=None))
  128. result = self._download_json(login_url, None, 'Logging in', headers={
  129. 'Accept': 'application/json',
  130. 'Referer': self._LOGIN_URL,
  131. 'X-Requested-With': 'XMLHttpRequest',
  132. }, data=urlencode_postdata({
  133. **self._hidden_inputs(login_webpage),
  134. 'username': username,
  135. 'password': password,
  136. }))
  137. if errors := traverse_obj(result, ('errors', ..., {str})):
  138. raise ExtractorError(', '.join(errors) or 'Unknown Error', expected=True)
  139. def _real_extract(self, url):
  140. media_id = self._match_id(url)
  141. try:
  142. webpage = self._download_webpage(url, media_id)
  143. except ExtractorError as error:
  144. if isinstance(error.cause, HTTPError) and error.cause.status == 401:
  145. self.raise_login_required()
  146. raise
  147. media_url_string = self._search_regex(
  148. r'embedController\(\[{"url"\s*:\s*("[^"]+"),', webpage, 'media url', default=None)
  149. if media_url_string:
  150. uploader = None
  151. formats = [{
  152. 'url': self._parse_json(media_url_string, media_id),
  153. 'format_id': 'source',
  154. 'quality': 1,
  155. }]
  156. else:
  157. json_video = self._download_json(f'https://www.newgrounds.com/portal/video/{media_id}', media_id, headers={
  158. 'Accept': 'application/json',
  159. 'Referer': url,
  160. 'X-Requested-With': 'XMLHttpRequest',
  161. })
  162. formats = []
  163. uploader = traverse_obj(json_video, ('author', {str}))
  164. for format_id, sources in traverse_obj(json_video, ('sources', {dict.items}, ...)):
  165. quality = int_or_none(format_id[:-1])
  166. formats.extend({
  167. 'format_id': format_id,
  168. 'quality': quality,
  169. 'url': url,
  170. } for url in traverse_obj(sources, (..., 'src', {url_or_none})))
  171. if not uploader:
  172. uploader = self._html_search_regex(
  173. (r'(?s)<h4[^>]*>(.+?)</h4>.*?<em>\s*(?:Author|Artist)\s*</em>',
  174. r'(?:Author|Writer)\s*<a[^>]+>([^<]+)'), webpage, 'uploader',
  175. fatal=False)
  176. if len(formats) == 1:
  177. formats[0]['filesize'] = int_or_none(self._html_search_regex(
  178. r'"filesize"\s*:\s*["\']?([\d]+)["\']?,', webpage, 'filesize', default=None))
  179. video_type_description = self._html_search_regex(
  180. r'"description"\s*:\s*["\']?([^"\']+)["\']?,', webpage, 'media type', default=None)
  181. if video_type_description == 'Audio File':
  182. formats[0]['vcodec'] = 'none'
  183. self._check_formats(formats, media_id)
  184. return {
  185. 'id': media_id,
  186. 'title': self._html_extract_title(webpage),
  187. 'uploader': uploader,
  188. 'timestamp': unified_timestamp(self._search_regex(
  189. r'itemprop="(?:uploadDate|datePublished)"\s+content="([^"]+)"',
  190. webpage, 'timestamp', default=None)),
  191. 'duration': parse_duration(self._html_search_regex(
  192. r'"duration"\s*:\s*["\']?(\d+)["\']?', webpage, 'duration', default=None)),
  193. 'formats': formats,
  194. 'thumbnail': self._og_search_thumbnail(webpage),
  195. 'description': (
  196. clean_html(get_element_by_id('author_comments', webpage))
  197. or self._og_search_description(webpage)),
  198. 'age_limit': self._AGE_LIMIT.get(self._html_search_regex(
  199. r'<h2\s+class=["\']rated-([etma])["\']', webpage, 'age_limit', default='e')),
  200. 'view_count': parse_count(self._html_search_regex(
  201. r'(?s)<dt>\s*(?:Views|Listens)\s*</dt>\s*<dd>([\d\.,]+)</dd>',
  202. webpage, 'view count', default=None)),
  203. }
  204. class NewgroundsPlaylistIE(InfoExtractor):
  205. IE_NAME = 'Newgrounds:playlist'
  206. _VALID_URL = r'https?://(?:www\.)?newgrounds\.com/(?:collection|[^/]+/search/[^/]+)/(?P<id>[^/?#&]+)'
  207. _TESTS = [{
  208. 'url': 'https://www.newgrounds.com/collection/cats',
  209. 'info_dict': {
  210. 'id': 'cats',
  211. 'title': 'Cats',
  212. },
  213. 'playlist_mincount': 45,
  214. }, {
  215. 'url': 'https://www.newgrounds.com/collection/dogs',
  216. 'info_dict': {
  217. 'id': 'dogs',
  218. 'title': 'Dogs',
  219. },
  220. 'playlist_mincount': 26,
  221. }, {
  222. 'url': 'http://www.newgrounds.com/audio/search/title/cats',
  223. 'only_matching': True,
  224. }]
  225. def _real_extract(self, url):
  226. playlist_id = self._match_id(url)
  227. webpage = self._download_webpage(url, playlist_id)
  228. title = self._html_extract_title(webpage, default=None)
  229. # cut left menu
  230. webpage = self._search_regex(
  231. r'(?s)<div[^>]+\bclass=["\']column wide(.+)',
  232. webpage, 'wide column', default=webpage)
  233. entries = []
  234. for a, path, media_id in re.findall(
  235. r'(<a[^>]+\bhref=["\'][^"\']+((?:portal/view|audio/listen)/(\d+))[^>]+>)',
  236. webpage):
  237. a_class = extract_attributes(a).get('class')
  238. if a_class not in ('item-portalsubmission', 'item-audiosubmission'):
  239. continue
  240. entries.append(
  241. self.url_result(
  242. f'https://www.newgrounds.com/{path}',
  243. ie=NewgroundsIE.ie_key(), video_id=media_id))
  244. return self.playlist_result(entries, playlist_id, title)
  245. class NewgroundsUserIE(InfoExtractor):
  246. IE_NAME = 'Newgrounds:user'
  247. _VALID_URL = r'https?://(?P<id>[^\.]+)\.newgrounds\.com/(?:movies|audio)/?(?:[#?]|$)'
  248. _TESTS = [{
  249. 'url': 'https://burn7.newgrounds.com/audio',
  250. 'info_dict': {
  251. 'id': 'burn7',
  252. },
  253. 'playlist_mincount': 150,
  254. }, {
  255. 'url': 'https://burn7.newgrounds.com/movies',
  256. 'info_dict': {
  257. 'id': 'burn7',
  258. },
  259. 'playlist_mincount': 2,
  260. }, {
  261. 'url': 'https://brian-beaton.newgrounds.com/movies',
  262. 'info_dict': {
  263. 'id': 'brian-beaton',
  264. },
  265. 'playlist_mincount': 10,
  266. }]
  267. _PAGE_SIZE = 30
  268. def _fetch_page(self, channel_id, url, page):
  269. page += 1
  270. posts_info = self._download_json(
  271. f'{url}?page={page}', channel_id,
  272. note=f'Downloading page {page}', headers={
  273. 'Accept': 'application/json, text/javascript, */*; q = 0.01',
  274. 'X-Requested-With': 'XMLHttpRequest',
  275. })
  276. for post in traverse_obj(posts_info, ('items', ..., ..., {str})):
  277. path, media_id = self._search_regex(
  278. r'<a[^>]+\bhref=["\'][^"\']+((?:portal/view|audio/listen)/(\d+))[^>]+>',
  279. post, 'url', group=(1, 2))
  280. yield self.url_result(f'https://www.newgrounds.com/{path}', NewgroundsIE.ie_key(), media_id)
  281. def _real_extract(self, url):
  282. channel_id = self._match_id(url)
  283. entries = OnDemandPagedList(functools.partial(
  284. self._fetch_page, channel_id, url), self._PAGE_SIZE)
  285. return self.playlist_result(entries, channel_id)