floatplane.py 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333
  1. import functools
  2. from .common import InfoExtractor
  3. from ..utils import (
  4. ExtractorError,
  5. OnDemandPagedList,
  6. clean_html,
  7. determine_ext,
  8. format_field,
  9. int_or_none,
  10. join_nonempty,
  11. parse_codecs,
  12. parse_iso8601,
  13. url_or_none,
  14. urljoin,
  15. )
  16. from ..utils.traversal import traverse_obj
  17. class FloatplaneIE(InfoExtractor):
  18. _VALID_URL = r'https?://(?:(?:www|beta)\.)?floatplane\.com/post/(?P<id>\w+)'
  19. _TESTS = [{
  20. 'url': 'https://www.floatplane.com/post/2Yf3UedF7C',
  21. 'info_dict': {
  22. 'id': 'yuleLogLTT',
  23. 'ext': 'mp4',
  24. 'display_id': '2Yf3UedF7C',
  25. 'title': '8K Yule Log Fireplace with Crackling Fire Sounds - 10 Hours',
  26. 'description': 'md5:adf2970e0de1c5e3df447818bb0309f6',
  27. 'thumbnail': r're:^https?://.*\.jpe?g$',
  28. 'duration': 36035,
  29. 'comment_count': int,
  30. 'like_count': int,
  31. 'dislike_count': int,
  32. 'release_date': '20191206',
  33. 'release_timestamp': 1575657000,
  34. 'uploader': 'LinusTechTips',
  35. 'uploader_id': '59f94c0bdd241b70349eb72b',
  36. 'uploader_url': 'https://www.floatplane.com/channel/linustechtips/home',
  37. 'channel': 'Linus Tech Tips',
  38. 'channel_id': '63fe42c309e691e4e36de93d',
  39. 'channel_url': 'https://www.floatplane.com/channel/linustechtips/home/main',
  40. 'availability': 'subscriber_only',
  41. },
  42. 'params': {'skip_download': 'm3u8'},
  43. }, {
  44. 'url': 'https://www.floatplane.com/post/j2jqG3JmgJ',
  45. 'info_dict': {
  46. 'id': 'j2jqG3JmgJ',
  47. 'title': 'TJM: Does Anyone Care About Avatar: The Way of Water?',
  48. 'description': 'md5:00bf17dc5733e4031e99b7fd6489f274',
  49. 'thumbnail': r're:^https?://.*\.jpe?g$',
  50. 'comment_count': int,
  51. 'like_count': int,
  52. 'dislike_count': int,
  53. 'release_timestamp': 1671915900,
  54. 'release_date': '20221224',
  55. 'uploader': 'LinusTechTips',
  56. 'uploader_id': '59f94c0bdd241b70349eb72b',
  57. 'uploader_url': 'https://www.floatplane.com/channel/linustechtips/home',
  58. 'channel': "They're Just Movies",
  59. 'channel_id': '64135f82fc76ab7f9fbdc876',
  60. 'channel_url': 'https://www.floatplane.com/channel/linustechtips/home/tajm',
  61. 'availability': 'subscriber_only',
  62. },
  63. 'playlist_count': 2,
  64. }, {
  65. 'url': 'https://www.floatplane.com/post/3tK2tInhoN',
  66. 'info_dict': {
  67. 'id': '3tK2tInhoN',
  68. 'title': 'Extras - How Linus Communicates with Editors (Compensator 4)',
  69. 'description': 'md5:83cd40aae1ce124df33769600c80ca5b',
  70. 'thumbnail': r're:^https?://.*\.jpe?g$',
  71. 'comment_count': int,
  72. 'like_count': int,
  73. 'dislike_count': int,
  74. 'release_timestamp': 1700529120,
  75. 'release_date': '20231121',
  76. 'uploader': 'LinusTechTips',
  77. 'uploader_id': '59f94c0bdd241b70349eb72b',
  78. 'uploader_url': 'https://www.floatplane.com/channel/linustechtips/home',
  79. 'channel': 'FP Exclusives',
  80. 'channel_id': '6413623f5b12cca228a28e78',
  81. 'channel_url': 'https://www.floatplane.com/channel/linustechtips/home/fpexclusive',
  82. 'availability': 'subscriber_only',
  83. },
  84. 'playlist_count': 2,
  85. }, {
  86. 'url': 'https://beta.floatplane.com/post/d870PEFXS1',
  87. 'info_dict': {
  88. 'id': 'bg9SuYKEww',
  89. 'ext': 'mp4',
  90. 'display_id': 'd870PEFXS1',
  91. 'title': 'LCS Drama, TLOU 2 Remaster, Destiny 2 Player Count Drops, + More!',
  92. 'description': 'md5:80d612dcabf41b17487afcbe303ec57d',
  93. 'thumbnail': r're:^https?://.*\.jpe?g$',
  94. 'release_timestamp': 1700622000,
  95. 'release_date': '20231122',
  96. 'duration': 513,
  97. 'like_count': int,
  98. 'dislike_count': int,
  99. 'comment_count': int,
  100. 'uploader': 'LinusTechTips',
  101. 'uploader_id': '59f94c0bdd241b70349eb72b',
  102. 'uploader_url': 'https://www.floatplane.com/channel/linustechtips/home',
  103. 'channel': 'GameLinked',
  104. 'channel_id': '649dbade3540dbc3945eeda7',
  105. 'channel_url': 'https://www.floatplane.com/channel/linustechtips/home/gamelinked',
  106. 'availability': 'subscriber_only',
  107. },
  108. 'params': {'skip_download': 'm3u8'},
  109. }, {
  110. 'url': 'https://www.floatplane.com/post/65B5PNoBtf',
  111. 'info_dict': {
  112. 'id': '65B5PNoBtf',
  113. 'description': 'I recorded the inbuilt demo mode for your 90\'s enjoyment, thanks for being Floaties!',
  114. 'display_id': '65B5PNoBtf',
  115. 'like_count': int,
  116. 'release_timestamp': 1701249480,
  117. 'uploader': 'The Trash Network',
  118. 'availability': 'subscriber_only',
  119. 'uploader_id': '61bc20c9a131fb692bf2a513',
  120. 'uploader_url': 'https://www.floatplane.com/channel/TheTrashNetwork/home',
  121. 'channel_url': 'https://www.floatplane.com/channel/TheTrashNetwork/home/thedrumthing',
  122. 'comment_count': int,
  123. 'title': 'The $50 electronic drum kit.',
  124. 'channel_id': '64424fe73cd58cbcf8d8e131',
  125. 'thumbnail': 'https://pbs.floatplane.com/blogPost_thumbnails/65B5PNoBtf/725555379422705_1701247052743.jpeg',
  126. 'dislike_count': int,
  127. 'channel': 'The Drum Thing',
  128. 'release_date': '20231129',
  129. },
  130. 'playlist_count': 2,
  131. 'playlist': [{
  132. 'info_dict': {
  133. 'id': 'ISPJjexylS',
  134. 'ext': 'mp4',
  135. 'release_date': '20231129',
  136. 'release_timestamp': 1701249480,
  137. 'title': 'The $50 electronic drum kit. .mov',
  138. 'channel_id': '64424fe73cd58cbcf8d8e131',
  139. 'thumbnail': 'https://pbs.floatplane.com/video_thumbnails/ISPJjexylS/335202812134041_1701249383392.jpeg',
  140. 'availability': 'subscriber_only',
  141. 'uploader': 'The Trash Network',
  142. 'duration': 622,
  143. 'channel': 'The Drum Thing',
  144. 'uploader_id': '61bc20c9a131fb692bf2a513',
  145. 'channel_url': 'https://www.floatplane.com/channel/TheTrashNetwork/home/thedrumthing',
  146. 'uploader_url': 'https://www.floatplane.com/channel/TheTrashNetwork/home',
  147. },
  148. }, {
  149. 'info_dict': {
  150. 'id': 'qKfxu6fEpu',
  151. 'ext': 'aac',
  152. 'release_date': '20231129',
  153. 'release_timestamp': 1701249480,
  154. 'title': 'Roland TD-7 Demo.m4a',
  155. 'channel_id': '64424fe73cd58cbcf8d8e131',
  156. 'availability': 'subscriber_only',
  157. 'uploader': 'The Trash Network',
  158. 'duration': 114,
  159. 'channel': 'The Drum Thing',
  160. 'uploader_id': '61bc20c9a131fb692bf2a513',
  161. 'channel_url': 'https://www.floatplane.com/channel/TheTrashNetwork/home/thedrumthing',
  162. 'uploader_url': 'https://www.floatplane.com/channel/TheTrashNetwork/home',
  163. },
  164. }],
  165. 'skip': 'requires subscription: "The Trash Network"',
  166. 'params': {'skip_download': 'm3u8'},
  167. }]
  168. def _real_initialize(self):
  169. if not self._get_cookies('https://www.floatplane.com').get('sails.sid'):
  170. self.raise_login_required()
  171. def _real_extract(self, url):
  172. post_id = self._match_id(url)
  173. post_data = self._download_json(
  174. 'https://www.floatplane.com/api/v3/content/post', post_id, query={'id': post_id},
  175. note='Downloading post data', errnote='Unable to download post data')
  176. if not any(traverse_obj(post_data, ('metadata', ('hasVideo', 'hasAudio')))):
  177. raise ExtractorError('Post does not contain a video or audio track', expected=True)
  178. uploader_url = format_field(
  179. post_data, [('creator', 'urlname')], 'https://www.floatplane.com/channel/%s/home') or None
  180. common_info = {
  181. 'uploader_url': uploader_url,
  182. 'channel_url': urljoin(f'{uploader_url}/', traverse_obj(post_data, ('channel', 'urlname'))),
  183. 'availability': self._availability(needs_subscription=True),
  184. **traverse_obj(post_data, {
  185. 'uploader': ('creator', 'title', {str}),
  186. 'uploader_id': ('creator', 'id', {str}),
  187. 'channel': ('channel', 'title', {str}),
  188. 'channel_id': ('channel', 'id', {str}),
  189. 'release_timestamp': ('releaseDate', {parse_iso8601}),
  190. }),
  191. }
  192. items = []
  193. for media in traverse_obj(post_data, (('videoAttachments', 'audioAttachments'), ...)):
  194. media_id = media['id']
  195. media_typ = media.get('type') or 'video'
  196. metadata = self._download_json(
  197. f'https://www.floatplane.com/api/v3/content/{media_typ}', media_id, query={'id': media_id},
  198. note=f'Downloading {media_typ} metadata')
  199. stream = self._download_json(
  200. 'https://www.floatplane.com/api/v2/cdn/delivery', media_id, query={
  201. 'type': 'vod' if media_typ == 'video' else 'aod',
  202. 'guid': metadata['guid'],
  203. }, note=f'Downloading {media_typ} stream data')
  204. path_template = traverse_obj(stream, ('resource', 'uri', {str}))
  205. def format_path(params):
  206. path = path_template
  207. for i, val in (params or {}).items():
  208. path = path.replace(f'{{qualityLevelParams.{i}}}', val)
  209. return path
  210. formats = []
  211. for quality in traverse_obj(stream, ('resource', 'data', 'qualityLevels', ...)):
  212. url = urljoin(stream['cdn'], format_path(traverse_obj(
  213. stream, ('resource', 'data', 'qualityLevelParams', quality['name'], {dict}))))
  214. formats.append({
  215. **traverse_obj(quality, {
  216. 'format_id': ('name', {str}),
  217. 'format_note': ('label', {str}),
  218. 'width': ('width', {int}),
  219. 'height': ('height', {int}),
  220. }),
  221. **parse_codecs(quality.get('codecs')),
  222. 'url': url,
  223. 'ext': determine_ext(url.partition('/chunk.m3u8')[0], 'mp4'),
  224. })
  225. items.append({
  226. **common_info,
  227. 'id': media_id,
  228. **traverse_obj(metadata, {
  229. 'title': ('title', {str}),
  230. 'duration': ('duration', {int_or_none}),
  231. 'thumbnail': ('thumbnail', 'path', {url_or_none}),
  232. }),
  233. 'formats': formats,
  234. })
  235. post_info = {
  236. **common_info,
  237. 'id': post_id,
  238. 'display_id': post_id,
  239. **traverse_obj(post_data, {
  240. 'title': ('title', {str}),
  241. 'description': ('text', {clean_html}),
  242. 'like_count': ('likes', {int_or_none}),
  243. 'dislike_count': ('dislikes', {int_or_none}),
  244. 'comment_count': ('comments', {int_or_none}),
  245. 'thumbnail': ('thumbnail', 'path', {url_or_none}),
  246. }),
  247. }
  248. if len(items) > 1:
  249. return self.playlist_result(items, **post_info)
  250. post_info.update(items[0])
  251. return post_info
  252. class FloatplaneChannelIE(InfoExtractor):
  253. _VALID_URL = r'https?://(?:(?:www|beta)\.)?floatplane\.com/channel/(?P<id>[\w-]+)/home(?:/(?P<channel>[\w-]+))?'
  254. _PAGE_SIZE = 20
  255. _TESTS = [{
  256. 'url': 'https://www.floatplane.com/channel/linustechtips/home/ltxexpo',
  257. 'info_dict': {
  258. 'id': 'linustechtips/ltxexpo',
  259. 'title': 'LTX Expo',
  260. 'description': 'md5:9819002f9ebe7fd7c75a3a1d38a59149',
  261. },
  262. 'playlist_mincount': 51,
  263. }, {
  264. 'url': 'https://www.floatplane.com/channel/ShankMods/home',
  265. 'info_dict': {
  266. 'id': 'ShankMods',
  267. 'title': 'Shank Mods',
  268. 'description': 'md5:6dff1bb07cad8e5448e04daad9be1b30',
  269. },
  270. 'playlist_mincount': 14,
  271. }, {
  272. 'url': 'https://beta.floatplane.com/channel/bitwit_ultra/home',
  273. 'info_dict': {
  274. 'id': 'bitwit_ultra',
  275. 'title': 'Bitwit Ultra',
  276. 'description': 'md5:1452f280bb45962976d4789200f676dd',
  277. },
  278. 'playlist_mincount': 200,
  279. }]
  280. def _fetch_page(self, display_id, creator_id, channel_id, page):
  281. query = {
  282. 'id': creator_id,
  283. 'limit': self._PAGE_SIZE,
  284. 'fetchAfter': page * self._PAGE_SIZE,
  285. }
  286. if channel_id:
  287. query['channel'] = channel_id
  288. page_data = self._download_json(
  289. 'https://www.floatplane.com/api/v3/content/creator', display_id,
  290. query=query, note=f'Downloading page {page + 1}')
  291. for post in page_data or []:
  292. yield self.url_result(
  293. f'https://www.floatplane.com/post/{post["id"]}',
  294. FloatplaneIE, id=post['id'], title=post.get('title'),
  295. release_timestamp=parse_iso8601(post.get('releaseDate')))
  296. def _real_extract(self, url):
  297. creator, channel = self._match_valid_url(url).group('id', 'channel')
  298. display_id = join_nonempty(creator, channel, delim='/')
  299. creator_data = self._download_json(
  300. 'https://www.floatplane.com/api/v3/creator/named',
  301. display_id, query={'creatorURL[0]': creator})[0]
  302. channel_data = traverse_obj(
  303. creator_data, ('channels', lambda _, v: v['urlname'] == channel), get_all=False) or {}
  304. return self.playlist_result(OnDemandPagedList(functools.partial(
  305. self._fetch_page, display_id, creator_data['id'], channel_data.get('id')), self._PAGE_SIZE),
  306. display_id, title=channel_data.get('title') or creator_data.get('title'),
  307. description=channel_data.get('about') or creator_data.get('about'))