taptap.py 10 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275
  1. import re
  2. import uuid
  3. from .common import InfoExtractor
  4. from ..utils import (
  5. clean_html,
  6. int_or_none,
  7. join_nonempty,
  8. str_or_none,
  9. url_or_none,
  10. )
  11. from ..utils.traversal import traverse_obj
  12. class TapTapBaseIE(InfoExtractor):
  13. _X_UA = 'V=1&PN=WebApp&LANG=zh_CN&VN_CODE=102&LOC=CN&PLT=PC&DS=Android&UID={uuid}&OS=Windows&OSV=10&DT=PC'
  14. _VIDEO_API = 'https://www.taptap.cn/webapiv2/video-resource/v1/multi-get'
  15. _INFO_API = None
  16. _INFO_QUERY_KEY = 'id'
  17. _DATA_PATH = None
  18. _ID_PATH = None
  19. _META_PATH = None
  20. def _get_api(self, url, video_id, query, **kwargs):
  21. query = {**query, 'X-UA': self._X_UA.format(uuid=uuid.uuid4())}
  22. return self._download_json(url, video_id, query=query, **kwargs)['data']
  23. def _extract_video(self, video_id):
  24. video_data = self._get_api(self._VIDEO_API, video_id, query={'video_ids': video_id})['list'][0]
  25. # h265 playlist contains both h265 and h264 formats
  26. video_url = traverse_obj(video_data, ('play_url', ('url_h265', 'url'), {url_or_none}, any))
  27. formats = self._extract_m3u8_formats(video_url, video_id, fatal=False)
  28. for fmt in formats:
  29. if re.search(r'^(hev|hvc|hvt)\d', fmt.get('vcodec', '')):
  30. fmt['format_id'] = join_nonempty(fmt.get('format_id'), 'h265', delim='_')
  31. return {
  32. 'id': str(video_id),
  33. 'formats': formats,
  34. **traverse_obj(video_data, ({
  35. 'duration': ('info', 'duration', {int_or_none}),
  36. 'thumbnail': ('thumbnail', ('original_url', 'url'), {url_or_none}),
  37. }), get_all=False),
  38. }
  39. def _real_extract(self, url):
  40. video_id = self._match_id(url)
  41. query = {self._INFO_QUERY_KEY: video_id}
  42. data = traverse_obj(
  43. self._get_api(self._INFO_API, video_id, query=query), self._DATA_PATH)
  44. metainfo = traverse_obj(data, self._META_PATH)
  45. entries = [{
  46. **metainfo,
  47. **self._extract_video(id_),
  48. } for id_ in set(traverse_obj(data, self._ID_PATH))]
  49. return self.playlist_result(entries, **metainfo, id=video_id)
  50. class TapTapMomentIE(TapTapBaseIE):
  51. _VALID_URL = r'https?://www\.taptap\.cn/moment/(?P<id>\d+)'
  52. _INFO_API = 'https://www.taptap.cn/webapiv2/moment/v3/detail'
  53. _ID_PATH = ('moment', 'topic', (('videos', ...), 'pin_video'), 'video_id')
  54. _META_PATH = ('moment', {
  55. 'timestamp': ('created_time', {int_or_none}),
  56. 'modified_timestamp': ('edited_time', {int_or_none}),
  57. 'uploader': ('author', 'user', 'name', {str}),
  58. 'uploader_id': ('author', 'user', 'id', {int}, {str_or_none}),
  59. 'title': ('topic', 'title', {str}),
  60. 'description': ('topic', 'summary', {str}),
  61. })
  62. _TESTS = [{
  63. 'url': 'https://www.taptap.cn/moment/194618230982052443',
  64. 'info_dict': {
  65. 'id': '194618230982052443',
  66. 'title': '《崩坏3》开放世界「后崩坏书」新篇章 于淹没之地仰视辰星',
  67. 'description': 'md5:cf66f7819d413641b8b28c8543f4ecda',
  68. 'timestamp': 1633453402,
  69. 'upload_date': '20211005',
  70. 'modified_timestamp': 1633453402,
  71. 'modified_date': '20211005',
  72. 'uploader': '乌酱',
  73. 'uploader_id': '532896',
  74. },
  75. 'playlist_count': 1,
  76. 'playlist': [{
  77. 'info_dict': {
  78. 'id': '2202584',
  79. 'ext': 'mp4',
  80. 'title': '《崩坏3》开放世界「后崩坏书」新篇章 于淹没之地仰视辰星',
  81. 'description': 'md5:cf66f7819d413641b8b28c8543f4ecda',
  82. 'duration': 66,
  83. 'timestamp': 1633453402,
  84. 'upload_date': '20211005',
  85. 'modified_timestamp': 1633453402,
  86. 'modified_date': '20211005',
  87. 'uploader': '乌酱',
  88. 'uploader_id': '532896',
  89. 'thumbnail': r're:^https?://.*\.(png|jpg)',
  90. },
  91. }],
  92. 'params': {'skip_download': 'm3u8'},
  93. }, {
  94. 'url': 'https://www.taptap.cn/moment/521630629209573493',
  95. 'info_dict': {
  96. 'id': '521630629209573493',
  97. 'title': '《崩坏:星穹铁道》黄泉角色PV——「你的颜色」',
  98. 'description': 'md5:2c81245da864428c904d53ae4ad2182b',
  99. 'timestamp': 1711425600,
  100. 'upload_date': '20240326',
  101. 'modified_timestamp': 1711425600,
  102. 'modified_date': '20240326',
  103. 'uploader': '崩坏:星穹铁道',
  104. 'uploader_id': '414732580',
  105. },
  106. 'playlist_count': 1,
  107. 'playlist': [{
  108. 'info_dict': {
  109. 'id': '4006511',
  110. 'ext': 'mp4',
  111. 'title': '《崩坏:星穹铁道》黄泉角色PV——「你的颜色」',
  112. 'description': 'md5:2c81245da864428c904d53ae4ad2182b',
  113. 'duration': 173,
  114. 'timestamp': 1711425600,
  115. 'upload_date': '20240326',
  116. 'modified_timestamp': 1711425600,
  117. 'modified_date': '20240326',
  118. 'uploader': '崩坏:星穹铁道',
  119. 'uploader_id': '414732580',
  120. 'thumbnail': r're:^https?://.*\.(png|jpg)',
  121. },
  122. }],
  123. 'params': {'skip_download': 'm3u8'},
  124. }, {
  125. 'url': 'https://www.taptap.cn/moment/540493587511511299',
  126. 'playlist_count': 2,
  127. 'info_dict': {
  128. 'id': '540493587511511299',
  129. 'title': '中式民俗解谜《纸嫁衣7》、新系列《纸不语》公布!',
  130. 'description': 'md5:d60842350e686ddb242291ddfb8e39c9',
  131. 'timestamp': 1715920200,
  132. 'upload_date': '20240517',
  133. 'modified_timestamp': 1715942225,
  134. 'modified_date': '20240517',
  135. 'uploader': 'TapTap 编辑',
  136. 'uploader_id': '7159244',
  137. },
  138. 'params': {'skip_download': 'm3u8'},
  139. }]
  140. class TapTapAppIE(TapTapBaseIE):
  141. _VALID_URL = r'https?://www\.taptap\.cn/app/(?P<id>\d+)'
  142. _INFO_API = 'https://www.taptap.cn/webapiv2/app/v4/detail'
  143. _ID_PATH = (('app_videos', 'videos'), ..., 'video_id')
  144. _META_PATH = {
  145. 'title': ('title', {str}),
  146. 'description': ('description', 'text', {str}, {clean_html}),
  147. }
  148. _TESTS = [{
  149. 'url': 'https://www.taptap.cn/app/168332',
  150. 'info_dict': {
  151. 'id': '168332',
  152. 'title': '原神',
  153. 'description': 'md5:e345f39a5fea5de2a46923f70d5f76ab',
  154. },
  155. 'playlist_count': 2,
  156. 'playlist': [{
  157. 'info_dict': {
  158. 'id': '4058443',
  159. 'ext': 'mp4',
  160. 'title': '原神',
  161. 'description': 'md5:e345f39a5fea5de2a46923f70d5f76ab',
  162. 'duration': 26,
  163. 'thumbnail': r're:^https?://.*\.(png|jpg)',
  164. },
  165. }, {
  166. 'info_dict': {
  167. 'id': '4058462',
  168. 'ext': 'mp4',
  169. 'title': '原神',
  170. 'description': 'md5:e345f39a5fea5de2a46923f70d5f76ab',
  171. 'duration': 295,
  172. 'thumbnail': r're:^https?://.*\.(png|jpg)',
  173. },
  174. }],
  175. 'params': {'skip_download': 'm3u8'},
  176. }]
  177. class TapTapIntlBase(TapTapBaseIE):
  178. _X_UA = 'V=1&PN=WebAppIntl2&LANG=zh_TW&VN_CODE=115&VN=0.1.0&LOC=CN&PLT=PC&DS=Android&UID={uuid}&CURR=&DT=PC&OS=Windows&OSV=NT%208.0.0'
  179. _VIDEO_API = 'https://www.taptap.io/webapiv2/video-resource/v1/multi-get'
  180. class TapTapAppIntlIE(TapTapIntlBase):
  181. _VALID_URL = r'https?://www\.taptap\.io/app/(?P<id>\d+)'
  182. _INFO_API = 'https://www.taptap.io/webapiv2/i/app/v5/detail'
  183. _DATA_PATH = 'app'
  184. _ID_PATH = (('app_videos', 'videos'), ..., 'video_id')
  185. _META_PATH = {
  186. 'title': ('title', {str}),
  187. 'description': ('description', 'text', {str}, {clean_html}),
  188. }
  189. _TESTS = [{
  190. 'url': 'https://www.taptap.io/app/233287',
  191. 'info_dict': {
  192. 'id': '233287',
  193. 'title': '《虹彩六號 M》',
  194. 'description': 'md5:418285f9c15347fc3cf3e3a3c649f182',
  195. },
  196. 'playlist_count': 1,
  197. 'playlist': [{
  198. 'info_dict': {
  199. 'id': '2149708997',
  200. 'ext': 'mp4',
  201. 'title': '《虹彩六號 M》',
  202. 'description': 'md5:418285f9c15347fc3cf3e3a3c649f182',
  203. 'duration': 78,
  204. 'thumbnail': r're:^https?://.*\.(png|jpg)',
  205. },
  206. }],
  207. 'params': {'skip_download': 'm3u8'},
  208. }]
  209. class TapTapPostIntlIE(TapTapIntlBase):
  210. _VALID_URL = r'https?://www\.taptap\.io/post/(?P<id>\d+)'
  211. _INFO_API = 'https://www.taptap.io/webapiv2/creation/post/v1/detail'
  212. _INFO_QUERY_KEY = 'id_str'
  213. _DATA_PATH = 'post'
  214. _ID_PATH = ((('videos', ...), 'pin_video'), 'video_id')
  215. _META_PATH = {
  216. 'timestamp': ('published_time', {int_or_none}),
  217. 'modified_timestamp': ('edited_time', {int_or_none}),
  218. 'uploader': ('user', 'name', {str}),
  219. 'uploader_id': ('user', 'id', {int}, {str_or_none}),
  220. 'title': ('title', {str}),
  221. 'description': ('list_fields', 'summary', {str}),
  222. }
  223. _TESTS = [{
  224. 'url': 'https://www.taptap.io/post/571785',
  225. 'info_dict': {
  226. 'id': '571785',
  227. 'title': 'Arknights x Rainbow Six Siege | Event PV',
  228. 'description': 'md5:f7717c13f6d3108e22db7303e6690bf7',
  229. 'timestamp': 1614664951,
  230. 'upload_date': '20210302',
  231. 'modified_timestamp': 1614664951,
  232. 'modified_date': '20210302',
  233. 'uploader': 'TapTap Editor',
  234. 'uploader_id': '80224473',
  235. },
  236. 'playlist_count': 1,
  237. 'playlist': [{
  238. 'info_dict': {
  239. 'id': '2149491903',
  240. 'ext': 'mp4',
  241. 'title': 'Arknights x Rainbow Six Siege | Event PV',
  242. 'description': 'md5:f7717c13f6d3108e22db7303e6690bf7',
  243. 'duration': 122,
  244. 'timestamp': 1614664951,
  245. 'upload_date': '20210302',
  246. 'modified_timestamp': 1614664951,
  247. 'modified_date': '20210302',
  248. 'uploader': 'TapTap Editor',
  249. 'uploader_id': '80224473',
  250. 'thumbnail': r're:^https?://.*\.(png|jpg)',
  251. },
  252. }],
  253. 'params': {'skip_download': 'm3u8'},
  254. }]