kankanews.py 1.8 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849
  1. import hashlib
  2. import random
  3. import string
  4. import time
  5. import urllib.parse
  6. from .common import InfoExtractor
  7. class KankaNewsIE(InfoExtractor):
  8. _WORKING = False
  9. _VALID_URL = r'https?://(?:www\.)?kankanews\.com/a/\d+\-\d+\-\d+/(?P<id>\d+)\.shtml'
  10. _TESTS = [{
  11. 'url': 'https://www.kankanews.com/a/2022-11-08/00310276054.shtml?appid=1088227',
  12. 'md5': '05e126513c74b1258d657452a6f4eef9',
  13. 'info_dict': {
  14. 'id': '4485057',
  15. 'url': 'http://mediaplay.kksmg.com/2022/11/08/h264_450k_mp4_1a388ad771e0e4cc28b0da44d245054e_ncm.mp4',
  16. 'ext': 'mp4',
  17. 'title': '视频|第23个中国记者节,我们在进博切蛋糕',
  18. 'thumbnail': r're:^https?://.*\.jpg*',
  19. },
  20. }]
  21. def _real_extract(self, url):
  22. display_id = self._match_id(url)
  23. webpage = self._download_webpage(url, display_id)
  24. video_id = self._search_regex(r'omsid\s*=\s*"(\d+)"', webpage, 'video id')
  25. params = {
  26. 'nonce': ''.join(random.choices(string.ascii_lowercase + string.digits, k=8)),
  27. 'omsid': video_id,
  28. 'platform': 'pc',
  29. 'timestamp': int(time.time()),
  30. 'version': '1.0',
  31. }
  32. params['sign'] = hashlib.md5((hashlib.md5((
  33. urllib.parse.urlencode(params) + '&28c8edde3d61a0411511d3b1866f0636'
  34. ).encode()).hexdigest()).encode()).hexdigest()
  35. meta = self._download_json('https://api-app.kankanews.com/kankan/pc/getvideo',
  36. video_id, query=params)['result']['video']
  37. return {
  38. 'id': video_id,
  39. 'url': meta['videourl'],
  40. 'title': self._search_regex(r'g\.title\s*=\s*"([^"]+)"', webpage, 'title'),
  41. 'thumbnail': meta.get('titlepic'),
  42. }