mocha.py 2.9 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364
  1. from .common import InfoExtractor
  2. from ..utils import int_or_none, traverse_obj
  3. class MochaVideoIE(InfoExtractor):
  4. _VALID_URL = r'https?://video\.mocha\.com\.vn/(?P<video_slug>[\w-]+)'
  5. _TESTS = [{
  6. 'url': 'http://video.mocha.com.vn/chuyen-meo-gia-su-tu-thong-diep-cuoc-song-v18694039',
  7. 'info_dict': {
  8. 'id': '18694039',
  9. 'title': 'Chuyện mèo giả sư tử | Thông điệp cuộc sống',
  10. 'ext': 'mp4',
  11. 'view_count': int,
  12. 'like_count': int,
  13. 'dislike_count': int,
  14. 'display_id': 'chuyen-meo-gia-su-tu-thong-diep-cuoc-song',
  15. 'thumbnail': 'http://mcvideomd1fr.keeng.net/playnow/images/20220505/ad0a055d-2f69-42ca-b888-4790041fe6bc_640x480.jpg',
  16. 'description': '',
  17. 'duration': 70,
  18. 'timestamp': 1652254203,
  19. 'upload_date': '20220511',
  20. 'comment_count': int,
  21. 'categories': ['Kids'],
  22. },
  23. }]
  24. def _real_extract(self, url):
  25. video_slug = self._match_valid_url(url).group('video_slug')
  26. json_data = self._download_json(
  27. 'http://apivideo.mocha.com.vn:8081/onMediaBackendBiz/mochavideo/getVideoDetail',
  28. video_slug, query={'url': url, 'token': ''})['data']['videoDetail']
  29. video_id = str(json_data['id'])
  30. video_urls = (json_data.get('list_resolution') or []) + [json_data.get('original_path')]
  31. formats, subtitles = [], {}
  32. for video in video_urls:
  33. if isinstance(video, str):
  34. formats.extend([{'url': video, 'ext': 'mp4'}])
  35. else:
  36. fmts, subs = self._extract_m3u8_formats_and_subtitles(
  37. video.get('video_path'), video_id, ext='mp4')
  38. formats.extend(fmts)
  39. self._merge_subtitles(subs, target=subtitles)
  40. return {
  41. 'id': video_id,
  42. 'display_id': json_data.get('slug') or video_slug,
  43. 'title': json_data.get('name'),
  44. 'formats': formats,
  45. 'subtitles': subtitles,
  46. 'description': json_data.get('description'),
  47. 'duration': json_data.get('durationS'),
  48. 'view_count': json_data.get('total_view'),
  49. 'like_count': json_data.get('total_like'),
  50. 'dislike_count': json_data.get('total_unlike'),
  51. 'thumbnail': json_data.get('image_path_thumb'),
  52. 'timestamp': int_or_none(json_data.get('publish_time'), scale=1000),
  53. 'is_live': json_data.get('isLive'),
  54. 'channel': traverse_obj(json_data, ('channels', '0', 'name')),
  55. 'channel_id': traverse_obj(json_data, ('channels', '0', 'id')),
  56. 'channel_follower_count': traverse_obj(json_data, ('channels', '0', 'numfollow')),
  57. 'categories': traverse_obj(json_data, ('categories', ..., 'categoryname')),
  58. 'comment_count': json_data.get('total_comment'),
  59. }