puhutv.py 8.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232
  1. from .common import InfoExtractor
  2. from ..networking.exceptions import HTTPError
  3. from ..utils import (
  4. ExtractorError,
  5. float_or_none,
  6. int_or_none,
  7. parse_resolution,
  8. str_or_none,
  9. try_get,
  10. unified_timestamp,
  11. url_or_none,
  12. urljoin,
  13. )
  14. class PuhuTVIE(InfoExtractor):
  15. _VALID_URL = r'https?://(?:www\.)?puhutv\.com/(?P<id>[^/?#&]+)-izle'
  16. IE_NAME = 'puhutv'
  17. _TESTS = [{
  18. # film
  19. 'url': 'https://puhutv.com/sut-kardesler-izle',
  20. 'md5': 'a347470371d56e1585d1b2c8dab01c96',
  21. 'info_dict': {
  22. 'id': '5085',
  23. 'display_id': 'sut-kardesler',
  24. 'ext': 'mp4',
  25. 'title': 'Süt Kardeşler',
  26. 'description': 'md5:ca09da25b7e57cbb5a9280d6e48d17aa',
  27. 'thumbnail': r're:^https?://.*\.jpg$',
  28. 'duration': 4832.44,
  29. 'creator': 'Arzu Film',
  30. 'timestamp': 1561062602,
  31. 'upload_date': '20190620',
  32. 'release_year': 1976,
  33. 'view_count': int,
  34. 'tags': list,
  35. },
  36. }, {
  37. # episode, geo restricted, bypassable with --geo-verification-proxy
  38. 'url': 'https://puhutv.com/jet-sosyete-1-bolum-izle',
  39. 'only_matching': True,
  40. }, {
  41. # 4k, with subtitles
  42. 'url': 'https://puhutv.com/dip-1-bolum-izle',
  43. 'only_matching': True,
  44. }]
  45. _SUBTITLE_LANGS = {
  46. 'English': 'en',
  47. 'Deutsch': 'de',
  48. 'عربى': 'ar',
  49. }
  50. def _real_extract(self, url):
  51. display_id = self._match_id(url)
  52. info = self._download_json(
  53. urljoin(url, f'/api/slug/{display_id}-izle'),
  54. display_id)['data']
  55. video_id = str(info['id'])
  56. show = info.get('title') or {}
  57. title = info.get('name') or show['name']
  58. if info.get('display_name'):
  59. title = '{} {}'.format(title, info['display_name'])
  60. try:
  61. videos = self._download_json(
  62. f'https://puhutv.com/api/assets/{video_id}/videos',
  63. display_id, 'Downloading video JSON',
  64. headers=self.geo_verification_headers())
  65. except ExtractorError as e:
  66. if isinstance(e.cause, HTTPError) and e.cause.status == 403:
  67. self.raise_geo_restricted()
  68. raise
  69. urls = []
  70. formats = []
  71. for video in videos['data']['videos']:
  72. media_url = url_or_none(video.get('url'))
  73. if not media_url or media_url in urls:
  74. continue
  75. urls.append(media_url)
  76. playlist = video.get('is_playlist')
  77. if (video.get('stream_type') == 'hls' and playlist is True) or 'playlist.m3u8' in media_url:
  78. formats.extend(self._extract_m3u8_formats(
  79. media_url, video_id, 'mp4', entry_protocol='m3u8_native',
  80. m3u8_id='hls', fatal=False))
  81. continue
  82. quality = int_or_none(video.get('quality'))
  83. f = {
  84. 'url': media_url,
  85. 'ext': 'mp4',
  86. 'height': quality,
  87. }
  88. video_format = video.get('video_format')
  89. is_hls = (video_format == 'hls' or '/hls/' in media_url or '/chunklist.m3u8' in media_url) and playlist is False
  90. if is_hls:
  91. format_id = 'hls'
  92. f['protocol'] = 'm3u8_native'
  93. elif video_format == 'mp4':
  94. format_id = 'http'
  95. else:
  96. continue
  97. if quality:
  98. format_id += f'-{quality}p'
  99. f['format_id'] = format_id
  100. formats.append(f)
  101. creator = try_get(
  102. show, lambda x: x['producer']['name'], str)
  103. content = info.get('content') or {}
  104. images = try_get(
  105. content, lambda x: x['images']['wide'], dict) or {}
  106. thumbnails = []
  107. for image_id, image_url in images.items():
  108. if not isinstance(image_url, str):
  109. continue
  110. if not image_url.startswith(('http', '//')):
  111. image_url = f'https://{image_url}'
  112. t = parse_resolution(image_id)
  113. t.update({
  114. 'id': image_id,
  115. 'url': image_url,
  116. })
  117. thumbnails.append(t)
  118. tags = []
  119. for genre in show.get('genres') or []:
  120. if not isinstance(genre, dict):
  121. continue
  122. genre_name = genre.get('name')
  123. if genre_name and isinstance(genre_name, str):
  124. tags.append(genre_name)
  125. subtitles = {}
  126. for subtitle in content.get('subtitles') or []:
  127. if not isinstance(subtitle, dict):
  128. continue
  129. lang = subtitle.get('language')
  130. sub_url = url_or_none(subtitle.get('url') or subtitle.get('file'))
  131. if not lang or not isinstance(lang, str) or not sub_url:
  132. continue
  133. subtitles[self._SUBTITLE_LANGS.get(lang, lang)] = [{
  134. 'url': sub_url,
  135. }]
  136. return {
  137. 'id': video_id,
  138. 'display_id': display_id,
  139. 'title': title,
  140. 'description': info.get('description') or show.get('description'),
  141. 'season_id': str_or_none(info.get('season_id')),
  142. 'season_number': int_or_none(info.get('season_number')),
  143. 'episode_number': int_or_none(info.get('episode_number')),
  144. 'release_year': int_or_none(show.get('released_at')),
  145. 'timestamp': unified_timestamp(info.get('created_at')),
  146. 'creator': creator,
  147. 'view_count': int_or_none(content.get('watch_count')),
  148. 'duration': float_or_none(content.get('duration_in_ms'), 1000),
  149. 'tags': tags,
  150. 'subtitles': subtitles,
  151. 'thumbnails': thumbnails,
  152. 'formats': formats,
  153. }
  154. class PuhuTVSerieIE(InfoExtractor):
  155. _VALID_URL = r'https?://(?:www\.)?puhutv\.com/(?P<id>[^/?#&]+)-detay'
  156. IE_NAME = 'puhutv:serie'
  157. _TESTS = [{
  158. 'url': 'https://puhutv.com/deniz-yildizi-detay',
  159. 'info_dict': {
  160. 'title': 'Deniz Yıldızı',
  161. 'id': 'deniz-yildizi',
  162. },
  163. 'playlist_mincount': 205,
  164. }, {
  165. # a film detail page which is using same url with serie page
  166. 'url': 'https://puhutv.com/kaybedenler-kulubu-detay',
  167. 'only_matching': True,
  168. }]
  169. def _extract_entries(self, seasons):
  170. for season in seasons:
  171. season_id = season.get('id')
  172. if not season_id:
  173. continue
  174. page = 1
  175. has_more = True
  176. while has_more is True:
  177. season = self._download_json(
  178. f'https://galadriel.puhutv.com/seasons/{season_id}',
  179. season_id, f'Downloading page {page}', query={
  180. 'page': page,
  181. 'per': 40,
  182. })
  183. episodes = season.get('episodes')
  184. if isinstance(episodes, list):
  185. for ep in episodes:
  186. slug_path = str_or_none(ep.get('slugPath'))
  187. if not slug_path:
  188. continue
  189. video_id = str_or_none(int_or_none(ep.get('id')))
  190. yield self.url_result(
  191. f'https://puhutv.com/{slug_path}',
  192. ie=PuhuTVIE.ie_key(), video_id=video_id,
  193. video_title=ep.get('name') or ep.get('eventLabel'))
  194. page += 1
  195. has_more = season.get('hasMore')
  196. def _real_extract(self, url):
  197. playlist_id = self._match_id(url)
  198. info = self._download_json(
  199. urljoin(url, f'/api/slug/{playlist_id}-detay'),
  200. playlist_id)['data']
  201. seasons = info.get('seasons')
  202. if seasons:
  203. return self.playlist_result(
  204. self._extract_entries(seasons), playlist_id, info.get('name'))
  205. # For films, these are using same url with series
  206. video_id = info.get('slug') or info['assets'][0]['slug']
  207. return self.url_result(
  208. f'https://puhutv.com/{video_id}-izle',
  209. PuhuTVIE.ie_key(), video_id)