hls.py 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365
  1. from __future__ import unicode_literals
  2. import re
  3. import binascii
  4. try:
  5. from Crypto.Cipher import AES
  6. can_decrypt_frag = True
  7. except ImportError:
  8. can_decrypt_frag = False
  9. try:
  10. import concurrent.futures
  11. can_threaded_download = True
  12. except ImportError:
  13. can_threaded_download = False
  14. from ..downloader import _get_real_downloader
  15. from .fragment import FragmentFD
  16. from .external import FFmpegFD
  17. from ..compat import (
  18. compat_urllib_error,
  19. compat_urlparse,
  20. compat_struct_pack,
  21. )
  22. from ..utils import (
  23. parse_m3u8_attributes,
  24. sanitize_open,
  25. update_url_query,
  26. )
  27. class HlsFD(FragmentFD):
  28. """
  29. Download segments in a m3u8 manifest. External downloaders can take over
  30. the fragment downloads by supporting the 'frag_urls' protocol and
  31. re-defining 'supports_manifest' function
  32. """
  33. FD_NAME = 'hlsnative'
  34. @staticmethod
  35. def can_download(manifest, info_dict, allow_unplayable_formats=False, with_crypto=can_decrypt_frag):
  36. UNSUPPORTED_FEATURES = [
  37. # r'#EXT-X-BYTERANGE', # playlists composed of byte ranges of media files [2]
  38. # Live streams heuristic does not always work (e.g. geo restricted to Germany
  39. # http://hls-geo.daserste.de/i/videoportal/Film/c_620000/622873/format,716451,716457,716450,716458,716459,.mp4.csmil/index_4_av.m3u8?null=0)
  40. # r'#EXT-X-MEDIA-SEQUENCE:(?!0$)', # live streams [3]
  41. # This heuristic also is not correct since segments may not be appended as well.
  42. # Twitch vods of finished streams have EXT-X-PLAYLIST-TYPE:EVENT despite
  43. # no segments will definitely be appended to the end of the playlist.
  44. # r'#EXT-X-PLAYLIST-TYPE:EVENT', # media segments may be appended to the end of
  45. # # event media playlists [4]
  46. # r'#EXT-X-MAP:', # media initialization [5]
  47. # 1. https://tools.ietf.org/html/draft-pantos-http-live-streaming-17#section-4.3.2.4
  48. # 2. https://tools.ietf.org/html/draft-pantos-http-live-streaming-17#section-4.3.2.2
  49. # 3. https://tools.ietf.org/html/draft-pantos-http-live-streaming-17#section-4.3.3.2
  50. # 4. https://tools.ietf.org/html/draft-pantos-http-live-streaming-17#section-4.3.3.5
  51. # 5. https://tools.ietf.org/html/draft-pantos-http-live-streaming-17#section-4.3.2.5
  52. ]
  53. if not allow_unplayable_formats:
  54. UNSUPPORTED_FEATURES += [
  55. r'#EXT-X-KEY:METHOD=(?!NONE|AES-128)', # encrypted streams [1]
  56. ]
  57. def check_results():
  58. yield not info_dict.get('is_live')
  59. is_aes128_enc = '#EXT-X-KEY:METHOD=AES-128' in manifest
  60. yield with_crypto or not is_aes128_enc
  61. yield not (is_aes128_enc and r'#EXT-X-BYTERANGE' in manifest)
  62. for feature in UNSUPPORTED_FEATURES:
  63. yield not re.search(feature, manifest)
  64. return all(check_results())
  65. def real_download(self, filename, info_dict):
  66. man_url = info_dict['url']
  67. self.to_screen('[%s] Downloading m3u8 manifest' % self.FD_NAME)
  68. urlh = self.ydl.urlopen(self._prepare_url(info_dict, man_url))
  69. man_url = urlh.geturl()
  70. s = urlh.read().decode('utf-8', 'ignore')
  71. if not self.can_download(s, info_dict, self.params.get('allow_unplayable_formats')):
  72. if info_dict.get('extra_param_to_segment_url') or info_dict.get('_decryption_key_url'):
  73. self.report_error('pycryptodome not found. Please install it.')
  74. return False
  75. if self.can_download(s, info_dict, with_crypto=True):
  76. self.report_warning('pycryptodome is needed to download this file with hlsnative')
  77. self.report_warning(
  78. 'hlsnative has detected features it does not support, '
  79. 'extraction will be delegated to ffmpeg')
  80. fd = FFmpegFD(self.ydl, self.params)
  81. # TODO: Make progress updates work without hooking twice
  82. # for ph in self._progress_hooks:
  83. # fd.add_progress_hook(ph)
  84. return fd.real_download(filename, info_dict)
  85. real_downloader = _get_real_downloader(info_dict, 'frag_urls', self.params, None)
  86. if real_downloader and not real_downloader.supports_manifest(s):
  87. real_downloader = None
  88. def is_ad_fragment_start(s):
  89. return (s.startswith('#ANVATO-SEGMENT-INFO') and 'type=ad' in s
  90. or s.startswith('#UPLYNK-SEGMENT') and s.endswith(',ad'))
  91. def is_ad_fragment_end(s):
  92. return (s.startswith('#ANVATO-SEGMENT-INFO') and 'type=master' in s
  93. or s.startswith('#UPLYNK-SEGMENT') and s.endswith(',segment'))
  94. fragments = []
  95. media_frags = 0
  96. ad_frags = 0
  97. ad_frag_next = False
  98. for line in s.splitlines():
  99. line = line.strip()
  100. if not line:
  101. continue
  102. if line.startswith('#'):
  103. if is_ad_fragment_start(line):
  104. ad_frag_next = True
  105. elif is_ad_fragment_end(line):
  106. ad_frag_next = False
  107. continue
  108. if ad_frag_next:
  109. ad_frags += 1
  110. continue
  111. media_frags += 1
  112. ctx = {
  113. 'filename': filename,
  114. 'total_frags': media_frags,
  115. 'ad_frags': ad_frags,
  116. }
  117. if real_downloader:
  118. self._prepare_external_frag_download(ctx)
  119. else:
  120. self._prepare_and_start_frag_download(ctx)
  121. fragment_retries = self.params.get('fragment_retries', 0)
  122. skip_unavailable_fragments = self.params.get('skip_unavailable_fragments', True)
  123. test = self.params.get('test', False)
  124. format_index = info_dict.get('format_index')
  125. extra_query = None
  126. extra_param_to_segment_url = info_dict.get('extra_param_to_segment_url')
  127. if extra_param_to_segment_url:
  128. extra_query = compat_urlparse.parse_qs(extra_param_to_segment_url)
  129. i = 0
  130. media_sequence = 0
  131. decrypt_info = {'METHOD': 'NONE'}
  132. byte_range = {}
  133. discontinuity_count = 0
  134. frag_index = 0
  135. ad_frag_next = False
  136. for line in s.splitlines():
  137. line = line.strip()
  138. if line:
  139. if not line.startswith('#'):
  140. if format_index and discontinuity_count != format_index:
  141. continue
  142. if ad_frag_next:
  143. continue
  144. frag_index += 1
  145. if frag_index <= ctx['fragment_index']:
  146. continue
  147. frag_url = (
  148. line
  149. if re.match(r'^https?://', line)
  150. else compat_urlparse.urljoin(man_url, line))
  151. if extra_query:
  152. frag_url = update_url_query(frag_url, extra_query)
  153. fragments.append({
  154. 'frag_index': frag_index,
  155. 'url': frag_url,
  156. 'decrypt_info': decrypt_info,
  157. 'byte_range': byte_range,
  158. 'media_sequence': media_sequence,
  159. })
  160. elif line.startswith('#EXT-X-MAP'):
  161. if format_index and discontinuity_count != format_index:
  162. continue
  163. if frag_index > 0:
  164. self.report_error(
  165. 'initialization fragment found after media fragments, unable to download')
  166. return False
  167. frag_index += 1
  168. map_info = parse_m3u8_attributes(line[11:])
  169. frag_url = (
  170. map_info.get('URI')
  171. if re.match(r'^https?://', map_info.get('URI'))
  172. else compat_urlparse.urljoin(man_url, map_info.get('URI')))
  173. if extra_query:
  174. frag_url = update_url_query(frag_url, extra_query)
  175. fragments.append({
  176. 'frag_index': frag_index,
  177. 'url': frag_url,
  178. 'decrypt_info': decrypt_info,
  179. 'byte_range': byte_range,
  180. 'media_sequence': media_sequence
  181. })
  182. if map_info.get('BYTERANGE'):
  183. splitted_byte_range = map_info.get('BYTERANGE').split('@')
  184. sub_range_start = int(splitted_byte_range[1]) if len(splitted_byte_range) == 2 else byte_range['end']
  185. byte_range = {
  186. 'start': sub_range_start,
  187. 'end': sub_range_start + int(splitted_byte_range[0]),
  188. }
  189. elif line.startswith('#EXT-X-KEY'):
  190. decrypt_url = decrypt_info.get('URI')
  191. decrypt_info = parse_m3u8_attributes(line[11:])
  192. if decrypt_info['METHOD'] == 'AES-128':
  193. if 'IV' in decrypt_info:
  194. decrypt_info['IV'] = binascii.unhexlify(decrypt_info['IV'][2:].zfill(32))
  195. if not re.match(r'^https?://', decrypt_info['URI']):
  196. decrypt_info['URI'] = compat_urlparse.urljoin(
  197. man_url, decrypt_info['URI'])
  198. if extra_query:
  199. decrypt_info['URI'] = update_url_query(decrypt_info['URI'], extra_query)
  200. if decrypt_url != decrypt_info['URI']:
  201. decrypt_info['KEY'] = None
  202. elif line.startswith('#EXT-X-MEDIA-SEQUENCE'):
  203. media_sequence = int(line[22:])
  204. elif line.startswith('#EXT-X-BYTERANGE'):
  205. splitted_byte_range = line[17:].split('@')
  206. sub_range_start = int(splitted_byte_range[1]) if len(splitted_byte_range) == 2 else byte_range['end']
  207. byte_range = {
  208. 'start': sub_range_start,
  209. 'end': sub_range_start + int(splitted_byte_range[0]),
  210. }
  211. elif is_ad_fragment_start(line):
  212. ad_frag_next = True
  213. elif is_ad_fragment_end(line):
  214. ad_frag_next = False
  215. elif line.startswith('#EXT-X-DISCONTINUITY'):
  216. discontinuity_count += 1
  217. i += 1
  218. media_sequence += 1
  219. # We only download the first fragment during the test
  220. if test:
  221. fragments = [fragments[0] if fragments else None]
  222. if real_downloader:
  223. info_copy = info_dict.copy()
  224. info_copy['fragments'] = fragments
  225. fd = real_downloader(self.ydl, self.params)
  226. # TODO: Make progress updates work without hooking twice
  227. # for ph in self._progress_hooks:
  228. # fd.add_progress_hook(ph)
  229. success = fd.real_download(filename, info_copy)
  230. if not success:
  231. return False
  232. else:
  233. def download_fragment(fragment):
  234. frag_index = fragment['frag_index']
  235. frag_url = fragment['url']
  236. decrypt_info = fragment['decrypt_info']
  237. byte_range = fragment['byte_range']
  238. media_sequence = fragment['media_sequence']
  239. ctx['fragment_index'] = frag_index
  240. count = 0
  241. headers = info_dict.get('http_headers', {})
  242. if byte_range:
  243. headers['Range'] = 'bytes=%d-%d' % (byte_range['start'], byte_range['end'] - 1)
  244. while count <= fragment_retries:
  245. try:
  246. success, frag_content = self._download_fragment(
  247. ctx, frag_url, info_dict, headers)
  248. if not success:
  249. return False, frag_index
  250. break
  251. except compat_urllib_error.HTTPError as err:
  252. # Unavailable (possibly temporary) fragments may be served.
  253. # First we try to retry then either skip or abort.
  254. # See https://github.com/ytdl-org/youtube-dl/issues/10165,
  255. # https://github.com/ytdl-org/youtube-dl/issues/10448).
  256. count += 1
  257. if count <= fragment_retries:
  258. self.report_retry_fragment(err, frag_index, count, fragment_retries)
  259. if count > fragment_retries:
  260. return False, frag_index
  261. if decrypt_info['METHOD'] == 'AES-128':
  262. iv = decrypt_info.get('IV') or compat_struct_pack('>8xq', media_sequence)
  263. decrypt_info['KEY'] = decrypt_info.get('KEY') or self.ydl.urlopen(
  264. self._prepare_url(info_dict, info_dict.get('_decryption_key_url') or decrypt_info['URI'])).read()
  265. # Don't decrypt the content in tests since the data is explicitly truncated and it's not to a valid block
  266. # size (see https://github.com/ytdl-org/youtube-dl/pull/27660). Tests only care that the correct data downloaded,
  267. # not what it decrypts to.
  268. if not test:
  269. frag_content = AES.new(
  270. decrypt_info['KEY'], AES.MODE_CBC, iv).decrypt(frag_content)
  271. return frag_content, frag_index
  272. def append_fragment(frag_content, frag_index):
  273. if frag_content:
  274. fragment_filename = '%s-Frag%d' % (ctx['tmpfilename'], frag_index)
  275. try:
  276. file, frag_sanitized = sanitize_open(fragment_filename, 'rb')
  277. ctx['fragment_filename_sanitized'] = frag_sanitized
  278. file.close()
  279. self._append_fragment(ctx, frag_content)
  280. return True
  281. except FileNotFoundError:
  282. if skip_unavailable_fragments:
  283. self.report_skip_fragment(frag_index)
  284. return True
  285. else:
  286. self.report_error(
  287. 'fragment %s not found, unable to continue' % frag_index)
  288. return False
  289. else:
  290. if skip_unavailable_fragments:
  291. self.report_skip_fragment(frag_index)
  292. return True
  293. else:
  294. self.report_error(
  295. 'fragment %s not found, unable to continue' % frag_index)
  296. return False
  297. max_workers = self.params.get('concurrent_fragment_downloads', 1)
  298. if can_threaded_download and max_workers > 1:
  299. self.report_warning('The download speed shown is only of one thread. This is a known issue')
  300. with concurrent.futures.ThreadPoolExecutor(max_workers) as pool:
  301. futures = [pool.submit(download_fragment, fragment) for fragment in fragments]
  302. # timeout must be 0 to return instantly
  303. done, not_done = concurrent.futures.wait(futures, timeout=0)
  304. try:
  305. while not_done:
  306. # Check every 1 second for KeyboardInterrupt
  307. freshly_done, not_done = concurrent.futures.wait(not_done, timeout=1)
  308. done |= freshly_done
  309. except KeyboardInterrupt:
  310. for future in not_done:
  311. future.cancel()
  312. # timeout must be none to cancel
  313. concurrent.futures.wait(not_done, timeout=None)
  314. raise KeyboardInterrupt
  315. results = [future.result() for future in futures]
  316. for frag_content, frag_index in results:
  317. result = append_fragment(frag_content, frag_index)
  318. if not result:
  319. return False
  320. else:
  321. for fragment in fragments:
  322. frag_content, frag_index = download_fragment(fragment)
  323. result = append_fragment(frag_content, frag_index)
  324. if not result:
  325. return False
  326. self._finish_frag_download(ctx)
  327. return True