lsm.py 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282
  1. import re
  2. import urllib.parse
  3. from .common import InfoExtractor
  4. from ..utils import (
  5. ExtractorError,
  6. determine_ext,
  7. int_or_none,
  8. js_to_json,
  9. parse_iso8601,
  10. parse_qs,
  11. str_or_none,
  12. url_or_none,
  13. urljoin,
  14. )
  15. from ..utils.traversal import traverse_obj
  16. class LSMLREmbedIE(InfoExtractor):
  17. _VALID_URL = r'''(?x)
  18. https?://(?:
  19. (?:latvijasradio|lr1|lr2|klasika|lr4|naba|radioteatris)\.lsm|
  20. pieci
  21. )\.lv/[^/?#]+/(?:
  22. pleijeris|embed
  23. )/?\?(?:[^#]+&)?(?:show|id)=(?P<id>\d+)'''
  24. _TESTS = [{
  25. 'url': 'https://latvijasradio.lsm.lv/lv/embed/?theme=black&size=16x9&showCaptions=0&id=183522',
  26. 'md5': '719b33875cd1429846eeeaeec6df2830',
  27. 'info_dict': {
  28. 'id': 'a342781',
  29. 'ext': 'mp3',
  30. 'duration': 1823,
  31. 'title': '#138 Nepilnīgā kompensējamo zāļu sistēma pat mēnešiem dzenā pacientus pa aptiekām',
  32. 'thumbnail': 'https://pic.latvijasradio.lv/public/assets/media/9/d/gallery_fd4675ac.jpg',
  33. },
  34. }, {
  35. 'url': 'https://radioteatris.lsm.lv/lv/embed/?id=&show=1270&theme=white&size=16x9',
  36. 'info_dict': {
  37. 'id': '1270',
  38. },
  39. 'playlist_count': 3,
  40. 'playlist': [{
  41. 'md5': '2e61b6eceff00d14d57fdbbe6ab24cac',
  42. 'info_dict': {
  43. 'id': 'a297397',
  44. 'ext': 'mp3',
  45. 'title': 'Eriks Emanuels Šmits "Pilāta evaņģēlijs". 1. daļa',
  46. 'thumbnail': 'https://radioteatris.lsm.lv/public/assets/shows/62f131ae81e3c.jpg',
  47. 'duration': 3300,
  48. },
  49. }],
  50. }, {
  51. 'url': 'https://radioteatris.lsm.lv/lv/embed/?id=&show=1269&theme=white&size=16x9',
  52. 'md5': '24810d4a961da2295d9860afdcaf4f5a',
  53. 'info_dict': {
  54. 'id': 'a230690',
  55. 'ext': 'mp3',
  56. 'title': 'Jens Ahlboms "Spārni". Radioizrāde ar Mārtiņa Freimaņa mūziku',
  57. 'thumbnail': 'https://radioteatris.lsm.lv/public/assets/shows/62f13023a457c.jpg',
  58. 'duration': 1788,
  59. },
  60. }, {
  61. 'url': 'https://lr1.lsm.lv/lv/embed/?id=166557&show=0&theme=white&size=16x9',
  62. 'info_dict': {
  63. 'id': '166557',
  64. },
  65. 'playlist_count': 2,
  66. 'playlist': [{
  67. 'md5': '6a8b0927572f443f09c6e50a3ad65f2d',
  68. 'info_dict': {
  69. 'id': 'a303104',
  70. 'ext': 'mp3',
  71. 'thumbnail': 'https://pic.latvijasradio.lv/public/assets/media/c/5/gallery_a83ad2c2.jpg',
  72. 'title': 'Krustpunktā Lielā intervija: Valsts prezidents Egils Levits',
  73. 'duration': 3222,
  74. },
  75. }, {
  76. 'md5': '5d5e191e718b7644e5118b7b4e093a6d',
  77. 'info_dict': {
  78. 'id': 'v303104',
  79. 'ext': 'mp4',
  80. 'thumbnail': 'https://pic.latvijasradio.lv/public/assets/media/c/5/gallery_a83ad2c2.jpg',
  81. 'title': 'Krustpunktā Lielā intervija: Valsts prezidents Egils Levits - Video Version',
  82. 'duration': 3222,
  83. },
  84. }],
  85. }, {
  86. 'url': 'https://lr1.lsm.lv/lv/embed/?id=183522&show=0&theme=white&size=16x9',
  87. 'only_matching': True,
  88. }, {
  89. 'url': 'https://lr2.lsm.lv/lv/embed/?id=182126&show=0&theme=white&size=16x9',
  90. 'only_matching': True,
  91. }, {
  92. 'url': 'https://klasika.lsm.lv/lv/embed/?id=110806&show=0&theme=white&size=16x9',
  93. 'only_matching': True,
  94. }, {
  95. 'url': 'https://lr4.lsm.lv/lv/embed/?id=184282&show=0&theme=white&size=16x9',
  96. 'only_matching': True,
  97. }, {
  98. 'url': 'https://pieci.lv/lv/embed/?id=168896&show=0&theme=white&size=16x9',
  99. 'only_matching': True,
  100. }, {
  101. 'url': 'https://naba.lsm.lv/lv/embed/?id=182901&show=0&theme=white&size=16x9',
  102. 'only_matching': True,
  103. }, {
  104. 'url': 'https://radioteatris.lsm.lv/lv/embed/?id=176439&show=0&theme=white&size=16x9',
  105. 'only_matching': True,
  106. }, {
  107. 'url': 'https://lr1.lsm.lv/lv/pleijeris/?embed=0&id=48205&time=00%3A00&idx=0',
  108. 'only_matching': True,
  109. }]
  110. def _real_extract(self, url):
  111. query = parse_qs(url)
  112. video_id = traverse_obj(query, (
  113. ('show', 'id'), 0, {int_or_none}, {lambda x: x or None}, {str_or_none}), get_all=False)
  114. webpage = self._download_webpage(url, video_id)
  115. player_data, media_data = self._search_regex(
  116. r'LR\.audio\.Player\s*\([^{]*(?P<player>\{.*?\}),(?P<media>\{.*\})\);',
  117. webpage, 'player json', group=('player', 'media'))
  118. player_json = self._parse_json(
  119. player_data, video_id, transform_source=js_to_json, fatal=False) or {}
  120. media_json = self._parse_json(media_data, video_id, transform_source=js_to_json)
  121. entries = []
  122. for item in traverse_obj(media_json, (('audio', 'video'), lambda _, v: v['id'])):
  123. formats = []
  124. for source_url in traverse_obj(item, ('sources', ..., 'file', {url_or_none})):
  125. if determine_ext(source_url) == 'm3u8':
  126. formats.extend(self._extract_m3u8_formats(source_url, video_id, fatal=False))
  127. else:
  128. formats.append({'url': source_url})
  129. id_ = item['id']
  130. title = item.get('title')
  131. if id_.startswith('v') and not title:
  132. title = traverse_obj(
  133. media_json, ('audio', lambda _, v: v['id'][1:] == id_[1:], 'title',
  134. {lambda x: x and f'{x} - Video Version'}), get_all=False)
  135. entries.append({
  136. 'formats': formats,
  137. 'thumbnail': urljoin(url, player_json.get('poster')),
  138. 'id': id_,
  139. 'title': title,
  140. 'duration': traverse_obj(item, ('duration', {int_or_none})),
  141. })
  142. if len(entries) == 1:
  143. return entries[0]
  144. return self.playlist_result(entries, video_id)
  145. class LSMLTVEmbedIE(InfoExtractor):
  146. _VALID_URL = r'https?://ltv\.lsm\.lv/embed\?(?:[^#]+&)?c=(?P<id>[^#&]+)'
  147. _TESTS = [{
  148. 'url': 'https://ltv.lsm.lv/embed?c=eyJpdiI6IjQzbHVUeHAyaDJiamFjcjdSUUFKdnc9PSIsInZhbHVlIjoiMHl3SnJNRmd2TmFIdnZwOGtGUUpzODFzUEZ4SVVsN2xoRjliSW9vckUyMWZIWG8vbWVzaFFkY0lhNmRjbjRpaCIsIm1hYyI6ImMzNjdhMzFhNTFhZmY1ZmE0NWI5YmFjZGI1YmJiNGEyNjgzNDM4MjUzMWEwM2FmMDMyZDMwYWM1MDFjZmM5MGIiLCJ0YWciOiIifQ==',
  149. 'md5': '64f72a360ca530d5ed89c77646c9eee5',
  150. 'info_dict': {
  151. 'id': '46k_d23-6000-105',
  152. 'ext': 'mp4',
  153. 'timestamp': 1700589151,
  154. 'duration': 1442,
  155. 'upload_date': '20231121',
  156. 'title': 'D23-6000-105_cetstud',
  157. 'thumbnail': 'https://store.cloudycdn.services/tmsp00060/assets/media/660858/placeholder1700589200.jpg',
  158. },
  159. }, {
  160. 'url': 'https://ltv.lsm.lv/embed?enablesdkjs=1&c=eyJpdiI6IncwVzZmUFk2MU12enVWK1I3SUcwQ1E9PSIsInZhbHVlIjoid3FhV29vamc3T2sxL1RaRmJ5Rm1GTXozU0o2dVczdUtLK0cwZEZJMDQ2a3ZIRG5DK2pneGlnbktBQy9uazVleHN6VXhxdWIweWNvcHRDSnlISlNYOHlVZ1lpcTUrcWZSTUZPQW14TVdkMW9aOUtRWVNDcFF4eWpHNGcrT0VZbUNFQStKQk91cGpndW9FVjJIa0lpbkh3PT0iLCJtYWMiOiIyZGI1NDJlMWRlM2QyMGNhOGEwYTM2MmNlN2JlOGRhY2QyYjdkMmEzN2RlOTEzYTVkNzI1ODlhZDlhZjU4MjQ2IiwidGFnIjoiIn0=',
  161. 'md5': 'a1711e190fe680fdb68fd8413b378e87',
  162. 'info_dict': {
  163. 'id': 'wUnFArIPDSY',
  164. 'ext': 'mp4',
  165. 'uploader': 'LTV_16plus',
  166. 'release_date': '20220514',
  167. 'channel_url': 'https://www.youtube.com/channel/UCNMrnafwXD2XKeeQOyfkFCw',
  168. 'view_count': int,
  169. 'availability': 'public',
  170. 'thumbnail': 'https://i.ytimg.com/vi/wUnFArIPDSY/maxresdefault.jpg',
  171. 'release_timestamp': 1652544074,
  172. 'title': 'EIROVĪZIJA SALĀTOS',
  173. 'live_status': 'was_live',
  174. 'uploader_id': '@LTV16plus',
  175. 'comment_count': int,
  176. 'channel_id': 'UCNMrnafwXD2XKeeQOyfkFCw',
  177. 'channel_follower_count': int,
  178. 'categories': ['Entertainment'],
  179. 'duration': 5269,
  180. 'upload_date': '20220514',
  181. 'age_limit': 0,
  182. 'channel': 'LTV_16plus',
  183. 'playable_in_embed': True,
  184. 'tags': [],
  185. 'uploader_url': 'https://www.youtube.com/@LTV16plus',
  186. 'like_count': int,
  187. 'description': 'md5:7ff0c42ba971e3c13e4b8a2ff03b70b5',
  188. },
  189. }]
  190. def _real_extract(self, url):
  191. video_id = urllib.parse.unquote(self._match_id(url))
  192. webpage = self._download_webpage(url, video_id)
  193. data = self._search_json(
  194. r'window\.ltvEmbedPayload\s*=', webpage, 'embed json', video_id)
  195. embed_type = traverse_obj(data, ('source', 'name', {str}))
  196. if embed_type == 'telia':
  197. ie_key = 'CloudyCDN'
  198. embed_url = traverse_obj(data, ('source', 'embed_url', {url_or_none}))
  199. elif embed_type == 'youtube':
  200. ie_key = 'Youtube'
  201. embed_url = traverse_obj(data, ('source', 'id', {str}))
  202. else:
  203. raise ExtractorError(f'Unsupported embed type {embed_type!r}')
  204. return self.url_result(
  205. embed_url, ie_key, video_id, **traverse_obj(data, {
  206. 'title': ('parentInfo', 'title'),
  207. 'duration': ('parentInfo', 'duration', {int_or_none}),
  208. 'thumbnail': ('source', 'poster', {url_or_none}),
  209. }))
  210. class LSMReplayIE(InfoExtractor):
  211. _VALID_URL = r'https?://replay\.lsm\.lv/[^/?#]+/(?:ieraksts|statja)/[^/?#]+/(?P<id>\d+)'
  212. _TESTS = [{
  213. 'url': 'https://replay.lsm.lv/lv/ieraksts/ltv/311130/4-studija-zolitudes-tragedija-un-incupes-stacija',
  214. 'md5': '64f72a360ca530d5ed89c77646c9eee5',
  215. 'info_dict': {
  216. 'id': '46k_d23-6000-105',
  217. 'ext': 'mp4',
  218. 'timestamp': 1700586300,
  219. 'description': 'md5:0f1b14798cc39e1ae578bd0eb268f759',
  220. 'duration': 1442,
  221. 'upload_date': '20231121',
  222. 'title': '4. studija. Zolitūdes traģēdija un Inčupes stacija',
  223. 'thumbnail': 'https://ltv.lsm.lv/storage/media/8/7/large/5/1f9604e1.jpg',
  224. },
  225. }, {
  226. 'url': 'https://replay.lsm.lv/lv/ieraksts/lr/183522/138-nepilniga-kompensejamo-zalu-sistema-pat-menesiem-dzena-pacientus-pa-aptiekam',
  227. 'md5': '719b33875cd1429846eeeaeec6df2830',
  228. 'info_dict': {
  229. 'id': 'a342781',
  230. 'ext': 'mp3',
  231. 'duration': 1823,
  232. 'title': '#138 Nepilnīgā kompensējamo zāļu sistēma pat mēnešiem dzenā pacientus pa aptiekām',
  233. 'thumbnail': 'https://pic.latvijasradio.lv/public/assets/media/9/d/large_fd4675ac.jpg',
  234. 'upload_date': '20231102',
  235. 'timestamp': 1698921060,
  236. 'description': 'md5:7bac3b2dd41e44325032943251c357b1',
  237. },
  238. }, {
  239. 'url': 'https://replay.lsm.lv/ru/statja/ltv/311130/4-studija-zolitudes-tragedija-un-incupes-stacija',
  240. 'only_matching': True,
  241. }]
  242. def _fix_nuxt_data(self, webpage):
  243. return re.sub(r'Object\.create\(null(?:,(\{.+\}))?\)', lambda m: m.group(1) or 'null', webpage)
  244. def _real_extract(self, url):
  245. video_id = self._match_id(url)
  246. webpage = self._download_webpage(url, video_id)
  247. data = self._search_nuxt_data(
  248. self._fix_nuxt_data(webpage), video_id, context_name='__REPLAY__')
  249. return {
  250. '_type': 'url_transparent',
  251. 'id': video_id,
  252. **traverse_obj(data, {
  253. 'url': ('playback', 'service', 'url', {url_or_none}),
  254. 'title': ('mediaItem', 'title'),
  255. 'description': ('mediaItem', ('lead', 'body')),
  256. 'duration': ('mediaItem', 'duration', {int_or_none}),
  257. 'timestamp': ('mediaItem', 'aired_at', {parse_iso8601}),
  258. 'thumbnail': ('mediaItem', 'largeThumbnail', {url_or_none}),
  259. }, get_all=False),
  260. }