From c2e7e9cdb2261adde01048d161914b156a3bad51 Mon Sep 17 00:00:00 2001 From: sepro Date: Thu, 20 Nov 2025 16:22:45 +0100 Subject: [PATCH 1/3] [ie/URPlay] Fix extractor (#15120) Closes #13028 Authored by: seproDev --- yt_dlp/extractor/urplay.py | 48 ++++++++++++++++++++++++-------------- 1 file changed, 31 insertions(+), 17 deletions(-) diff --git a/yt_dlp/extractor/urplay.py b/yt_dlp/extractor/urplay.py index a0ac2a0bc6..ad48e350ef 100644 --- a/yt_dlp/extractor/urplay.py +++ b/yt_dlp/extractor/urplay.py @@ -7,15 +7,15 @@ from ..utils import ( parse_age_limit, try_get, unified_timestamp, + url_or_none, ) -from ..utils.traversal import traverse_obj +from ..utils.traversal import require, traverse_obj class URPlayIE(InfoExtractor): _VALID_URL = r'https?://(?:www\.)?ur(?:play|skola)\.se/(?:program|Produkter)/(?P[0-9]+)' _TESTS = [{ 'url': 'https://urplay.se/program/203704-ur-samtiden-livet-universum-och-rymdens-markliga-musik-om-vetenskap-kritiskt-tankande-och-motstand', - 'md5': '5ba36643c77cc3d34ffeadad89937d1e', 'info_dict': { 'id': '203704', 'ext': 'mp4', @@ -31,6 +31,7 @@ class URPlayIE(InfoExtractor): 'episode': 'Om vetenskap, kritiskt tänkande och motstånd', 'age_limit': 15, }, + 'params': {'skip_download': 'm3u8'}, }, { 'url': 'https://urplay.se/program/222967-en-foralders-dagbok-mitt-barn-skadar-sig-sjalv', 'info_dict': { @@ -49,6 +50,7 @@ class URPlayIE(InfoExtractor): 'tags': 'count:7', 'episode': 'Mitt barn skadar sig själv', }, + 'params': {'skip_download': 'm3u8'}, }, { 'url': 'https://urskola.se/Produkter/190031-Tripp-Trapp-Trad-Sovkudde', 'info_dict': { @@ -68,6 +70,27 @@ class URPlayIE(InfoExtractor): 'episode': 'Sovkudde', 'season': 'Säsong 1', }, + 'params': {'skip_download': 'm3u8'}, + }, { + # Only accessible through new media api + 'url': 'https://urplay.se/program/242932-vulkanernas-krafter-fran-kraftfull-till-forgorande', + 'info_dict': { + 'id': '242932', + 'ext': 'mp4', + 'title': 'Vulkanernas krafter : Från kraftfull till förgörande', + 'description': 'md5:742bb87048e7d5a7f209d28f9bb70ab1', + 'age_limit': 15, + 'duration': 2613, + 'thumbnail': 'https://assets.ur.se/id/242932/images/1_hd.jpg', + 'categories': ['Vetenskap & teknik'], + 'tags': ['Geofysik', 'Naturvetenskap', 'Vulkaner', 'Vulkanutbrott'], + 'series': 'Vulkanernas krafter', + 'episode': 'Från kraftfull till förgörande', + 'episode_number': 2, + 'timestamp': 1763514000, + 'upload_date': '20251119', + }, + 'params': {'skip_download': 'm3u8'}, }, { 'url': 'http://urskola.se/Produkter/155794-Smasagor-meankieli-Grodan-i-vida-varlden', 'only_matching': True, @@ -88,21 +111,12 @@ class URPlayIE(InfoExtractor): webpage, 'urplayer data'), video_id)['accessibleEpisodes'] urplayer_data = next(e for e in accessible_episodes if e.get('id') == int_or_none(video_id)) episode = urplayer_data['title'] - - host = self._download_json('http://streaming-loadbalancer.ur.se/loadbalancer.json', video_id)['redirect'] - formats = [] - urplayer_streams = urplayer_data.get('streamingInfo', {}) - - for k, v in urplayer_streams.get('raw', {}).items(): - if not (k in ('sd', 'hd', 'mp3', 'm4a') and isinstance(v, dict)): - continue - file_http = v.get('location') - if file_http: - formats.extend(self._extract_wowza_formats( - f'http://{host}/{file_http}playlist.m3u8', - video_id, skip_protocols=['f4m', 'rtmp', 'rtsp'])) - - subtitles = {} + sources = self._download_json( + f'https://media-api.urplay.se/config-streaming/v1/urplay/sources/{video_id}', video_id, + note='Downloading streaming information') + hls_url = traverse_obj(sources, ('sources', 'hls', {url_or_none}, {require('HLS URL')})) + formats, subtitles = self._extract_m3u8_formats_and_subtitles( + hls_url, video_id, 'mp4', m3u8_id='hls') def parse_lang_code(code): "3-character language code or None (utils candidate)" From 20f83f208eae863250b35e2761adad88e91d85a1 Mon Sep 17 00:00:00 2001 From: "Michael D." Date: Thu, 20 Nov 2025 19:56:25 +0100 Subject: [PATCH 2/3] [ie/netapp] Add extractors (#15122) Closes #14902 Authored by: darkstar --- yt_dlp/extractor/_extractors.py | 4 ++ yt_dlp/extractor/netapp.py | 79 +++++++++++++++++++++++++++++++++ 2 files changed, 83 insertions(+) create mode 100644 yt_dlp/extractor/netapp.py diff --git a/yt_dlp/extractor/_extractors.py b/yt_dlp/extractor/_extractors.py index fc77804692..5f82ad54e9 100644 --- a/yt_dlp/extractor/_extractors.py +++ b/yt_dlp/extractor/_extractors.py @@ -1284,6 +1284,10 @@ from .nest import ( NestClipIE, NestIE, ) +from .netapp import ( + NetAppCollectionIE, + NetAppVideoIE, +) from .neteasemusic import ( NetEaseMusicAlbumIE, NetEaseMusicDjRadioIE, diff --git a/yt_dlp/extractor/netapp.py b/yt_dlp/extractor/netapp.py new file mode 100644 index 0000000000..a665472094 --- /dev/null +++ b/yt_dlp/extractor/netapp.py @@ -0,0 +1,79 @@ +from .brightcove import BrightcoveNewIE +from .common import InfoExtractor +from ..utils import parse_iso8601 +from ..utils.traversal import require, traverse_obj + + +class NetAppBaseIE(InfoExtractor): + _BC_URL = 'https://players.brightcove.net/6255154784001/default_default/index.html?videoId={}' + + @staticmethod + def _parse_metadata(item): + return traverse_obj(item, { + 'title': ('name', {str}), + 'description': ('description', {str}), + 'timestamp': ('createdAt', {parse_iso8601}), + }) + + +class NetAppVideoIE(NetAppBaseIE): + _VALID_URL = r'https?://media\.netapp\.com/video-detail/(?P[0-9a-f-]+)' + + _TESTS = [{ + 'url': 'https://media.netapp.com/video-detail/da25fc01-82ad-5284-95bc-26920200a222/seamless-storage-for-modern-kubernetes-deployments', + 'info_dict': { + 'id': '1843620950167202073', + 'ext': 'mp4', + 'title': 'Seamless storage for modern Kubernetes deployments', + 'description': 'md5:1ee39e315243fe71fb90af2796037248', + 'uploader_id': '6255154784001', + 'duration': 2159.41, + 'thumbnail': r're:https://house-fastly-signed-us-east-1-prod\.brightcovecdn\.com/image/.*\.jpg', + 'tags': 'count:15', + 'timestamp': 1758213949, + 'upload_date': '20250918', + }, + }, { + 'url': 'https://media.netapp.com/video-detail/45593e5d-cf1c-5996-978c-c9081906e69f/unleash-ai-innovation-with-your-data-with-the-netapp-platform', + 'only_matching': True, + }] + + def _real_extract(self, url): + video_uuid = self._match_id(url) + metadata = self._download_json( + f'https://api.media.netapp.com/client/detail/{video_uuid}', video_uuid) + + brightcove_video_id = traverse_obj(metadata, ( + 'sections', lambda _, v: v['type'] == 'Player', 'video', {str}, any, {require('brightcove video id')})) + + video_item = traverse_obj(metadata, ('sections', lambda _, v: v['type'] == 'VideoDetail', any)) + + return self.url_result( + self._BC_URL.format(brightcove_video_id), BrightcoveNewIE, brightcove_video_id, + url_transparent=True, **self._parse_metadata(video_item)) + + +class NetAppCollectionIE(NetAppBaseIE): + _VALID_URL = r'https?://media\.netapp\.com/collection/(?P[0-9a-f-]+)' + _TESTS = [{ + 'url': 'https://media.netapp.com/collection/9820e190-f2a6-47ac-9c0a-98e5e64234a4', + 'info_dict': { + 'title': 'Featured sessions', + 'id': '9820e190-f2a6-47ac-9c0a-98e5e64234a4', + }, + 'playlist_count': 4, + }] + + def _entries(self, metadata): + for item in traverse_obj(metadata, ('items', lambda _, v: v['brightcoveVideoId'])): + brightcove_video_id = item['brightcoveVideoId'] + yield self.url_result( + self._BC_URL.format(brightcove_video_id), BrightcoveNewIE, brightcove_video_id, + url_transparent=True, **self._parse_metadata(item)) + + def _real_extract(self, url): + collection_uuid = self._match_id(url) + metadata = self._download_json( + f'https://api.media.netapp.com/client/collection/{collection_uuid}', collection_uuid) + + return self.playlist_result(self._entries(metadata), collection_uuid, playlist_title=metadata.get('name')) From 6842620d56e4c4e6affb90c2f8dff8a36dee852c Mon Sep 17 00:00:00 2001 From: Elioo <79273475+beliote@users.noreply.github.com> Date: Thu, 20 Nov 2025 20:01:07 +0100 Subject: [PATCH 3/3] [ie/Digiteka] Rework extractor (#14903) Closes #12454 Authored by: beliote --- yt_dlp/extractor/digiteka.py | 95 +++++++++++++++--------------------- 1 file changed, 39 insertions(+), 56 deletions(-) diff --git a/yt_dlp/extractor/digiteka.py b/yt_dlp/extractor/digiteka.py index e56ec63e86..1bbec62165 100644 --- a/yt_dlp/extractor/digiteka.py +++ b/yt_dlp/extractor/digiteka.py @@ -1,5 +1,6 @@ from .common import InfoExtractor -from ..utils import int_or_none +from ..utils import int_or_none, url_or_none +from ..utils.traversal import traverse_obj class DigitekaIE(InfoExtractor): @@ -25,74 +26,56 @@ class DigitekaIE(InfoExtractor): )/(?P[\d+a-z]+)''' _EMBED_REGEX = [r'<(?:iframe|script)[^>]+src=["\'](?P(?:https?:)?//(?:www\.)?ultimedia\.com/deliver/(?:generic|musique)(?:/[^/]+)*/(?:src|article)/[\d+a-z]+)'] _TESTS = [{ - # news - 'url': 'https://www.ultimedia.com/default/index/videogeneric/id/s8uk0r', - 'md5': '276a0e49de58c7e85d32b057837952a2', + 'url': 'https://www.ultimedia.com/default/index/videogeneric/id/3x5x55k', 'info_dict': { - 'id': 's8uk0r', + 'id': '3x5x55k', 'ext': 'mp4', - 'title': 'Loi sur la fin de vie: le texte prévoit un renforcement des directives anticipées', + 'title': 'Il est passionné de DS', 'thumbnail': r're:^https?://.*\.jpg', - 'duration': 74, - 'upload_date': '20150317', - 'timestamp': 1426604939, - 'uploader_id': '3fszv', + 'duration': 89, + 'upload_date': '20251012', + 'timestamp': 1760285363, + 'uploader_id': '3pz33', }, - }, { - # music - 'url': 'https://www.ultimedia.com/default/index/videomusic/id/xvpfp8', - 'md5': '2ea3513813cf230605c7e2ffe7eca61c', - 'info_dict': { - 'id': 'xvpfp8', - 'ext': 'mp4', - 'title': 'Two - C\'est La Vie (clip)', - 'thumbnail': r're:^https?://.*\.jpg', - 'duration': 233, - 'upload_date': '20150224', - 'timestamp': 1424760500, - 'uploader_id': '3rfzk', - }, - }, { - 'url': 'https://www.digiteka.net/deliver/generic/iframe/mdtk/01637594/src/lqm3kl/zone/1/showtitle/1/autoplay/yes', - 'only_matching': True, + 'params': {'skip_download': True}, }] + _IFRAME_MD_ID = '01836272' # One static ID working for Ultimedia iframes def _real_extract(self, url): - mobj = self._match_valid_url(url) - video_id = mobj.group('id') - video_type = mobj.group('embed_type') or mobj.group('site_type') - if video_type == 'music': - video_type = 'musique' + video_id = self._match_id(url) - deliver_info = self._download_json( - f'http://www.ultimedia.com/deliver/video?video={video_id}&topic={video_type}', - video_id) - - yt_id = deliver_info.get('yt_id') - if yt_id: - return self.url_result(yt_id, 'Youtube') - - jwconf = deliver_info['jwconf'] + video_info = self._download_json( + f'https://www.ultimedia.com/player/getConf/{self._IFRAME_MD_ID}/1/{video_id}', video_id, + note='Downloading player configuration')['video'] formats = [] - for source in jwconf['playlist'][0]['sources']: - formats.append({ - 'url': source['file'], - 'format_id': source.get('label'), - }) + subtitles = {} - title = deliver_info['title'] - thumbnail = jwconf.get('image') - duration = int_or_none(deliver_info.get('duration')) - timestamp = int_or_none(deliver_info.get('release_time')) - uploader_id = deliver_info.get('owner_id') + if hls_url := traverse_obj(video_info, ('media_sources', 'hls', 'hls_auto', {url_or_none})): + fmts, subs = self._extract_m3u8_formats_and_subtitles( + hls_url, video_id, 'mp4', m3u8_id='hls', fatal=False) + formats.extend(fmts) + self._merge_subtitles(subs, target=subtitles) + + for format_id, mp4_url in traverse_obj(video_info, ('media_sources', 'mp4', {dict.items}, ...)): + if not mp4_url: + continue + formats.append({ + 'url': mp4_url, + 'format_id': format_id, + 'height': int_or_none(format_id.partition('_')[2]), + 'ext': 'mp4', + }) return { 'id': video_id, - 'title': title, - 'thumbnail': thumbnail, - 'duration': duration, - 'timestamp': timestamp, - 'uploader_id': uploader_id, 'formats': formats, + 'subtitles': subtitles, + **traverse_obj(video_info, { + 'title': ('title', {str}), + 'thumbnail': ('image', {url_or_none}), + 'duration': ('duration', {int_or_none}), + 'timestamp': ('creationDate', {int_or_none}), + 'uploader_id': ('ownerId', {str}), + }), }