[^\/$&?#]+)(?:/?$|/season:(?P
[0-9]+)/?$)'
+ _VALID_URL = r'https?://(?:watch\.)?dropout\.tv/(?P[^\/$&?#]+)(?:/?$|/season:(?P[0-9]+)/?$)'
_TESTS = [
{
- 'url': 'https://www.dropout.tv/dimension-20-fantasy-high/season:1',
+ 'url': 'https://watch.dropout.tv/dimension-20-fantasy-high/season:1',
'note': 'Multi-season series with the season in the url',
'playlist_count': 24,
'info_dict': {
@@ -179,7 +179,7 @@ class DropoutSeasonIE(InfoExtractor):
},
},
{
- 'url': 'https://www.dropout.tv/dimension-20-fantasy-high',
+ 'url': 'https://watch.dropout.tv/dimension-20-fantasy-high',
'note': 'Multi-season series with the season not in the url',
'playlist_count': 24,
'info_dict': {
@@ -188,7 +188,7 @@ class DropoutSeasonIE(InfoExtractor):
},
},
{
- 'url': 'https://www.dropout.tv/dimension-20-shriek-week',
+ 'url': 'https://watch.dropout.tv/dimension-20-shriek-week',
'note': 'Single-season series',
'playlist_count': 4,
'info_dict': {
@@ -197,7 +197,7 @@ class DropoutSeasonIE(InfoExtractor):
},
},
{
- 'url': 'https://www.dropout.tv/breaking-news-no-laugh-newsroom/season:3',
+ 'url': 'https://watch.dropout.tv/breaking-news-no-laugh-newsroom/season:3',
'note': 'Multi-season series with season in the url that requires pagination',
'playlist_count': 25,
'info_dict': {
diff --git a/yt_dlp/extractor/dtube.py b/yt_dlp/extractor/dtube.py
index 0d87820c4c..2c47de8448 100644
--- a/yt_dlp/extractor/dtube.py
+++ b/yt_dlp/extractor/dtube.py
@@ -1,5 +1,4 @@
import json
-import socket
from .common import InfoExtractor
from ..utils import (
@@ -56,7 +55,7 @@ class DTubeIE(InfoExtractor):
try:
self.to_screen(f'{video_id}: Checking {format_id} video format URL')
self._downloader._opener.open(video_url, timeout=5).close()
- except socket.timeout:
+ except TimeoutError:
self.to_screen(
f'{video_id}: {format_id} URL is invalid, skipping')
continue
diff --git a/yt_dlp/extractor/fujitv.py b/yt_dlp/extractor/fujitv.py
index a2d1a828b4..3319b12681 100644
--- a/yt_dlp/extractor/fujitv.py
+++ b/yt_dlp/extractor/fujitv.py
@@ -56,7 +56,7 @@ class FujiTVFODPlus7IE(InfoExtractor):
fmt, subs = self._extract_m3u8_formats_and_subtitles(src['url'], video_id, 'ts')
for f in fmt:
f.update(dict(zip(('height', 'width'),
- self._BITRATE_MAP.get(f.get('tbr'), ()))))
+ self._BITRATE_MAP.get(f.get('tbr'), ()), strict=False)))
formats.extend(fmt)
subtitles = self._merge_subtitles(subtitles, subs)
diff --git a/yt_dlp/extractor/googledrive.py b/yt_dlp/extractor/googledrive.py
index 0c84f0b241..91c9f60cd8 100644
--- a/yt_dlp/extractor/googledrive.py
+++ b/yt_dlp/extractor/googledrive.py
@@ -1,21 +1,20 @@
import re
-import urllib.parse
from .common import InfoExtractor
-from .youtube import YoutubeIE
from ..utils import (
- ExtractorError,
- bug_reports_message,
determine_ext,
extract_attributes,
+ filter_dict,
get_element_by_class,
get_element_html_by_id,
int_or_none,
- lowercase_escape,
- parse_qs,
- try_get,
+ mimetype2ext,
+ parse_duration,
+ str_or_none,
update_url_query,
+ url_or_none,
)
+from ..utils.traversal import traverse_obj, value
class GoogleDriveIE(InfoExtractor):
@@ -38,8 +37,8 @@ class GoogleDriveIE(InfoExtractor):
'id': '0ByeS4oOUV-49Zzh4R1J6R09zazQ',
'ext': 'mp4',
'title': 'Big Buck Bunny.mp4',
- 'duration': 45,
- 'thumbnail': 'https://drive.google.com/thumbnail?id=0ByeS4oOUV-49Zzh4R1J6R09zazQ',
+ 'duration': 45.069,
+ 'thumbnail': r're:https://lh3\.googleusercontent\.com/drive-storage/',
},
}, {
# has itag 50 which is not in YoutubeIE._formats (royalty Free music from 1922)
@@ -49,8 +48,29 @@ class GoogleDriveIE(InfoExtractor):
'id': '1IP0o8dHcQrIHGgVyp0Ofvx2cGfLzyO1x',
'ext': 'mp3',
'title': 'My Buddy - Henry Burr - Gus Kahn - Walter Donaldson.mp3',
- 'duration': 184,
- 'thumbnail': 'https://drive.google.com/thumbnail?id=1IP0o8dHcQrIHGgVyp0Ofvx2cGfLzyO1x',
+ 'duration': 184.68,
+ },
+ }, {
+ # Has subtitle track
+ 'url': 'https://drive.google.com/file/d/1RAGWRgzn85TXCaCk4gxnwF6TGUaZatzE/view',
+ 'md5': '05488c528da6ef737ec8c962bfa9724e',
+ 'info_dict': {
+ 'id': '1RAGWRgzn85TXCaCk4gxnwF6TGUaZatzE',
+ 'ext': 'mp4',
+ 'title': 'test.mp4',
+ 'duration': 9.999,
+ 'thumbnail': r're:https://lh3\.googleusercontent\.com/drive-storage/',
+ },
+ }, {
+ # Has subtitle track with kind 'asr'
+ 'url': 'https://drive.google.com/file/d/1Prvv9-mtDDfN_gkJgtt1OFvIULK8c3Ev/view',
+ 'md5': 'ccae12d07f18b5988900b2c8b92801fc',
+ 'info_dict': {
+ 'id': '1Prvv9-mtDDfN_gkJgtt1OFvIULK8c3Ev',
+ 'ext': 'mp4',
+ 'title': 'LEE NA GYUNG-3410-VOICE_MESSAGE.mp4',
+ 'duration': 8.766,
+ 'thumbnail': r're:https://lh3\.googleusercontent\.com/drive-storage/',
},
}, {
# video can't be watched anonymously due to view count limit reached,
@@ -71,17 +91,6 @@ class GoogleDriveIE(InfoExtractor):
'url': 'https://drive.usercontent.google.com/download?id=0ByeS4oOUV-49Zzh4R1J6R09zazQ',
'only_matching': True,
}]
- _FORMATS_EXT = {
- **{k: v['ext'] for k, v in YoutubeIE._formats.items() if v.get('ext')},
- '50': 'm4a',
- }
- _BASE_URL_CAPTIONS = 'https://drive.google.com/timedtext'
- _CAPTIONS_ENTRY_TAG = {
- 'subtitles': 'track',
- 'automatic_captions': 'target',
- }
- _caption_formats_ext = []
- _captions_xml = None
@classmethod
def _extract_embed_urls(cls, url, webpage):
@@ -91,129 +100,73 @@ class GoogleDriveIE(InfoExtractor):
if mobj:
yield 'https://drive.google.com/file/d/{}'.format(mobj.group('id'))
- def _download_subtitles_xml(self, video_id, subtitles_id, hl):
- if self._captions_xml:
- return
- self._captions_xml = self._download_xml(
- self._BASE_URL_CAPTIONS, video_id, query={
- 'id': video_id,
- 'vid': subtitles_id,
- 'hl': hl,
+ @staticmethod
+ def _construct_subtitle_url(base_url, video_id, language, fmt, kind):
+ return update_url_query(
+ base_url, filter_dict({
+ 'hl': 'en-US',
'v': video_id,
+ 'type': 'track',
+ 'lang': language,
+ 'fmt': fmt,
+ 'kind': kind,
+ }))
+
+ def _get_subtitles(self, video_id, video_info):
+ subtitles = {}
+ timed_text_base_url = traverse_obj(video_info, ('timedTextDetails', 'timedTextBaseUrl', {url_or_none}))
+ if not timed_text_base_url:
+ return subtitles
+ subtitle_data = self._download_xml(
+ timed_text_base_url, video_id, 'Downloading subtitles XML', fatal=False, query={
+ 'hl': 'en-US',
'type': 'list',
- 'tlangs': '1',
- 'fmts': '1',
- 'vssids': '1',
- }, note='Downloading subtitles XML',
- errnote='Unable to download subtitles XML', fatal=False)
- if self._captions_xml:
- for f in self._captions_xml.findall('format'):
- if f.attrib.get('fmt_code') and not f.attrib.get('default'):
- self._caption_formats_ext.append(f.attrib['fmt_code'])
-
- def _get_captions_by_type(self, video_id, subtitles_id, caption_type,
- origin_lang_code=None, origin_lang_name=None):
- if not subtitles_id or not caption_type:
- return
- captions = {}
- for caption_entry in self._captions_xml.findall(
- self._CAPTIONS_ENTRY_TAG[caption_type]):
- caption_lang_code = caption_entry.attrib.get('lang_code')
- caption_name = caption_entry.attrib.get('name') or origin_lang_name
- if not caption_lang_code or not caption_name:
- self.report_warning(f'Missing necessary caption metadata. '
- f'Need lang_code and name attributes. '
- f'Found: {caption_entry.attrib}')
- continue
- caption_format_data = []
- for caption_format in self._caption_formats_ext:
- query = {
- 'vid': subtitles_id,
- 'v': video_id,
- 'fmt': caption_format,
- 'lang': (caption_lang_code if origin_lang_code is None
- else origin_lang_code),
- 'type': 'track',
- 'name': caption_name,
- 'kind': '',
- }
- if origin_lang_code is not None:
- query.update({'tlang': caption_lang_code})
- caption_format_data.append({
- 'url': update_url_query(self._BASE_URL_CAPTIONS, query),
- 'ext': caption_format,
- })
- captions[caption_lang_code] = caption_format_data
- return captions
-
- def _get_subtitles(self, video_id, subtitles_id, hl):
- if not subtitles_id or not hl:
- return
- self._download_subtitles_xml(video_id, subtitles_id, hl)
- if not self._captions_xml:
- return
- return self._get_captions_by_type(video_id, subtitles_id, 'subtitles')
-
- def _get_automatic_captions(self, video_id, subtitles_id, hl):
- if not subtitles_id or not hl:
- return
- self._download_subtitles_xml(video_id, subtitles_id, hl)
- if not self._captions_xml:
- return
- track = next((t for t in self._captions_xml.findall('track') if t.attrib.get('cantran') == 'true'), None)
- if track is None:
- return
- origin_lang_code = track.attrib.get('lang_code')
- origin_lang_name = track.attrib.get('name')
- if not origin_lang_code or not origin_lang_name:
- return
- return self._get_captions_by_type(
- video_id, subtitles_id, 'automatic_captions', origin_lang_code, origin_lang_name)
+ 'tlangs': 1,
+ 'v': video_id,
+ 'vssids': 1,
+ })
+ subtitle_formats = traverse_obj(subtitle_data, (lambda _, v: v.tag == 'format', {lambda x: x.get('fmt_code')}, {str}))
+ for track in traverse_obj(subtitle_data, (lambda _, v: v.tag == 'track' and v.get('lang_code'))):
+ language = track.get('lang_code')
+ subtitles.setdefault(language, []).extend([{
+ 'url': self._construct_subtitle_url(
+ timed_text_base_url, video_id, language, sub_fmt, track.get('kind')),
+ 'name': track.get('lang_original'),
+ 'ext': sub_fmt,
+ } for sub_fmt in subtitle_formats])
+ return subtitles
def _real_extract(self, url):
video_id = self._match_id(url)
- video_info = urllib.parse.parse_qs(self._download_webpage(
- 'https://drive.google.com/get_video_info',
- video_id, 'Downloading video webpage', query={'docid': video_id}))
-
- def get_value(key):
- return try_get(video_info, lambda x: x[key][0])
-
- reason = get_value('reason')
- title = get_value('title')
+ video_info = self._download_json(
+ f'https://content-workspacevideo-pa.googleapis.com/v1/drive/media/{video_id}/playback',
+ video_id, 'Downloading video webpage', query={'key': 'AIzaSyDVQw45DwoYh632gvsP5vPDqEKvb-Ywnb8'},
+ headers={'Referer': 'https://drive.google.com/'})
formats = []
- fmt_stream_map = (get_value('fmt_stream_map') or '').split(',')
- fmt_list = (get_value('fmt_list') or '').split(',')
- if fmt_stream_map and fmt_list:
- resolutions = {}
- for fmt in fmt_list:
- mobj = re.search(
- r'^(?P\d+)/(?P\d+)[xX](?P\d+)', fmt)
- if mobj:
- resolutions[mobj.group('format_id')] = (
- int(mobj.group('width')), int(mobj.group('height')))
+ for fmt in traverse_obj(video_info, (
+ 'mediaStreamingData', 'formatStreamingData', ('adaptiveTranscodes', 'progressiveTranscodes'),
+ lambda _, v: url_or_none(v['url']))):
+ formats.append({
+ **traverse_obj(fmt, {
+ 'url': 'url',
+ 'format_id': ('itag', {int}, {str_or_none}),
+ }),
+ **traverse_obj(fmt, ('transcodeMetadata', {
+ 'ext': ('mimeType', {mimetype2ext}),
+ 'width': ('width', {int_or_none}),
+ 'height': ('height', {int_or_none}),
+ 'fps': ('videoFps', {int_or_none}),
+ 'filesize': ('contentLength', {int_or_none}),
+ 'vcodec': ((('videoCodecString', {str}), {value('none')}), any),
+ 'acodec': ((('audioCodecString', {str}), {value('none')}), any),
+ })),
+ 'downloader_options': {
+ 'http_chunk_size': 10 << 20,
+ },
+ })
- for fmt_stream in fmt_stream_map:
- fmt_stream_split = fmt_stream.split('|')
- if len(fmt_stream_split) < 2:
- continue
- format_id, format_url = fmt_stream_split[:2]
- ext = self._FORMATS_EXT.get(format_id)
- if not ext:
- self.report_warning(f'Unknown format {format_id}{bug_reports_message()}')
- f = {
- 'url': lowercase_escape(format_url),
- 'format_id': format_id,
- 'ext': ext,
- }
- resolution = resolutions.get(format_id)
- if resolution:
- f.update({
- 'width': resolution[0],
- 'height': resolution[1],
- })
- formats.append(f)
+ title = traverse_obj(video_info, ('mediaMetadata', 'title', {str}))
source_url = update_url_query(
'https://drive.usercontent.google.com/download', {
@@ -264,30 +217,20 @@ class GoogleDriveIE(InfoExtractor):
or get_element_by_class('uc-error-caption', confirmation_webpage)
or 'unable to extract confirmation code')
- if not formats and reason:
- if title:
- self.raise_no_formats(reason, expected=True)
- else:
- raise ExtractorError(reason, expected=True)
-
- hl = get_value('hl')
- subtitles_id = None
- ttsurl = get_value('ttsurl')
- if ttsurl:
- # the subtitles ID is the vid param of the ttsurl query
- subtitles_id = parse_qs(ttsurl).get('vid', [None])[-1]
-
- self.cookiejar.clear(domain='.google.com', path='/', name='NID')
-
return {
'id': video_id,
'title': title,
- 'thumbnail': 'https://drive.google.com/thumbnail?id=' + video_id,
- 'duration': int_or_none(get_value('length_seconds')),
+ **traverse_obj(video_info, {
+ 'duration': ('mediaMetadata', 'duration', {parse_duration}),
+ 'thumbnails': ('thumbnails', lambda _, v: url_or_none(v['url']), {
+ 'url': 'url',
+ 'ext': ('mimeType', {mimetype2ext}),
+ 'width': ('width', {int}),
+ 'height': ('height', {int}),
+ }),
+ }),
'formats': formats,
- 'subtitles': self.extract_subtitles(video_id, subtitles_id, hl),
- 'automatic_captions': self.extract_automatic_captions(
- video_id, subtitles_id, hl),
+ 'subtitles': self.extract_subtitles(video_id, video_info),
}
diff --git a/yt_dlp/extractor/idagio.py b/yt_dlp/extractor/idagio.py
new file mode 100644
index 0000000000..a99c559065
--- /dev/null
+++ b/yt_dlp/extractor/idagio.py
@@ -0,0 +1,262 @@
+from .common import InfoExtractor
+from ..utils import int_or_none, unified_timestamp, url_or_none
+from ..utils.traversal import traverse_obj
+
+
+class IdagioTrackIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?app\.idagio\.com(?:/[a-z]{2})?/recordings/\d+\?(?:[^#]+&)?trackId=(?P\d+)'
+ _TESTS = [{
+ 'url': 'https://app.idagio.com/recordings/30576934?trackId=30576943',
+ 'md5': '15148bd71804b2450a2508931a116b56',
+ 'info_dict': {
+ 'id': '30576943',
+ 'ext': 'mp3',
+ 'title': 'Theme. Andante',
+ 'duration': 82,
+ 'composers': ['Edward Elgar'],
+ 'artists': ['Vasily Petrenko', 'Royal Liverpool Philharmonic Orchestra'],
+ 'genres': ['Orchestral', 'Other Orchestral Music'],
+ 'track': 'Theme. Andante',
+ 'timestamp': 1554474370,
+ 'upload_date': '20190405',
+ },
+ }, {
+ 'url': 'https://app.idagio.com/recordings/20514467?trackId=20514478&utm_source=pcl',
+ 'md5': '3acef2ea0feadf889123b70e5a1e7fa7',
+ 'info_dict': {
+ 'id': '20514478',
+ 'ext': 'mp3',
+ 'title': 'I. Adagio sostenuto',
+ 'duration': 316,
+ 'composers': ['Ludwig van Beethoven'],
+ 'genres': ['Keyboard', 'Sonata (Keyboard)'],
+ 'track': 'I. Adagio sostenuto',
+ 'timestamp': 1518076337,
+ 'upload_date': '20180208',
+ },
+ }, {
+ 'url': 'https://app.idagio.com/de/recordings/20514467?trackId=20514478&utm_source=pcl',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ track_id = self._match_id(url)
+ track_info = self._download_json(
+ f'https://api.idagio.com/v2.0/metadata/tracks/{track_id}',
+ track_id, fatal=False, expected_status=406)
+ if traverse_obj(track_info, 'error_code') == 'idagio.error.blocked.location':
+ self.raise_geo_restricted()
+
+ content_info = self._download_json(
+ f'https://api.idagio.com/v1.8/content/track/{track_id}', track_id,
+ query={
+ 'quality': '0',
+ 'format': '2',
+ 'client_type': 'web-4',
+ })
+
+ return {
+ 'ext': 'mp3',
+ 'vcodec': 'none',
+ 'id': track_id,
+ 'url': traverse_obj(content_info, ('url', {url_or_none})),
+ **traverse_obj(track_info, ('result', {
+ 'title': ('piece', 'title', {str}),
+ 'timestamp': ('recording', 'created_at', {int_or_none(scale=1000)}),
+ 'location': ('recording', 'location', {str}),
+ 'duration': ('duration', {int_or_none}),
+ 'track': ('piece', 'title', {str}),
+ 'artists': ('recording', ('conductor', ('ensembles', ...), ('soloists', ...)), 'name', {str}, filter),
+ 'composers': ('piece', 'workpart', 'work', 'composer', 'name', {str}, filter, all, filter),
+ 'genres': ('piece', 'workpart', 'work', ('genre', 'subgenre'), 'title', {str}, filter),
+ })),
+ }
+
+
+class IdagioPlaylistBaseIE(InfoExtractor):
+ """Subclasses must set _API_URL_TMPL and define _parse_playlist_metadata"""
+ _PLAYLIST_ID_KEY = 'id' # vs. 'display_id'
+
+ def _entries(self, playlist_info):
+ for track_data in traverse_obj(playlist_info, ('tracks', lambda _, v: v['id'] and v['recording']['id'])):
+ track_id = track_data['id']
+ recording_id = track_data['recording']['id']
+ yield self.url_result(
+ f'https://app.idagio.com/recordings/{recording_id}?trackId={track_id}',
+ ie=IdagioTrackIE, video_id=track_id)
+
+ def _real_extract(self, url):
+ playlist_id = self._match_id(url)
+ playlist_info = self._download_json(
+ self._API_URL_TMPL.format(playlist_id), playlist_id)['result']
+
+ return {
+ '_type': 'playlist',
+ self._PLAYLIST_ID_KEY: playlist_id,
+ 'entries': self._entries(playlist_info),
+ **self._parse_playlist_metadata(playlist_info),
+ }
+
+
+class IdagioRecordingIE(IdagioPlaylistBaseIE):
+ _VALID_URL = r'https?://(?:www\.)?app\.idagio\.com(?:/[a-z]{2})?/recordings/(?P\d+)(?![^#]*[&?]trackId=\d+)'
+ _TESTS = [{
+ 'url': 'https://app.idagio.com/recordings/30576934',
+ 'info_dict': {
+ 'id': '30576934',
+ 'title': 'Variations on an Original Theme op. 36',
+ 'composers': ['Edward Elgar'],
+ 'artists': ['Vasily Petrenko', 'Royal Liverpool Philharmonic Orchestra'],
+ 'genres': ['Orchestral', 'Other Orchestral Music'],
+ 'timestamp': 1554474370,
+ 'modified_timestamp': 1554474370,
+ 'modified_date': '20190405',
+ 'upload_date': '20190405',
+ },
+ 'playlist_count': 15,
+ }, {
+ 'url': 'https://app.idagio.com/de/recordings/20514467',
+ 'info_dict': {
+ 'id': '20514467',
+ 'title': 'Sonata for Piano No. 14 in C sharp minor op. 27/2',
+ 'composers': ['Ludwig van Beethoven'],
+ 'genres': ['Keyboard', 'Sonata (Keyboard)'],
+ 'timestamp': 1518076337,
+ 'upload_date': '20180208',
+ 'modified_timestamp': 1518076337,
+ 'modified_date': '20180208',
+ },
+ 'playlist_count': 3,
+ }]
+ _API_URL_TMPL = 'https://api.idagio.com/v2.0/metadata/recordings/{}'
+
+ def _parse_playlist_metadata(self, playlist_info):
+ return traverse_obj(playlist_info, {
+ 'title': ('work', 'title', {str}),
+ 'timestamp': ('created_at', {int_or_none(scale=1000)}),
+ 'modified_timestamp': ('created_at', {int_or_none(scale=1000)}),
+ 'location': ('location', {str}),
+ 'artists': (('conductor', ('ensembles', ...), ('soloists', ...)), 'name', {str}),
+ 'composers': ('work', 'composer', 'name', {str}, all),
+ 'genres': ('work', ('genre', 'subgenre'), 'title', {str}),
+ 'tags': ('tags', ..., {str}),
+ })
+
+
+class IdagioAlbumIE(IdagioPlaylistBaseIE):
+ _VALID_URL = r'https?://(?:www\.)?app\.idagio\.com(?:/[a-z]{2})?/albums/(?P[\w-]+)'
+ _TESTS = [{
+ 'url': 'https://app.idagio.com/albums/elgar-enigma-variations-in-the-south-serenade-for-strings',
+ 'info_dict': {
+ 'id': 'a9f139b8-f70d-4b8a-a9a4-5fe8d35eaf9c',
+ 'display_id': 'elgar-enigma-variations-in-the-south-serenade-for-strings',
+ 'title': 'Elgar: Enigma Variations, In the South, Serenade for Strings',
+ 'description': '',
+ 'thumbnail': r're:https://.+/albums/880040420521/main\.jpg',
+ 'artists': ['Vasily Petrenko', 'Royal Liverpool Philharmonic Orchestra', 'Edward Elgar'],
+ 'timestamp': 1553817600,
+ 'upload_date': '20190329',
+ 'modified_timestamp': 1562566559.0,
+ 'modified_date': '20190708',
+ },
+ 'playlist_count': 19,
+ }, {
+ 'url': 'https://app.idagio.com/de/albums/brahms-ein-deutsches-requiem-3B403DF6-62D7-4A42-807B-47173F3E0192',
+ 'info_dict': {
+ 'id': '2862ad4e-4a61-45ad-9ce4-7fcf0c2626fe',
+ 'display_id': 'brahms-ein-deutsches-requiem-3B403DF6-62D7-4A42-807B-47173F3E0192',
+ 'title': 'Brahms: Ein deutsches Requiem',
+ 'description': 'GRAMOPHONE CLASSICAL MUSIC AWARDS 2025 Recording of the Year & Choral',
+ 'thumbnail': r're:https://.+/albums/3149020954522/main\.jpg',
+ 'artists': ['Sabine Devieilhe', 'Stéphane Degout', 'Raphaël Pichon', 'Pygmalion', 'Johannes Brahms'],
+ 'timestamp': 1760054400,
+ 'upload_date': '20251010',
+ 'modified_timestamp': 1760624868,
+ 'modified_date': '20251016',
+ 'tags': ['recommended', 'recent-release'],
+ },
+ 'playlist_count': 7,
+ }]
+ _API_URL_TMPL = 'https://api.idagio.com/v2.0/metadata/albums/{}'
+ _PLAYLIST_ID_KEY = 'display_id'
+
+ def _parse_playlist_metadata(self, playlist_info):
+ return traverse_obj(playlist_info, {
+ 'id': ('id', {str}),
+ 'title': ('title', {str}),
+ 'timestamp': ('publishDate', {unified_timestamp}),
+ 'modified_timestamp': ('lastModified', {unified_timestamp}),
+ 'thumbnail': ('imageUrl', {url_or_none}),
+ 'description': ('description', {str}),
+ 'artists': ('participants', ..., 'name', {str}),
+ 'tags': ('tags', ..., {str}),
+ })
+
+
+class IdagioPlaylistIE(IdagioPlaylistBaseIE):
+ _VALID_URL = r'https?://(?:www\.)?app\.idagio\.com(?:/[a-z]{2})?/playlists/(?!personal/)(?P[\w-]+)'
+ _TESTS = [{
+ 'url': 'https://app.idagio.com/playlists/beethoven-the-most-beautiful-piano-music',
+ 'info_dict': {
+ 'id': '31652bec-8c5b-460e-a3f0-cf1f69817f53',
+ 'display_id': 'beethoven-the-most-beautiful-piano-music',
+ 'title': 'Beethoven: the most beautiful piano music',
+ 'description': 'md5:d41bb04b8896bb69377f5c2cd9345ad1',
+ 'thumbnail': r're:https://.+/playlists/31652bec-8c5b-460e-a3f0-cf1f69817f53/main\.jpg',
+ 'creators': ['IDAGIO'],
+ },
+ 'playlist_mincount': 16, # one entry is geo-restricted
+ }, {
+ 'url': 'https://app.idagio.com/de/playlists/piano-music-for-an-autumn-day',
+ 'info_dict': {
+ 'id': 'd70e9c7f-7080-4308-ae0f-f890dddeda82',
+ 'display_id': 'piano-music-for-an-autumn-day',
+ 'title': 'Piano Music for an Autumn Day',
+ 'description': 'Get ready to snuggle up and enjoy all the musical colours of this cosy, autumnal playlist.',
+ 'thumbnail': r're:https://.+/playlists/d70e9c7f-7080-4308-ae0f-f890dddeda82/main\.jpg',
+ 'creators': ['IDAGIO'],
+ },
+ 'playlist_count': 35,
+ }]
+ _API_URL_TMPL = 'https://api.idagio.com/v2.0/playlists/{}'
+ _PLAYLIST_ID_KEY = 'display_id'
+
+ def _parse_playlist_metadata(self, playlist_info):
+ return traverse_obj(playlist_info, {
+ 'id': ('id', {str}),
+ 'title': ('title', {str}),
+ 'thumbnail': ('imageUrl', {url_or_none}),
+ 'description': ('description', {str}),
+ 'creators': ('curator', 'name', {str}, all),
+ })
+
+
+class IdagioPersonalPlaylistIE(IdagioPlaylistBaseIE):
+ _VALID_URL = r'https?://(?:www\.)?app\.idagio\.com(?:/[a-z]{2})?/playlists/personal/(?P[\da-f-]+)'
+ _TESTS = [{
+ 'url': 'https://app.idagio.com/playlists/personal/99dad72e-7b3a-45a4-b216-867c08046ed8',
+ 'info_dict': {
+ 'id': '99dad72e-7b3a-45a4-b216-867c08046ed8',
+ 'title': 'Test',
+ 'creators': ['1a6f16a6-4514-4d0c-b481-3a9877835626'],
+ 'thumbnail': r're:https://.+/artists/86371/main\.jpg',
+ 'timestamp': 1602859138,
+ 'modified_timestamp': 1755616667,
+ 'upload_date': '20201016',
+ 'modified_date': '20250819',
+ },
+ 'playlist_count': 100,
+ }, {
+ 'url': 'https://app.idagio.com/de/playlists/personal/99dad72e-7b3a-45a4-b216-867c08046ed8',
+ 'only_matching': True,
+ }]
+ _API_URL_TMPL = 'https://api.idagio.com/v1.0/personal-playlists/{}'
+
+ def _parse_playlist_metadata(self, playlist_info):
+ return traverse_obj(playlist_info, {
+ 'title': ('title', {str}),
+ 'thumbnail': ('image_url', {url_or_none}),
+ 'creators': ('user_id', {str}, all),
+ 'timestamp': ('created_at', {int_or_none(scale=1000)}),
+ 'modified_timestamp': ('updated_at', {int_or_none(scale=1000)}),
+ })
diff --git a/yt_dlp/extractor/kaltura.py b/yt_dlp/extractor/kaltura.py
index dc11cbf6be..98516b8ce1 100644
--- a/yt_dlp/extractor/kaltura.py
+++ b/yt_dlp/extractor/kaltura.py
@@ -437,7 +437,7 @@ class KalturaIE(InfoExtractor):
params = urllib.parse.parse_qs(query)
if path:
splitted_path = path.split('/')
- params.update(dict(zip(splitted_path[::2], [[v] for v in splitted_path[1::2]])))
+ params.update(dict(zip(splitted_path[::2], [[v] for v in splitted_path[1::2]]))) # noqa: B905
if 'wid' in params:
partner_id = remove_start(params['wid'][0], '_')
elif 'p' in params:
diff --git a/yt_dlp/extractor/kika.py b/yt_dlp/extractor/kika.py
index e277564524..94798b9ac3 100644
--- a/yt_dlp/extractor/kika.py
+++ b/yt_dlp/extractor/kika.py
@@ -17,57 +17,60 @@ class KikaIE(InfoExtractor):
_GEO_COUNTRIES = ['DE']
_TESTS = [{
- 'url': 'https://www.kika.de/logo/videos/logo-vom-samstag-einunddreissig-august-zweitausendvierundzwanzig-100',
- 'md5': 'fbfc8da483719ef06f396e5e5b938c69',
+ # Video without season/episode info
+ 'url': 'https://www.kika.de/logo/videos/logo-vom-dienstag-achtundzwanzig-oktober-zweitausendfuenfundzwanzig-100',
+ 'md5': '4a9f6e0f9c6bfcc82394c294f186d6db',
'info_dict': {
- 'id': 'logo-vom-samstag-einunddreissig-august-zweitausendvierundzwanzig-100',
+ 'id': 'logo-vom-dienstag-achtundzwanzig-oktober-zweitausendfuenfundzwanzig-100',
'ext': 'mp4',
- 'upload_date': '20240831',
- 'timestamp': 1725126600,
- 'season_number': 2024,
- 'modified_date': '20240831',
- 'episode': 'Episode 476',
- 'episode_number': 476,
- 'season': 'Season 2024',
- 'duration': 634,
- 'title': 'logo! vom Samstag, 31. August 2024',
- 'modified_timestamp': 1725129983,
+ 'title': 'logo! vom Dienstag, 28. Oktober 2025',
+ 'description': 'md5:4d28b92cef423bec99740ffaa3e7ec04',
+ 'duration': 651,
+ 'timestamp': 1761678000,
+ 'upload_date': '20251028',
+ 'modified_timestamp': 1761682624,
+ 'modified_date': '20251028',
},
}, {
+ # Video with season/episode info
+ # Also: Video with subtitles
'url': 'https://www.kika.de/kaltstart/videos/video92498',
- 'md5': '710ece827e5055094afeb474beacb7aa',
+ 'md5': 'e58073070acb195906c55c4ad31dceb3',
'info_dict': {
'id': 'video92498',
'ext': 'mp4',
'title': '7. Wo ist Leo?',
'description': 'md5:fb48396a5b75068bcac1df74f1524920',
'duration': 436,
+ 'season': 'Season 1',
+ 'season_number': 1,
+ 'episode': 'Episode 7',
+ 'episode_number': 7,
'timestamp': 1702926876,
'upload_date': '20231218',
- 'episode_number': 7,
- 'modified_date': '20240319',
'modified_timestamp': 1710880610,
- 'episode': 'Episode 7',
- 'season_number': 1,
- 'season': 'Season 1',
+ 'modified_date': '20240319',
+ 'subtitles': 'count:1',
},
}, {
- 'url': 'https://www.kika.de/bernd-das-brot/astrobrot/videos/video90088',
- 'md5': 'ffd1b700d7de0a6616a1d08544c77294',
+ # Video without subtitles
+ 'url': 'https://www.kika.de/die-pfefferkoerner/videos/abgezogen-102',
+ 'md5': '62e97961ce5343c19f0f330a1b6dd736',
'info_dict': {
- 'id': 'video90088',
+ 'id': 'abgezogen-102',
'ext': 'mp4',
- 'upload_date': '20221102',
- 'timestamp': 1667390580,
- 'duration': 197,
- 'modified_timestamp': 1711093771,
- 'episode_number': 8,
- 'title': 'Es ist nicht leicht, ein Astrobrot zu sein',
- 'modified_date': '20240322',
- 'description': 'md5:d3641deaf1b5515a160788b2be4159a9',
- 'season_number': 1,
- 'episode': 'Episode 8',
+ 'title': '1. Abgezogen',
+ 'description': 'md5:42d87963364391f9f8eba8affcb30bd2',
+ 'duration': 1574,
'season': 'Season 1',
+ 'season_number': 1,
+ 'episode': 'Episode 1',
+ 'episode_number': 1,
+ 'timestamp': 1735382700,
+ 'upload_date': '20241228',
+ 'modified_timestamp': 1757344051,
+ 'modified_date': '20250908',
+ 'subtitles': 'count:0',
},
}]
@@ -78,16 +81,19 @@ class KikaIE(InfoExtractor):
video_assets = self._download_json(doc['assets']['url'], video_id)
subtitles = {}
- if ttml_resource := url_or_none(video_assets.get('videoSubtitle')):
- subtitles['de'] = [{
- 'url': ttml_resource,
- 'ext': 'ttml',
- }]
- if webvtt_resource := url_or_none(video_assets.get('webvttUrl')):
- subtitles.setdefault('de', []).append({
- 'url': webvtt_resource,
- 'ext': 'vtt',
- })
+ # Subtitle API endpoints may be present in the JSON even if there are no subtitles.
+ # They then return HTTP 200 with invalid data. So we must check explicitly.
+ if doc.get('hasSubtitle'):
+ if ttml_resource := url_or_none(video_assets.get('videoSubtitle')):
+ subtitles['de'] = [{
+ 'url': ttml_resource,
+ 'ext': 'ttml',
+ }]
+ if webvtt_resource := url_or_none(video_assets.get('webvttUrl')):
+ subtitles.setdefault('de', []).append({
+ 'url': webvtt_resource,
+ 'ext': 'vtt',
+ })
return {
'id': video_id,
diff --git a/yt_dlp/extractor/lynda.py b/yt_dlp/extractor/lynda.py
index bfd4619337..f7cf9261a8 100644
--- a/yt_dlp/extractor/lynda.py
+++ b/yt_dlp/extractor/lynda.py
@@ -1,3 +1,4 @@
+import itertools
import re
import urllib.parse
@@ -216,7 +217,7 @@ class LyndaIE(LyndaBaseIE):
def _fix_subtitles(self, subs):
srt = ''
seq_counter = 0
- for seq_current, seq_next in zip(subs, subs[1:]):
+ for seq_current, seq_next in itertools.pairwise(subs):
m_current = re.match(self._TIMECODE_REGEX, seq_current['Timecode'])
if m_current is None:
continue
diff --git a/yt_dlp/extractor/mojevideo.py b/yt_dlp/extractor/mojevideo.py
index 145e306970..1f95ed8bc0 100644
--- a/yt_dlp/extractor/mojevideo.py
+++ b/yt_dlp/extractor/mojevideo.py
@@ -92,7 +92,7 @@ class MojevideoIE(InfoExtractor):
contains_pattern=r'\[(?s:.+)\]', transform_source=js_to_json)
formats = []
- for video_hash, (suffix, quality, format_note) in zip(video_hashes, [
+ for video_hash, (suffix, quality, format_note) in zip(video_hashes, [ # noqa: B905
('', 1, 'normálna kvalita'),
('_lq', 0, 'nízka kvalita'),
('_hd', 2, 'HD-720p'),
diff --git a/yt_dlp/extractor/musescore.py b/yt_dlp/extractor/musescore.py
index 0ef2fa0c88..c171e58b3f 100644
--- a/yt_dlp/extractor/musescore.py
+++ b/yt_dlp/extractor/musescore.py
@@ -1,3 +1,5 @@
+import hashlib
+
from .common import InfoExtractor
@@ -9,10 +11,10 @@ class MuseScoreIE(InfoExtractor):
'id': '142975',
'ext': 'mp3',
'title': 'WA Mozart Marche Turque (Turkish March fingered)',
- 'description': 'md5:7ede08230e4eaabd67a4a98bb54d07be',
- 'thumbnail': r're:https?://(?:www\.)?musescore\.com/.*\.png[^$]+',
+ 'description': 'md5:0ca4cf6b79d7f5868a1fee74097394ab',
+ 'thumbnail': r're:https?://cdn\.ustatik\.com/musescore/.*\.jpg',
'uploader': 'PapyPiano',
- 'creator': 'Wolfgang Amadeus Mozart',
+ 'creators': ['Wolfgang Amadeus Mozart'],
},
}, {
'url': 'https://musescore.com/user/36164500/scores/6837638',
@@ -20,10 +22,10 @@ class MuseScoreIE(InfoExtractor):
'id': '6837638',
'ext': 'mp3',
'title': 'Sweet Child O\' Mine – Guns N\' Roses sweet child',
- 'description': 'md5:4dca71191c14abc312a0a4192492eace',
- 'thumbnail': r're:https?://(?:www\.)?musescore\.com/.*\.png[^$]+',
+ 'description': 'md5:2cd49bd6b4e48a75a3c469d4775d5079',
+ 'thumbnail': r're:https?://cdn\.ustatik\.com/musescore/.*\.png',
'uploader': 'roxbelviolin',
- 'creator': 'Guns N´Roses Arr. Roxbel Violin',
+ 'creators': ['Guns N´Roses Arr. Roxbel Violin'],
},
}, {
'url': 'https://musescore.com/classicman/fur-elise',
@@ -31,22 +33,28 @@ class MuseScoreIE(InfoExtractor):
'id': '33816',
'ext': 'mp3',
'title': 'Für Elise – Beethoven',
- 'description': 'md5:49515a3556d5ecaf9fa4b2514064ac34',
- 'thumbnail': r're:https?://(?:www\.)?musescore\.com/.*\.png[^$]+',
+ 'description': 'md5:e37b241c0280b33e9ac25651b815d06e',
+ 'thumbnail': r're:https?://cdn\.ustatik\.com/musescore/.*\.jpg',
'uploader': 'ClassicMan',
- 'creator': 'Ludwig van Beethoven (1770–1827)',
+ 'creators': ['Ludwig van Beethoven (1770–1827)'],
},
}, {
'url': 'https://musescore.com/minh_cuteee/scores/6555384',
'only_matching': True,
}]
+ @staticmethod
+ def _generate_auth_token(video_id):
+ return hashlib.md5((video_id + 'mp30gs').encode()).hexdigest()[:4]
+
def _real_extract(self, url):
webpage = self._download_webpage(url, None)
url = self._og_search_url(webpage) or url
video_id = self._match_id(url)
- mp3_url = self._download_json(f'https://musescore.com/api/jmuse?id={video_id}&index=0&type=mp3&v2=1', video_id,
- headers={'authorization': '63794e5461e4cfa046edfbdddfccc1ac16daffd2'})['info']['url']
+ mp3_url = self._download_json(
+ 'https://musescore.com/api/jmuse', video_id,
+ headers={'authorization': self._generate_auth_token(video_id)},
+ query={'id': video_id, 'index': '0', 'type': 'mp3'})['info']['url']
formats = [{
'url': mp3_url,
'ext': 'mp3',
@@ -57,7 +65,7 @@ class MuseScoreIE(InfoExtractor):
'id': video_id,
'formats': formats,
'title': self._og_search_title(webpage),
- 'description': self._og_search_description(webpage),
+ 'description': self._html_search_meta('description', webpage, 'description'),
'thumbnail': self._og_search_thumbnail(webpage),
'uploader': self._html_search_meta('musescore:author', webpage, 'uploader'),
'creator': self._html_search_meta('musescore:composer', webpage, 'composer'),
diff --git a/yt_dlp/extractor/nascar.py b/yt_dlp/extractor/nascar.py
new file mode 100644
index 0000000000..b14a3b0aa1
--- /dev/null
+++ b/yt_dlp/extractor/nascar.py
@@ -0,0 +1,60 @@
+from .common import InfoExtractor
+from ..utils import (
+ float_or_none,
+ parse_iso8601,
+ url_or_none,
+)
+from ..utils.traversal import traverse_obj
+
+
+class NascarClassicsIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?classics\.nascar\.com/video/(?P[\w~-]+)'
+ _TESTS = [{
+ 'url': 'https://classics.nascar.com/video/Ka5qGuxzZ~SIvJii7uAC~wszPshklHN',
+ 'md5': '81d712eccffa7169c328281b8cc28f77',
+ 'info_dict': {
+ 'id': 'Ka5qGuxzZ~SIvJii7uAC~wszPshklHN',
+ 'ext': 'mp4',
+ 'title': 'Cook Out 400 2023',
+ 'thumbnail': 'https://va.aws.nascar.com/IMAGES/CUP_2023_22_RICHMOND_THUMB_NCD.jpg',
+ 'timestamp': 1690732800,
+ 'upload_date': '20230730',
+ 'tags': ['2023', 'race #22', 'richmond', 'chris buescher', 'cup'],
+ 'chapters': 'count:18',
+ },
+ }, {
+ 'url': 'https://classics.nascar.com/video/UASvPDOwEha~SIvJii7uAC~wszPshklHN',
+ 'md5': 'a5e8d6ec6005da3857d25ba2df5e7133',
+ 'info_dict': {
+ 'id': 'UASvPDOwEha~SIvJii7uAC~wszPshklHN',
+ 'ext': 'mp4',
+ 'title': 'I Love New York 355 at the Glen 2017',
+ 'thumbnail': 'https://va.aws.nascar.com/IMAGES/CUP_2017_22_WATKINSGLEN_THUMB_NCD.jpg',
+ 'timestamp': 1501995600,
+ 'upload_date': '20170806',
+ 'tags': ['watkins glen', 'race #22', '2017', 'martin truex jr.', 'cup'],
+ 'chapters': 'count:13',
+ },
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ webpage = self._download_webpage(url, video_id)
+ content_data = self._search_nextjs_data(
+ webpage, video_id)['props']['pageProps']['contentData']
+
+ return {
+ 'id': video_id,
+ 'formats': self._extract_m3u8_formats(content_data['input']['src'], video_id, 'mp4'),
+ **traverse_obj(content_data, {
+ 'title': ('input', 'name', {str}),
+ 'description': ('input', 'description', {str}, filter),
+ 'thumbnail': ('input', 'thumbnail', {url_or_none}),
+ 'tags': ('input', 'settings', 'tags', ..., {str}),
+ 'timestamp': ('input', 'start_time', {parse_iso8601}),
+ 'chapters': ('overlay', 'data', 'timelines', 0, 'events', lambda _, v: float(v['timestamp']) is not None, {
+ 'start_time': ('timestamp', {float_or_none}),
+ 'title': ('name', {str}),
+ }),
+ }),
+ }
diff --git a/yt_dlp/extractor/nbc.py b/yt_dlp/extractor/nbc.py
index caa9dc0175..41811b8a20 100644
--- a/yt_dlp/extractor/nbc.py
+++ b/yt_dlp/extractor/nbc.py
@@ -63,7 +63,7 @@ class NBCUniversalBaseIE(ThePlatformBaseIE):
# formats='mpeg4' will return either a working m3u8 URL or an m3u8 template for non-DRM HLS
# formats='m3u+none,mpeg4' may return DRM HLS but w/the "folders" needed for non-DRM template
query['formats'] = 'm3u+none,mpeg4'
- m3u8_url = self._download_nbcu_smil_and_extract_m3u8_url(tp_path, video_id, query)
+ orig_m3u8_url = m3u8_url = self._download_nbcu_smil_and_extract_m3u8_url(tp_path, video_id, query)
if mobj := re.fullmatch(self._M3U8_RE, m3u8_url):
query['formats'] = 'mpeg4'
@@ -76,7 +76,17 @@ class NBCUniversalBaseIE(ThePlatformBaseIE):
if '/mpeg_cenc' in m3u8_url or '/mpeg_cbcs' in m3u8_url:
self.report_drm(video_id)
- return self._extract_m3u8_formats_and_subtitles(m3u8_url, video_id, 'mp4', m3u8_id='hls')
+ formats, subtitles = self._extract_m3u8_formats_and_subtitles(
+ m3u8_url, video_id, 'mp4', m3u8_id='hls', fatal=False)
+
+ if not formats and m3u8_url != orig_m3u8_url:
+ orig_fmts, subtitles = self._extract_m3u8_formats_and_subtitles(
+ orig_m3u8_url, video_id, 'mp4', m3u8_id='hls', fatal=False)
+ formats = [f for f in orig_fmts if not f.get('has_drm')]
+ if orig_fmts and not formats:
+ self.report_drm(video_id)
+
+ return formats, subtitles
def _extract_nbcu_video(self, url, display_id, old_ie_key=None):
webpage = self._download_webpage(url, display_id)
diff --git a/yt_dlp/extractor/nhk.py b/yt_dlp/extractor/nhk.py
index 14fbd6ce82..eef3ed820c 100644
--- a/yt_dlp/extractor/nhk.py
+++ b/yt_dlp/extractor/nhk.py
@@ -503,7 +503,7 @@ class NhkForSchoolBangumiIE(InfoExtractor):
'start_time': s,
'end_time': e,
'title': t,
- } for s, e, t in zip(start_time, end_time, chapter_titles)]
+ } for s, e, t in zip(start_time, end_time, chapter_titles, strict=True)]
return {
'id': video_id,
diff --git a/yt_dlp/extractor/pbs.py b/yt_dlp/extractor/pbs.py
index 53b1994156..dde734ff3d 100644
--- a/yt_dlp/extractor/pbs.py
+++ b/yt_dlp/extractor/pbs.py
@@ -181,7 +181,7 @@ class PBSIE(InfoExtractor):
)
IE_NAME = 'pbs'
- IE_DESC = 'Public Broadcasting Service (PBS) and member stations: {}'.format(', '.join(list(zip(*_STATIONS))[1]))
+ IE_DESC = 'Public Broadcasting Service (PBS) and member stations: {}'.format(', '.join(list(zip(*_STATIONS, strict=True))[1]))
_VALID_URL = r'''(?x)https?://
(?:
@@ -193,7 +193,7 @@ class PBSIE(InfoExtractor):
(?:[^/?#]+/){{1,5}}(?P[^/?#]+?)(?:\.html)?/?(?:$|[?#])
)
)
- '''.format('|'.join(next(zip(*_STATIONS))))
+ '''.format('|'.join(next(zip(*_STATIONS, strict=True))))
_GEO_COUNTRIES = ['US']
diff --git a/yt_dlp/extractor/polskieradio.py b/yt_dlp/extractor/polskieradio.py
index 9d0496bdf0..8777f987e6 100644
--- a/yt_dlp/extractor/polskieradio.py
+++ b/yt_dlp/extractor/polskieradio.py
@@ -405,7 +405,7 @@ class PolskieRadioCategoryIE(InfoExtractor):
tab_content = self._download_json(
'https://www.polskieradio.pl/CMS/TemplateBoxesManagement/TemplateBoxTabContent.aspx/GetTabContent',
category_id, f'Downloading page {page_num}', headers={'content-type': 'application/json'},
- data=json.dumps(dict(zip((
+ data=json.dumps(dict(zip(( # noqa: B905
'boxInstanceId', 'tabId', 'categoryType', 'sectionId', 'categoryId', 'pagerMode',
'subjectIds', 'tagIndexId', 'queryString', 'name', 'openArticlesInParentTemplate',
'idSectionFromUrl', 'maxDocumentAge', 'showCategoryForArticle', 'pageNumber',
diff --git a/yt_dlp/extractor/pr0gramm.py b/yt_dlp/extractor/pr0gramm.py
index d5d6ecdfd8..0ba78a4f42 100644
--- a/yt_dlp/extractor/pr0gramm.py
+++ b/yt_dlp/extractor/pr0gramm.py
@@ -155,7 +155,7 @@ class Pr0grammIE(InfoExtractor):
# Sorted by "confidence", higher confidence = earlier in list
confidences = traverse_obj(metadata, ('tags', ..., 'confidence', ({int}, {float})))
if confidences:
- tags = [tag for _, tag in sorted(zip(confidences, tags), reverse=True)]
+ tags = [tag for _, tag in sorted(zip(confidences, tags), reverse=True)] # noqa: B905
formats = traverse_obj(video_info, ('variants', ..., {
'format_id': ('name', {str}),
diff --git a/yt_dlp/extractor/prankcast.py b/yt_dlp/extractor/prankcast.py
index 84e6f7ebcf..68eb382e70 100644
--- a/yt_dlp/extractor/prankcast.py
+++ b/yt_dlp/extractor/prankcast.py
@@ -1,8 +1,8 @@
import json
from .common import InfoExtractor
-from ..utils import float_or_none, parse_iso8601, str_or_none, try_call
-from ..utils.traversal import traverse_obj
+from ..utils import float_or_none, parse_iso8601, str_or_none, try_call, url_or_none
+from ..utils.traversal import traverse_obj, value
class PrankCastIE(InfoExtractor):
@@ -100,9 +100,38 @@ class PrankCastPostIE(InfoExtractor):
'duration': 263.287,
'cast': ['despicabledogs'],
'description': 'https://imgur.com/a/vtxLvKU',
- 'categories': [],
'upload_date': '20240104',
},
+ }, {
+ 'url': 'https://prankcast.com/drtomservo/posts/11988-butteye-s-late-night-stank-episode-1-part-1-',
+ 'info_dict': {
+ 'id': '11988',
+ 'ext': 'mp3',
+ 'title': 'Butteye\'s Late Night Stank Episode 1 (Part 1)',
+ 'display_id': 'butteye-s-late-night-stank-episode-1-part-1-',
+ 'timestamp': 1754238686,
+ 'uploader': 'DrTomServo',
+ 'channel_id': '136',
+ 'duration': 2176.464,
+ 'cast': ['DrTomServo'],
+ 'description': '',
+ 'upload_date': '20250803',
+ },
+ }, {
+ 'url': 'https://prankcast.com/drtomservo/posts/12105-butteye-s-late-night-stank-episode-08-16-2025-part-2',
+ 'info_dict': {
+ 'id': '12105',
+ 'ext': 'mp3',
+ 'title': 'Butteye\'s Late Night Stank Episode 08-16-2025 Part 2',
+ 'display_id': 'butteye-s-late-night-stank-episode-08-16-2025-part-2',
+ 'timestamp': 1755453505,
+ 'uploader': 'DrTomServo',
+ 'channel_id': '136',
+ 'duration': 19018.392,
+ 'cast': ['DrTomServo'],
+ 'description': '',
+ 'upload_date': '20250817',
+ },
}]
def _real_extract(self, url):
@@ -112,26 +141,28 @@ class PrankCastPostIE(InfoExtractor):
post = self._search_nextjs_data(webpage, video_id)['props']['pageProps']['ssr_data_posts']
content = self._parse_json(post['post_contents_json'], video_id)[0]
- uploader = post.get('user_name')
- guests_json = traverse_obj(content, ('guests_json', {json.loads}, {dict})) or {}
-
return {
'id': video_id,
- 'title': post.get('post_title') or self._og_search_title(webpage),
'display_id': display_id,
- 'url': content.get('url'),
- 'timestamp': parse_iso8601(content.get('start_date') or content.get('crdate'), ' '),
- 'uploader': uploader,
- 'channel_id': str_or_none(post.get('user_id')),
- 'duration': float_or_none(content.get('duration')),
- 'cast': list(filter(None, [uploader, *traverse_obj(guests_json, (..., 'name'))])),
- 'description': post.get('post_body'),
- 'categories': list(filter(None, [content.get('category')])),
- 'tags': try_call(lambda: list(filter('', post['post_tags'].split(',')))),
- 'subtitles': {
- 'live_chat': [{
- 'url': f'https://prankcast.com/api/private/chat/select-broadcast?id={post["content_id"]}&cache=',
- 'ext': 'json',
- }],
- } if post.get('content_id') else None,
+ 'title': self._og_search_title(webpage),
+ **traverse_obj(post, {
+ 'title': ('post_title', {str}),
+ 'description': ('post_body', {str}),
+ 'tags': ('post_tags', {lambda x: x.split(',')}, ..., {str.strip}, filter),
+ 'channel_id': ('user_id', {int}, {str_or_none}),
+ 'uploader': ('user_name', {str}),
+ }),
+ **traverse_obj(content, {
+ 'url': (('secure_url', 'url'), {url_or_none}, any),
+ 'timestamp': ((
+ (('start_date', 'crdate'), {parse_iso8601(delimiter=' ')}),
+ ('created_at', {parse_iso8601}),
+ ), any),
+ 'duration': ('duration', {float_or_none}),
+ 'categories': ('category', {str}, filter, all, filter),
+ 'cast': ((
+ {value(post.get('user_name'))},
+ ('guests_json', {json.loads}, ..., 'name'),
+ ), {str}, filter),
+ }),
}
diff --git a/yt_dlp/extractor/slideslive.py b/yt_dlp/extractor/slideslive.py
index e684ac7b8e..c95feb9d34 100644
--- a/yt_dlp/extractor/slideslive.py
+++ b/yt_dlp/extractor/slideslive.py
@@ -248,35 +248,17 @@ class SlidesLiveIE(InfoExtractor):
'skip_download': 'm3u8',
},
}, {
- # /v3/ slides, .jpg and .png, service_name = youtube
+ # /v3/ slides, .jpg and .png, formerly service_name = youtube, now native
'url': 'https://slideslive.com/embed/38932460/',
'info_dict': {
- 'id': 'RTPdrgkyTiE',
- 'display_id': '38932460',
+ 'id': '38932460',
'ext': 'mp4',
'title': 'Active Learning for Hierarchical Multi-Label Classification',
- 'description': 'Watch full version of this video at https://slideslive.com/38932460.',
- 'channel': 'SlidesLive Videos - A',
- 'channel_id': 'UC62SdArr41t_-_fX40QCLRw',
- 'channel_url': 'https://www.youtube.com/channel/UC62SdArr41t_-_fX40QCLRw',
- 'uploader': 'SlidesLive Videos - A',
- 'uploader_id': '@slideslivevideos-a6075',
- 'uploader_url': 'https://www.youtube.com/@slideslivevideos-a6075',
- 'upload_date': '20200903',
- 'timestamp': 1697805922,
- 'duration': 942,
- 'age_limit': 0,
- 'live_status': 'not_live',
- 'playable_in_embed': True,
- 'availability': 'unlisted',
- 'categories': ['People & Blogs'],
- 'tags': [],
- 'channel_follower_count': int,
- 'like_count': int,
- 'view_count': int,
- 'thumbnail': r're:^https?://.*\.(?:jpg|png|webp)',
- 'thumbnails': 'count:21',
+ 'duration': 941,
+ 'thumbnail': r're:https?://.+/.+\.(?:jpg|png)',
'chapters': 'count:20',
+ 'timestamp': 1708338974,
+ 'upload_date': '20240219',
},
'params': {
'skip_download': 'm3u8',
@@ -425,7 +407,7 @@ class SlidesLiveIE(InfoExtractor):
player_token = self._search_regex(r'data-player-token="([^"]+)"', webpage, 'player token')
player_data = self._download_webpage(
- f'https://ben.slideslive.com/player/{video_id}', video_id,
+ f'https://slideslive.com/player/{video_id}', video_id,
note='Downloading player info', query={'player_token': player_token})
player_info = self._extract_custom_m3u8_info(player_data)
@@ -525,7 +507,7 @@ class SlidesLiveIE(InfoExtractor):
yield info
service_data = self._download_json(
- f'https://ben.slideslive.com/player/{video_id}/slides_video_service_data',
+ f'https://slideslive.com/player/{video_id}/slides_video_service_data',
video_id, fatal=False, query={
'player_token': player_token,
'videos': ','.join(video_slides),
diff --git a/yt_dlp/extractor/soundcloud.py b/yt_dlp/extractor/soundcloud.py
index 2cc98c66ce..7833081bfa 100644
--- a/yt_dlp/extractor/soundcloud.py
+++ b/yt_dlp/extractor/soundcloud.py
@@ -438,7 +438,7 @@ class SoundcloudIE(SoundcloudBaseIE):
(?P[\w\d-]+)
(?:/(?P(?!(?:albums|sets|recommended))[^?]+?))?
(?:[?].*)?$)
- |(?:api(?:-v2)?\.soundcloud\.com/tracks/(?P\d+)
+ |(?:api(?:-v2)?\.soundcloud\.com/tracks/(?:soundcloud%3Atracks%3A)?(?P\d+)
(?:/?\?secret_token=(?P[^&]+))?)
)
'''
@@ -692,6 +692,9 @@ class SoundcloudIE(SoundcloudBaseIE):
# Go+ (account with active subscription needed)
'url': 'https://soundcloud.com/taylorswiftofficial/look-what-you-made-me-do',
'only_matching': True,
+ }, {
+ 'url': 'https://api.soundcloud.com/tracks/soundcloud%3Atracks%3A1083788353',
+ 'only_matching': True,
}]
def _real_extract(self, url):
diff --git a/yt_dlp/extractor/tenplay.py b/yt_dlp/extractor/tenplay.py
index bf5dddde42..bf82f4bfda 100644
--- a/yt_dlp/extractor/tenplay.py
+++ b/yt_dlp/extractor/tenplay.py
@@ -1,12 +1,20 @@
+import base64
+import datetime as dt
import itertools
+import json
+import re
+import time
from .common import InfoExtractor
-from ..networking import HEADRequest
+from ..networking.exceptions import HTTPError
from ..utils import (
ExtractorError,
+ encode_data_uri,
+ filter_dict,
int_or_none,
- update_url_query,
+ jwt_decode_hs256,
url_or_none,
+ urlencode_postdata,
urljoin,
)
from ..utils.traversal import traverse_obj
@@ -90,7 +98,7 @@ class TenPlayIE(InfoExtractor):
'only_matching': True,
}]
_GEO_BYPASS = False
-
+ _GEO_COUNTRIES = ['AU']
_AUS_AGES = {
'G': 0,
'PG': 15,
@@ -100,31 +108,155 @@ class TenPlayIE(InfoExtractor):
'R': 18,
'X': 18,
}
+ _TOKEN_CACHE_KEY = 'token_data'
+ _SEGMENT_BITRATE_RE = r'(?m)-(?:300|150|75|55)0000-(\d+(?:-[\da-f]+)?)\.ts$'
+
+ _refresh_token = None
+ _access_token = None
+
+ @staticmethod
+ def _filter_ads_from_m3u8(m3u8_doc):
+ out = []
+ for line in m3u8_doc.splitlines():
+ if line.startswith('https://redirector.googlevideo.com/'):
+ out.pop()
+ continue
+ out.append(line)
+
+ return '\n'.join(out)
+
+ @staticmethod
+ def _generate_xnetwork_ten_auth_token():
+ ts = dt.datetime.now(dt.timezone.utc).strftime('%Y%m%d%H%M%S')
+ return base64.b64encode(ts.encode()).decode()
+
+ @staticmethod
+ def _is_jwt_expired(token):
+ return jwt_decode_hs256(token)['exp'] - time.time() < 300
+
+ def _refresh_access_token(self):
+ try:
+ refresh_data = self._download_json(
+ 'https://10.com.au/api/token/refresh', None, 'Refreshing access token',
+ headers={
+ 'Content-Type': 'application/json',
+ }, data=json.dumps({
+ 'accessToken': self._access_token,
+ 'refreshToken': self._refresh_token,
+ }).encode())
+ except ExtractorError as e:
+ if isinstance(e.cause, HTTPError) and e.cause.status == 400:
+ self._refresh_token = self._access_token = None
+ self.cache.store(self._NETRC_MACHINE, self._TOKEN_CACHE_KEY, [None, None])
+ self.report_warning('Refresh token has been invalidated; retrying with credentials')
+ self._perform_login(*self._get_login_info())
+ return
+ raise
+ self._access_token = refresh_data['accessToken']
+ self._refresh_token = refresh_data['refreshToken']
+ self.cache.store(self._NETRC_MACHINE, self._TOKEN_CACHE_KEY, [self._refresh_token, self._access_token])
+
+ def _perform_login(self, username, password):
+ if not self._refresh_token:
+ self._refresh_token, self._access_token = self.cache.load(
+ self._NETRC_MACHINE, self._TOKEN_CACHE_KEY, default=[None, None])
+ if self._refresh_token and self._access_token:
+ self.write_debug('Using cached refresh token')
+ return
+
+ try:
+ auth_data = self._download_json(
+ 'https://10.com.au/api/user/auth', None, 'Logging in',
+ headers={
+ 'Content-Type': 'application/json',
+ 'X-Network-Ten-Auth': self._generate_xnetwork_ten_auth_token(),
+ 'Referer': 'https://10.com.au/',
+ }, data=json.dumps({
+ 'email': username,
+ 'password': password,
+ }).encode())
+ except ExtractorError as e:
+ if isinstance(e.cause, HTTPError) and e.cause.status == 400:
+ raise ExtractorError('Invalid username/password', expected=True)
+ raise
+
+ self._refresh_token = auth_data['jwt']['refreshToken']
+ self._access_token = auth_data['jwt']['accessToken']
+ self.cache.store(self._NETRC_MACHINE, self._TOKEN_CACHE_KEY, [self._refresh_token, self._access_token])
+
+ def _call_playback_api(self, content_id):
+ if self._access_token and self._is_jwt_expired(self._access_token):
+ self._refresh_access_token()
+ for is_retry in (False, True):
+ try:
+ return self._download_json_handle(
+ f'https://10.com.au/api/v1/videos/playback/{content_id}/', content_id,
+ note='Downloading video JSON', query={'platform': 'samsung'},
+ headers=filter_dict({
+ 'TP-AcceptFeature': 'v1/fw;v1/drm',
+ 'Authorization': f'Bearer {self._access_token}' if self._access_token else None,
+ }))
+ except ExtractorError as e:
+ if not is_retry and isinstance(e.cause, HTTPError) and e.cause.status == 403:
+ if self._access_token:
+ self.to_screen('Access token has expired; refreshing')
+ self._refresh_access_token()
+ continue
+ elif not self._get_login_info()[0]:
+ self.raise_login_required('Login required to access this video', method='password')
+ raise
def _real_extract(self, url):
content_id = self._match_id(url)
- data = self._download_json(
- 'https://10.com.au/api/v1/videos/' + content_id, content_id)
+ try:
+ data = self._download_json(f'https://10.com.au/api/v1/videos/{content_id}', content_id)
+ except ExtractorError as e:
+ if (
+ isinstance(e.cause, HTTPError) and e.cause.status == 403
+ and 'Error 54113' in e.cause.response.read().decode()
+ ):
+ self.raise_geo_restricted(countries=self._GEO_COUNTRIES)
+ raise
- video_data = self._download_json(
- f'https://vod.ten.com.au/api/videos/bcquery?command=find_videos_by_id&video_id={data["altId"]}',
- content_id, 'Downloading video JSON')
- # Dash URL 404s, changing the m3u8 format works
- m3u8_url = self._request_webpage(
- HEADRequest(update_url_query(video_data['items'][0]['dashManifestUrl'], {
- 'manifest': 'm3u',
- })),
- content_id, 'Checking stream URL').url
- if '10play-not-in-oz' in m3u8_url:
- self.raise_geo_restricted(countries=['AU'])
- if '10play_unsupported' in m3u8_url:
- raise ExtractorError('Unable to extract stream')
- # Attempt to get a higher quality stream
- formats = self._extract_m3u8_formats(
- m3u8_url.replace(',150,75,55,0000', ',500,300,150,75,55,0000'),
- content_id, 'mp4', fatal=False)
- if not formats:
- formats = self._extract_m3u8_formats(m3u8_url, content_id, 'mp4')
+ video_data, urlh = self._call_playback_api(content_id)
+ content_source_id = video_data['dai']['contentSourceId']
+ video_id = video_data['dai']['videoId']
+ auth_token = urlh.get_header('x-dai-auth')
+ if not auth_token:
+ raise ExtractorError('Failed to get DAI auth token')
+
+ dai_data = self._download_json(
+ f'https://pubads.g.doubleclick.net/ondemand/hls/content/{content_source_id}/vid/{video_id}/streams',
+ content_id, note='Downloading DAI JSON',
+ data=urlencode_postdata({'auth-token': auth_token}))
+
+ # Ignore subs to avoid ad break cleanup
+ formats, _ = self._extract_m3u8_formats_and_subtitles(
+ dai_data['stream_manifest'], content_id, 'mp4')
+
+ already_have_1080p = False
+ for fmt in formats:
+ m3u8_doc = self._download_webpage(
+ fmt['url'], content_id, note='Downloading m3u8 information')
+ m3u8_doc = self._filter_ads_from_m3u8(m3u8_doc)
+ fmt['hls_media_playlist_data'] = m3u8_doc
+ if fmt.get('height') == 1080:
+ already_have_1080p = True
+
+ # Attempt format upgrade
+ if not already_have_1080p and m3u8_doc and re.search(self._SEGMENT_BITRATE_RE, m3u8_doc):
+ m3u8_doc = re.sub(self._SEGMENT_BITRATE_RE, r'-5000000-\1.ts', m3u8_doc)
+ m3u8_doc = re.sub(r'-(?:300|150|75|55)0000\.key"', r'-5000000.key"', m3u8_doc)
+ formats.append({
+ 'format_id': 'upgrade-attempt-1080p',
+ 'url': encode_data_uri(m3u8_doc.encode(), 'application/x-mpegurl'),
+ 'hls_media_playlist_data': m3u8_doc,
+ 'width': 1920,
+ 'height': 1080,
+ 'ext': 'mp4',
+ 'protocol': 'm3u8_native',
+ '__needs_testing': True,
+ })
return {
'id': content_id,
diff --git a/yt_dlp/extractor/tiktok.py b/yt_dlp/extractor/tiktok.py
index e165effd4e..b7e058ebe7 100644
--- a/yt_dlp/extractor/tiktok.py
+++ b/yt_dlp/extractor/tiktok.py
@@ -81,7 +81,7 @@ class TikTokBaseIE(InfoExtractor):
}
self._APP_INFO_POOL = [
{**defaults, **dict(
- (k, v) for k, v in zip(self._APP_INFO_DEFAULTS, app_info.split('/')) if v
+ (k, v) for k, v in zip(self._APP_INFO_DEFAULTS, app_info.split('/'), strict=False) if v
)} for app_info in self._KNOWN_APP_INFO
]
@@ -220,7 +220,7 @@ class TikTokBaseIE(InfoExtractor):
def _extract_web_data_and_status(self, url, video_id, fatal=True):
video_data, status = {}, -1
- res = self._download_webpage_handle(url, video_id, fatal=fatal, headers={'User-Agent': 'Mozilla/5.0'})
+ res = self._download_webpage_handle(url, video_id, fatal=fatal, impersonate=True)
if res is False:
return video_data, status
@@ -1071,12 +1071,15 @@ class TikTokUserIE(TikTokBaseIE):
webpage = self._download_webpage(
self._UPLOADER_URL_FORMAT % user_name, user_name,
'Downloading user webpage', 'Unable to download user webpage',
- fatal=False, headers={'User-Agent': 'Mozilla/5.0'}) or ''
+ fatal=False, impersonate=True) or ''
detail = traverse_obj(
self._get_universal_data(webpage, user_name), ('webapp.user-detail', {dict})) or {}
- if detail.get('statusCode') == 10222:
+ video_count = traverse_obj(detail, ('userInfo', ('stats', 'statsV2'), 'videoCount', {int}, any))
+ if not video_count and detail.get('statusCode') == 10222:
self.raise_login_required(
'This user\'s account is private. Log into an account that has access')
+ elif video_count == 0:
+ raise ExtractorError('This account does not have any videos posted', expected=True)
sec_uid = traverse_obj(detail, ('userInfo', 'user', 'secUid', {str}))
if sec_uid:
fail_early = not traverse_obj(detail, ('userInfo', 'itemList', ...))
@@ -1520,7 +1523,7 @@ class TikTokLiveIE(TikTokBaseIE):
uploader, room_id = self._match_valid_url(url).group('uploader', 'id')
if not room_id:
webpage = self._download_webpage(
- format_field(uploader, None, self._UPLOADER_URL_FORMAT), uploader)
+ format_field(uploader, None, self._UPLOADER_URL_FORMAT), uploader, impersonate=True)
room_id = traverse_obj(
self._get_universal_data(webpage, uploader),
('webapp.user-detail', 'userInfo', 'user', 'roomId', {str}))
diff --git a/yt_dlp/extractor/tvnoe.py b/yt_dlp/extractor/tvnoe.py
index 24a82623f2..b6d9ac6692 100644
--- a/yt_dlp/extractor/tvnoe.py
+++ b/yt_dlp/extractor/tvnoe.py
@@ -1,46 +1,82 @@
+import re
+
from .common import InfoExtractor
from ..utils import (
clean_html,
- get_element_by_class,
+ extract_attributes,
js_to_json,
+ mimetype2ext,
+ unified_strdate,
+ url_or_none,
+ urljoin,
)
+from ..utils.traversal import find_element, traverse_obj
class TVNoeIE(InfoExtractor):
- _WORKING = False
- _VALID_URL = r'https?://(?:www\.)?tvnoe\.cz/video/(?P[0-9]+)'
- _TEST = {
- 'url': 'http://www.tvnoe.cz/video/10362',
- 'md5': 'aee983f279aab96ec45ab6e2abb3c2ca',
+ IE_NAME = 'tvnoe'
+ IE_DESC = 'Televize Noe'
+
+ _VALID_URL = r'https?://(?:www\.)?tvnoe\.cz/porad/(?P[\w-]+)'
+ _TESTS = [{
+ 'url': 'https://www.tvnoe.cz/porad/43216-outdoor-films-s-mudr-tomasem-kempnym-pomahat-potrebnym-nejen-u-nas',
'info_dict': {
- 'id': '10362',
+ 'id': '43216-outdoor-films-s-mudr-tomasem-kempnym-pomahat-potrebnym-nejen-u-nas',
'ext': 'mp4',
- 'series': 'Noční univerzita',
- 'title': 'prof. Tomáš Halík, Th.D. - Návrat náboženství a střet civilizací',
- 'description': 'md5:f337bae384e1a531a52c55ebc50fff41',
+ 'title': 'Pomáhat potřebným nejen u nás',
+ 'description': 'md5:78b538ee32f7e881ec23b9c278a0ff3a',
+ 'release_date': '20250531',
+ 'series': 'Outdoor Films s MUDr. Tomášem Kempným',
+ 'thumbnail': r're:https?://www\.tvnoe\.cz/.+\.jpg',
},
- }
+ }, {
+ 'url': 'https://www.tvnoe.cz/porad/43205-zamysleni-tomase-halika-7-nedele-velikonocni',
+ 'info_dict': {
+ 'id': '43205-zamysleni-tomase-halika-7-nedele-velikonocni',
+ 'ext': 'mp4',
+ 'title': '7. neděle velikonoční',
+ 'description': 'md5:6bb9908efc59abe60e1c8c7c0e9bb6cd',
+ 'release_date': '20250531',
+ 'series': 'Zamyšlení Tomáše Halíka',
+ 'thumbnail': r're:https?://www\.tvnoe\.cz/.+\.jpg',
+ },
+ }]
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
+ player = self._search_json(
+ r'var\s+INIT_PLAYER\s*=', webpage, 'init player',
+ video_id, transform_source=js_to_json)
- iframe_url = self._search_regex(
- r'