From 4cb5e191efeebc3679f89c3c8ac819bcd511bb1f Mon Sep 17 00:00:00 2001 From: bashonly <88596187+bashonly@users.noreply.github.com> Date: Sun, 16 Nov 2025 16:39:22 -0600 Subject: [PATCH 1/3] [ie/youtube] Detect "super resolution" AI-upscaled formats (#15050) Closes #14923 Authored by: bashonly --- yt_dlp/extractor/youtube/_video.py | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/yt_dlp/extractor/youtube/_video.py b/yt_dlp/extractor/youtube/_video.py index 8f64cf4d8d..57edad3c0f 100644 --- a/yt_dlp/extractor/youtube/_video.py +++ b/yt_dlp/extractor/youtube/_video.py @@ -3150,6 +3150,9 @@ class YoutubeIE(YoutubeBaseInfoExtractor): self._downloader.deprecated_feature('[youtube] include_duplicate_formats extractor argument is deprecated. ' 'Use formats=duplicate extractor argument instead') + def is_super_resolution(f_url): + return '1' in traverse_obj(f_url, ({parse_qs}, 'xtags', ..., {urllib.parse.parse_qs}, 'sr', ...)) + def solve_sig(s, spec): return ''.join(s[i] for i in spec) @@ -3202,7 +3205,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): def get_stream_id(fmt_stream): return str_or_none(fmt_stream.get('itag')), traverse_obj(fmt_stream, 'audioTrack', 'id'), fmt_stream.get('isDrc') - def process_format_stream(fmt_stream, proto, missing_pot): + def process_format_stream(fmt_stream, proto, missing_pot, super_resolution=False): itag = str_or_none(fmt_stream.get('itag')) audio_track = fmt_stream.get('audioTrack') or {} quality = fmt_stream.get('quality') @@ -3253,10 +3256,13 @@ class YoutubeIE(YoutubeBaseInfoExtractor): dct = { 'asr': int_or_none(fmt_stream.get('audioSampleRate')), 'filesize': int_or_none(fmt_stream.get('contentLength')), - 'format_id': f'{itag}{"-drc" if fmt_stream.get("isDrc") else ""}', + 'format_id': join_nonempty(itag, ( + 'drc' if fmt_stream.get('isDrc') + else 'sr' if super_resolution + else None)), 'format_note': join_nonempty( join_nonempty(audio_track.get('displayName'), audio_track.get('audioIsDefault') and '(default)', delim=' '), - name, fmt_stream.get('isDrc') and 'DRC', + name, fmt_stream.get('isDrc') and 'DRC', super_resolution and 'AI-upscaled', try_get(fmt_stream, lambda x: x['projectionType'].replace('RECTANGULAR', '').lower()), try_get(fmt_stream, lambda x: x['spatialAudioType'].replace('SPATIAL_AUDIO_TYPE_', '').lower()), is_damaged and 'DAMAGED', missing_pot and 'MISSING POT', @@ -3342,7 +3348,9 @@ class YoutubeIE(YoutubeBaseInfoExtractor): self.report_warning(msg, video_id, only_once=True) continue - fmt = process_format_stream(fmt_stream, proto, missing_pot=require_po_token and not po_token) + fmt = process_format_stream( + fmt_stream, proto, missing_pot=require_po_token and not po_token, + super_resolution=is_super_resolution(fmt_url)) if not fmt: continue From 5f66ac71f6637f768cd251509b0a932d0ce56427 Mon Sep 17 00:00:00 2001 From: Anton Larionov <11796525+anlar@users.noreply.github.com> Date: Mon, 17 Nov 2025 00:05:44 +0100 Subject: [PATCH 2/3] [ie/mave:channel] Add extractor (#14915) Authored by: anlar --- yt_dlp/extractor/_extractors.py | 5 +- yt_dlp/extractor/mave.py | 155 ++++++++++++++++++++++++-------- 2 files changed, 123 insertions(+), 37 deletions(-) diff --git a/yt_dlp/extractor/_extractors.py b/yt_dlp/extractor/_extractors.py index 848b608717..86769def0c 100644 --- a/yt_dlp/extractor/_extractors.py +++ b/yt_dlp/extractor/_extractors.py @@ -1094,7 +1094,10 @@ from .markiza import ( from .massengeschmacktv import MassengeschmackTVIE from .masters import MastersIE from .matchtv import MatchTVIE -from .mave import MaveIE +from .mave import ( + MaveChannelIE, + MaveIE, +) from .mbn import MBNIE from .mdr import MDRIE from .medaltv import MedalTVIE diff --git a/yt_dlp/extractor/mave.py b/yt_dlp/extractor/mave.py index 86d8d8b7c4..aa026f85cb 100644 --- a/yt_dlp/extractor/mave.py +++ b/yt_dlp/extractor/mave.py @@ -1,7 +1,9 @@ -import re +import functools +import math from .common import InfoExtractor from ..utils import ( + InAdvancePagedList, clean_html, int_or_none, parse_iso8601, @@ -10,15 +12,64 @@ from ..utils import ( from ..utils.traversal import require, traverse_obj -class MaveIE(InfoExtractor): - _VALID_URL = r'https?://(?P[\w-]+)\.mave\.digital/(?Pep-\d+)' +class MaveBaseIE(InfoExtractor): + _API_BASE_URL = 'https://api.mave.digital/v1/website' + _API_BASE_STORAGE_URL = 'https://store.cloud.mts.ru/mave/' + + def _load_channel_meta(self, channel_id, display_id): + return traverse_obj(self._download_json( + f'{self._API_BASE_URL}/{channel_id}/', display_id, + note='Downloading channel metadata'), 'podcast') + + def _load_episode_meta(self, channel_id, episode_code, display_id): + return self._download_json( + f'{self._API_BASE_URL}/{channel_id}/episodes/{episode_code}', + display_id, note='Downloading episode metadata') + + def _create_entry(self, channel_id, channel_meta, episode_meta): + episode_code = traverse_obj(episode_meta, ('code', {int}, {require('episode code')})) + return { + 'display_id': f'{channel_id}-{episode_code}', + 'extractor_key': MaveIE.ie_key(), + 'extractor': MaveIE.IE_NAME, + 'webpage_url': f'https://{channel_id}.mave.digital/ep-{episode_code}', + 'channel_id': channel_id, + 'channel_url': f'https://{channel_id}.mave.digital/', + 'vcodec': 'none', + **traverse_obj(episode_meta, { + 'id': ('id', {str}), + 'url': ('audio', {urljoin(self._API_BASE_STORAGE_URL)}), + 'title': ('title', {str}), + 'description': ('description', {clean_html}), + 'thumbnail': ('image', {urljoin(self._API_BASE_STORAGE_URL)}), + 'duration': ('duration', {int_or_none}), + 'season_number': ('season', {int_or_none}), + 'episode_number': ('number', {int_or_none}), + 'view_count': ('listenings', {int_or_none}), + 'like_count': ('reactions', lambda _, v: v['type'] == 'like', 'count', {int_or_none}, any), + 'dislike_count': ('reactions', lambda _, v: v['type'] == 'dislike', 'count', {int_or_none}, any), + 'age_limit': ('is_explicit', {bool}, {lambda x: 18 if x else None}), + 'timestamp': ('publish_date', {parse_iso8601}), + }), + **traverse_obj(channel_meta, { + 'series_id': ('id', {str}), + 'series': ('title', {str}), + 'channel': ('title', {str}), + 'uploader': ('author', {str}), + }), + } + + +class MaveIE(MaveBaseIE): + IE_NAME = 'mave' + _VALID_URL = r'https?://(?P[\w-]+)\.mave\.digital/ep-(?P\d+)' _TESTS = [{ 'url': 'https://ochenlichnoe.mave.digital/ep-25', 'md5': 'aa3e513ef588b4366df1520657cbc10c', 'info_dict': { 'id': '4035f587-914b-44b6-aa5a-d76685ad9bc2', 'ext': 'mp3', - 'display_id': 'ochenlichnoe-ep-25', + 'display_id': 'ochenlichnoe-25', 'title': 'Между мной и миром: психология самооценки', 'description': 'md5:4b7463baaccb6982f326bce5c700382a', 'uploader': 'Самарский университет', @@ -45,7 +96,7 @@ class MaveIE(InfoExtractor): 'info_dict': { 'id': '41898bb5-ff57-4797-9236-37a8e537aa21', 'ext': 'mp3', - 'display_id': 'budem-ep-12', + 'display_id': 'budem-12', 'title': 'Екатерина Михайлова: "Горе от ума" не про женщин написана', 'description': 'md5:fa3bdd59ee829dfaf16e3efcb13f1d19', 'uploader': 'Полина Цветкова+Евгения Акопова', @@ -68,40 +119,72 @@ class MaveIE(InfoExtractor): 'upload_date': '20241230', }, }] - _API_BASE_URL = 'https://api.mave.digital/' def _real_extract(self, url): - channel_id, slug = self._match_valid_url(url).group('channel', 'id') - display_id = f'{channel_id}-{slug}' - webpage = self._download_webpage(url, display_id) - data = traverse_obj( - self._search_nuxt_json(webpage, display_id), - ('data', lambda _, v: v['activeEpisodeData'], any, {require('podcast data')})) + channel_id, episode_code = self._match_valid_url(url).group( + 'channel_id', 'episode_code') + display_id = f'{channel_id}-{episode_code}' + + channel_meta = self._load_channel_meta(channel_id, display_id) + episode_meta = self._load_episode_meta(channel_id, episode_code, display_id) + + return self._create_entry(channel_id, channel_meta, episode_meta) + + +class MaveChannelIE(MaveBaseIE): + IE_NAME = 'mave:channel' + _VALID_URL = r'https?://(?P[\w-]+)\.mave\.digital/?(?:$|[?#])' + _TESTS = [{ + 'url': 'https://budem.mave.digital/', + 'info_dict': { + 'id': 'budem', + 'title': 'Все там будем', + 'description': 'md5:f04ae12a42be0f1d765c5e326b41987a', + }, + 'playlist_mincount': 15, + }, { + 'url': 'https://ochenlichnoe.mave.digital/', + 'info_dict': { + 'id': 'ochenlichnoe', + 'title': 'Очень личное', + 'description': 'md5:ee36a6a52546b91b487fe08c552fdbb2', + }, + 'playlist_mincount': 20, + }, { + 'url': 'https://geekcity.mave.digital/', + 'info_dict': { + 'id': 'geekcity', + 'title': 'Мужчины в трико', + 'description': 'md5:4164d425d60a0d97abdce9d1f6f8e049', + }, + 'playlist_mincount': 80, + }] + _PAGE_SIZE = 50 + + def _entries(self, channel_id, channel_meta, page_num): + page_data = self._download_json( + f'{self._API_BASE_URL}/{channel_id}/episodes', channel_id, query={ + 'view': 'all', + 'page': page_num + 1, + 'sort': 'newest', + 'format': 'all', + }, note=f'Downloading page {page_num + 1}') + for ep in traverse_obj(page_data, ('episodes', lambda _, v: v['audio'] and v['id'])): + yield self._create_entry(channel_id, channel_meta, ep) + + def _real_extract(self, url): + channel_id = self._match_id(url) + + channel_meta = self._load_channel_meta(channel_id, channel_id) return { - 'display_id': display_id, - 'channel_id': channel_id, - 'channel_url': f'https://{channel_id}.mave.digital/', - 'vcodec': 'none', - 'thumbnail': re.sub(r'_\d+(?=\.(?:jpg|png))', '', self._og_search_thumbnail(webpage, default='')) or None, - **traverse_obj(data, ('activeEpisodeData', { - 'url': ('audio', {urljoin(self._API_BASE_URL)}), - 'id': ('id', {str}), + '_type': 'playlist', + 'id': channel_id, + **traverse_obj(channel_meta, { 'title': ('title', {str}), - 'description': ('description', {clean_html}), - 'duration': ('duration', {int_or_none}), - 'season_number': ('season', {int_or_none}), - 'episode_number': ('number', {int_or_none}), - 'view_count': ('listenings', {int_or_none}), - 'like_count': ('reactions', lambda _, v: v['type'] == 'like', 'count', {int_or_none}, any), - 'dislike_count': ('reactions', lambda _, v: v['type'] == 'dislike', 'count', {int_or_none}, any), - 'age_limit': ('is_explicit', {bool}, {lambda x: 18 if x else None}), - 'timestamp': ('publish_date', {parse_iso8601}), - })), - **traverse_obj(data, ('podcast', 'podcast', { - 'series_id': ('id', {str}), - 'series': ('title', {str}), - 'channel': ('title', {str}), - 'uploader': ('author', {str}), - })), + 'description': ('description', {str}), + }), + 'entries': InAdvancePagedList( + functools.partial(self._entries, channel_id, channel_meta), + math.ceil(channel_meta['episodes_count'] / self._PAGE_SIZE), self._PAGE_SIZE), } From 854fded114f3b7b33693c2d3418575d04014aa4b Mon Sep 17 00:00:00 2001 From: Mr Flamel <148793343+mrFlamel@users.noreply.github.com> Date: Mon, 17 Nov 2025 01:17:55 +0200 Subject: [PATCH 3/3] [ie/TheChosen] Add extractors (#14183) Closes #11246 Authored by: mrFlamel --- yt_dlp/extractor/_extractors.py | 4 + yt_dlp/extractor/frontro.py | 164 ++++++++++++++++++++++++++++++++ 2 files changed, 168 insertions(+) create mode 100644 yt_dlp/extractor/frontro.py diff --git a/yt_dlp/extractor/_extractors.py b/yt_dlp/extractor/_extractors.py index 86769def0c..fc77804692 100644 --- a/yt_dlp/extractor/_extractors.py +++ b/yt_dlp/extractor/_extractors.py @@ -691,6 +691,10 @@ from .frontendmasters import ( FrontendMastersIE, FrontendMastersLessonIE, ) +from .frontro import ( + TheChosenGroupIE, + TheChosenIE, +) from .fujitv import FujiTVFODPlus7IE from .funk import FunkIE from .funker530 import Funker530IE diff --git a/yt_dlp/extractor/frontro.py b/yt_dlp/extractor/frontro.py new file mode 100644 index 0000000000..e86c4afa51 --- /dev/null +++ b/yt_dlp/extractor/frontro.py @@ -0,0 +1,164 @@ +import json + +from .common import InfoExtractor +from ..utils import int_or_none, parse_iso8601, url_or_none +from ..utils.traversal import traverse_obj + + +class FrontoBaseIE(InfoExtractor): + def _get_auth_headers(self, url): + return traverse_obj(self._get_cookies(url), { + 'authorization': ('frAccessToken', 'value', {lambda token: f'Bearer {token}' if token else None}), + }) + + +class FrontroVideoBaseIE(FrontoBaseIE): + _CHANNEL_ID = None + + def _real_extract(self, url): + video_id = self._match_id(url) + + metadata = self._download_json( + 'https://api.frontrow.cc/query', video_id, data=json.dumps({ + 'operationName': 'Video', + 'variables': {'channelID': self._CHANNEL_ID, 'videoID': video_id}, + 'query': '''query Video($channelID: ID!, $videoID: ID!) { + video(ChannelID: $channelID, VideoID: $videoID) { + ... on Video {title description updatedAt thumbnail createdAt duration likeCount comments views url hasAccess} + } + }''', + }).encode(), headers={ + 'content-type': 'application/json', + **self._get_auth_headers(url), + })['data']['video'] + if not traverse_obj(metadata, 'hasAccess'): + self.raise_login_required() + + formats, subtitles = self._extract_m3u8_formats_and_subtitles(metadata['url'], video_id) + + return { + 'id': video_id, + 'formats': formats, + 'subtitles': subtitles, + **traverse_obj(metadata, { + 'title': ('title', {str}), + 'description': ('description', {str}), + 'thumbnail': ('thumbnail', {url_or_none}), + 'timestamp': ('createdAt', {parse_iso8601}), + 'modified_timestamp': ('updatedAt', {parse_iso8601}), + 'duration': ('duration', {int_or_none}), + 'like_count': ('likeCount', {int_or_none}), + 'comment_count': ('comments', {int_or_none}), + 'view_count': ('views', {int_or_none}), + }), + } + + +class FrontroGroupBaseIE(FrontoBaseIE): + _CHANNEL_ID = None + _VIDEO_EXTRACTOR = None + _VIDEO_URL_TMPL = None + + def _real_extract(self, url): + group_id = self._match_id(url) + + metadata = self._download_json( + 'https://api.frontrow.cc/query', group_id, note='Downloading playlist metadata', + data=json.dumps({ + 'operationName': 'PaginatedStaticPageContainer', + 'variables': {'channelID': self._CHANNEL_ID, 'first': 500, 'pageContainerID': group_id}, + 'query': '''query PaginatedStaticPageContainer($channelID: ID!, $pageContainerID: ID!) { + pageContainer(ChannelID: $channelID, PageContainerID: $pageContainerID) { + ... on StaticPageContainer { id title updatedAt createdAt itemRefs {edges {node { + id contentItem { ... on ItemVideo { videoItem: item { + id + }}} + }}} + } + } + }''', + }).encode(), headers={ + 'content-type': 'application/json', + **self._get_auth_headers(url), + })['data']['pageContainer'] + + entries = [] + for video_id in traverse_obj(metadata, ( + 'itemRefs', 'edges', ..., 'node', 'contentItem', 'videoItem', 'id', {str}), + ): + entries.append(self.url_result( + self._VIDEO_URL_TMPL % video_id, self._VIDEO_EXTRACTOR, video_id)) + + return { + '_type': 'playlist', + 'id': group_id, + 'entries': entries, + **traverse_obj(metadata, { + 'title': ('title', {str}), + 'timestamp': ('createdAt', {parse_iso8601}), + 'modified_timestamp': ('updatedAt', {parse_iso8601}), + }), + } + + +class TheChosenIE(FrontroVideoBaseIE): + _CHANNEL_ID = '12884901895' + + _VALID_URL = r'https?://(?:www\.)?watch\.thechosen\.tv/video/(?P[0-9]+)' + _TESTS = [{ + 'url': 'https://watch.thechosen.tv/video/184683594325', + 'md5': '3f878b689588c71b38ec9943c54ff5b0', + 'info_dict': { + 'id': '184683594325', + 'ext': 'mp4', + 'title': 'Season 3 Episode 2: Two by Two', + 'description': 'md5:174c373756ecc8df46b403f4fcfbaf8c', + 'comment_count': int, + 'view_count': int, + 'like_count': int, + 'duration': 4212, + 'thumbnail': r're:https://fastly\.frontrowcdn\.com/channels/12884901895/VIDEO_THUMBNAIL/184683594325/', + 'timestamp': 1698954546, + 'upload_date': '20231102', + 'modified_timestamp': int, + 'modified_date': str, + }, + }, { + 'url': 'https://watch.thechosen.tv/video/184683596189', + 'md5': 'd581562f9d29ce82f5b7770415334151', + 'info_dict': { + 'id': '184683596189', + 'ext': 'mp4', + 'title': 'Season 4 Episode 8: Humble', + 'description': 'md5:20a57bead43da1cf77cd5b0fe29bbc76', + 'comment_count': int, + 'view_count': int, + 'like_count': int, + 'duration': 5092, + 'thumbnail': r're:https://fastly\.frontrowcdn\.com/channels/12884901895/VIDEO_THUMBNAIL/184683596189/', + 'timestamp': 1715019474, + 'upload_date': '20240506', + 'modified_timestamp': int, + 'modified_date': str, + }, + }] + + +class TheChosenGroupIE(FrontroGroupBaseIE): + _CHANNEL_ID = '12884901895' + _VIDEO_EXTRACTOR = TheChosenIE + _VIDEO_URL_TMPL = 'https://watch.thechosen.tv/video/%s' + + _VALID_URL = r'https?://(?:www\.)?watch\.thechosen\.tv/group/(?P[0-9]+)' + _TESTS = [{ + 'url': 'https://watch.thechosen.tv/group/309237658592', + 'info_dict': { + 'id': '309237658592', + 'title': 'Season 3', + 'timestamp': 1746203969, + 'upload_date': '20250502', + 'modified_timestamp': int, + 'modified_date': str, + }, + 'playlist_count': 8, + }]