From 7cbe4b5f33e4241558f1f976981c5d2ab345a751 Mon Sep 17 00:00:00 2001 From: Robert Damas Date: Thu, 17 Jul 2025 19:56:47 +0200 Subject: [PATCH 1/5] Dont't trust substack's webpage "post.type" A substack "podcast" type can also have a video. This commit looks for "videoUpload" elements regardless of content type. If it has a video url add it to formats. Closes #13594 --- yt_dlp/extractor/substack.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/yt_dlp/extractor/substack.py b/yt_dlp/extractor/substack.py index b70d40f2ca..3fa5e3218d 100644 --- a/yt_dlp/extractor/substack.py +++ b/yt_dlp/extractor/substack.py @@ -107,7 +107,9 @@ class SubstackIE(InfoExtractor): post_type = webpage_info['post']['type'] formats, subtitles = [], {} - if post_type == 'podcast': + if webpage_info['post'].get('videoUpload'): + formats, subtitles = self._extract_video_formats(webpage_info['post']['videoUpload']['id'], canonical_url) + if webpage_info['post'].get('podcast_url'): fmt = {'url': webpage_info['post']['podcast_url']} if not determine_ext(fmt['url'], default_ext=None): # The redirected format URL expires but the original URL doesn't, @@ -117,9 +119,7 @@ class SubstackIE(InfoExtractor): 'Resolving podcast file extension', 'Podcast URL is invalid').url) formats.append(fmt) - elif post_type == 'video': - formats, subtitles = self._extract_video_formats(webpage_info['post']['videoUpload']['id'], canonical_url) - else: + if not formats: self.raise_no_formats(f'Page type "{post_type}" is not supported') return { From 02fcd241e98fe6448a650128f6a7ae1319df7bc8 Mon Sep 17 00:00:00 2001 From: Robert Damas Date: Mon, 1 Dec 2025 12:43:31 +0100 Subject: [PATCH 2/5] Fix for #15213 --- yt_dlp/extractor/substack.py | 34 ++++++++++++++++++++++++++-------- 1 file changed, 26 insertions(+), 8 deletions(-) diff --git a/yt_dlp/extractor/substack.py b/yt_dlp/extractor/substack.py index 84d2ae6422..8814ecebe8 100644 --- a/yt_dlp/extractor/substack.py +++ b/yt_dlp/extractor/substack.py @@ -85,6 +85,20 @@ class SubstackIE(InfoExtractor): 'uploader': 'Blocked and Reported', 'uploader_id': '500230', }, + }, { + # Podcast with video where podcast_url is not resolvable + 'url': 'https://mellowkat.substack.com/p/theyre-all-compromised-wake-up', + 'md5': '7627f28352ed05c4cfc799bb1fc5822c', + 'info_dict': { + 'id': '180331920', + 'ext': 'mp4', + 'title': 'They\'re ALL compromised. Wake up.', + 'description': 'Watch now | Left vs. Right is theater. Many more links in this post! Click "view post" to read.', + 'thumbnail': r're:https://substackcdn\.com/image/.+\.png', + 'uploader': 'MellowKat\'s Newsletter', + 'uploader_id': '1075591', + }, + 'expected_warnings': ['Podcast URL is invalid'] }] @classmethod @@ -130,18 +144,22 @@ class SubstackIE(InfoExtractor): post_type = webpage_info['post']['type'] formats, subtitles = [], {} - if webpage_info['post'].get('videoUpload'): - formats, subtitles = self._extract_video_formats(webpage_info['post']['videoUpload']['id'], canonical_url) + if webpage_info['post'].get('video_upload_id'): + formats, subtitles = self._extract_video_formats(webpage_info['post']['video_upload_id'], canonical_url) if webpage_info['post'].get('podcast_url'): fmt = {'url': webpage_info['post']['podcast_url']} - if not determine_ext(fmt['url'], default_ext=None): + if not (ext := determine_ext(fmt['url'], default_ext=None)): # The redirected format URL expires but the original URL doesn't, # so we only want to extract the extension from this request - fmt['ext'] = determine_ext(self._request_webpage( - HEADRequest(fmt['url']), display_id, - 'Resolving podcast file extension', - 'Podcast URL is invalid').url) - formats.append(fmt) + fatal = not formats + podcast_url_src = self._request_webpage(HEADRequest(fmt['url']), display_id, + 'Resolving podcast file extension', 'Podcast URL is invalid', fatal=fatal) + if podcast_url_src: + ext = determine_ext(podcast_url_src.url) + if ext: + fmt['ext'] = ext + formats.append(fmt) + if not formats: self.raise_no_formats(f'Page type "{post_type}" is not supported') From 091076fa71978068dcb344f985f69d3254e3f400 Mon Sep 17 00:00:00 2001 From: Robert Damas Date: Mon, 1 Dec 2025 12:47:00 +0100 Subject: [PATCH 3/5] Fix COM812 Trailing comma missing --- yt_dlp/extractor/substack.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/yt_dlp/extractor/substack.py b/yt_dlp/extractor/substack.py index 8814ecebe8..06bbbb18f3 100644 --- a/yt_dlp/extractor/substack.py +++ b/yt_dlp/extractor/substack.py @@ -98,7 +98,7 @@ class SubstackIE(InfoExtractor): 'uploader': 'MellowKat\'s Newsletter', 'uploader_id': '1075591', }, - 'expected_warnings': ['Podcast URL is invalid'] + 'expected_warnings': ['Podcast URL is invalid'], }] @classmethod From a693aec12070fe6cf4e7b5dd22a8ad5dd9e1f09d Mon Sep 17 00:00:00 2001 From: Robert Damas Date: Mon, 1 Dec 2025 12:50:20 +0100 Subject: [PATCH 4/5] Fix autopep8 run --- yt_dlp/extractor/substack.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/yt_dlp/extractor/substack.py b/yt_dlp/extractor/substack.py index 06bbbb18f3..af3ea0b4e2 100644 --- a/yt_dlp/extractor/substack.py +++ b/yt_dlp/extractor/substack.py @@ -153,7 +153,8 @@ class SubstackIE(InfoExtractor): # so we only want to extract the extension from this request fatal = not formats podcast_url_src = self._request_webpage(HEADRequest(fmt['url']), display_id, - 'Resolving podcast file extension', 'Podcast URL is invalid', fatal=fatal) + 'Resolving podcast file extension', + 'Podcast URL is invalid', fatal=fatal) if podcast_url_src: ext = determine_ext(podcast_url_src.url) if ext: From fe7601987863491353ae2fcd670515d2bb96c785 Mon Sep 17 00:00:00 2001 From: Robert Damas Date: Mon, 1 Dec 2025 16:31:01 +0100 Subject: [PATCH 5/5] Move test from _WEBPAGE_TEST to _TESTS --- yt_dlp/extractor/substack.py | 28 ++++++++++++++-------------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/yt_dlp/extractor/substack.py b/yt_dlp/extractor/substack.py index af3ea0b4e2..51dcc5b121 100644 --- a/yt_dlp/extractor/substack.py +++ b/yt_dlp/extractor/substack.py @@ -62,6 +62,20 @@ class SubstackIE(InfoExtractor): 'uploader': 'Persuasion', 'uploader_id': '61579', }, + }, { + # Podcast with video where podcast_url is not resolvable + 'url': 'https://mellowkat.substack.com/p/theyre-all-compromised-wake-up', + 'md5': '7627f28352ed05c4cfc799bb1fc5822c', + 'info_dict': { + 'id': '180331920', + 'ext': 'mp4', + 'title': 'They\'re ALL compromised. Wake up.', + 'description': 'Watch now | Left vs. Right is theater. Many more links in this post! Click "view post" to read.', + 'thumbnail': r're:https://substackcdn\.com/image/.+\.png', + 'uploader': 'MellowKat\'s Newsletter', + 'uploader_id': '1075591', + }, + 'expected_warnings': ['Podcast URL is invalid'], }] _WEBPAGE_TESTS = [{ 'url': 'https://www.mollymovieclub.com/p/interstellar', @@ -85,20 +99,6 @@ class SubstackIE(InfoExtractor): 'uploader': 'Blocked and Reported', 'uploader_id': '500230', }, - }, { - # Podcast with video where podcast_url is not resolvable - 'url': 'https://mellowkat.substack.com/p/theyre-all-compromised-wake-up', - 'md5': '7627f28352ed05c4cfc799bb1fc5822c', - 'info_dict': { - 'id': '180331920', - 'ext': 'mp4', - 'title': 'They\'re ALL compromised. Wake up.', - 'description': 'Watch now | Left vs. Right is theater. Many more links in this post! Click "view post" to read.', - 'thumbnail': r're:https://substackcdn\.com/image/.+\.png', - 'uploader': 'MellowKat\'s Newsletter', - 'uploader_id': '1075591', - }, - 'expected_warnings': ['Podcast URL is invalid'], }] @classmethod