This commit is contained in:
rdamas 2025-12-06 08:51:58 +08:00 committed by GitHub
commit 4163d7ab8c
No known key found for this signature in database
GPG key ID: B5690EEEBB952194

View file

@ -62,6 +62,20 @@ class SubstackIE(InfoExtractor):
'uploader': 'Persuasion', 'uploader': 'Persuasion',
'uploader_id': '61579', 'uploader_id': '61579',
}, },
}, {
# Podcast with video where podcast_url is not resolvable
'url': 'https://mellowkat.substack.com/p/theyre-all-compromised-wake-up',
'md5': '7627f28352ed05c4cfc799bb1fc5822c',
'info_dict': {
'id': '180331920',
'ext': 'mp4',
'title': 'They\'re ALL compromised. Wake up.',
'description': 'Watch now | Left vs. Right is theater. Many more links in this post! Click "view post" to read.',
'thumbnail': r're:https://substackcdn\.com/image/.+\.png',
'uploader': 'MellowKat\'s Newsletter',
'uploader_id': '1075591',
},
'expected_warnings': ['Podcast URL is invalid'],
}] }]
_WEBPAGE_TESTS = [{ _WEBPAGE_TESTS = [{
'url': 'https://www.mollymovieclub.com/p/interstellar', 'url': 'https://www.mollymovieclub.com/p/interstellar',
@ -130,19 +144,24 @@ class SubstackIE(InfoExtractor):
post_type = webpage_info['post']['type'] post_type = webpage_info['post']['type']
formats, subtitles = [], {} formats, subtitles = [], {}
if post_type == 'podcast': if webpage_info['post'].get('video_upload_id'):
formats, subtitles = self._extract_video_formats(webpage_info['post']['video_upload_id'], canonical_url)
if webpage_info['post'].get('podcast_url'):
fmt = {'url': webpage_info['post']['podcast_url']} fmt = {'url': webpage_info['post']['podcast_url']}
if not determine_ext(fmt['url'], default_ext=None): if not (ext := determine_ext(fmt['url'], default_ext=None)):
# The redirected format URL expires but the original URL doesn't, # The redirected format URL expires but the original URL doesn't,
# so we only want to extract the extension from this request # so we only want to extract the extension from this request
fmt['ext'] = determine_ext(self._request_webpage( fatal = not formats
HEADRequest(fmt['url']), display_id, podcast_url_src = self._request_webpage(HEADRequest(fmt['url']), display_id,
'Resolving podcast file extension', 'Resolving podcast file extension',
'Podcast URL is invalid').url) 'Podcast URL is invalid', fatal=fatal)
if podcast_url_src:
ext = determine_ext(podcast_url_src.url)
if ext:
fmt['ext'] = ext
formats.append(fmt) formats.append(fmt)
elif post_type == 'video':
formats, subtitles = self._extract_video_formats(webpage_info['post']['videoUpload']['id'], canonical_url) if not formats:
else:
self.raise_no_formats(f'Page type "{post_type}" is not supported') self.raise_no_formats(f'Page type "{post_type}" is not supported')
return { return {