mirror of
https://github.com/yt-dlp/yt-dlp.git
synced 2025-12-05 22:34:59 +01:00
[ie/web.archive:youtube] Fix extractor (#15234)
Closes #15233 Authored by: seproDev
This commit is contained in:
parent
f7acf3c1f4
commit
7ec6b9bc40
1 changed files with 39 additions and 11 deletions
|
|
@ -704,6 +704,24 @@ class YoutubeWebArchiveIE(InfoExtractor):
|
|||
'thumbnail': 'https://web.archive.org/web/20160108040020if_/https://i.ytimg.com/vi/SQCom7wjGDs/maxresdefault.jpg',
|
||||
'upload_date': '20160107',
|
||||
},
|
||||
}, {
|
||||
# dmuxed formats
|
||||
'url': 'https://web.archive.org/web/20240922160632/https://www.youtube.com/watch?v=z7hzvTL3k1k',
|
||||
'info_dict': {
|
||||
'id': 'z7hzvTL3k1k',
|
||||
'ext': 'webm',
|
||||
'title': 'Praise the Lord and Pass the Ammunition (BARRXN REMIX)',
|
||||
'description': 'md5:45dbf2c71c23b0734c8dfb82dd1e94b6',
|
||||
'uploader': 'Barrxn',
|
||||
'uploader_id': 'TheRockstar6086',
|
||||
'uploader_url': 'https://www.youtube.com/user/TheRockstar6086',
|
||||
'channel_id': 'UCjJPGUTtvR9uizmawn2ThqA',
|
||||
'channel_url': 'https://www.youtube.com/channel/UCjJPGUTtvR9uizmawn2ThqA',
|
||||
'duration': 125,
|
||||
'thumbnail': r're:https?://.*\.(jpg|webp)',
|
||||
'upload_date': '20201207',
|
||||
},
|
||||
'params': {'format': 'bv'},
|
||||
}, {
|
||||
'url': 'https://web.archive.org/web/http://www.youtube.com/watch?v=kH-G_aIBlFw',
|
||||
'only_matching': True,
|
||||
|
|
@ -1060,6 +1078,19 @@ class YoutubeWebArchiveIE(InfoExtractor):
|
|||
capture_dates.extend([self._OLDEST_CAPTURE_DATE, self._NEWEST_CAPTURE_DATE])
|
||||
return orderedSet(filter(None, capture_dates))
|
||||
|
||||
def _parse_fmt(self, fmt, extra_info=None):
|
||||
format_id = traverse_obj(fmt, ('url', {parse_qs}, 'itag', 0))
|
||||
return {
|
||||
'format_id': format_id,
|
||||
**self._FORMATS.get(format_id, {}),
|
||||
**traverse_obj(fmt, {
|
||||
'url': ('url', {lambda x: f'https://web.archive.org/web/2id_/{x}'}),
|
||||
'ext': ('ext', {str}),
|
||||
'filesize': ('url', {parse_qs}, 'clen', 0, {int_or_none}),
|
||||
}),
|
||||
**(extra_info or {}),
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id, url_date, url_date_2 = self._match_valid_url(url).group('id', 'date', 'date2')
|
||||
url_date = url_date or url_date_2
|
||||
|
|
@ -1090,17 +1121,14 @@ class YoutubeWebArchiveIE(InfoExtractor):
|
|||
info['thumbnails'] = self._extract_thumbnails(video_id)
|
||||
|
||||
formats = []
|
||||
for fmt in traverse_obj(video_info, ('formats', lambda _, v: url_or_none(v['url']))):
|
||||
format_id = traverse_obj(fmt, ('url', {parse_qs}, 'itag', 0))
|
||||
formats.append({
|
||||
'format_id': format_id,
|
||||
**self._FORMATS.get(format_id, {}),
|
||||
**traverse_obj(fmt, {
|
||||
'url': ('url', {lambda x: f'https://web.archive.org/web/2id_/{x}'}),
|
||||
'ext': ('ext', {str}),
|
||||
'filesize': ('url', {parse_qs}, 'clen', 0, {int_or_none}),
|
||||
}),
|
||||
})
|
||||
if video_info.get('dmux'):
|
||||
for vf in traverse_obj(video_info, ('formats', 'video', lambda _, v: url_or_none(v['url']))):
|
||||
formats.append(self._parse_fmt(vf, {'acodec': 'none'}))
|
||||
for af in traverse_obj(video_info, ('formats', 'audio', lambda _, v: url_or_none(v['url']))):
|
||||
formats.append(self._parse_fmt(af, {'vcodec': 'none'}))
|
||||
else:
|
||||
for fmt in traverse_obj(video_info, ('formats', lambda _, v: url_or_none(v['url']))):
|
||||
formats.append(self._parse_fmt(fmt))
|
||||
info['formats'] = formats
|
||||
|
||||
return info
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue