[youtube] Extract AI summary from video metadata

Authored by: Unverified Contact
This commit is contained in:
Unverified Contact 2025-11-17 00:37:10 +11:00
parent f3c255b63b
commit 9807608fdd
2 changed files with 19 additions and 0 deletions

View file

@ -409,6 +409,7 @@ class InfoExtractor:
players on other sites. Can be True (=always allowed),
False (=never allowed), None (=unknown), or a string
specifying the criteria for embedability; e.g. 'whitelist'
ai_summary: Short AI-generated summary text shown on the video description, if available.
availability: Under what condition the video is available. One of
'private', 'premium_only', 'subscriber_only', 'needs_auth',
'unlisted' or 'public'. Use 'InfoExtractor._availability'

View file

@ -1806,6 +1806,15 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
},
'playlist_count': 10,
'params': {'skip_download': True},
}, {
'note': 'Video with AI summary',
'url': 'https://www.youtube.com/watch?v=8lF22FnHkUU',
'info_dict': {
'id': '8lF22FnHkUU',
'ext': 'mp4',
'ai_summary': 'md5:d892e2d40070a08530c965dc2c0922f7',
},
'params': {'skip_download': True},
}]
_DEFAULT_PLAYER_JS_VERSION = 'actual'
@ -3748,6 +3757,12 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
initial_description = traverse_obj(initial_sdcr, (
'items', ..., 'expandableVideoDescriptionBodyRenderer',
'attributedDescriptionBodyText', 'content', {str}, any))
initial_ai_summary = traverse_obj(initial_data, (
'contents', 'twoColumnWatchNextResults', 'results', 'results', 'contents', ...,
'expandableMetadataRenderer', 'header', 'collapsedTitle', 'simpleText', {str}, any),
('engagementPanels', ..., 'engagementPanelSectionListRenderer', 'content',
'structuredDescriptionContentRenderer', 'items', ...,
'expandableMetadataRenderer', 'header', 'collapsedTitle', 'simpleText', {str}, any))
# videoDescriptionHeaderRenderer also has publishDate/channel/handle/ucid, but not needed
initial_vdhr = traverse_obj(initial_sdcr, (
'items', ..., 'videoDescriptionHeaderRenderer', {dict}, any)) or {}
@ -4169,6 +4184,9 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
info['heatmap'] = self._extract_heatmap(initial_data)
if initial_ai_summary:
info['ai_summary'] = initial_ai_summary
contents = traverse_obj(
initial_data, ('contents', 'twoColumnWatchNextResults', 'results', 'results', 'contents'),
expected_type=list, default=[])