Compare commits

...

5 commits

Author SHA1 Message Date
WhatAmISupposedToPutHere
56ea3a00ea
[ie/youtube] Add request_no_ads extractor-arg (#15145)
Default is `true` for unauthenticated users.
Default is `false` if logged-in cookies have been passed to yt-dlp.
Using `true` results in a loss of premium formats.

Closes #15144
Authored by: WhatAmISupposedToPutHere
2025-12-01 01:02:58 +00:00
Zer0 Spectrum
2a777ecbd5
[ie/tubitv:series] Fix extractor (#15018)
Authored by: Zer0spectrum
2025-12-01 00:33:14 +00:00
thomasmllt
023e4db9af
[ie/patreon:campaign] Fix extractor (#15108)
Closes #15094
Authored by: thomasmllt
2025-11-30 23:59:28 +00:00
Zer0 Spectrum
4433b3a217
[ie/fc2:live] Raise appropriate error when stream is offline (#15180)
Closes #15179
Authored by: Zer0spectrum
2025-11-30 23:54:17 +00:00
bashonly
419776ecf5
[ie/youtube] Extract all automatic caption languages (#15156)
Closes #14889, Closes #15150
Authored by: bashonly
2025-11-30 23:35:05 +00:00
5 changed files with 51 additions and 15 deletions

View file

@ -1871,6 +1871,7 @@ The following extractors use this feature:
* `pot_trace`: Enable debug logging for PO Token fetching. Either `true` or `false` (default) * `pot_trace`: Enable debug logging for PO Token fetching. Either `true` or `false` (default)
* `fetch_pot`: Policy to use for fetching a PO Token from providers. One of `always` (always try fetch a PO Token regardless if the client requires one for the given context), `never` (never fetch a PO Token), or `auto` (default; only fetch a PO Token if the client requires one for the given context) * `fetch_pot`: Policy to use for fetching a PO Token from providers. One of `always` (always try fetch a PO Token regardless if the client requires one for the given context), `never` (never fetch a PO Token), or `auto` (default; only fetch a PO Token if the client requires one for the given context)
* `jsc_trace`: Enable debug logging for JS Challenge fetching. Either `true` or `false` (default) * `jsc_trace`: Enable debug logging for JS Challenge fetching. Either `true` or `false` (default)
* `request_no_ads`: Skip preroll ads to eliminate the mandatory wait period before download. Either `true` (the default if unauthenticated) or `false`. The default is `false` when logged-in cookies have been passed to yt-dlp, since `true` will result in a loss of premium formats
#### youtube-ejs #### youtube-ejs
* `jitless`: Run suported Javascript engines in JIT-less mode. Supported runtimes are `deno`, `node` and `bun`. Provides better security at the cost of performance/speed. Do note that `node` and `bun` are still considered unsecure. Either `true` or `false` (default) * `jitless`: Run suported Javascript engines in JIT-less mode. Supported runtimes are `deno`, `node` and `bun`. Provides better security at the cost of performance/speed. Do note that `node` and `bun` are still considered unsecure. Either `true` or `false` (default)

View file

@ -5,6 +5,7 @@ from .common import InfoExtractor
from ..networking import Request from ..networking import Request
from ..utils import ( from ..utils import (
ExtractorError, ExtractorError,
UserNotLive,
js_to_json, js_to_json,
traverse_obj, traverse_obj,
update_url_query, update_url_query,
@ -205,6 +206,9 @@ class FC2LiveIE(InfoExtractor):
'client_app': 'browser_hls', 'client_app': 'browser_hls',
'ipv6': '', 'ipv6': '',
}), headers={'X-Requested-With': 'XMLHttpRequest'}) }), headers={'X-Requested-With': 'XMLHttpRequest'})
# A non-zero 'status' indicates the stream is not live, so check truthiness
if traverse_obj(control_server, ('status', {int})) and 'control_token' not in control_server:
raise UserNotLive(video_id=video_id)
self._set_cookie('live.fc2.com', 'l_ortkn', control_server['orz_raw']) self._set_cookie('live.fc2.com', 'l_ortkn', control_server['orz_raw'])
ws_url = update_url_query(control_server['url'], {'control_token': control_server['control_token']}) ws_url = update_url_query(control_server['url'], {'control_token': control_server['control_token']})

View file

@ -598,7 +598,8 @@ class PatreonCampaignIE(PatreonBaseIE):
'props', 'pageProps', 'bootstrapEnvelope', 'pageBootstrap', 'campaign', 'data', 'id', {str})) 'props', 'pageProps', 'bootstrapEnvelope', 'pageBootstrap', 'campaign', 'data', 'id', {str}))
if not campaign_id: if not campaign_id:
campaign_id = traverse_obj(self._search_nextjs_v13_data(webpage, vanity), ( campaign_id = traverse_obj(self._search_nextjs_v13_data(webpage, vanity), (
lambda _, v: v['type'] == 'campaign', 'id', {str}, any, {require('campaign ID')})) ((..., 'value', 'campaign', 'data'), lambda _, v: v['type'] == 'campaign'),
'id', {str}, any, {require('campaign ID')}))
params = { params = {
'json-api-use-default-includes': 'false', 'json-api-use-default-includes': 'false',

View file

@ -182,13 +182,13 @@ class TubiTvShowIE(InfoExtractor):
webpage = self._download_webpage(show_url, playlist_id) webpage = self._download_webpage(show_url, playlist_id)
data = self._search_json( data = self._search_json(
r'window\.__data\s*=', webpage, 'data', playlist_id, r'window\.__REACT_QUERY_STATE__\s*=', webpage, 'data', playlist_id,
transform_source=js_to_json)['video'] transform_source=js_to_json)['queries'][0]['state']['data']
# v['number'] is already a decimal string, but stringify to protect against API changes # v['number'] is already a decimal string, but stringify to protect against API changes
path = [lambda _, v: str(v['number']) == selected_season] if selected_season else [..., {dict}] path = [lambda _, v: str(v['number']) == selected_season] if selected_season else [..., {dict}]
for season in traverse_obj(data, ('byId', lambda _, v: v['type'] == 's', 'seasons', *path)): for season in traverse_obj(data, ('seasons', *path)):
season_number = int_or_none(season.get('number')) season_number = int_or_none(season.get('number'))
for episode in traverse_obj(season, ('episodes', lambda _, v: v['id'])): for episode in traverse_obj(season, ('episodes', lambda _, v: v['id'])):
episode_id = episode['id'] episode_id = episode['id']

View file

@ -2628,18 +2628,29 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
def _get_checkok_params(): def _get_checkok_params():
return {'contentCheckOk': True, 'racyCheckOk': True} return {'contentCheckOk': True, 'racyCheckOk': True}
@classmethod def _generate_player_context(self, sts=None):
def _generate_player_context(cls, sts=None):
context = { context = {
'html5Preference': 'HTML5_PREF_WANTS', 'html5Preference': 'HTML5_PREF_WANTS',
} }
if sts is not None: if sts is not None:
context['signatureTimestamp'] = sts context['signatureTimestamp'] = sts
playback_context = {
'contentPlaybackContext': context,
}
# The 'adPlaybackContext'/'request_no_ads' workaround results in a loss of premium formats.
# Only default to 'true' if the user is unauthenticated, since we can't reliably detect all
# types of premium accounts (e.g. YTMusic Premium), and since premium users don't have ads.
default_arg_value = 'false' if self.is_authenticated else 'true'
if self._configuration_arg('request_no_ads', [default_arg_value])[0] != 'false':
playback_context['adPlaybackContext'] = {
'pyv': True,
}
return { return {
'playbackContext': { 'playbackContext': playback_context,
'contentPlaybackContext': context, **self._get_checkok_params(),
},
**cls._get_checkok_params(),
} }
def _get_config_po_token(self, client: str, context: _PoTokenContext): def _get_config_po_token(self, client: str, context: _PoTokenContext):
@ -4029,6 +4040,11 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
STREAMING_DATA_CLIENT_NAME: client_name, STREAMING_DATA_CLIENT_NAME: client_name,
}) })
def set_audio_lang_from_orig_subs_lang(lang_code):
for f in formats:
if f.get('acodec') != 'none' and not f.get('language'):
f['language'] = lang_code
subtitles = {} subtitles = {}
skipped_subs_clients = set() skipped_subs_clients = set()
@ -4088,7 +4104,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
orig_lang = qs.get('lang', [None])[-1] orig_lang = qs.get('lang', [None])[-1]
lang_name = self._get_text(caption_track, 'name', max_runs=1) lang_name = self._get_text(caption_track, 'name', max_runs=1)
if caption_track.get('kind') != 'asr': is_manual_subs = caption_track.get('kind') != 'asr'
if is_manual_subs:
if not lang_code: if not lang_code:
continue continue
process_language( process_language(
@ -4099,16 +4116,14 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
if not trans_code: if not trans_code:
continue continue
orig_trans_code = trans_code orig_trans_code = trans_code
if caption_track.get('kind') != 'asr' and trans_code != 'und': if is_manual_subs and trans_code != 'und':
if not get_translated_subs: if not get_translated_subs:
continue continue
trans_code += f'-{lang_code}' trans_code += f'-{lang_code}'
trans_name += format_field(lang_name, None, ' from %s') trans_name += format_field(lang_name, None, ' from %s')
if lang_code == f'a-{orig_trans_code}': if lang_code == f'a-{orig_trans_code}':
# Set audio language based on original subtitles # Set audio language based on original subtitles
for f in formats: set_audio_lang_from_orig_subs_lang(orig_trans_code)
if f.get('acodec') != 'none' and not f.get('language'):
f['language'] = orig_trans_code
# Add an "-orig" label to the original language so that it can be distinguished. # Add an "-orig" label to the original language so that it can be distinguished.
# The subs are returned without "-orig" as well for compatibility # The subs are returned without "-orig" as well for compatibility
process_language( process_language(
@ -4119,6 +4134,21 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
automatic_captions, base_url, trans_code, trans_name, client_name, automatic_captions, base_url, trans_code, trans_name, client_name,
pot_params if orig_lang == orig_trans_code else {'tlang': trans_code, **pot_params}) pot_params if orig_lang == orig_trans_code else {'tlang': trans_code, **pot_params})
# Extract automatic captions when the language is not in 'translationLanguages'
# e.g. Cantonese [yue], see https://github.com/yt-dlp/yt-dlp/issues/14889
lang_code = remove_start(lang_code, 'a-')
if is_manual_subs or not lang_code or lang_code in automatic_captions:
continue
lang_name = remove_end(lang_name, ' (auto-generated)')
if caption_track.get('isTranslatable'):
# We can assume this is the original audio language
set_audio_lang_from_orig_subs_lang(lang_code)
process_language(
automatic_captions, base_url, f'{lang_code}-orig',
f'{lang_name} (Original)', client_name, pot_params)
process_language(
automatic_captions, base_url, lang_code, lang_name, client_name, pot_params)
# Avoid duplication if we've already got everything we need # Avoid duplication if we've already got everything we need
need_subs_langs.difference_update(subtitles) need_subs_langs.difference_update(subtitles)
need_caps_langs.difference_update(automatic_captions) need_caps_langs.difference_update(automatic_captions)