diff --git a/README.md b/README.md index c98c69f418..6a97eb0eb2 100644 --- a/README.md +++ b/README.md @@ -1871,6 +1871,7 @@ The following extractors use this feature: * `pot_trace`: Enable debug logging for PO Token fetching. Either `true` or `false` (default) * `fetch_pot`: Policy to use for fetching a PO Token from providers. One of `always` (always try fetch a PO Token regardless if the client requires one for the given context), `never` (never fetch a PO Token), or `auto` (default; only fetch a PO Token if the client requires one for the given context) * `jsc_trace`: Enable debug logging for JS Challenge fetching. Either `true` or `false` (default) +* `request_no_ads`: Skip preroll ads to eliminate the mandatory wait period before download. Either `true` (the default if unauthenticated) or `false`. The default is `false` when logged-in cookies have been passed to yt-dlp, since `true` will result in a loss of premium formats #### youtube-ejs * `jitless`: Run suported Javascript engines in JIT-less mode. Supported runtimes are `deno`, `node` and `bun`. Provides better security at the cost of performance/speed. Do note that `node` and `bun` are still considered unsecure. Either `true` or `false` (default) diff --git a/yt_dlp/extractor/fc2.py b/yt_dlp/extractor/fc2.py index d343069fec..aa6ff6335d 100644 --- a/yt_dlp/extractor/fc2.py +++ b/yt_dlp/extractor/fc2.py @@ -5,6 +5,7 @@ from .common import InfoExtractor from ..networking import Request from ..utils import ( ExtractorError, + UserNotLive, js_to_json, traverse_obj, update_url_query, @@ -205,6 +206,9 @@ class FC2LiveIE(InfoExtractor): 'client_app': 'browser_hls', 'ipv6': '', }), headers={'X-Requested-With': 'XMLHttpRequest'}) + # A non-zero 'status' indicates the stream is not live, so check truthiness + if traverse_obj(control_server, ('status', {int})) and 'control_token' not in control_server: + raise UserNotLive(video_id=video_id) self._set_cookie('live.fc2.com', 'l_ortkn', control_server['orz_raw']) ws_url = update_url_query(control_server['url'], {'control_token': control_server['control_token']}) diff --git a/yt_dlp/extractor/patreon.py b/yt_dlp/extractor/patreon.py index 9038b4a7ff..b511994e8a 100644 --- a/yt_dlp/extractor/patreon.py +++ b/yt_dlp/extractor/patreon.py @@ -598,7 +598,8 @@ class PatreonCampaignIE(PatreonBaseIE): 'props', 'pageProps', 'bootstrapEnvelope', 'pageBootstrap', 'campaign', 'data', 'id', {str})) if not campaign_id: campaign_id = traverse_obj(self._search_nextjs_v13_data(webpage, vanity), ( - lambda _, v: v['type'] == 'campaign', 'id', {str}, any, {require('campaign ID')})) + ((..., 'value', 'campaign', 'data'), lambda _, v: v['type'] == 'campaign'), + 'id', {str}, any, {require('campaign ID')})) params = { 'json-api-use-default-includes': 'false', diff --git a/yt_dlp/extractor/tubitv.py b/yt_dlp/extractor/tubitv.py index 694a92fcd4..bb9a293b86 100644 --- a/yt_dlp/extractor/tubitv.py +++ b/yt_dlp/extractor/tubitv.py @@ -182,13 +182,13 @@ class TubiTvShowIE(InfoExtractor): webpage = self._download_webpage(show_url, playlist_id) data = self._search_json( - r'window\.__data\s*=', webpage, 'data', playlist_id, - transform_source=js_to_json)['video'] + r'window\.__REACT_QUERY_STATE__\s*=', webpage, 'data', playlist_id, + transform_source=js_to_json)['queries'][0]['state']['data'] # v['number'] is already a decimal string, but stringify to protect against API changes path = [lambda _, v: str(v['number']) == selected_season] if selected_season else [..., {dict}] - for season in traverse_obj(data, ('byId', lambda _, v: v['type'] == 's', 'seasons', *path)): + for season in traverse_obj(data, ('seasons', *path)): season_number = int_or_none(season.get('number')) for episode in traverse_obj(season, ('episodes', lambda _, v: v['id'])): episode_id = episode['id'] diff --git a/yt_dlp/extractor/youtube/_video.py b/yt_dlp/extractor/youtube/_video.py index 600e0ccda6..0756ce2c40 100644 --- a/yt_dlp/extractor/youtube/_video.py +++ b/yt_dlp/extractor/youtube/_video.py @@ -2628,18 +2628,29 @@ class YoutubeIE(YoutubeBaseInfoExtractor): def _get_checkok_params(): return {'contentCheckOk': True, 'racyCheckOk': True} - @classmethod - def _generate_player_context(cls, sts=None): + def _generate_player_context(self, sts=None): context = { 'html5Preference': 'HTML5_PREF_WANTS', } if sts is not None: context['signatureTimestamp'] = sts + + playback_context = { + 'contentPlaybackContext': context, + } + + # The 'adPlaybackContext'/'request_no_ads' workaround results in a loss of premium formats. + # Only default to 'true' if the user is unauthenticated, since we can't reliably detect all + # types of premium accounts (e.g. YTMusic Premium), and since premium users don't have ads. + default_arg_value = 'false' if self.is_authenticated else 'true' + if self._configuration_arg('request_no_ads', [default_arg_value])[0] != 'false': + playback_context['adPlaybackContext'] = { + 'pyv': True, + } + return { - 'playbackContext': { - 'contentPlaybackContext': context, - }, - **cls._get_checkok_params(), + 'playbackContext': playback_context, + **self._get_checkok_params(), } def _get_config_po_token(self, client: str, context: _PoTokenContext): @@ -4029,6 +4040,11 @@ class YoutubeIE(YoutubeBaseInfoExtractor): STREAMING_DATA_CLIENT_NAME: client_name, }) + def set_audio_lang_from_orig_subs_lang(lang_code): + for f in formats: + if f.get('acodec') != 'none' and not f.get('language'): + f['language'] = lang_code + subtitles = {} skipped_subs_clients = set() @@ -4088,7 +4104,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor): orig_lang = qs.get('lang', [None])[-1] lang_name = self._get_text(caption_track, 'name', max_runs=1) - if caption_track.get('kind') != 'asr': + is_manual_subs = caption_track.get('kind') != 'asr' + if is_manual_subs: if not lang_code: continue process_language( @@ -4099,16 +4116,14 @@ class YoutubeIE(YoutubeBaseInfoExtractor): if not trans_code: continue orig_trans_code = trans_code - if caption_track.get('kind') != 'asr' and trans_code != 'und': + if is_manual_subs and trans_code != 'und': if not get_translated_subs: continue trans_code += f'-{lang_code}' trans_name += format_field(lang_name, None, ' from %s') if lang_code == f'a-{orig_trans_code}': # Set audio language based on original subtitles - for f in formats: - if f.get('acodec') != 'none' and not f.get('language'): - f['language'] = orig_trans_code + set_audio_lang_from_orig_subs_lang(orig_trans_code) # Add an "-orig" label to the original language so that it can be distinguished. # The subs are returned without "-orig" as well for compatibility process_language( @@ -4119,6 +4134,21 @@ class YoutubeIE(YoutubeBaseInfoExtractor): automatic_captions, base_url, trans_code, trans_name, client_name, pot_params if orig_lang == orig_trans_code else {'tlang': trans_code, **pot_params}) + # Extract automatic captions when the language is not in 'translationLanguages' + # e.g. Cantonese [yue], see https://github.com/yt-dlp/yt-dlp/issues/14889 + lang_code = remove_start(lang_code, 'a-') + if is_manual_subs or not lang_code or lang_code in automatic_captions: + continue + lang_name = remove_end(lang_name, ' (auto-generated)') + if caption_track.get('isTranslatable'): + # We can assume this is the original audio language + set_audio_lang_from_orig_subs_lang(lang_code) + process_language( + automatic_captions, base_url, f'{lang_code}-orig', + f'{lang_name} (Original)', client_name, pot_params) + process_language( + automatic_captions, base_url, lang_code, lang_name, client_name, pot_params) + # Avoid duplication if we've already got everything we need need_subs_langs.difference_update(subtitles) need_caps_langs.difference_update(automatic_captions)