diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index b7487f1c2f..8f5df4ce4d 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -422,23 +422,23 @@ jobs: runner: windows-2025 python_version: '3.10' platform_tag: win_amd64 - pyi_version: '6.16.0' - pyi_tag: '2025.09.13.221251' - pyi_hash: b6496c7630c3afe66900cfa824e8234a8c2e2c81704bd7facd79586abc76c0e5 + pyi_version: '6.17.0' + pyi_tag: '2025.11.29.054325' + pyi_hash: e28cc13e4ad0cc74330d832202806d0c1976e9165da6047309348ca663c0ed3d - arch: 'x86' runner: windows-2025 python_version: '3.10' platform_tag: win32 - pyi_version: '6.16.0' - pyi_tag: '2025.09.13.221251' - pyi_hash: 2d881843580efdc54f3523507fc6d9c5b6051ee49c743a6d9b7003ac5758c226 + pyi_version: '6.17.0' + pyi_tag: '2025.11.29.054325' + pyi_hash: c00f600c17de3bdd589f043f60ab64fc34fcba6dd902ad973af9c8afc74f80d1 - arch: 'arm64' runner: windows-11-arm python_version: '3.13' # arm64 only has Python >= 3.11 available platform_tag: win_arm64 - pyi_version: '6.16.0' - pyi_tag: '2025.09.13.221251' - pyi_hash: 4250c9085e34a95c898f3ee2f764914fc36ec59f0d97c28e6a75fcf21f7b144f + pyi_version: '6.17.0' + pyi_tag: '2025.11.29.054325' + pyi_hash: a2033b18b4f7bc6108b5fd76a92c6c1de0a12ec4fe98a23396a9f978cb4b7d7b env: CHANNEL: ${{ inputs.channel }} ORIGIN: ${{ needs.process.outputs.origin }} diff --git a/pyproject.toml b/pyproject.toml index d2c5745b95..d06e71d74b 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -69,7 +69,7 @@ build = [ "build", "hatchling>=1.27.0", "pip", - "setuptools>=71.0.2,<81", # See https://github.com/pyinstaller/pyinstaller/issues/9149 + "setuptools>=71.0.2", "wheel", ] dev = [ @@ -86,7 +86,7 @@ test = [ "pytest-rerunfailures~=14.0", ] pyinstaller = [ - "pyinstaller>=6.13.0", # Windows temp cleanup fixed in 6.13.0 + "pyinstaller>=6.17.0", # 6.17.0+ needed for compat with setuptools 81+ ] [project.urls] diff --git a/yt_dlp/extractor/fc2.py b/yt_dlp/extractor/fc2.py index d343069fec..aa6ff6335d 100644 --- a/yt_dlp/extractor/fc2.py +++ b/yt_dlp/extractor/fc2.py @@ -5,6 +5,7 @@ from .common import InfoExtractor from ..networking import Request from ..utils import ( ExtractorError, + UserNotLive, js_to_json, traverse_obj, update_url_query, @@ -205,6 +206,9 @@ class FC2LiveIE(InfoExtractor): 'client_app': 'browser_hls', 'ipv6': '', }), headers={'X-Requested-With': 'XMLHttpRequest'}) + # A non-zero 'status' indicates the stream is not live, so check truthiness + if traverse_obj(control_server, ('status', {int})) and 'control_token' not in control_server: + raise UserNotLive(video_id=video_id) self._set_cookie('live.fc2.com', 'l_ortkn', control_server['orz_raw']) ws_url = update_url_query(control_server['url'], {'control_token': control_server['control_token']}) diff --git a/yt_dlp/extractor/nhk.py b/yt_dlp/extractor/nhk.py index eef3ed820c..99186ad414 100644 --- a/yt_dlp/extractor/nhk.py +++ b/yt_dlp/extractor/nhk.py @@ -23,96 +23,38 @@ from ..utils import ( class NhkBaseIE(InfoExtractor): - _API_URL_TEMPLATE = 'https://nwapi.nhk.jp/nhkworld/%sod%slist/v7b/%s/%s/%s/all%s.json' + _API_URL_TEMPLATE = 'https://api.nhkworld.jp/showsapi/v1/{lang}/{content_format}_{page_type}/{m_id}{extra_page}' _BASE_URL_REGEX = r'https?://www3\.nhk\.or\.jp/nhkworld/(?P[a-z]{2})/' def _call_api(self, m_id, lang, is_video, is_episode, is_clip): + content_format = 'video' if is_video else 'audio' + content_type = 'clips' if is_clip else 'episodes' + if not is_episode: + extra_page = f'/{content_format}_{content_type}' + page_type = 'programs' + else: + extra_page = '' + page_type = content_type + return self._download_json( - self._API_URL_TEMPLATE % ( - 'v' if is_video else 'r', - 'clip' if is_clip else 'esd', - 'episode' if is_episode else 'program', - m_id, lang, '/all' if is_video else ''), - m_id, query={'apikey': 'EJfK8jdS57GqlupFgAfAAwr573q01y6k'})['data']['episodes'] or [] - - def _get_api_info(self, refresh=True): - if not refresh: - return self.cache.load('nhk', 'api_info') - - self.cache.store('nhk', 'api_info', {}) - movie_player_js = self._download_webpage( - 'https://movie-a.nhk.or.jp/world/player/js/movie-player.js', None, - note='Downloading stream API information') - api_info = { - 'url': self._search_regex( - r'prod:[^;]+\bapiUrl:\s*[\'"]([^\'"]+)[\'"]', movie_player_js, None, 'stream API url'), - 'token': self._search_regex( - r'prod:[^;]+\btoken:\s*[\'"]([^\'"]+)[\'"]', movie_player_js, None, 'stream API token'), - } - self.cache.store('nhk', 'api_info', api_info) - return api_info - - def _extract_stream_info(self, vod_id): - for refresh in (False, True): - api_info = self._get_api_info(refresh) - if not api_info: - continue - - api_url = api_info.pop('url') - meta = traverse_obj( - self._download_json( - api_url, vod_id, 'Downloading stream url info', fatal=False, query={ - **api_info, - 'type': 'json', - 'optional_id': vod_id, - 'active_flg': 1, - }), ('meta', 0)) - stream_url = traverse_obj( - meta, ('movie_url', ('mb_auto', 'auto_sp', 'auto_pc'), {url_or_none}), get_all=False) - - if stream_url: - formats, subtitles = self._extract_m3u8_formats_and_subtitles(stream_url, vod_id) - return { - **traverse_obj(meta, { - 'duration': ('duration', {int_or_none}), - 'timestamp': ('publication_date', {unified_timestamp}), - 'release_timestamp': ('insert_date', {unified_timestamp}), - 'modified_timestamp': ('update_date', {unified_timestamp}), - }), - 'formats': formats, - 'subtitles': subtitles, - } - raise ExtractorError('Unable to extract stream url') + self._API_URL_TEMPLATE.format( + lang=lang, content_format=content_format, page_type=page_type, + m_id=m_id, extra_page=extra_page), + join_nonempty(m_id, lang)) def _extract_episode_info(self, url, episode=None): fetch_episode = episode is None lang, m_type, episode_id = NhkVodIE._match_valid_url(url).group('lang', 'type', 'id') is_video = m_type != 'audio' - if is_video: - episode_id = episode_id[:4] + '-' + episode_id[4:] - if fetch_episode: episode = self._call_api( - episode_id, lang, is_video, True, episode_id[:4] == '9999')[0] + episode_id, lang, is_video, is_episode=True, is_clip=episode_id[:4] == '9999') - def get_clean_field(key): - return clean_html(episode.get(key + '_clean') or episode.get(key)) + video_id = join_nonempty('id', 'lang', from_dict=episode) - title = get_clean_field('sub_title') - series = get_clean_field('title') - - thumbnails = [] - for s, w, h in [('', 640, 360), ('_l', 1280, 720)]: - img_path = episode.get('image' + s) - if not img_path: - continue - thumbnails.append({ - 'id': f'{h}p', - 'height': h, - 'width': w, - 'url': 'https://www3.nhk.or.jp' + img_path, - }) + title = episode.get('title') + series = traverse_obj(episode, (('video_program', 'audio_program'), any, 'title')) episode_name = title if series and title: @@ -125,37 +67,52 @@ class NhkBaseIE(InfoExtractor): episode_name = None info = { - 'id': episode_id + '-' + lang, + 'id': video_id, 'title': title, - 'description': get_clean_field('description'), - 'thumbnails': thumbnails, 'series': series, 'episode': episode_name, + **traverse_obj(episode, { + 'description': ('description', {str}), + 'release_timestamp': ('first_broadcasted_at', {unified_timestamp}), + 'categories': ('categories', ..., 'name', {str}), + 'tags': ('tags', ..., 'name', {str}), + 'thumbnails': ('images', lambda _, v: v['url'], { + 'url': ('url', {urljoin(url)}), + 'width': ('width', {int_or_none}), + 'height': ('height', {int_or_none}), + }), + 'webpage_url': ('url', {urljoin(url)}), + }), + 'extractor_key': NhkVodIE.ie_key(), + 'extractor': NhkVodIE.IE_NAME, } - if is_video: - vod_id = episode['vod_id'] - info.update({ - **self._extract_stream_info(vod_id), - 'id': vod_id, - }) - + # XXX: We are assuming that 'video' and 'audio' are mutually exclusive + stream_info = traverse_obj(episode, (('video', 'audio'), {dict}, any)) or {} + if not stream_info.get('url'): + self.raise_no_formats('Stream not found; it has most likely expired', expected=True) else: - if fetch_episode: + stream_url = stream_info['url'] + if is_video: + formats, subtitles = self._extract_m3u8_formats_and_subtitles(stream_url, video_id) + info.update({ + 'formats': formats, + 'subtitles': subtitles, + **traverse_obj(stream_info, ({ + 'duration': ('duration', {int_or_none}), + 'timestamp': ('published_at', {unified_timestamp}), + })), + }) + else: # From https://www3.nhk.or.jp/nhkworld/common/player/radio/inline/rod.html - audio_path = remove_end(episode['audio']['audio'], '.m4a') + audio_path = remove_end(stream_url, '.m4a') info['formats'] = self._extract_m3u8_formats( f'{urljoin("https://vod-stream.nhk.jp", audio_path)}/index.m3u8', episode_id, 'm4a', entry_protocol='m3u8_native', m3u8_id='hls', fatal=False) for f in info['formats']: f['language'] = lang - else: - info.update({ - '_type': 'url_transparent', - 'ie_key': NhkVodIE.ie_key(), - 'url': url, - }) + return info @@ -168,29 +125,29 @@ class NhkVodIE(NhkBaseIE): # Content available only for a limited period of time. Visit # https://www3.nhk.or.jp/nhkworld/en/ondemand/ for working samples. _TESTS = [{ - 'url': 'https://www3.nhk.or.jp/nhkworld/en/ondemand/video/2049126/', + 'url': 'https://www3.nhk.or.jp/nhkworld/en/shows/2049165/', 'info_dict': { - 'id': 'nw_vod_v_en_2049_126_20230413233000_01_1681398302', + 'id': '2049165-en', 'ext': 'mp4', - 'title': 'Japan Railway Journal - The Tohoku Shinkansen: Full Speed Ahead', - 'description': 'md5:49f7c5b206e03868a2fdf0d0814b92f6', + 'title': 'Japan Railway Journal - Choshi Electric Railway: Fighting to Get Back on Track', + 'description': 'md5:ab57df2fca7f04245148c2e787bb203d', 'thumbnail': r're:https://.+/.+\.jpg', - 'episode': 'The Tohoku Shinkansen: Full Speed Ahead', + 'episode': 'Choshi Electric Railway: Fighting to Get Back on Track', 'series': 'Japan Railway Journal', - 'modified_timestamp': 1707217907, - 'timestamp': 1681428600, - 'release_timestamp': 1693883728, - 'duration': 1679, - 'upload_date': '20230413', - 'modified_date': '20240206', - 'release_date': '20230905', + 'duration': 1680, + 'categories': ['Biz & Tech'], + 'tags': ['Akita', 'Chiba', 'Trains', 'Transcript', 'All (Japan Navigator)'], + 'timestamp': 1759055880, + 'upload_date': '20250928', + 'release_timestamp': 1758810600, + 'release_date': '20250925', }, }, { # video clip 'url': 'https://www3.nhk.or.jp/nhkworld/en/ondemand/video/9999011/', 'md5': '153c3016dfd252ba09726588149cf0e7', 'info_dict': { - 'id': 'lpZXIwaDE6_Z-976CPsFdxyICyWUzlT5', + 'id': '9999011-en', 'ext': 'mp4', 'title': 'Dining with the Chef - Chef Saito\'s Family recipe: MENCHI-KATSU', 'description': 'md5:5aee4a9f9d81c26281862382103b0ea5', @@ -198,24 +155,23 @@ class NhkVodIE(NhkBaseIE): 'series': 'Dining with the Chef', 'episode': 'Chef Saito\'s Family recipe: MENCHI-KATSU', 'duration': 148, - 'upload_date': '20190816', - 'release_date': '20230902', - 'release_timestamp': 1693619292, - 'modified_timestamp': 1707217907, - 'modified_date': '20240206', - 'timestamp': 1565997540, + 'categories': ['Food'], + 'tags': ['Washoku'], + 'timestamp': 1548212400, + 'upload_date': '20190123', }, }, { # radio - 'url': 'https://www3.nhk.or.jp/nhkworld/en/ondemand/audio/livinginjapan-20231001-1/', + 'url': 'https://www3.nhk.or.jp/nhkworld/en/shows/audio/livinginjapan-20240901-1/', 'info_dict': { - 'id': 'livinginjapan-20231001-1-en', + 'id': 'livinginjapan-20240901-1-en', 'ext': 'm4a', - 'title': 'Living in Japan - Tips for Travelers to Japan / Ramen Vending Machines', + 'title': 'Living in Japan - Weekend Hiking / Self-protection from crime', 'series': 'Living in Japan', - 'description': 'md5:0a0e2077d8f07a03071e990a6f51bfab', + 'description': 'md5:4d0e14ab73bdbfedb60a53b093954ed6', 'thumbnail': r're:https://.+/.+\.jpg', - 'episode': 'Tips for Travelers to Japan / Ramen Vending Machines', + 'episode': 'Weekend Hiking / Self-protection from crime', + 'categories': ['Interactive'], }, }, { 'url': 'https://www3.nhk.or.jp/nhkworld/en/ondemand/video/2015173/', @@ -256,96 +212,51 @@ class NhkVodIE(NhkBaseIE): }, 'skip': 'expires 2023-10-15', }, { - # a one-off (single-episode series). title from the api is just '

' - 'url': 'https://www3.nhk.or.jp/nhkworld/en/ondemand/video/3004952/', + # a one-off (single-episode series). title from the api is just null + 'url': 'https://www3.nhk.or.jp/nhkworld/en/shows/3026036/', 'info_dict': { - 'id': 'nw_vod_v_en_3004_952_20230723091000_01_1690074552', + 'id': '3026036-en', 'ext': 'mp4', - 'title': 'Barakan Discovers - AMAMI OSHIMA: Isson\'s Treasure Isla', - 'description': 'md5:5db620c46a0698451cc59add8816b797', - 'thumbnail': r're:https://.+/.+\.jpg', - 'release_date': '20230905', - 'timestamp': 1690103400, - 'duration': 2939, - 'release_timestamp': 1693898699, - 'upload_date': '20230723', - 'modified_timestamp': 1707217907, - 'modified_date': '20240206', - 'episode': 'AMAMI OSHIMA: Isson\'s Treasure Isla', - 'series': 'Barakan Discovers', + 'title': 'STATELESS: The Japanese Left Behind in the Philippines', + 'description': 'md5:9a2fd51cdfa9f52baae28569e0053786', + 'duration': 2955, + 'thumbnail': 'https://www3.nhk.or.jp/nhkworld/en/shows/3026036/images/wide_l_QPtWpt4lzVhm3NzPAMIIF35MCg4CdNwcikPaTS5Q.jpg', + 'categories': ['Documentary', 'Culture & Lifestyle'], + 'tags': ['Transcript', 'Documentary 360', 'The Pursuit of PEACE'], + 'timestamp': 1758931800, + 'upload_date': '20250927', + 'release_timestamp': 1758931800, + 'release_date': '20250927', }, }, { # /ondemand/video/ url with alphabetical character in 5th position of id 'url': 'https://www3.nhk.or.jp/nhkworld/en/ondemand/video/9999a07/', 'info_dict': { - 'id': 'nw_c_en_9999-a07', + 'id': '9999a07-en', 'ext': 'mp4', 'episode': 'Mini-Dramas on SDGs: Ep 1 Close the Gender Gap [Director\'s Cut]', 'series': 'Mini-Dramas on SDGs', - 'modified_date': '20240206', 'title': 'Mini-Dramas on SDGs - Mini-Dramas on SDGs: Ep 1 Close the Gender Gap [Director\'s Cut]', 'description': 'md5:3f9dcb4db22fceb675d90448a040d3f6', - 'timestamp': 1621962360, - 'duration': 189, - 'release_date': '20230903', - 'modified_timestamp': 1707217907, + 'timestamp': 1621911600, + 'duration': 190, 'upload_date': '20210525', 'thumbnail': r're:https://.+/.+\.jpg', - 'release_timestamp': 1693713487, + 'categories': ['Current Affairs', 'Entertainment'], }, }, { 'url': 'https://www3.nhk.or.jp/nhkworld/en/ondemand/video/9999d17/', 'info_dict': { - 'id': 'nw_c_en_9999-d17', + 'id': '9999d17-en', 'ext': 'mp4', 'title': 'Flowers of snow blossom - The 72 Pentads of Yamato', 'description': 'Today’s focus: Snow', - 'release_timestamp': 1693792402, - 'release_date': '20230904', - 'upload_date': '20220128', - 'timestamp': 1643370960, 'thumbnail': r're:https://.+/.+\.jpg', 'duration': 136, - 'series': '', - 'modified_date': '20240206', - 'modified_timestamp': 1707217907, - }, - }, { - # new /shows/ url format - 'url': 'https://www3.nhk.or.jp/nhkworld/en/shows/2032307/', - 'info_dict': { - 'id': 'nw_vod_v_en_2032_307_20240321113000_01_1710990282', - 'ext': 'mp4', - 'title': 'Japanology Plus - 20th Anniversary Special Part 1', - 'description': 'md5:817d41fc8e54339ad2a916161ea24faf', - 'episode': '20th Anniversary Special Part 1', - 'series': 'Japanology Plus', - 'thumbnail': r're:https://.+/.+\.jpg', - 'duration': 1680, - 'timestamp': 1711020600, - 'upload_date': '20240321', - 'release_timestamp': 1711022683, - 'release_date': '20240321', - 'modified_timestamp': 1711031012, - 'modified_date': '20240321', - }, - }, { - 'url': 'https://www3.nhk.or.jp/nhkworld/en/shows/3020025/', - 'info_dict': { - 'id': 'nw_vod_v_en_3020_025_20230325144000_01_1679723944', - 'ext': 'mp4', - 'title': '100 Ideas to Save the World - Working Styles Evolve', - 'description': 'md5:9e6c7778eaaf4f7b4af83569649f84d9', - 'episode': 'Working Styles Evolve', - 'series': '100 Ideas to Save the World', - 'thumbnail': r're:https://.+/.+\.jpg', - 'duration': 899, - 'upload_date': '20230325', - 'timestamp': 1679755200, - 'release_date': '20230905', - 'release_timestamp': 1693880540, - 'modified_date': '20240206', - 'modified_timestamp': 1707217907, + 'categories': ['Culture & Lifestyle', 'Science & Nature'], + 'tags': ['Nara', 'Temples & Shrines', 'Winter', 'Snow'], + 'timestamp': 1643339040, + 'upload_date': '20220128', }, }, { # new /shows/audio/ url format @@ -373,6 +284,7 @@ class NhkVodProgramIE(NhkBaseIE): 'id': 'sumo', 'title': 'GRAND SUMO Highlights', 'description': 'md5:fc20d02dc6ce85e4b72e0273aa52fdbf', + 'series': 'GRAND SUMO Highlights', }, 'playlist_mincount': 1, }, { @@ -381,6 +293,7 @@ class NhkVodProgramIE(NhkBaseIE): 'id': 'japanrailway', 'title': 'Japan Railway Journal', 'description': 'md5:ea39d93af7d05835baadf10d1aae0e3f', + 'series': 'Japan Railway Journal', }, 'playlist_mincount': 12, }, { @@ -390,6 +303,7 @@ class NhkVodProgramIE(NhkBaseIE): 'id': 'japanrailway', 'title': 'Japan Railway Journal', 'description': 'md5:ea39d93af7d05835baadf10d1aae0e3f', + 'series': 'Japan Railway Journal', }, 'playlist_mincount': 12, }, { @@ -399,17 +313,9 @@ class NhkVodProgramIE(NhkBaseIE): 'id': 'livinginjapan', 'title': 'Living in Japan', 'description': 'md5:665bb36ec2a12c5a7f598ee713fc2b54', + 'series': 'Living in Japan', }, - 'playlist_mincount': 12, - }, { - # /tv/ program url - 'url': 'https://www3.nhk.or.jp/nhkworld/en/tv/designtalksplus/', - 'info_dict': { - 'id': 'designtalksplus', - 'title': 'DESIGN TALKS plus', - 'description': 'md5:47b3b3a9f10d4ac7b33b53b70a7d2837', - }, - 'playlist_mincount': 20, + 'playlist_mincount': 11, }, { 'url': 'https://www3.nhk.or.jp/nhkworld/en/shows/10yearshayaomiyazaki/', 'only_matching': True, @@ -430,9 +336,8 @@ class NhkVodProgramIE(NhkBaseIE): program_id, lang, m_type != 'audio', False, episode_type == 'clip') def entries(): - for episode in episodes: - if episode_path := episode.get('url'): - yield self._extract_episode_info(urljoin(url, episode_path), episode) + for episode in traverse_obj(episodes, ('items', lambda _, v: v['url'])): + yield self._extract_episode_info(urljoin(url, episode['url']), episode) html = self._download_webpage(url, program_id) program_title = self._extract_meta_from_class_elements([ @@ -446,7 +351,7 @@ class NhkVodProgramIE(NhkBaseIE): 'tAudioProgramMain__info', # /shows/audio/programs/ 'p-program-description'], html) # /tv/ - return self.playlist_result(entries(), program_id, program_title, program_description) + return self.playlist_result(entries(), program_id, program_title, program_description, series=program_title) class NhkForSchoolBangumiIE(InfoExtractor): diff --git a/yt_dlp/extractor/patreon.py b/yt_dlp/extractor/patreon.py index 9038b4a7ff..b511994e8a 100644 --- a/yt_dlp/extractor/patreon.py +++ b/yt_dlp/extractor/patreon.py @@ -598,7 +598,8 @@ class PatreonCampaignIE(PatreonBaseIE): 'props', 'pageProps', 'bootstrapEnvelope', 'pageBootstrap', 'campaign', 'data', 'id', {str})) if not campaign_id: campaign_id = traverse_obj(self._search_nextjs_v13_data(webpage, vanity), ( - lambda _, v: v['type'] == 'campaign', 'id', {str}, any, {require('campaign ID')})) + ((..., 'value', 'campaign', 'data'), lambda _, v: v['type'] == 'campaign'), + 'id', {str}, any, {require('campaign ID')})) params = { 'json-api-use-default-includes': 'false', diff --git a/yt_dlp/extractor/s4c.py b/yt_dlp/extractor/s4c.py index 6eb8b2b2c6..d35436d104 100644 --- a/yt_dlp/extractor/s4c.py +++ b/yt_dlp/extractor/s4c.py @@ -15,14 +15,15 @@ class S4CIE(InfoExtractor): 'thumbnail': 'https://www.s4c.cymru/amg/1920x1080/Y_Swn_2023S4C_099_ii.jpg', }, }, { - 'url': 'https://www.s4c.cymru/clic/programme/856636948', + # Geo restricted to the UK + 'url': 'https://www.s4c.cymru/clic/programme/886303048', 'info_dict': { - 'id': '856636948', + 'id': '886303048', 'ext': 'mp4', - 'title': 'Am Dro', + 'title': 'Pennod 1', + 'description': 'md5:7e3f364b70f61fcdaa8b4cb4a3eb3e7a', 'duration': 2880, - 'description': 'md5:100d8686fc9a632a0cb2db52a3433ffe', - 'thumbnail': 'https://www.s4c.cymru/amg/1920x1080/Am_Dro_2022-23S4C_P6_4005.jpg', + 'thumbnail': 'https://www.s4c.cymru/amg/1920x1080/Stad_2025S4C_P1_210053.jpg', }, }] @@ -51,7 +52,7 @@ class S4CIE(InfoExtractor): 'https://player-api.s4c-cdn.co.uk/streaming-urls/prod', video_id, query={ 'mode': 'od', 'application': 'clic', - 'region': 'WW', + 'region': 'UK' if player_config.get('application') == 's4chttpl' else 'WW', 'extra': 'false', 'thirdParty': 'false', 'filename': player_config['filename'], diff --git a/yt_dlp/extractor/tubitv.py b/yt_dlp/extractor/tubitv.py index 694a92fcd4..bb9a293b86 100644 --- a/yt_dlp/extractor/tubitv.py +++ b/yt_dlp/extractor/tubitv.py @@ -182,13 +182,13 @@ class TubiTvShowIE(InfoExtractor): webpage = self._download_webpage(show_url, playlist_id) data = self._search_json( - r'window\.__data\s*=', webpage, 'data', playlist_id, - transform_source=js_to_json)['video'] + r'window\.__REACT_QUERY_STATE__\s*=', webpage, 'data', playlist_id, + transform_source=js_to_json)['queries'][0]['state']['data'] # v['number'] is already a decimal string, but stringify to protect against API changes path = [lambda _, v: str(v['number']) == selected_season] if selected_season else [..., {dict}] - for season in traverse_obj(data, ('byId', lambda _, v: v['type'] == 's', 'seasons', *path)): + for season in traverse_obj(data, ('seasons', *path)): season_number = int_or_none(season.get('number')) for episode in traverse_obj(season, ('episodes', lambda _, v: v['id'])): episode_id = episode['id'] diff --git a/yt_dlp/extractor/youtube/_video.py b/yt_dlp/extractor/youtube/_video.py index 600e0ccda6..a792332046 100644 --- a/yt_dlp/extractor/youtube/_video.py +++ b/yt_dlp/extractor/youtube/_video.py @@ -4029,6 +4029,11 @@ class YoutubeIE(YoutubeBaseInfoExtractor): STREAMING_DATA_CLIENT_NAME: client_name, }) + def set_audio_lang_from_orig_subs_lang(lang_code): + for f in formats: + if f.get('acodec') != 'none' and not f.get('language'): + f['language'] = lang_code + subtitles = {} skipped_subs_clients = set() @@ -4088,7 +4093,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor): orig_lang = qs.get('lang', [None])[-1] lang_name = self._get_text(caption_track, 'name', max_runs=1) - if caption_track.get('kind') != 'asr': + is_manual_subs = caption_track.get('kind') != 'asr' + if is_manual_subs: if not lang_code: continue process_language( @@ -4099,16 +4105,14 @@ class YoutubeIE(YoutubeBaseInfoExtractor): if not trans_code: continue orig_trans_code = trans_code - if caption_track.get('kind') != 'asr' and trans_code != 'und': + if is_manual_subs and trans_code != 'und': if not get_translated_subs: continue trans_code += f'-{lang_code}' trans_name += format_field(lang_name, None, ' from %s') if lang_code == f'a-{orig_trans_code}': # Set audio language based on original subtitles - for f in formats: - if f.get('acodec') != 'none' and not f.get('language'): - f['language'] = orig_trans_code + set_audio_lang_from_orig_subs_lang(orig_trans_code) # Add an "-orig" label to the original language so that it can be distinguished. # The subs are returned without "-orig" as well for compatibility process_language( @@ -4119,6 +4123,21 @@ class YoutubeIE(YoutubeBaseInfoExtractor): automatic_captions, base_url, trans_code, trans_name, client_name, pot_params if orig_lang == orig_trans_code else {'tlang': trans_code, **pot_params}) + # Extract automatic captions when the language is not in 'translationLanguages' + # e.g. Cantonese [yue], see https://github.com/yt-dlp/yt-dlp/issues/14889 + lang_code = remove_start(lang_code, 'a-') + if is_manual_subs or not lang_code or lang_code in automatic_captions: + continue + lang_name = remove_end(lang_name, ' (auto-generated)') + if caption_track.get('isTranslatable'): + # We can assume this is the original audio language + set_audio_lang_from_orig_subs_lang(lang_code) + process_language( + automatic_captions, base_url, f'{lang_code}-orig', + f'{lang_name} (Original)', client_name, pot_params) + process_language( + automatic_captions, base_url, lang_code, lang_name, client_name, pot_params) + # Avoid duplication if we've already got everything we need need_subs_langs.difference_update(subtitles) need_caps_langs.difference_update(automatic_captions) diff --git a/yt_dlp/utils/_jsruntime.py b/yt_dlp/utils/_jsruntime.py index bd8fd1f880..94db52bf19 100644 --- a/yt_dlp/utils/_jsruntime.py +++ b/yt_dlp/utils/_jsruntime.py @@ -1,21 +1,61 @@ from __future__ import annotations + import abc import dataclasses import functools import os.path +import sys from ._utils import _get_exe_version_output, detect_exe_version, int_or_none -# NOT public API -def runtime_version_tuple(v): +def _runtime_version_tuple(v): # NB: will return (0,) if `v` is an invalid version string return tuple(int_or_none(x, default=0) for x in v.split('.')) +_FALLBACK_PATHEXT = ('.COM', '.EXE', '.BAT', '.CMD') + + +def _find_exe(basename: str) -> str: + if os.name != 'nt': + return basename + + paths: list[str] = [] + + # binary dir + if getattr(sys, 'frozen', False): + paths.append(os.path.dirname(sys.executable)) + # cwd + paths.append(os.getcwd()) + # PATH items + if path := os.environ.get('PATH'): + paths.extend(filter(None, path.split(os.path.pathsep))) + + pathext = os.environ.get('PATHEXT') + if pathext is None: + exts = _FALLBACK_PATHEXT + else: + exts = tuple(ext for ext in pathext.split(os.pathsep) if ext) + + visited = [] + for path in map(os.path.realpath, paths): + normed = os.path.normcase(path) + if normed in visited: + continue + visited.append(normed) + + for ext in exts: + binary = os.path.join(path, f'{basename}{ext}') + if os.access(binary, os.F_OK | os.X_OK) and not os.path.isdir(binary): + return binary + + return basename + + def _determine_runtime_path(path, basename): if not path: - return basename + return _find_exe(basename) if os.path.isdir(path): return os.path.join(path, basename) return path @@ -52,7 +92,7 @@ class DenoJsRuntime(JsRuntime): if not out: return None version = detect_exe_version(out, r'^deno (\S+)', 'unknown') - vt = runtime_version_tuple(version) + vt = _runtime_version_tuple(version) return JsRuntimeInfo( name='deno', path=path, version=version, version_tuple=vt, supported=vt >= self.MIN_SUPPORTED_VERSION) @@ -67,7 +107,7 @@ class BunJsRuntime(JsRuntime): if not out: return None version = detect_exe_version(out, r'^(\S+)', 'unknown') - vt = runtime_version_tuple(version) + vt = _runtime_version_tuple(version) return JsRuntimeInfo( name='bun', path=path, version=version, version_tuple=vt, supported=vt >= self.MIN_SUPPORTED_VERSION) @@ -82,7 +122,7 @@ class NodeJsRuntime(JsRuntime): if not out: return None version = detect_exe_version(out, r'^v(\S+)', 'unknown') - vt = runtime_version_tuple(version) + vt = _runtime_version_tuple(version) return JsRuntimeInfo( name='node', path=path, version=version, version_tuple=vt, supported=vt >= self.MIN_SUPPORTED_VERSION) @@ -100,7 +140,7 @@ class QuickJsRuntime(JsRuntime): is_ng = 'QuickJS-ng' in out version = detect_exe_version(out, r'^QuickJS(?:-ng)?\s+version\s+(\S+)', 'unknown') - vt = runtime_version_tuple(version.replace('-', '.')) + vt = _runtime_version_tuple(version.replace('-', '.')) if is_ng: return JsRuntimeInfo( name='quickjs-ng', path=path, version=version, version_tuple=vt, diff --git a/yt_dlp/utils/_utils.py b/yt_dlp/utils/_utils.py index c6ae21f6c7..65cd2373ce 100644 --- a/yt_dlp/utils/_utils.py +++ b/yt_dlp/utils/_utils.py @@ -876,13 +876,19 @@ class Popen(subprocess.Popen): kwargs.setdefault('encoding', 'utf-8') kwargs.setdefault('errors', 'replace') - if shell and os.name == 'nt' and kwargs.get('executable') is None: - if not isinstance(args, str): - args = shell_quote(args, shell=True) - shell = False - # Set variable for `cmd.exe` newline escaping (see `utils.shell_quote`) - env['='] = '"^\n\n"' - args = f'{self.__comspec()} /Q /S /D /V:OFF /E:ON /C "{args}"' + if os.name == 'nt' and kwargs.get('executable') is None: + # Must apply shell escaping if we are trying to run a batch file + # These conditions should be very specific to limit impact + if not shell and isinstance(args, list) and args and args[0].lower().endswith(('.bat', '.cmd')): + shell = True + + if shell: + if not isinstance(args, str): + args = shell_quote(args, shell=True) + shell = False + # Set variable for `cmd.exe` newline escaping (see `utils.shell_quote`) + env['='] = '"^\n\n"' + args = f'{self.__comspec()} /Q /S /D /V:OFF /E:ON /C "{args}"' super().__init__(args, *remaining, env=env, shell=shell, **kwargs, startupinfo=self._startupinfo)