Merge branch 'master' into instagramfix

This commit is contained in:
Zer0 Spectrum 2025-12-01 17:00:32 +05:30 committed by GitHub
commit d3e57f839d
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
10 changed files with 215 additions and 239 deletions

View file

@ -422,23 +422,23 @@ jobs:
runner: windows-2025 runner: windows-2025
python_version: '3.10' python_version: '3.10'
platform_tag: win_amd64 platform_tag: win_amd64
pyi_version: '6.16.0' pyi_version: '6.17.0'
pyi_tag: '2025.09.13.221251' pyi_tag: '2025.11.29.054325'
pyi_hash: b6496c7630c3afe66900cfa824e8234a8c2e2c81704bd7facd79586abc76c0e5 pyi_hash: e28cc13e4ad0cc74330d832202806d0c1976e9165da6047309348ca663c0ed3d
- arch: 'x86' - arch: 'x86'
runner: windows-2025 runner: windows-2025
python_version: '3.10' python_version: '3.10'
platform_tag: win32 platform_tag: win32
pyi_version: '6.16.0' pyi_version: '6.17.0'
pyi_tag: '2025.09.13.221251' pyi_tag: '2025.11.29.054325'
pyi_hash: 2d881843580efdc54f3523507fc6d9c5b6051ee49c743a6d9b7003ac5758c226 pyi_hash: c00f600c17de3bdd589f043f60ab64fc34fcba6dd902ad973af9c8afc74f80d1
- arch: 'arm64' - arch: 'arm64'
runner: windows-11-arm runner: windows-11-arm
python_version: '3.13' # arm64 only has Python >= 3.11 available python_version: '3.13' # arm64 only has Python >= 3.11 available
platform_tag: win_arm64 platform_tag: win_arm64
pyi_version: '6.16.0' pyi_version: '6.17.0'
pyi_tag: '2025.09.13.221251' pyi_tag: '2025.11.29.054325'
pyi_hash: 4250c9085e34a95c898f3ee2f764914fc36ec59f0d97c28e6a75fcf21f7b144f pyi_hash: a2033b18b4f7bc6108b5fd76a92c6c1de0a12ec4fe98a23396a9f978cb4b7d7b
env: env:
CHANNEL: ${{ inputs.channel }} CHANNEL: ${{ inputs.channel }}
ORIGIN: ${{ needs.process.outputs.origin }} ORIGIN: ${{ needs.process.outputs.origin }}

View file

@ -69,7 +69,7 @@ build = [
"build", "build",
"hatchling>=1.27.0", "hatchling>=1.27.0",
"pip", "pip",
"setuptools>=71.0.2,<81", # See https://github.com/pyinstaller/pyinstaller/issues/9149 "setuptools>=71.0.2",
"wheel", "wheel",
] ]
dev = [ dev = [
@ -86,7 +86,7 @@ test = [
"pytest-rerunfailures~=14.0", "pytest-rerunfailures~=14.0",
] ]
pyinstaller = [ pyinstaller = [
"pyinstaller>=6.13.0", # Windows temp cleanup fixed in 6.13.0 "pyinstaller>=6.17.0", # 6.17.0+ needed for compat with setuptools 81+
] ]
[project.urls] [project.urls]

View file

@ -5,6 +5,7 @@ from .common import InfoExtractor
from ..networking import Request from ..networking import Request
from ..utils import ( from ..utils import (
ExtractorError, ExtractorError,
UserNotLive,
js_to_json, js_to_json,
traverse_obj, traverse_obj,
update_url_query, update_url_query,
@ -205,6 +206,9 @@ class FC2LiveIE(InfoExtractor):
'client_app': 'browser_hls', 'client_app': 'browser_hls',
'ipv6': '', 'ipv6': '',
}), headers={'X-Requested-With': 'XMLHttpRequest'}) }), headers={'X-Requested-With': 'XMLHttpRequest'})
# A non-zero 'status' indicates the stream is not live, so check truthiness
if traverse_obj(control_server, ('status', {int})) and 'control_token' not in control_server:
raise UserNotLive(video_id=video_id)
self._set_cookie('live.fc2.com', 'l_ortkn', control_server['orz_raw']) self._set_cookie('live.fc2.com', 'l_ortkn', control_server['orz_raw'])
ws_url = update_url_query(control_server['url'], {'control_token': control_server['control_token']}) ws_url = update_url_query(control_server['url'], {'control_token': control_server['control_token']})

View file

@ -23,96 +23,38 @@ from ..utils import (
class NhkBaseIE(InfoExtractor): class NhkBaseIE(InfoExtractor):
_API_URL_TEMPLATE = 'https://nwapi.nhk.jp/nhkworld/%sod%slist/v7b/%s/%s/%s/all%s.json' _API_URL_TEMPLATE = 'https://api.nhkworld.jp/showsapi/v1/{lang}/{content_format}_{page_type}/{m_id}{extra_page}'
_BASE_URL_REGEX = r'https?://www3\.nhk\.or\.jp/nhkworld/(?P<lang>[a-z]{2})/' _BASE_URL_REGEX = r'https?://www3\.nhk\.or\.jp/nhkworld/(?P<lang>[a-z]{2})/'
def _call_api(self, m_id, lang, is_video, is_episode, is_clip): def _call_api(self, m_id, lang, is_video, is_episode, is_clip):
content_format = 'video' if is_video else 'audio'
content_type = 'clips' if is_clip else 'episodes'
if not is_episode:
extra_page = f'/{content_format}_{content_type}'
page_type = 'programs'
else:
extra_page = ''
page_type = content_type
return self._download_json( return self._download_json(
self._API_URL_TEMPLATE % ( self._API_URL_TEMPLATE.format(
'v' if is_video else 'r', lang=lang, content_format=content_format, page_type=page_type,
'clip' if is_clip else 'esd', m_id=m_id, extra_page=extra_page),
'episode' if is_episode else 'program', join_nonempty(m_id, lang))
m_id, lang, '/all' if is_video else ''),
m_id, query={'apikey': 'EJfK8jdS57GqlupFgAfAAwr573q01y6k'})['data']['episodes'] or []
def _get_api_info(self, refresh=True):
if not refresh:
return self.cache.load('nhk', 'api_info')
self.cache.store('nhk', 'api_info', {})
movie_player_js = self._download_webpage(
'https://movie-a.nhk.or.jp/world/player/js/movie-player.js', None,
note='Downloading stream API information')
api_info = {
'url': self._search_regex(
r'prod:[^;]+\bapiUrl:\s*[\'"]([^\'"]+)[\'"]', movie_player_js, None, 'stream API url'),
'token': self._search_regex(
r'prod:[^;]+\btoken:\s*[\'"]([^\'"]+)[\'"]', movie_player_js, None, 'stream API token'),
}
self.cache.store('nhk', 'api_info', api_info)
return api_info
def _extract_stream_info(self, vod_id):
for refresh in (False, True):
api_info = self._get_api_info(refresh)
if not api_info:
continue
api_url = api_info.pop('url')
meta = traverse_obj(
self._download_json(
api_url, vod_id, 'Downloading stream url info', fatal=False, query={
**api_info,
'type': 'json',
'optional_id': vod_id,
'active_flg': 1,
}), ('meta', 0))
stream_url = traverse_obj(
meta, ('movie_url', ('mb_auto', 'auto_sp', 'auto_pc'), {url_or_none}), get_all=False)
if stream_url:
formats, subtitles = self._extract_m3u8_formats_and_subtitles(stream_url, vod_id)
return {
**traverse_obj(meta, {
'duration': ('duration', {int_or_none}),
'timestamp': ('publication_date', {unified_timestamp}),
'release_timestamp': ('insert_date', {unified_timestamp}),
'modified_timestamp': ('update_date', {unified_timestamp}),
}),
'formats': formats,
'subtitles': subtitles,
}
raise ExtractorError('Unable to extract stream url')
def _extract_episode_info(self, url, episode=None): def _extract_episode_info(self, url, episode=None):
fetch_episode = episode is None fetch_episode = episode is None
lang, m_type, episode_id = NhkVodIE._match_valid_url(url).group('lang', 'type', 'id') lang, m_type, episode_id = NhkVodIE._match_valid_url(url).group('lang', 'type', 'id')
is_video = m_type != 'audio' is_video = m_type != 'audio'
if is_video:
episode_id = episode_id[:4] + '-' + episode_id[4:]
if fetch_episode: if fetch_episode:
episode = self._call_api( episode = self._call_api(
episode_id, lang, is_video, True, episode_id[:4] == '9999')[0] episode_id, lang, is_video, is_episode=True, is_clip=episode_id[:4] == '9999')
def get_clean_field(key): video_id = join_nonempty('id', 'lang', from_dict=episode)
return clean_html(episode.get(key + '_clean') or episode.get(key))
title = get_clean_field('sub_title') title = episode.get('title')
series = get_clean_field('title') series = traverse_obj(episode, (('video_program', 'audio_program'), any, 'title'))
thumbnails = []
for s, w, h in [('', 640, 360), ('_l', 1280, 720)]:
img_path = episode.get('image' + s)
if not img_path:
continue
thumbnails.append({
'id': f'{h}p',
'height': h,
'width': w,
'url': 'https://www3.nhk.or.jp' + img_path,
})
episode_name = title episode_name = title
if series and title: if series and title:
@ -125,37 +67,52 @@ class NhkBaseIE(InfoExtractor):
episode_name = None episode_name = None
info = { info = {
'id': episode_id + '-' + lang, 'id': video_id,
'title': title, 'title': title,
'description': get_clean_field('description'),
'thumbnails': thumbnails,
'series': series, 'series': series,
'episode': episode_name, 'episode': episode_name,
**traverse_obj(episode, {
'description': ('description', {str}),
'release_timestamp': ('first_broadcasted_at', {unified_timestamp}),
'categories': ('categories', ..., 'name', {str}),
'tags': ('tags', ..., 'name', {str}),
'thumbnails': ('images', lambda _, v: v['url'], {
'url': ('url', {urljoin(url)}),
'width': ('width', {int_or_none}),
'height': ('height', {int_or_none}),
}),
'webpage_url': ('url', {urljoin(url)}),
}),
'extractor_key': NhkVodIE.ie_key(),
'extractor': NhkVodIE.IE_NAME,
} }
if is_video: # XXX: We are assuming that 'video' and 'audio' are mutually exclusive
vod_id = episode['vod_id'] stream_info = traverse_obj(episode, (('video', 'audio'), {dict}, any)) or {}
info.update({ if not stream_info.get('url'):
**self._extract_stream_info(vod_id), self.raise_no_formats('Stream not found; it has most likely expired', expected=True)
'id': vod_id,
})
else: else:
if fetch_episode: stream_url = stream_info['url']
if is_video:
formats, subtitles = self._extract_m3u8_formats_and_subtitles(stream_url, video_id)
info.update({
'formats': formats,
'subtitles': subtitles,
**traverse_obj(stream_info, ({
'duration': ('duration', {int_or_none}),
'timestamp': ('published_at', {unified_timestamp}),
})),
})
else:
# From https://www3.nhk.or.jp/nhkworld/common/player/radio/inline/rod.html # From https://www3.nhk.or.jp/nhkworld/common/player/radio/inline/rod.html
audio_path = remove_end(episode['audio']['audio'], '.m4a') audio_path = remove_end(stream_url, '.m4a')
info['formats'] = self._extract_m3u8_formats( info['formats'] = self._extract_m3u8_formats(
f'{urljoin("https://vod-stream.nhk.jp", audio_path)}/index.m3u8', f'{urljoin("https://vod-stream.nhk.jp", audio_path)}/index.m3u8',
episode_id, 'm4a', entry_protocol='m3u8_native', episode_id, 'm4a', entry_protocol='m3u8_native',
m3u8_id='hls', fatal=False) m3u8_id='hls', fatal=False)
for f in info['formats']: for f in info['formats']:
f['language'] = lang f['language'] = lang
else:
info.update({
'_type': 'url_transparent',
'ie_key': NhkVodIE.ie_key(),
'url': url,
})
return info return info
@ -168,29 +125,29 @@ class NhkVodIE(NhkBaseIE):
# Content available only for a limited period of time. Visit # Content available only for a limited period of time. Visit
# https://www3.nhk.or.jp/nhkworld/en/ondemand/ for working samples. # https://www3.nhk.or.jp/nhkworld/en/ondemand/ for working samples.
_TESTS = [{ _TESTS = [{
'url': 'https://www3.nhk.or.jp/nhkworld/en/ondemand/video/2049126/', 'url': 'https://www3.nhk.or.jp/nhkworld/en/shows/2049165/',
'info_dict': { 'info_dict': {
'id': 'nw_vod_v_en_2049_126_20230413233000_01_1681398302', 'id': '2049165-en',
'ext': 'mp4', 'ext': 'mp4',
'title': 'Japan Railway Journal - The Tohoku Shinkansen: Full Speed Ahead', 'title': 'Japan Railway Journal - Choshi Electric Railway: Fighting to Get Back on Track',
'description': 'md5:49f7c5b206e03868a2fdf0d0814b92f6', 'description': 'md5:ab57df2fca7f04245148c2e787bb203d',
'thumbnail': r're:https://.+/.+\.jpg', 'thumbnail': r're:https://.+/.+\.jpg',
'episode': 'The Tohoku Shinkansen: Full Speed Ahead', 'episode': 'Choshi Electric Railway: Fighting to Get Back on Track',
'series': 'Japan Railway Journal', 'series': 'Japan Railway Journal',
'modified_timestamp': 1707217907, 'duration': 1680,
'timestamp': 1681428600, 'categories': ['Biz & Tech'],
'release_timestamp': 1693883728, 'tags': ['Akita', 'Chiba', 'Trains', 'Transcript', 'All (Japan Navigator)'],
'duration': 1679, 'timestamp': 1759055880,
'upload_date': '20230413', 'upload_date': '20250928',
'modified_date': '20240206', 'release_timestamp': 1758810600,
'release_date': '20230905', 'release_date': '20250925',
}, },
}, { }, {
# video clip # video clip
'url': 'https://www3.nhk.or.jp/nhkworld/en/ondemand/video/9999011/', 'url': 'https://www3.nhk.or.jp/nhkworld/en/ondemand/video/9999011/',
'md5': '153c3016dfd252ba09726588149cf0e7', 'md5': '153c3016dfd252ba09726588149cf0e7',
'info_dict': { 'info_dict': {
'id': 'lpZXIwaDE6_Z-976CPsFdxyICyWUzlT5', 'id': '9999011-en',
'ext': 'mp4', 'ext': 'mp4',
'title': 'Dining with the Chef - Chef Saito\'s Family recipe: MENCHI-KATSU', 'title': 'Dining with the Chef - Chef Saito\'s Family recipe: MENCHI-KATSU',
'description': 'md5:5aee4a9f9d81c26281862382103b0ea5', 'description': 'md5:5aee4a9f9d81c26281862382103b0ea5',
@ -198,24 +155,23 @@ class NhkVodIE(NhkBaseIE):
'series': 'Dining with the Chef', 'series': 'Dining with the Chef',
'episode': 'Chef Saito\'s Family recipe: MENCHI-KATSU', 'episode': 'Chef Saito\'s Family recipe: MENCHI-KATSU',
'duration': 148, 'duration': 148,
'upload_date': '20190816', 'categories': ['Food'],
'release_date': '20230902', 'tags': ['Washoku'],
'release_timestamp': 1693619292, 'timestamp': 1548212400,
'modified_timestamp': 1707217907, 'upload_date': '20190123',
'modified_date': '20240206',
'timestamp': 1565997540,
}, },
}, { }, {
# radio # radio
'url': 'https://www3.nhk.or.jp/nhkworld/en/ondemand/audio/livinginjapan-20231001-1/', 'url': 'https://www3.nhk.or.jp/nhkworld/en/shows/audio/livinginjapan-20240901-1/',
'info_dict': { 'info_dict': {
'id': 'livinginjapan-20231001-1-en', 'id': 'livinginjapan-20240901-1-en',
'ext': 'm4a', 'ext': 'm4a',
'title': 'Living in Japan - Tips for Travelers to Japan / Ramen Vending Machines', 'title': 'Living in Japan - Weekend Hiking / Self-protection from crime',
'series': 'Living in Japan', 'series': 'Living in Japan',
'description': 'md5:0a0e2077d8f07a03071e990a6f51bfab', 'description': 'md5:4d0e14ab73bdbfedb60a53b093954ed6',
'thumbnail': r're:https://.+/.+\.jpg', 'thumbnail': r're:https://.+/.+\.jpg',
'episode': 'Tips for Travelers to Japan / Ramen Vending Machines', 'episode': 'Weekend Hiking / Self-protection from crime',
'categories': ['Interactive'],
}, },
}, { }, {
'url': 'https://www3.nhk.or.jp/nhkworld/en/ondemand/video/2015173/', 'url': 'https://www3.nhk.or.jp/nhkworld/en/ondemand/video/2015173/',
@ -256,96 +212,51 @@ class NhkVodIE(NhkBaseIE):
}, },
'skip': 'expires 2023-10-15', 'skip': 'expires 2023-10-15',
}, { }, {
# a one-off (single-episode series). title from the api is just '<p></p>' # a one-off (single-episode series). title from the api is just null
'url': 'https://www3.nhk.or.jp/nhkworld/en/ondemand/video/3004952/', 'url': 'https://www3.nhk.or.jp/nhkworld/en/shows/3026036/',
'info_dict': { 'info_dict': {
'id': 'nw_vod_v_en_3004_952_20230723091000_01_1690074552', 'id': '3026036-en',
'ext': 'mp4', 'ext': 'mp4',
'title': 'Barakan Discovers - AMAMI OSHIMA: Isson\'s Treasure Isla', 'title': 'STATELESS: The Japanese Left Behind in the Philippines',
'description': 'md5:5db620c46a0698451cc59add8816b797', 'description': 'md5:9a2fd51cdfa9f52baae28569e0053786',
'thumbnail': r're:https://.+/.+\.jpg', 'duration': 2955,
'release_date': '20230905', 'thumbnail': 'https://www3.nhk.or.jp/nhkworld/en/shows/3026036/images/wide_l_QPtWpt4lzVhm3NzPAMIIF35MCg4CdNwcikPaTS5Q.jpg',
'timestamp': 1690103400, 'categories': ['Documentary', 'Culture & Lifestyle'],
'duration': 2939, 'tags': ['Transcript', 'Documentary 360', 'The Pursuit of PEACE'],
'release_timestamp': 1693898699, 'timestamp': 1758931800,
'upload_date': '20230723', 'upload_date': '20250927',
'modified_timestamp': 1707217907, 'release_timestamp': 1758931800,
'modified_date': '20240206', 'release_date': '20250927',
'episode': 'AMAMI OSHIMA: Isson\'s Treasure Isla',
'series': 'Barakan Discovers',
}, },
}, { }, {
# /ondemand/video/ url with alphabetical character in 5th position of id # /ondemand/video/ url with alphabetical character in 5th position of id
'url': 'https://www3.nhk.or.jp/nhkworld/en/ondemand/video/9999a07/', 'url': 'https://www3.nhk.or.jp/nhkworld/en/ondemand/video/9999a07/',
'info_dict': { 'info_dict': {
'id': 'nw_c_en_9999-a07', 'id': '9999a07-en',
'ext': 'mp4', 'ext': 'mp4',
'episode': 'Mini-Dramas on SDGs: Ep 1 Close the Gender Gap [Director\'s Cut]', 'episode': 'Mini-Dramas on SDGs: Ep 1 Close the Gender Gap [Director\'s Cut]',
'series': 'Mini-Dramas on SDGs', 'series': 'Mini-Dramas on SDGs',
'modified_date': '20240206',
'title': 'Mini-Dramas on SDGs - Mini-Dramas on SDGs: Ep 1 Close the Gender Gap [Director\'s Cut]', 'title': 'Mini-Dramas on SDGs - Mini-Dramas on SDGs: Ep 1 Close the Gender Gap [Director\'s Cut]',
'description': 'md5:3f9dcb4db22fceb675d90448a040d3f6', 'description': 'md5:3f9dcb4db22fceb675d90448a040d3f6',
'timestamp': 1621962360, 'timestamp': 1621911600,
'duration': 189, 'duration': 190,
'release_date': '20230903',
'modified_timestamp': 1707217907,
'upload_date': '20210525', 'upload_date': '20210525',
'thumbnail': r're:https://.+/.+\.jpg', 'thumbnail': r're:https://.+/.+\.jpg',
'release_timestamp': 1693713487, 'categories': ['Current Affairs', 'Entertainment'],
}, },
}, { }, {
'url': 'https://www3.nhk.or.jp/nhkworld/en/ondemand/video/9999d17/', 'url': 'https://www3.nhk.or.jp/nhkworld/en/ondemand/video/9999d17/',
'info_dict': { 'info_dict': {
'id': 'nw_c_en_9999-d17', 'id': '9999d17-en',
'ext': 'mp4', 'ext': 'mp4',
'title': 'Flowers of snow blossom - The 72 Pentads of Yamato', 'title': 'Flowers of snow blossom - The 72 Pentads of Yamato',
'description': 'Todays focus: Snow', 'description': 'Todays focus: Snow',
'release_timestamp': 1693792402,
'release_date': '20230904',
'upload_date': '20220128',
'timestamp': 1643370960,
'thumbnail': r're:https://.+/.+\.jpg', 'thumbnail': r're:https://.+/.+\.jpg',
'duration': 136, 'duration': 136,
'series': '', 'categories': ['Culture & Lifestyle', 'Science & Nature'],
'modified_date': '20240206', 'tags': ['Nara', 'Temples & Shrines', 'Winter', 'Snow'],
'modified_timestamp': 1707217907, 'timestamp': 1643339040,
}, 'upload_date': '20220128',
}, {
# new /shows/ url format
'url': 'https://www3.nhk.or.jp/nhkworld/en/shows/2032307/',
'info_dict': {
'id': 'nw_vod_v_en_2032_307_20240321113000_01_1710990282',
'ext': 'mp4',
'title': 'Japanology Plus - 20th Anniversary Special Part 1',
'description': 'md5:817d41fc8e54339ad2a916161ea24faf',
'episode': '20th Anniversary Special Part 1',
'series': 'Japanology Plus',
'thumbnail': r're:https://.+/.+\.jpg',
'duration': 1680,
'timestamp': 1711020600,
'upload_date': '20240321',
'release_timestamp': 1711022683,
'release_date': '20240321',
'modified_timestamp': 1711031012,
'modified_date': '20240321',
},
}, {
'url': 'https://www3.nhk.or.jp/nhkworld/en/shows/3020025/',
'info_dict': {
'id': 'nw_vod_v_en_3020_025_20230325144000_01_1679723944',
'ext': 'mp4',
'title': '100 Ideas to Save the World - Working Styles Evolve',
'description': 'md5:9e6c7778eaaf4f7b4af83569649f84d9',
'episode': 'Working Styles Evolve',
'series': '100 Ideas to Save the World',
'thumbnail': r're:https://.+/.+\.jpg',
'duration': 899,
'upload_date': '20230325',
'timestamp': 1679755200,
'release_date': '20230905',
'release_timestamp': 1693880540,
'modified_date': '20240206',
'modified_timestamp': 1707217907,
}, },
}, { }, {
# new /shows/audio/ url format # new /shows/audio/ url format
@ -373,6 +284,7 @@ class NhkVodProgramIE(NhkBaseIE):
'id': 'sumo', 'id': 'sumo',
'title': 'GRAND SUMO Highlights', 'title': 'GRAND SUMO Highlights',
'description': 'md5:fc20d02dc6ce85e4b72e0273aa52fdbf', 'description': 'md5:fc20d02dc6ce85e4b72e0273aa52fdbf',
'series': 'GRAND SUMO Highlights',
}, },
'playlist_mincount': 1, 'playlist_mincount': 1,
}, { }, {
@ -381,6 +293,7 @@ class NhkVodProgramIE(NhkBaseIE):
'id': 'japanrailway', 'id': 'japanrailway',
'title': 'Japan Railway Journal', 'title': 'Japan Railway Journal',
'description': 'md5:ea39d93af7d05835baadf10d1aae0e3f', 'description': 'md5:ea39d93af7d05835baadf10d1aae0e3f',
'series': 'Japan Railway Journal',
}, },
'playlist_mincount': 12, 'playlist_mincount': 12,
}, { }, {
@ -390,6 +303,7 @@ class NhkVodProgramIE(NhkBaseIE):
'id': 'japanrailway', 'id': 'japanrailway',
'title': 'Japan Railway Journal', 'title': 'Japan Railway Journal',
'description': 'md5:ea39d93af7d05835baadf10d1aae0e3f', 'description': 'md5:ea39d93af7d05835baadf10d1aae0e3f',
'series': 'Japan Railway Journal',
}, },
'playlist_mincount': 12, 'playlist_mincount': 12,
}, { }, {
@ -399,17 +313,9 @@ class NhkVodProgramIE(NhkBaseIE):
'id': 'livinginjapan', 'id': 'livinginjapan',
'title': 'Living in Japan', 'title': 'Living in Japan',
'description': 'md5:665bb36ec2a12c5a7f598ee713fc2b54', 'description': 'md5:665bb36ec2a12c5a7f598ee713fc2b54',
'series': 'Living in Japan',
}, },
'playlist_mincount': 12, 'playlist_mincount': 11,
}, {
# /tv/ program url
'url': 'https://www3.nhk.or.jp/nhkworld/en/tv/designtalksplus/',
'info_dict': {
'id': 'designtalksplus',
'title': 'DESIGN TALKS plus',
'description': 'md5:47b3b3a9f10d4ac7b33b53b70a7d2837',
},
'playlist_mincount': 20,
}, { }, {
'url': 'https://www3.nhk.or.jp/nhkworld/en/shows/10yearshayaomiyazaki/', 'url': 'https://www3.nhk.or.jp/nhkworld/en/shows/10yearshayaomiyazaki/',
'only_matching': True, 'only_matching': True,
@ -430,9 +336,8 @@ class NhkVodProgramIE(NhkBaseIE):
program_id, lang, m_type != 'audio', False, episode_type == 'clip') program_id, lang, m_type != 'audio', False, episode_type == 'clip')
def entries(): def entries():
for episode in episodes: for episode in traverse_obj(episodes, ('items', lambda _, v: v['url'])):
if episode_path := episode.get('url'): yield self._extract_episode_info(urljoin(url, episode['url']), episode)
yield self._extract_episode_info(urljoin(url, episode_path), episode)
html = self._download_webpage(url, program_id) html = self._download_webpage(url, program_id)
program_title = self._extract_meta_from_class_elements([ program_title = self._extract_meta_from_class_elements([
@ -446,7 +351,7 @@ class NhkVodProgramIE(NhkBaseIE):
'tAudioProgramMain__info', # /shows/audio/programs/ 'tAudioProgramMain__info', # /shows/audio/programs/
'p-program-description'], html) # /tv/ 'p-program-description'], html) # /tv/
return self.playlist_result(entries(), program_id, program_title, program_description) return self.playlist_result(entries(), program_id, program_title, program_description, series=program_title)
class NhkForSchoolBangumiIE(InfoExtractor): class NhkForSchoolBangumiIE(InfoExtractor):

View file

@ -598,7 +598,8 @@ class PatreonCampaignIE(PatreonBaseIE):
'props', 'pageProps', 'bootstrapEnvelope', 'pageBootstrap', 'campaign', 'data', 'id', {str})) 'props', 'pageProps', 'bootstrapEnvelope', 'pageBootstrap', 'campaign', 'data', 'id', {str}))
if not campaign_id: if not campaign_id:
campaign_id = traverse_obj(self._search_nextjs_v13_data(webpage, vanity), ( campaign_id = traverse_obj(self._search_nextjs_v13_data(webpage, vanity), (
lambda _, v: v['type'] == 'campaign', 'id', {str}, any, {require('campaign ID')})) ((..., 'value', 'campaign', 'data'), lambda _, v: v['type'] == 'campaign'),
'id', {str}, any, {require('campaign ID')}))
params = { params = {
'json-api-use-default-includes': 'false', 'json-api-use-default-includes': 'false',

View file

@ -15,14 +15,15 @@ class S4CIE(InfoExtractor):
'thumbnail': 'https://www.s4c.cymru/amg/1920x1080/Y_Swn_2023S4C_099_ii.jpg', 'thumbnail': 'https://www.s4c.cymru/amg/1920x1080/Y_Swn_2023S4C_099_ii.jpg',
}, },
}, { }, {
'url': 'https://www.s4c.cymru/clic/programme/856636948', # Geo restricted to the UK
'url': 'https://www.s4c.cymru/clic/programme/886303048',
'info_dict': { 'info_dict': {
'id': '856636948', 'id': '886303048',
'ext': 'mp4', 'ext': 'mp4',
'title': 'Am Dro', 'title': 'Pennod 1',
'description': 'md5:7e3f364b70f61fcdaa8b4cb4a3eb3e7a',
'duration': 2880, 'duration': 2880,
'description': 'md5:100d8686fc9a632a0cb2db52a3433ffe', 'thumbnail': 'https://www.s4c.cymru/amg/1920x1080/Stad_2025S4C_P1_210053.jpg',
'thumbnail': 'https://www.s4c.cymru/amg/1920x1080/Am_Dro_2022-23S4C_P6_4005.jpg',
}, },
}] }]
@ -51,7 +52,7 @@ class S4CIE(InfoExtractor):
'https://player-api.s4c-cdn.co.uk/streaming-urls/prod', video_id, query={ 'https://player-api.s4c-cdn.co.uk/streaming-urls/prod', video_id, query={
'mode': 'od', 'mode': 'od',
'application': 'clic', 'application': 'clic',
'region': 'WW', 'region': 'UK' if player_config.get('application') == 's4chttpl' else 'WW',
'extra': 'false', 'extra': 'false',
'thirdParty': 'false', 'thirdParty': 'false',
'filename': player_config['filename'], 'filename': player_config['filename'],

View file

@ -182,13 +182,13 @@ class TubiTvShowIE(InfoExtractor):
webpage = self._download_webpage(show_url, playlist_id) webpage = self._download_webpage(show_url, playlist_id)
data = self._search_json( data = self._search_json(
r'window\.__data\s*=', webpage, 'data', playlist_id, r'window\.__REACT_QUERY_STATE__\s*=', webpage, 'data', playlist_id,
transform_source=js_to_json)['video'] transform_source=js_to_json)['queries'][0]['state']['data']
# v['number'] is already a decimal string, but stringify to protect against API changes # v['number'] is already a decimal string, but stringify to protect against API changes
path = [lambda _, v: str(v['number']) == selected_season] if selected_season else [..., {dict}] path = [lambda _, v: str(v['number']) == selected_season] if selected_season else [..., {dict}]
for season in traverse_obj(data, ('byId', lambda _, v: v['type'] == 's', 'seasons', *path)): for season in traverse_obj(data, ('seasons', *path)):
season_number = int_or_none(season.get('number')) season_number = int_or_none(season.get('number'))
for episode in traverse_obj(season, ('episodes', lambda _, v: v['id'])): for episode in traverse_obj(season, ('episodes', lambda _, v: v['id'])):
episode_id = episode['id'] episode_id = episode['id']

View file

@ -4029,6 +4029,11 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
STREAMING_DATA_CLIENT_NAME: client_name, STREAMING_DATA_CLIENT_NAME: client_name,
}) })
def set_audio_lang_from_orig_subs_lang(lang_code):
for f in formats:
if f.get('acodec') != 'none' and not f.get('language'):
f['language'] = lang_code
subtitles = {} subtitles = {}
skipped_subs_clients = set() skipped_subs_clients = set()
@ -4088,7 +4093,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
orig_lang = qs.get('lang', [None])[-1] orig_lang = qs.get('lang', [None])[-1]
lang_name = self._get_text(caption_track, 'name', max_runs=1) lang_name = self._get_text(caption_track, 'name', max_runs=1)
if caption_track.get('kind') != 'asr': is_manual_subs = caption_track.get('kind') != 'asr'
if is_manual_subs:
if not lang_code: if not lang_code:
continue continue
process_language( process_language(
@ -4099,16 +4105,14 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
if not trans_code: if not trans_code:
continue continue
orig_trans_code = trans_code orig_trans_code = trans_code
if caption_track.get('kind') != 'asr' and trans_code != 'und': if is_manual_subs and trans_code != 'und':
if not get_translated_subs: if not get_translated_subs:
continue continue
trans_code += f'-{lang_code}' trans_code += f'-{lang_code}'
trans_name += format_field(lang_name, None, ' from %s') trans_name += format_field(lang_name, None, ' from %s')
if lang_code == f'a-{orig_trans_code}': if lang_code == f'a-{orig_trans_code}':
# Set audio language based on original subtitles # Set audio language based on original subtitles
for f in formats: set_audio_lang_from_orig_subs_lang(orig_trans_code)
if f.get('acodec') != 'none' and not f.get('language'):
f['language'] = orig_trans_code
# Add an "-orig" label to the original language so that it can be distinguished. # Add an "-orig" label to the original language so that it can be distinguished.
# The subs are returned without "-orig" as well for compatibility # The subs are returned without "-orig" as well for compatibility
process_language( process_language(
@ -4119,6 +4123,21 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
automatic_captions, base_url, trans_code, trans_name, client_name, automatic_captions, base_url, trans_code, trans_name, client_name,
pot_params if orig_lang == orig_trans_code else {'tlang': trans_code, **pot_params}) pot_params if orig_lang == orig_trans_code else {'tlang': trans_code, **pot_params})
# Extract automatic captions when the language is not in 'translationLanguages'
# e.g. Cantonese [yue], see https://github.com/yt-dlp/yt-dlp/issues/14889
lang_code = remove_start(lang_code, 'a-')
if is_manual_subs or not lang_code or lang_code in automatic_captions:
continue
lang_name = remove_end(lang_name, ' (auto-generated)')
if caption_track.get('isTranslatable'):
# We can assume this is the original audio language
set_audio_lang_from_orig_subs_lang(lang_code)
process_language(
automatic_captions, base_url, f'{lang_code}-orig',
f'{lang_name} (Original)', client_name, pot_params)
process_language(
automatic_captions, base_url, lang_code, lang_name, client_name, pot_params)
# Avoid duplication if we've already got everything we need # Avoid duplication if we've already got everything we need
need_subs_langs.difference_update(subtitles) need_subs_langs.difference_update(subtitles)
need_caps_langs.difference_update(automatic_captions) need_caps_langs.difference_update(automatic_captions)

View file

@ -1,21 +1,61 @@
from __future__ import annotations from __future__ import annotations
import abc import abc
import dataclasses import dataclasses
import functools import functools
import os.path import os.path
import sys
from ._utils import _get_exe_version_output, detect_exe_version, int_or_none from ._utils import _get_exe_version_output, detect_exe_version, int_or_none
# NOT public API def _runtime_version_tuple(v):
def runtime_version_tuple(v):
# NB: will return (0,) if `v` is an invalid version string # NB: will return (0,) if `v` is an invalid version string
return tuple(int_or_none(x, default=0) for x in v.split('.')) return tuple(int_or_none(x, default=0) for x in v.split('.'))
_FALLBACK_PATHEXT = ('.COM', '.EXE', '.BAT', '.CMD')
def _find_exe(basename: str) -> str:
if os.name != 'nt':
return basename
paths: list[str] = []
# binary dir
if getattr(sys, 'frozen', False):
paths.append(os.path.dirname(sys.executable))
# cwd
paths.append(os.getcwd())
# PATH items
if path := os.environ.get('PATH'):
paths.extend(filter(None, path.split(os.path.pathsep)))
pathext = os.environ.get('PATHEXT')
if pathext is None:
exts = _FALLBACK_PATHEXT
else:
exts = tuple(ext for ext in pathext.split(os.pathsep) if ext)
visited = []
for path in map(os.path.realpath, paths):
normed = os.path.normcase(path)
if normed in visited:
continue
visited.append(normed)
for ext in exts:
binary = os.path.join(path, f'{basename}{ext}')
if os.access(binary, os.F_OK | os.X_OK) and not os.path.isdir(binary):
return binary
return basename
def _determine_runtime_path(path, basename): def _determine_runtime_path(path, basename):
if not path: if not path:
return basename return _find_exe(basename)
if os.path.isdir(path): if os.path.isdir(path):
return os.path.join(path, basename) return os.path.join(path, basename)
return path return path
@ -52,7 +92,7 @@ class DenoJsRuntime(JsRuntime):
if not out: if not out:
return None return None
version = detect_exe_version(out, r'^deno (\S+)', 'unknown') version = detect_exe_version(out, r'^deno (\S+)', 'unknown')
vt = runtime_version_tuple(version) vt = _runtime_version_tuple(version)
return JsRuntimeInfo( return JsRuntimeInfo(
name='deno', path=path, version=version, version_tuple=vt, name='deno', path=path, version=version, version_tuple=vt,
supported=vt >= self.MIN_SUPPORTED_VERSION) supported=vt >= self.MIN_SUPPORTED_VERSION)
@ -67,7 +107,7 @@ class BunJsRuntime(JsRuntime):
if not out: if not out:
return None return None
version = detect_exe_version(out, r'^(\S+)', 'unknown') version = detect_exe_version(out, r'^(\S+)', 'unknown')
vt = runtime_version_tuple(version) vt = _runtime_version_tuple(version)
return JsRuntimeInfo( return JsRuntimeInfo(
name='bun', path=path, version=version, version_tuple=vt, name='bun', path=path, version=version, version_tuple=vt,
supported=vt >= self.MIN_SUPPORTED_VERSION) supported=vt >= self.MIN_SUPPORTED_VERSION)
@ -82,7 +122,7 @@ class NodeJsRuntime(JsRuntime):
if not out: if not out:
return None return None
version = detect_exe_version(out, r'^v(\S+)', 'unknown') version = detect_exe_version(out, r'^v(\S+)', 'unknown')
vt = runtime_version_tuple(version) vt = _runtime_version_tuple(version)
return JsRuntimeInfo( return JsRuntimeInfo(
name='node', path=path, version=version, version_tuple=vt, name='node', path=path, version=version, version_tuple=vt,
supported=vt >= self.MIN_SUPPORTED_VERSION) supported=vt >= self.MIN_SUPPORTED_VERSION)
@ -100,7 +140,7 @@ class QuickJsRuntime(JsRuntime):
is_ng = 'QuickJS-ng' in out is_ng = 'QuickJS-ng' in out
version = detect_exe_version(out, r'^QuickJS(?:-ng)?\s+version\s+(\S+)', 'unknown') version = detect_exe_version(out, r'^QuickJS(?:-ng)?\s+version\s+(\S+)', 'unknown')
vt = runtime_version_tuple(version.replace('-', '.')) vt = _runtime_version_tuple(version.replace('-', '.'))
if is_ng: if is_ng:
return JsRuntimeInfo( return JsRuntimeInfo(
name='quickjs-ng', path=path, version=version, version_tuple=vt, name='quickjs-ng', path=path, version=version, version_tuple=vt,

View file

@ -876,13 +876,19 @@ class Popen(subprocess.Popen):
kwargs.setdefault('encoding', 'utf-8') kwargs.setdefault('encoding', 'utf-8')
kwargs.setdefault('errors', 'replace') kwargs.setdefault('errors', 'replace')
if shell and os.name == 'nt' and kwargs.get('executable') is None: if os.name == 'nt' and kwargs.get('executable') is None:
if not isinstance(args, str): # Must apply shell escaping if we are trying to run a batch file
args = shell_quote(args, shell=True) # These conditions should be very specific to limit impact
shell = False if not shell and isinstance(args, list) and args and args[0].lower().endswith(('.bat', '.cmd')):
# Set variable for `cmd.exe` newline escaping (see `utils.shell_quote`) shell = True
env['='] = '"^\n\n"'
args = f'{self.__comspec()} /Q /S /D /V:OFF /E:ON /C "{args}"' if shell:
if not isinstance(args, str):
args = shell_quote(args, shell=True)
shell = False
# Set variable for `cmd.exe` newline escaping (see `utils.shell_quote`)
env['='] = '"^\n\n"'
args = f'{self.__comspec()} /Q /S /D /V:OFF /E:ON /C "{args}"'
super().__init__(args, *remaining, env=env, shell=shell, **kwargs, startupinfo=self._startupinfo) super().__init__(args, *remaining, env=env, shell=shell, **kwargs, startupinfo=self._startupinfo)