diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml
index 29c18723c2..69f8eb0c8d 100644
--- a/.github/workflows/build.yml
+++ b/.github/workflows/build.yml
@@ -214,7 +214,7 @@ jobs:
- name: Build Unix platform-independent binary
run: |
- make all tar
+ make all-extra tar
- name: Verify --update-to
if: vars.UPDATE_TO_VERIFICATION
@@ -341,14 +341,14 @@ jobs:
brew uninstall --ignore-dependencies python3
python3 -m venv ~/yt-dlp-build-venv
source ~/yt-dlp-build-venv/bin/activate
- python3 devscripts/install_deps.py -o --include build
- python3 devscripts/install_deps.py --print --include pyinstaller > requirements.txt
+ python3 devscripts/install_deps.py --only-optional-groups --include-group build
+ python3 devscripts/install_deps.py --print --include-group pyinstaller > requirements.txt
# We need to ignore wheels otherwise we break universal2 builds
python3 -m pip install -U --no-binary :all: -r requirements.txt
# We need to fuse our own universal2 wheels for curl_cffi
python3 -m pip install -U 'delocate==0.11.0'
mkdir curl_cffi_whls curl_cffi_universal2
- python3 devscripts/install_deps.py --print -o --include curl-cffi > requirements.txt
+ python3 devscripts/install_deps.py --print --only-optional-groups --include-group curl-cffi > requirements.txt
for platform in "macosx_11_0_arm64" "macosx_11_0_x86_64"; do
python3 -m pip download \
--only-binary=:all: \
@@ -482,11 +482,11 @@ jobs:
mkdir /pyi-wheels
python -m pip download -d /pyi-wheels --no-deps --require-hashes "pyinstaller@${Env:PYI_URL}#sha256=${Env:PYI_HASH}"
python -m pip install --force-reinstall -U "/pyi-wheels/${Env:PYI_WHEEL}"
- python devscripts/install_deps.py -o --include build
+ python devscripts/install_deps.py --only-optional-groups --include-group build
if ("${Env:ARCH}" -eq "x86") {
python devscripts/install_deps.py
} else {
- python devscripts/install_deps.py --include curl-cffi
+ python devscripts/install_deps.py --include-group curl-cffi
}
- name: Prepare
diff --git a/.github/workflows/challenge-tests.yml b/.github/workflows/challenge-tests.yml
new file mode 100644
index 0000000000..89895eb07b
--- /dev/null
+++ b/.github/workflows/challenge-tests.yml
@@ -0,0 +1,77 @@
+name: Challenge Tests
+on:
+ push:
+ paths:
+ - .github/workflows/challenge-tests.yml
+ - test/test_jsc/*.py
+ - yt_dlp/extractor/youtube/jsc/**.js
+ - yt_dlp/extractor/youtube/jsc/**.py
+ - yt_dlp/extractor/youtube/pot/**.py
+ - yt_dlp/utils/_jsruntime.py
+ pull_request:
+ paths:
+ - .github/workflows/challenge-tests.yml
+ - test/test_jsc/*.py
+ - yt_dlp/extractor/youtube/jsc/**.js
+ - yt_dlp/extractor/youtube/jsc/**.py
+ - yt_dlp/extractor/youtube/pot/**.py
+ - yt_dlp/utils/_jsruntime.py
+permissions:
+ contents: read
+
+concurrency:
+ group: challenge-tests-${{ github.event.pull_request.number || github.ref }}
+ cancel-in-progress: ${{ github.event_name == 'pull_request' }}
+
+jobs:
+ tests:
+ name: Challenge Tests
+ runs-on: ${{ matrix.os }}
+ strategy:
+ fail-fast: false
+ matrix:
+ os: [ubuntu-latest, windows-latest]
+ python-version: ['3.10', '3.11', '3.12', '3.13', '3.14', pypy-3.11]
+ env:
+ QJS_VERSION: '2025-04-26' # Earliest version with rope strings
+ steps:
+ - uses: actions/checkout@v5
+ - name: Set up Python ${{ matrix.python-version }}
+ uses: actions/setup-python@v6
+ with:
+ python-version: ${{ matrix.python-version }}
+ - name: Install Deno
+ uses: denoland/setup-deno@v2
+ with:
+ deno-version: '2.0.0' # minimum supported version
+ - name: Install Bun
+ uses: oven-sh/setup-bun@v2
+ with:
+ # minimum supported version is 1.0.31 but earliest available Windows version is 1.1.0
+ bun-version: ${{ (matrix.os == 'windows-latest' && '1.1.0') || '1.0.31' }}
+ - name: Install Node
+ uses: actions/setup-node@v6
+ with:
+ node-version: '20.0' # minimum supported version
+ - name: Install QuickJS (Linux)
+ if: matrix.os == 'ubuntu-latest'
+ run: |
+ wget "https://bellard.org/quickjs/binary_releases/quickjs-linux-x86_64-${QJS_VERSION}.zip" -O quickjs.zip
+ unzip quickjs.zip qjs
+ sudo install qjs /usr/local/bin/qjs
+ - name: Install QuickJS (Windows)
+ if: matrix.os == 'windows-latest'
+ shell: pwsh
+ run: |
+ Invoke-WebRequest "https://bellard.org/quickjs/binary_releases/quickjs-win-x86_64-${Env:QJS_VERSION}.zip" -OutFile quickjs.zip
+ unzip quickjs.zip
+ - name: Install test requirements
+ run: |
+ python ./devscripts/install_deps.py --print --only-optional-groups --include-group test > requirements.txt
+ python ./devscripts/install_deps.py --print -c certifi -c requests -c urllib3 -c yt-dlp-ejs >> requirements.txt
+ python -m pip install -U -r requirements.txt
+ - name: Run tests
+ timeout-minutes: 15
+ run: |
+ python -m yt_dlp -v --js-runtimes node --js-runtimes bun --js-runtimes quickjs || true
+ python ./devscripts/run_tests.py test/test_jsc -k download
diff --git a/.github/workflows/core.yml b/.github/workflows/core.yml
index d196f59d83..ae3dc95e1b 100644
--- a/.github/workflows/core.yml
+++ b/.github/workflows/core.yml
@@ -7,6 +7,7 @@ on:
- test/**
- yt_dlp/**.py
- '!yt_dlp/extractor/**.py'
+ - yt_dlp/extractor/youtube/**.py
- yt_dlp/extractor/__init__.py
- yt_dlp/extractor/common.py
- yt_dlp/extractor/extractors.py
@@ -17,6 +18,7 @@ on:
- test/**
- yt_dlp/**.py
- '!yt_dlp/extractor/**.py'
+ - yt_dlp/extractor/youtube/**.py
- yt_dlp/extractor/__init__.py
- yt_dlp/extractor/common.py
- yt_dlp/extractor/extractors.py
@@ -59,7 +61,7 @@ jobs:
with:
python-version: ${{ matrix.python-version }}
- name: Install test requirements
- run: python3 ./devscripts/install_deps.py --include test --include curl-cffi
+ run: python ./devscripts/install_deps.py --include-group test --include-group curl-cffi
- name: Run tests
timeout-minutes: 15
continue-on-error: False
diff --git a/.github/workflows/download.yml b/.github/workflows/download.yml
index 8163bd1a23..d075270d7b 100644
--- a/.github/workflows/download.yml
+++ b/.github/workflows/download.yml
@@ -15,10 +15,10 @@ jobs:
with:
python-version: '3.10'
- name: Install test requirements
- run: python3 ./devscripts/install_deps.py --include dev
+ run: python ./devscripts/install_deps.py --include-group dev
- name: Run tests
continue-on-error: true
- run: python3 ./devscripts/run_tests.py download
+ run: python ./devscripts/run_tests.py download
full:
name: Full Download Tests
@@ -42,7 +42,7 @@ jobs:
with:
python-version: ${{ matrix.python-version }}
- name: Install test requirements
- run: python3 ./devscripts/install_deps.py --include dev
+ run: python ./devscripts/install_deps.py --include-group dev
- name: Run tests
continue-on-error: true
- run: python3 ./devscripts/run_tests.py download
+ run: python ./devscripts/run_tests.py download
diff --git a/.github/workflows/quick-test.yml b/.github/workflows/quick-test.yml
index c26628b421..a6e84b1d80 100644
--- a/.github/workflows/quick-test.yml
+++ b/.github/workflows/quick-test.yml
@@ -15,7 +15,7 @@ jobs:
with:
python-version: '3.10'
- name: Install test requirements
- run: python3 ./devscripts/install_deps.py -o --include test
+ run: python ./devscripts/install_deps.py --only-optional-groups --include-group test
- name: Run tests
timeout-minutes: 15
run: |
@@ -31,9 +31,9 @@ jobs:
with:
python-version: '3.10'
- name: Install dev dependencies
- run: python3 ./devscripts/install_deps.py -o --include static-analysis
+ run: python ./devscripts/install_deps.py --only-optional-groups --include-group static-analysis
- name: Make lazy extractors
- run: python3 ./devscripts/make_lazy_extractors.py
+ run: python ./devscripts/make_lazy_extractors.py
- name: Run ruff
run: ruff check --output-format github .
- name: Run autopep8
diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml
index b60a0650a5..afe1d384b4 100644
--- a/.github/workflows/release.yml
+++ b/.github/workflows/release.yml
@@ -180,7 +180,7 @@ jobs:
- name: Install Requirements
run: |
sudo apt -y install pandoc man
- python devscripts/install_deps.py -o --include build
+ python devscripts/install_deps.py --only-optional-groups --include-group build
- name: Prepare
env:
@@ -269,9 +269,10 @@ jobs:
"[]" \
"(https://github.com/${MASTER_REPO}/releases/latest \"Master builds\")" >> ./RELEASE_NOTES
fi
- printf '\n\n%s\n\n%s%s\n\n---\n' \
+ printf '\n\n%s\n\n%s%s%s\n\n---\n' \
"#### A description of the various files is in the [README](https://github.com/${REPOSITORY}#release-files)" \
- "The PyInstaller-bundled executables are subject to the licenses described in " \
+ "The zipimport Unix executable contains code licensed under ISC and MIT. " \
+ "The PyInstaller-bundled executables are subject to these and other licenses, all of which are compiled in " \
"[THIRD_PARTY_LICENSES.txt](https://github.com/${BASE_REPO}/blob/${HEAD_SHA}/THIRD_PARTY_LICENSES.txt)" >> ./RELEASE_NOTES
python ./devscripts/make_changelog.py -vv --collapsible >> ./RELEASE_NOTES
printf '%s\n\n' '**This is a pre-release build**' >> ./PRERELEASE_NOTES
diff --git a/.github/workflows/signature-tests.yml b/.github/workflows/signature-tests.yml
deleted file mode 100644
index 77f5e6a4c8..0000000000
--- a/.github/workflows/signature-tests.yml
+++ /dev/null
@@ -1,41 +0,0 @@
-name: Signature Tests
-on:
- push:
- paths:
- - .github/workflows/signature-tests.yml
- - test/test_youtube_signature.py
- - yt_dlp/jsinterp.py
- pull_request:
- paths:
- - .github/workflows/signature-tests.yml
- - test/test_youtube_signature.py
- - yt_dlp/jsinterp.py
-permissions:
- contents: read
-
-concurrency:
- group: signature-tests-${{ github.event.pull_request.number || github.ref }}
- cancel-in-progress: ${{ github.event_name == 'pull_request' }}
-
-jobs:
- tests:
- name: Signature Tests
- runs-on: ${{ matrix.os }}
- strategy:
- fail-fast: false
- matrix:
- os: [ubuntu-latest, windows-latest]
- python-version: ['3.10', '3.11', '3.12', '3.13', '3.14', pypy-3.11]
- steps:
- - uses: actions/checkout@v5
- - name: Set up Python ${{ matrix.python-version }}
- uses: actions/setup-python@v6
- with:
- python-version: ${{ matrix.python-version }}
- - name: Install test requirements
- run: python3 ./devscripts/install_deps.py --only-optional --include test
- - name: Run tests
- timeout-minutes: 15
- run: |
- python3 -m yt_dlp -v || true # Print debug head
- python3 ./devscripts/run_tests.py test/test_youtube_signature.py
diff --git a/.github/workflows/test-workflows.yml b/.github/workflows/test-workflows.yml
index 6c993e6b34..37bf044d69 100644
--- a/.github/workflows/test-workflows.yml
+++ b/.github/workflows/test-workflows.yml
@@ -34,7 +34,7 @@ jobs:
env:
ACTIONLINT_TARBALL: ${{ format('actionlint_{0}_linux_amd64.tar.gz', env.ACTIONLINT_VERSION) }}
run: |
- python -m devscripts.install_deps -o --include test
+ python -m devscripts.install_deps --only-optional-groups --include-group test
sudo apt -y install shellcheck
python -m pip install -U pyflakes
curl -LO "${ACTIONLINT_REPO}/releases/download/v${ACTIONLINT_VERSION}/${ACTIONLINT_TARBALL}"
diff --git a/.gitignore b/.gitignore
index 40bb34d2aa..af6da639db 100644
--- a/.gitignore
+++ b/.gitignore
@@ -107,6 +107,7 @@ README.txt
test/testdata/sigs/player-*.js
test/testdata/thumbnails/empty.webp
test/testdata/thumbnails/foo\ %d\ bar/foo_%d.*
+.ejs-*
# Binary
/youtube-dl
@@ -129,3 +130,6 @@ yt-dlp.zip
# Plugins
ytdlp_plugins/
yt-dlp-plugins
+
+# Packages
+yt_dlp_ejs/
diff --git a/Makefile b/Makefile
index 290955d209..89aef9033b 100644
--- a/Makefile
+++ b/Makefile
@@ -1,4 +1,5 @@
all: lazy-extractors yt-dlp doc pypi-files
+all-extra: lazy-extractors yt-dlp-extra doc pypi-files
clean: clean-test clean-dist
clean-all: clean clean-cache
completions: completion-bash completion-fish completion-zsh
@@ -15,7 +16,11 @@ pypi-files: AUTHORS Changelog.md LICENSE README.md README.txt supportedsites \
.PHONY: all clean clean-all clean-test clean-dist clean-cache \
completions completion-bash completion-fish completion-zsh \
doc issuetemplates supportedsites ot offlinetest codetest test \
- tar pypi-files lazy-extractors install uninstall
+ tar pypi-files lazy-extractors install uninstall \
+ all-extra yt-dlp-extra current-ejs-version
+
+.IGNORE: current-ejs-version
+.SILENT: current-ejs-version
clean-test:
rm -rf tmp/ *.annotations.xml *.aria2 *.description *.dump *.frag \
@@ -25,7 +30,8 @@ clean-test:
test/testdata/sigs/player-*.js test/testdata/thumbnails/empty.webp "test/testdata/thumbnails/foo %d bar/foo_%d."*
clean-dist:
rm -rf yt-dlp.1.temp.md yt-dlp.1 README.txt MANIFEST build/ dist/ .coverage cover/ yt-dlp.tar.gz completions/ \
- yt_dlp/extractor/lazy_extractors.py *.spec CONTRIBUTING.md.tmp yt-dlp yt-dlp.exe yt_dlp.egg-info/ AUTHORS
+ yt_dlp/extractor/lazy_extractors.py *.spec CONTRIBUTING.md.tmp yt-dlp yt-dlp.exe yt_dlp.egg-info/ AUTHORS \
+ yt-dlp.zip .ejs-* yt_dlp_ejs/
clean-cache:
find . \( \
-type d -name ".*_cache" -o -type d -name __pycache__ -o -name "*.pyc" -o -name "*.class" \
@@ -81,28 +87,49 @@ test:
offlinetest: codetest
$(PYTHON) -m pytest -Werror -m "not download"
-CODE_FOLDERS_CMD = find yt_dlp -type f -name '__init__.py' | sed 's,/__init__.py,,' | grep -v '/__' | sort
-CODE_FOLDERS != $(CODE_FOLDERS_CMD)
-CODE_FOLDERS ?= $(shell $(CODE_FOLDERS_CMD))
-CODE_FILES_CMD = for f in $(CODE_FOLDERS) ; do echo "$$f" | sed 's,$$,/*.py,' ; done
-CODE_FILES != $(CODE_FILES_CMD)
-CODE_FILES ?= $(shell $(CODE_FILES_CMD))
-yt-dlp: $(CODE_FILES)
+PY_CODE_FOLDERS_CMD = find yt_dlp -type f -name '__init__.py' | sed 's|/__init__\.py||' | grep -v '/__' | sort
+PY_CODE_FOLDERS != $(PY_CODE_FOLDERS_CMD)
+PY_CODE_FOLDERS ?= $(shell $(PY_CODE_FOLDERS_CMD))
+
+PY_CODE_FILES_CMD = for f in $(PY_CODE_FOLDERS) ; do echo "$$f" | sed 's|$$|/*.py|' ; done
+PY_CODE_FILES != $(PY_CODE_FILES_CMD)
+PY_CODE_FILES ?= $(shell $(PY_CODE_FILES_CMD))
+
+JS_CODE_FOLDERS_CMD = find yt_dlp -type f -name '*.js' | sed 's|/[^/]\{1,\}\.js$$||' | uniq
+JS_CODE_FOLDERS != $(JS_CODE_FOLDERS_CMD)
+JS_CODE_FOLDERS ?= $(shell $(JS_CODE_FOLDERS_CMD))
+
+JS_CODE_FILES_CMD = for f in $(JS_CODE_FOLDERS) ; do echo "$$f" | sed 's|$$|/*.js|' ; done
+JS_CODE_FILES != $(JS_CODE_FILES_CMD)
+JS_CODE_FILES ?= $(shell $(JS_CODE_FILES_CMD))
+
+yt-dlp.zip: $(PY_CODE_FILES) $(JS_CODE_FILES)
mkdir -p zip
- for d in $(CODE_FOLDERS) ; do \
+ for d in $(PY_CODE_FOLDERS) ; do \
mkdir -p zip/$$d ;\
cp -pPR $$d/*.py zip/$$d/ ;\
done
- (cd zip && touch -t 200001010101 $(CODE_FILES))
- mv zip/yt_dlp/__main__.py zip/
- (cd zip && zip -q ../yt-dlp $(CODE_FILES) __main__.py)
+ for d in $(JS_CODE_FOLDERS) ; do \
+ mkdir -p zip/$$d ;\
+ cp -pPR $$d/*.js zip/$$d/ ;\
+ done
+ (cd zip && touch -t 200001010101 $(PY_CODE_FILES) $(JS_CODE_FILES))
+ rm -f zip/yt_dlp/__main__.py
+ (cd zip && zip -q ../yt-dlp.zip $(PY_CODE_FILES) $(JS_CODE_FILES))
rm -rf zip
+
+yt-dlp: yt-dlp.zip
+ mkdir -p zip
+ cp -pP yt_dlp/__main__.py zip/
+ touch -t 200001010101 zip/__main__.py
+ (cd zip && zip -q ../yt-dlp.zip __main__.py)
echo '#!$(PYTHON)' > yt-dlp
cat yt-dlp.zip >> yt-dlp
rm yt-dlp.zip
chmod a+x yt-dlp
+ rm -rf zip
-README.md: $(CODE_FILES) devscripts/make_readme.py
+README.md: $(PY_CODE_FILES) devscripts/make_readme.py
COLUMNS=80 $(PYTHON) yt_dlp/__main__.py --ignore-config --help | $(PYTHON) devscripts/make_readme.py
CONTRIBUTING.md: README.md devscripts/make_contributing.py
@@ -127,15 +154,15 @@ yt-dlp.1: README.md devscripts/prepare_manpage.py
pandoc -s -f $(MARKDOWN) -t man yt-dlp.1.temp.md -o yt-dlp.1
rm -f yt-dlp.1.temp.md
-completions/bash/yt-dlp: $(CODE_FILES) devscripts/bash-completion.in
+completions/bash/yt-dlp: $(PY_CODE_FILES) devscripts/bash-completion.in
mkdir -p completions/bash
$(PYTHON) devscripts/bash-completion.py
-completions/zsh/_yt-dlp: $(CODE_FILES) devscripts/zsh-completion.in
+completions/zsh/_yt-dlp: $(PY_CODE_FILES) devscripts/zsh-completion.in
mkdir -p completions/zsh
$(PYTHON) devscripts/zsh-completion.py
-completions/fish/yt-dlp.fish: $(CODE_FILES) devscripts/fish-completion.in
+completions/fish/yt-dlp.fish: $(PY_CODE_FILES) devscripts/fish-completion.in
mkdir -p completions/fish
$(PYTHON) devscripts/fish-completion.py
@@ -172,3 +199,45 @@ CONTRIBUTORS: Changelog.md
echo 'Updating $@ from git commit history' ; \
$(PYTHON) devscripts/make_changelog.py -v -c > /dev/null ; \
fi
+
+# The following EJS_-prefixed variables are auto-generated by devscripts/update_ejs.py
+# DO NOT EDIT!
+EJS_VERSION = 0.3.1
+EJS_WHEEL_NAME = yt_dlp_ejs-0.3.1-py3-none-any.whl
+EJS_WHEEL_HASH = sha256:a6e3548874db7c774388931752bb46c7f4642c044b2a189e56968f3d5ecab622
+EJS_PY_FOLDERS = yt_dlp_ejs yt_dlp_ejs/yt yt_dlp_ejs/yt/solver
+EJS_PY_FILES = yt_dlp_ejs/__init__.py yt_dlp_ejs/_version.py yt_dlp_ejs/yt/__init__.py yt_dlp_ejs/yt/solver/__init__.py
+EJS_JS_FOLDERS = yt_dlp_ejs/yt/solver
+EJS_JS_FILES = yt_dlp_ejs/yt/solver/core.min.js yt_dlp_ejs/yt/solver/lib.min.js
+
+yt-dlp-extra: current-ejs-version .ejs-$(EJS_VERSION) $(EJS_PY_FILES) $(EJS_JS_FILES) yt-dlp.zip
+ mkdir -p zip
+ for d in $(EJS_PY_FOLDERS) ; do \
+ mkdir -p zip/$$d ;\
+ cp -pPR $$d/*.py zip/$$d/ ;\
+ done
+ for d in $(EJS_JS_FOLDERS) ; do \
+ mkdir -p zip/$$d ;\
+ cp -pPR $$d/*.js zip/$$d/ ;\
+ done
+ (cd zip && touch -t 200001010101 $(EJS_PY_FILES) $(EJS_JS_FILES))
+ (cd zip && zip -q ../yt-dlp.zip $(EJS_PY_FILES) $(EJS_JS_FILES))
+ cp -pP yt_dlp/__main__.py zip/
+ touch -t 200001010101 zip/__main__.py
+ (cd zip && zip -q ../yt-dlp.zip __main__.py)
+ echo '#!$(PYTHON)' > yt-dlp
+ cat yt-dlp.zip >> yt-dlp
+ rm yt-dlp.zip
+ chmod a+x yt-dlp
+ rm -rf zip
+
+.ejs-$(EJS_VERSION):
+ @echo Downloading yt-dlp-ejs
+ @echo "yt-dlp-ejs==$(EJS_VERSION) --hash $(EJS_WHEEL_HASH)" > .ejs-requirements.txt
+ $(PYTHON) -m pip download -d ./build --no-deps --require-hashes -r .ejs-requirements.txt
+ unzip -o build/$(EJS_WHEEL_NAME) "yt_dlp_ejs/*"
+ @touch .ejs-$(EJS_VERSION)
+
+current-ejs-version:
+ rm -rf .ejs-*
+ touch .ejs-$$($(PYTHON) -c 'import sys; sys.path = [""]; from yt_dlp_ejs import version; print(version)' 2>/dev/null)
diff --git a/README.md b/README.md
index 7b3cd0970d..0274b6fe9f 100644
--- a/README.md
+++ b/README.md
@@ -145,9 +145,11 @@ While yt-dlp is licensed under the [Unlicense](LICENSE), many of the release fil
Most notably, the PyInstaller-bundled executables include GPLv3+ licensed code, and as such the combined work is licensed under [GPLv3+](https://www.gnu.org/licenses/gpl-3.0.html).
-See [THIRD_PARTY_LICENSES.txt](THIRD_PARTY_LICENSES.txt) for details.
+The zipimport Unix executable (`yt-dlp`) contains [ISC](https://github.com/meriyah/meriyah/blob/main/LICENSE.md) licensed code from [`meriyah`](https://github.com/meriyah/meriyah) and [MIT](https://github.com/davidbonnet/astring/blob/main/LICENSE) licensed code from [`astring`](https://github.com/davidbonnet/astring).
-The zipimport binary (`yt-dlp`), the source tarball (`yt-dlp.tar.gz`), and the PyPI source distribution & wheel only contain code licensed under the [Unlicense](LICENSE).
+See [THIRD_PARTY_LICENSES.txt](THIRD_PARTY_LICENSES.txt) for more details.
+
+The git repository, the source tarball (`yt-dlp.tar.gz`), the PyPI source distribution and the PyPI built distribution (wheel) only contain code licensed under the [Unlicense](LICENSE).
@@ -201,7 +203,7 @@ Python versions 3.10+ (CPython) and 3.11+ (PyPy) are supported. Other versions a
On Windows, [Microsoft Visual C++ 2010 SP1 Redistributable Package (x86)](https://download.microsoft.com/download/1/6/5/165255E7-1014-4D0A-B094-B6A430A6BFFC/vcredist_x86.exe) is also necessary to run yt-dlp. You probably already have this, but if the executable throws an error due to missing `MSVCR100.dll` you need to install it manually.
-->
-While all the other dependencies are optional, `ffmpeg` and `ffprobe` are highly recommended
+While all the other dependencies are optional, `ffmpeg`, `ffprobe`, `yt-dlp-ejs` and a JavaScript runtime are highly recommended
### Strongly recommended
@@ -211,6 +213,10 @@ While all the other dependencies are optional, `ffmpeg` and `ffprobe` are highly
**Important**: What you need is ffmpeg *binary*, **NOT** [the Python package of the same name](https://pypi.org/project/ffmpeg)
+* [**yt-dlp-ejs**](https://github.com/yt-dlp/ejs) - Required for deciphering YouTube n/sig values. Licensed under [Unlicense](https://github.com/yt-dlp/ejs/blob/main/LICENSE), bundles [MIT](https://github.com/davidbonnet/astring/blob/main/LICENSE) and [ISC](https://github.com/meriyah/meriyah/blob/main/LICENSE.md) components.
+
+ A JavaScript runtime like [**deno**](https://deno.land) (recommended), [**node.js**](https://nodejs.org), [**bun**](https://bun.sh), or [**QuickJS**](https://bellard.org/quickjs/) is also required to run yt-dlp-ejs. See [the wiki](https://github.com/yt-dlp/yt-dlp/wiki/EJS).
+
### Networking
* [**certifi**](https://github.com/certifi/python-certifi)\* - Provides Mozilla's root certificate bundle. Licensed under [MPLv2](https://github.com/certifi/python-certifi/blob/master/LICENSE)
* [**brotli**](https://github.com/google/brotli)\* or [**brotlicffi**](https://github.com/python-hyper/brotlicffi) - [Brotli](https://en.wikipedia.org/wiki/Brotli) content encoding support. Both licensed under MIT [1](https://github.com/google/brotli/blob/master/LICENSE) [2](https://github.com/python-hyper/brotlicffi/blob/master/LICENSE)
@@ -235,7 +241,7 @@ The following provide support for impersonating browser requests. This may be re
### Misc
* [**pycryptodomex**](https://github.com/Legrandin/pycryptodome)\* - For decrypting AES-128 HLS streams and various other data. Licensed under [BSD-2-Clause](https://github.com/Legrandin/pycryptodome/blob/master/LICENSE.rst)
-* [**phantomjs**](https://github.com/ariya/phantomjs) - Used in extractors where javascript needs to be run. Licensed under [BSD-3-Clause](https://github.com/ariya/phantomjs/blob/master/LICENSE.BSD)
+* [**phantomjs**](https://github.com/ariya/phantomjs) - Used in some extractors where JavaScript needs to be run. No longer used for YouTube. To be deprecated in the near future. Licensed under [BSD-3-Clause](https://github.com/ariya/phantomjs/blob/master/LICENSE.BSD)
* [**secretstorage**](https://github.com/mitya57/secretstorage)\* - For `--cookies-from-browser` to access the **Gnome** keyring while decrypting cookies of **Chromium**-based browsers on **Linux**. Licensed under [BSD-3-Clause](https://github.com/mitya57/secretstorage/blob/master/LICENSE)
* Any external downloader that you want to use with `--downloader`
@@ -362,6 +368,26 @@ Tip: Use `CTRL`+`F` (or `Command`+`F`) to search by keywords
--no-plugin-dirs Clear plugin directories to search,
including defaults and those provided by
previous --plugin-dirs
+ --js-runtimes RUNTIME[:PATH] Additional JavaScript runtime to enable,
+ with an optional path to the runtime
+ location. This option can be used multiple
+ times to enable multiple runtimes. Supported
+ runtimes: deno, node, bun, quickjs. By
+ default, only "deno" runtime is enabled.
+ --no-js-runtimes Clear JavaScript runtimes to enable,
+ including defaults and those provided by
+ previous --js-runtimes
+ --remote-components COMPONENT Remote components to allow yt-dlp to fetch
+ when required. You can use this option
+ multiple times to allow multiple components.
+ Supported values: ejs:npm (external
+ JavaScript components from npm), ejs:github
+ (external JavaScript components from yt-dlp-
+ ejs GitHub). By default, no remote
+ components are allowed.
+ --no-remote-components Disallow fetching of all remote components,
+ including any previously allowed by
+ --remote-components or defaults.
--flat-playlist Do not extract a playlist's URL result
entries; some entry metadata may be missing
and downloading may be bypassed
@@ -1814,7 +1840,7 @@ The following extractors use this feature:
#### youtube
* `lang`: Prefer translated metadata (`title`, `description` etc) of this language code (case-sensitive). By default, the video primary language metadata is preferred, with a fallback to `en` translated. See [youtube/_base.py](https://github.com/yt-dlp/yt-dlp/blob/415b4c9f955b1a0391204bd24a7132590e7b3bdb/yt_dlp/extractor/youtube/_base.py#L402-L409) for the list of supported content language codes
* `skip`: One or more of `hls`, `dash` or `translated_subs` to skip extraction of the m3u8 manifests, dash manifests and [auto-translated subtitles](https://github.com/yt-dlp/yt-dlp/issues/4090#issuecomment-1158102032) respectively
-* `player_client`: Clients to extract video data from. The currently available clients are `web`, `web_safari`, `web_embedded`, `web_music`, `web_creator`, `mweb`, `ios`, `android`, `android_sdkless`, `android_vr`, `tv`, `tv_simply` and `tv_embedded`. By default, `android_sdkless,tv,web_safari,web` is used. `android_sdkless` is omitted if cookies are passed. If premium cookies are passed, `tv,web_creator,web_safari,web` is used instead. The `web_music` client is added for `music.youtube.com` URLs when logged-in cookies are used. The `web_embedded` client is added for age-restricted videos but only works if the video is embeddable. The `tv_embedded` and `web_creator` clients are added for age-restricted videos if account age-verification is required. Some clients, such as `web` and `web_music`, require a `po_token` for their formats to be downloadable. Some clients, such as `web_creator`, will only work with authentication. Not all clients support authentication via cookies. You can use `default` for the default clients, or you can use `all` for all clients (not recommended). You can prefix a client with `-` to exclude it, e.g. `youtube:player_client=default,-ios`
+* `player_client`: Clients to extract video data from. The currently available clients are `web`, `web_safari`, `web_embedded`, `web_music`, `web_creator`, `mweb`, `ios`, `android`, `android_sdkless`, `android_vr`, `tv`, `tv_simply`, `tv_downgraded`, and `tv_embedded`. By default, `tv,android_sdkless,web` is used. If no JavaScript runtime is available, then `android_sdkless,web_safari,web` is used. If logged-in cookies are passed to yt-dlp, then `tv_downgraded,web_safari,web` is used for free accounts and `tv_downgraded,web_creator,web` is used for premium accounts. The `web_music` client is added for `music.youtube.com` URLs when logged-in cookies are used. The `web_embedded` client is added for age-restricted videos but only works if the video is embeddable. The `tv_embedded` and `web_creator` clients are added for age-restricted videos if account age-verification is required. Some clients, such as `web` and `web_music`, require a `po_token` for their formats to be downloadable. Some clients, such as `web_creator`, will only work with authentication. Not all clients support authentication via cookies. You can use `default` for the default clients, or you can use `all` for all clients (not recommended). You can prefix a client with `-` to exclude it, e.g. `youtube:player_client=default,-ios`
* `player_skip`: Skip some network requests that are generally needed for robust extraction. One or more of `configs` (skip client configs), `webpage` (skip initial webpage), `js` (skip js player), `initial_data` (skip initial data/next ep request). While these options can help reduce the number of requests needed or avoid some rate-limiting, they could cause issues such as missing formats or metadata. See [#860](https://github.com/yt-dlp/yt-dlp/pull/860) and [#12826](https://github.com/yt-dlp/yt-dlp/issues/12826) for more details
* `webpage_skip`: Skip extraction of embedded webpage data. One or both of `player_response`, `initial_data`. These options are for testing purposes and don't skip any network requests
* `player_params`: YouTube player parameters to use for player requests. Will overwrite any default ones set by yt-dlp.
@@ -1833,6 +1859,10 @@ The following extractors use this feature:
* `pot_trace`: Enable debug logging for PO Token fetching. Either `true` or `false` (default)
* `fetch_pot`: Policy to use for fetching a PO Token from providers. One of `always` (always try fetch a PO Token regardless if the client requires one for the given context), `never` (never fetch a PO Token), or `auto` (default; only fetch a PO Token if the client requires one for the given context)
* `playback_wait`: Duration (in seconds) to wait inbetween the extraction and download stages in order to ensure the formats are available. The default is `6` seconds
+* `jsc_trace`: Enable debug logging for JS Challenge fetching. Either `true` or `false` (default)
+
+#### youtube-ejs
+* `jitless`: Run suported Javascript engines in JIT-less mode. Supported runtimes are `deno`, `node` and `bun`. Provides better security at the cost of performance/speed. Do note that `node` and `bun` are still considered unsecure. Either `true` or `false` (default)
#### youtubepot-webpo
* `bind_to_visitor_id`: Whether to use the Visitor ID instead of Visitor Data for caching WebPO tokens. Either `true` (default) or `false`
diff --git a/THIRD_PARTY_LICENSES.txt b/THIRD_PARTY_LICENSES.txt
index 1040046541..f7977064a0 100644
--- a/THIRD_PARTY_LICENSES.txt
+++ b/THIRD_PARTY_LICENSES.txt
@@ -4431,3 +4431,43 @@ SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+
+
+--------------------------------------------------------------------------------
+Meriyah | ISC
+URL: https://github.com/meriyah/meriyah
+--------------------------------------------------------------------------------
+ISC License
+
+Copyright (c) 2019 and later, KFlash and others.
+
+Permission to use, copy, modify, and/or distribute this software for any purpose with or without fee is hereby granted, provided that the above copyright notice and this permission notice appear in all copies.
+
+THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+
+
+
+--------------------------------------------------------------------------------
+Astring | MIT
+URL: https://github.com/davidbonnet/astring/
+--------------------------------------------------------------------------------
+Copyright (c) 2015, David Bonnet
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
diff --git a/bundle/docker/linux/build.sh b/bundle/docker/linux/build.sh
index 71adaad058..b30d40980e 100755
--- a/bundle/docker/linux/build.sh
+++ b/bundle/docker/linux/build.sh
@@ -15,12 +15,12 @@ function venvpy {
}
INCLUDES=(
- --include pyinstaller
- --include secretstorage
+ --include-group pyinstaller
+ --include-group secretstorage
)
if [[ -z "${EXCLUDE_CURL_CFFI:-}" ]]; then
- INCLUDES+=(--include curl-cffi)
+ INCLUDES+=(--include-group curl-cffi)
fi
runpy -m venv /yt-dlp-build-venv
@@ -28,7 +28,7 @@ runpy -m venv /yt-dlp-build-venv
source /yt-dlp-build-venv/bin/activate
# Inside the venv we use venvpy instead of runpy
venvpy -m ensurepip --upgrade --default-pip
-venvpy -m devscripts.install_deps -o --include build
+venvpy -m devscripts.install_deps --only-optional-groups --include-group build
venvpy -m devscripts.install_deps "${INCLUDES[@]}"
venvpy -m devscripts.make_lazy_extractors
venvpy devscripts/update-version.py -c "${CHANNEL}" -r "${ORIGIN}" "${VERSION}"
diff --git a/devscripts/generate_third_party_licenses.py b/devscripts/generate_third_party_licenses.py
index db615d2e35..322d56f633 100644
--- a/devscripts/generate_third_party_licenses.py
+++ b/devscripts/generate_third_party_licenses.py
@@ -271,6 +271,19 @@ DEPENDENCIES: list[Dependency] = [
license_url='https://raw.githubusercontent.com/python-websockets/websockets/refs/heads/main/LICENSE',
project_url='https://websockets.readthedocs.io/',
),
+ # Dependencies of yt-dlp-ejs
+ Dependency(
+ name='Meriyah',
+ license='ISC',
+ license_url='https://raw.githubusercontent.com/meriyah/meriyah/refs/heads/main/LICENSE.md',
+ project_url='https://github.com/meriyah/meriyah',
+ ),
+ Dependency(
+ name='Astring',
+ license='MIT',
+ license_url='https://raw.githubusercontent.com/davidbonnet/astring/refs/heads/main/LICENSE',
+ project_url='https://github.com/davidbonnet/astring/',
+ ),
]
diff --git a/devscripts/install_deps.py b/devscripts/install_deps.py
index d292505458..07c646a4c0 100755
--- a/devscripts/install_deps.py
+++ b/devscripts/install_deps.py
@@ -22,14 +22,19 @@ def parse_args():
'input', nargs='?', metavar='TOMLFILE', default=Path(__file__).parent.parent / 'pyproject.toml',
help='input file (default: %(default)s)')
parser.add_argument(
- '-e', '--exclude', metavar='DEPENDENCY', action='append',
- help='exclude a dependency')
+ '-e', '--exclude-dependency', metavar='DEPENDENCY', action='append',
+ help='exclude a dependency (can be used multiple times)')
parser.add_argument(
- '-i', '--include', metavar='GROUP', action='append',
- help='include an optional dependency group')
+ '-i', '--include-group', metavar='GROUP', action='append',
+ help='include an optional dependency group (can be used multiple times)')
parser.add_argument(
- '-o', '--only-optional', action='store_true',
- help='only install optional dependencies')
+ '-c', '--cherry-pick', metavar='DEPENDENCY', action='append',
+ help=(
+ 'only include a specific dependency from the resulting dependency list '
+ '(can be used multiple times)'))
+ parser.add_argument(
+ '-o', '--only-optional-groups', action='store_true',
+ help='omit default dependencies unless the "default" group is specified with --include-group')
parser.add_argument(
'-p', '--print', action='store_true',
help='only print requirements to stdout')
@@ -39,30 +44,41 @@ def parse_args():
return parser.parse_args()
+def uniq(arg) -> dict[str, None]:
+ return dict.fromkeys(map(str.lower, arg or ()))
+
+
def main():
args = parse_args()
project_table = parse_toml(read_file(args.input))['project']
recursive_pattern = re.compile(rf'{project_table["name"]}\[(?P[\w-]+)\]')
optional_groups = project_table['optional-dependencies']
- excludes = args.exclude or []
+
+ excludes = uniq(args.exclude_dependency)
+ only_includes = uniq(args.cherry_pick)
+ include_groups = uniq(args.include_group)
def yield_deps(group):
for dep in group:
if mobj := recursive_pattern.fullmatch(dep):
- yield from optional_groups.get(mobj.group('group_name'), [])
+ yield from optional_groups.get(mobj.group('group_name'), ())
else:
yield dep
- targets = []
- if not args.only_optional: # `-o` should exclude 'dependencies' and the 'default' group
- targets.extend(project_table['dependencies'])
- if 'default' not in excludes: # `--exclude default` should exclude entire 'default' group
- targets.extend(yield_deps(optional_groups['default']))
+ targets = {}
+ if not args.only_optional_groups:
+ # legacy: 'dependencies' is empty now
+ targets.update(dict.fromkeys(project_table['dependencies']))
+ targets.update(dict.fromkeys(yield_deps(optional_groups['default'])))
- for include in filter(None, map(optional_groups.get, args.include or [])):
- targets.extend(yield_deps(include))
+ for include in filter(None, map(optional_groups.get, include_groups)):
+ targets.update(dict.fromkeys(yield_deps(include)))
- targets = [t for t in targets if re.match(r'[\w-]+', t).group(0).lower() not in excludes]
+ def target_filter(target):
+ name = re.match(r'[\w-]+', target).group(0).lower()
+ return name not in excludes and (not only_includes or name in only_includes)
+
+ targets = list(filter(target_filter, targets))
if args.print:
for target in targets:
diff --git a/devscripts/update_ejs.py b/devscripts/update_ejs.py
new file mode 100644
index 0000000000..cffb1aa2b4
--- /dev/null
+++ b/devscripts/update_ejs.py
@@ -0,0 +1,164 @@
+#!/usr/bin/env python3
+from __future__ import annotations
+
+import contextlib
+import io
+import json
+import hashlib
+import pathlib
+import urllib.request
+import zipfile
+
+
+TEMPLATE = '''\
+# This file is generated by devscripts/update_ejs.py. DO NOT MODIFY!
+
+VERSION = {version!r}
+HASHES = {{
+{hash_mapping}
+}}
+'''
+PREFIX = ' "yt-dlp-ejs=='
+BASE_PATH = pathlib.Path(__file__).parent.parent
+PYPROJECT_PATH = BASE_PATH / 'pyproject.toml'
+PACKAGE_PATH = BASE_PATH / 'yt_dlp/extractor/youtube/jsc/_builtin/vendor'
+RELEASE_URL = 'https://api.github.com/repos/yt-dlp/ejs/releases/latest'
+ASSETS = {
+ 'yt.solver.lib.js': False,
+ 'yt.solver.lib.min.js': False,
+ 'yt.solver.deno.lib.js': True,
+ 'yt.solver.bun.lib.js': True,
+ 'yt.solver.core.min.js': False,
+ 'yt.solver.core.js': True,
+}
+MAKEFILE_PATH = BASE_PATH / 'Makefile'
+
+
+def request(url: str):
+ return contextlib.closing(urllib.request.urlopen(url))
+
+
+def makefile_variables(
+ version: str | None = None,
+ name: str | None = None,
+ digest: str | None = None,
+ data: bytes | None = None,
+ keys_only: bool = False,
+) -> dict[str, str | None]:
+ assert keys_only or all(arg is not None for arg in (version, name, digest, data))
+
+ return {
+ 'EJS_VERSION': None if keys_only else version,
+ 'EJS_WHEEL_NAME': None if keys_only else name,
+ 'EJS_WHEEL_HASH': None if keys_only else digest,
+ 'EJS_PY_FOLDERS': None if keys_only else list_wheel_contents(data, 'py', files=False),
+ 'EJS_PY_FILES': None if keys_only else list_wheel_contents(data, 'py', folders=False),
+ 'EJS_JS_FOLDERS': None if keys_only else list_wheel_contents(data, 'js', files=False),
+ 'EJS_JS_FILES': None if keys_only else list_wheel_contents(data, 'js', folders=False),
+ }
+
+
+def list_wheel_contents(
+ wheel_data: bytes,
+ suffix: str | None = None,
+ folders: bool = True,
+ files: bool = True,
+) -> str:
+ assert folders or files, 'at least one of "folders" or "files" must be True'
+
+ path_gen = (zinfo.filename for zinfo in zipfile.ZipFile(io.BytesIO(wheel_data)).infolist())
+ filtered = filter(lambda path: path.startswith('yt_dlp_ejs/'), path_gen)
+ if suffix:
+ filtered = filter(lambda path: path.endswith(f'.{suffix}'), filtered)
+
+ files_list = list(filtered)
+ if not folders:
+ return ' '.join(files_list)
+
+ folders_list = list(dict.fromkeys(path.rpartition('/')[0] for path in files_list))
+ if not files:
+ return ' '.join(folders_list)
+
+ return ' '.join(folders_list + files_list)
+
+
+def main():
+ current_version = None
+ with PYPROJECT_PATH.open() as file:
+ for line in file:
+ if not line.startswith(PREFIX):
+ continue
+ current_version, _, _ = line.removeprefix(PREFIX).partition('"')
+
+ if not current_version:
+ print('yt-dlp-ejs dependency line could not be found')
+ return
+
+ makefile_info = makefile_variables(keys_only=True)
+ prefixes = tuple(f'{key} = ' for key in makefile_info)
+ with MAKEFILE_PATH.open() as file:
+ for line in file:
+ if not line.startswith(prefixes):
+ continue
+ key, _, val = line.partition(' = ')
+ makefile_info[key] = val.rstrip()
+
+ with request(RELEASE_URL) as resp:
+ info = json.load(resp)
+
+ version = info['tag_name']
+ if version == current_version:
+ print(f'yt-dlp-ejs is up to date! ({version})')
+ return
+
+ print(f'Updating yt-dlp-ejs from {current_version} to {version}')
+ hashes = []
+ wheel_info = {}
+ for asset in info['assets']:
+ name = asset['name']
+ is_wheel = name.startswith('yt_dlp_ejs-') and name.endswith('.whl')
+ if not is_wheel and name not in ASSETS:
+ continue
+ with request(asset['browser_download_url']) as resp:
+ data = resp.read()
+
+ # verify digest from github
+ digest = asset['digest']
+ algo, _, expected = digest.partition(':')
+ hexdigest = hashlib.new(algo, data).hexdigest()
+ assert hexdigest == expected, f'downloaded attest mismatch ({hexdigest!r} != {expected!r})'
+
+ if is_wheel:
+ wheel_info = makefile_variables(version, name, digest, data)
+ continue
+
+ # calculate sha3-512 digest
+ asset_hash = hashlib.sha3_512(data).hexdigest()
+ hashes.append(f' {name!r}: {asset_hash!r},')
+
+ if ASSETS[name]:
+ (PACKAGE_PATH / name).write_bytes(data)
+
+ hash_mapping = '\n'.join(hashes)
+ for asset_name in ASSETS:
+ assert asset_name in hash_mapping, f'{asset_name} not found in release'
+
+ assert all(wheel_info.get(key) for key in makefile_info), 'wheel info not found in release'
+
+ (PACKAGE_PATH / '_info.py').write_text(TEMPLATE.format(
+ version=version,
+ hash_mapping=hash_mapping,
+ ))
+
+ content = PYPROJECT_PATH.read_text()
+ updated = content.replace(PREFIX + current_version, PREFIX + version)
+ PYPROJECT_PATH.write_text(updated)
+
+ makefile = MAKEFILE_PATH.read_text()
+ for key in wheel_info:
+ makefile = makefile.replace(f'{key} = {makefile_info[key]}', f'{key} = {wheel_info[key]}')
+ MAKEFILE_PATH.write_text(makefile)
+
+
+if __name__ == '__main__':
+ main()
diff --git a/pyproject.toml b/pyproject.toml
index 1d6e573791..d2c5745b95 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -56,6 +56,7 @@ default = [
"requests>=2.32.2,<3",
"urllib3>=2.0.2,<3",
"websockets>=13.0",
+ "yt-dlp-ejs==0.3.1",
]
curl-cffi = [
"curl-cffi>=0.5.10,!=0.6.*,!=0.7.*,!=0.8.*,!=0.9.*,<0.14; implementation_name=='cpython'",
@@ -122,7 +123,12 @@ artifacts = [
[tool.hatch.build.targets.wheel]
packages = ["yt_dlp"]
-artifacts = ["/yt_dlp/extractor/lazy_extractors.py"]
+artifacts = [
+ "/yt_dlp/extractor/lazy_extractors.py",
+]
+exclude = [
+ "/yt_dlp/**/*.md",
+]
[tool.hatch.build.targets.wheel.shared-data]
"completions/bash/yt-dlp" = "share/bash-completion/completions/yt-dlp"
diff --git a/test/test_YoutubeDL.py b/test/test_YoutubeDL.py
index 72dfc20288..2705accb76 100644
--- a/test/test_YoutubeDL.py
+++ b/test/test_YoutubeDL.py
@@ -13,12 +13,10 @@ sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
import contextlib
import copy
-import itertools
import json
from test.helper import FakeYDL, assertRegexpMatches, try_rm
from yt_dlp import YoutubeDL
-from yt_dlp.extractor import YoutubeIE
from yt_dlp.extractor.common import InfoExtractor
from yt_dlp.postprocessor.common import PostProcessor
from yt_dlp.utils import (
@@ -337,99 +335,6 @@ class TestFormatSelection(unittest.TestCase):
ydl = YDL({'format': '[format_id!*=-]'})
self.assertRaises(ExtractorError, ydl.process_ie_result, info_dict.copy())
- def test_youtube_format_selection(self):
- # FIXME: Rewrite in accordance with the new format sorting options
- return
-
- order = [
- '38', '37', '46', '22', '45', '35', '44', '18', '34', '43', '6', '5', '17', '36', '13',
- # Apple HTTP Live Streaming
- '96', '95', '94', '93', '92', '132', '151',
- # 3D
- '85', '84', '102', '83', '101', '82', '100',
- # Dash video
- '137', '248', '136', '247', '135', '246',
- '245', '244', '134', '243', '133', '242', '160',
- # Dash audio
- '141', '172', '140', '171', '139',
- ]
-
- def format_info(f_id):
- info = YoutubeIE._formats[f_id].copy()
-
- # XXX: In real cases InfoExtractor._parse_mpd_formats() fills up 'acodec'
- # and 'vcodec', while in tests such information is incomplete since
- # commit a6c2c24479e5f4827ceb06f64d855329c0a6f593
- # test_YoutubeDL.test_youtube_format_selection is broken without
- # this fix
- if 'acodec' in info and 'vcodec' not in info:
- info['vcodec'] = 'none'
- elif 'vcodec' in info and 'acodec' not in info:
- info['acodec'] = 'none'
-
- info['format_id'] = f_id
- info['url'] = 'url:' + f_id
- return info
- formats_order = [format_info(f_id) for f_id in order]
-
- info_dict = _make_result(list(formats_order), extractor='youtube')
- ydl = YDL({'format': 'bestvideo+bestaudio'})
- ydl.sort_formats(info_dict)
- ydl.process_ie_result(info_dict)
- downloaded = ydl.downloaded_info_dicts[0]
- self.assertEqual(downloaded['format_id'], '248+172')
- self.assertEqual(downloaded['ext'], 'mp4')
-
- info_dict = _make_result(list(formats_order), extractor='youtube')
- ydl = YDL({'format': 'bestvideo[height>=999999]+bestaudio/best'})
- ydl.sort_formats(info_dict)
- ydl.process_ie_result(info_dict)
- downloaded = ydl.downloaded_info_dicts[0]
- self.assertEqual(downloaded['format_id'], '38')
-
- info_dict = _make_result(list(formats_order), extractor='youtube')
- ydl = YDL({'format': 'bestvideo/best,bestaudio'})
- ydl.sort_formats(info_dict)
- ydl.process_ie_result(info_dict)
- downloaded_ids = [info['format_id'] for info in ydl.downloaded_info_dicts]
- self.assertEqual(downloaded_ids, ['137', '141'])
-
- info_dict = _make_result(list(formats_order), extractor='youtube')
- ydl = YDL({'format': '(bestvideo[ext=mp4],bestvideo[ext=webm])+bestaudio'})
- ydl.sort_formats(info_dict)
- ydl.process_ie_result(info_dict)
- downloaded_ids = [info['format_id'] for info in ydl.downloaded_info_dicts]
- self.assertEqual(downloaded_ids, ['137+141', '248+141'])
-
- info_dict = _make_result(list(formats_order), extractor='youtube')
- ydl = YDL({'format': '(bestvideo[ext=mp4],bestvideo[ext=webm])[height<=720]+bestaudio'})
- ydl.sort_formats(info_dict)
- ydl.process_ie_result(info_dict)
- downloaded_ids = [info['format_id'] for info in ydl.downloaded_info_dicts]
- self.assertEqual(downloaded_ids, ['136+141', '247+141'])
-
- info_dict = _make_result(list(formats_order), extractor='youtube')
- ydl = YDL({'format': '(bestvideo[ext=none]/bestvideo[ext=webm])+bestaudio'})
- ydl.sort_formats(info_dict)
- ydl.process_ie_result(info_dict)
- downloaded_ids = [info['format_id'] for info in ydl.downloaded_info_dicts]
- self.assertEqual(downloaded_ids, ['248+141'])
-
- for f1, f2 in itertools.pairwise(formats_order):
- info_dict = _make_result([f1, f2], extractor='youtube')
- ydl = YDL({'format': 'best/bestvideo'})
- ydl.sort_formats(info_dict)
- ydl.process_ie_result(info_dict)
- downloaded = ydl.downloaded_info_dicts[0]
- self.assertEqual(downloaded['format_id'], f1['format_id'])
-
- info_dict = _make_result([f2, f1], extractor='youtube')
- ydl = YDL({'format': 'best/bestvideo'})
- ydl.sort_formats(info_dict)
- ydl.process_ie_result(info_dict)
- downloaded = ydl.downloaded_info_dicts[0]
- self.assertEqual(downloaded['format_id'], f1['format_id'])
-
def test_audio_only_extractor_format_selection(self):
# For extractors with incomplete formats (all formats are audio-only or
# video-only) best and worst should fallback to corresponding best/worst
diff --git a/test/test_jsc/conftest.py b/test/test_jsc/conftest.py
new file mode 100644
index 0000000000..28d6734122
--- /dev/null
+++ b/test/test_jsc/conftest.py
@@ -0,0 +1,60 @@
+import re
+import pathlib
+
+import pytest
+
+import yt_dlp.globals
+from yt_dlp import YoutubeDL
+from yt_dlp.extractor.common import InfoExtractor
+
+
+_TESTDATA_PATH = pathlib.Path(__file__).parent.parent / 'testdata/sigs'
+_player_re = re.compile(r'^.+/player/(?P[a-zA-Z0-9_/.-]+)\.js$')
+_player_id_trans = str.maketrans(dict.fromkeys('/.-', '_'))
+
+
+@pytest.fixture
+def ie() -> InfoExtractor:
+ runtime_names = yt_dlp.globals.supported_js_runtimes.value
+ ydl = YoutubeDL({'js_runtimes': {key: {} for key in runtime_names}})
+ ie = ydl.get_info_extractor('Youtube')
+
+ def _load_player(video_id, player_url, fatal=True):
+ match = _player_re.match(player_url)
+ test_id = match.group('id').translate(_player_id_trans)
+ cached_file = _TESTDATA_PATH / f'player-{test_id}.js'
+
+ if cached_file.exists():
+ return cached_file.read_text()
+
+ if code := ie._download_webpage(player_url, video_id, fatal=fatal):
+ _TESTDATA_PATH.mkdir(exist_ok=True, parents=True)
+ cached_file.write_text(code)
+ return code
+
+ return None
+
+ ie._load_player = _load_player
+ return ie
+
+
+class MockLogger:
+ def trace(self, message: str):
+ print(f'trace: {message}')
+
+ def debug(self, message: str, *, once=False):
+ print(f'debug: {message}')
+
+ def info(self, message: str):
+ print(f'info: {message}')
+
+ def warning(self, message: str, *, once=False):
+ print(f'warning: {message}')
+
+ def error(self, message: str):
+ print(f'error: {message}')
+
+
+@pytest.fixture
+def logger():
+ return MockLogger()
diff --git a/test/test_jsc/test_ejs_integration.py b/test/test_jsc/test_ejs_integration.py
new file mode 100644
index 0000000000..7984810794
--- /dev/null
+++ b/test/test_jsc/test_ejs_integration.py
@@ -0,0 +1,128 @@
+from __future__ import annotations
+
+import dataclasses
+import enum
+import importlib.util
+import json
+
+import pytest
+
+from yt_dlp.extractor.youtube.jsc.provider import (
+ JsChallengeRequest,
+ JsChallengeType,
+ JsChallengeProviderResponse,
+ JsChallengeResponse,
+ NChallengeInput,
+ NChallengeOutput,
+ SigChallengeInput,
+ SigChallengeOutput,
+)
+from yt_dlp.extractor.youtube.jsc._builtin.bun import BunJCP
+from yt_dlp.extractor.youtube.jsc._builtin.deno import DenoJCP
+from yt_dlp.extractor.youtube.jsc._builtin.node import NodeJCP
+from yt_dlp.extractor.youtube.jsc._builtin.quickjs import QuickJSJCP
+
+
+_has_ejs = bool(importlib.util.find_spec('yt_dlp_ejs'))
+pytestmark = pytest.mark.skipif(not _has_ejs, reason='yt-dlp-ejs not available')
+
+
+class Variant(enum.Enum):
+ main = 'player_ias.vflset/en_US/base.js'
+ tcc = 'player_ias_tcc.vflset/en_US/base.js'
+ tce = 'player_ias_tce.vflset/en_US/base.js'
+ es5 = 'player_es5.vflset/en_US/base.js'
+ es6 = 'player_es6.vflset/en_US/base.js'
+ tv = 'tv-player-ias.vflset/tv-player-ias.js'
+ tv_es6 = 'tv-player-es6.vflset/tv-player-es6.js'
+ phone = 'player-plasma-ias-phone-en_US.vflset/base.js'
+ tablet = 'player-plasma-ias-tablet-en_US.vflset/base.js'
+
+
+@dataclasses.dataclass
+class Challenge:
+ player: str
+ variant: Variant
+ type: JsChallengeType
+ values: dict[str, str] = dataclasses.field(default_factory=dict)
+
+ def url(self, /):
+ return f'https://www.youtube.com/s/player/{self.player}/{self.variant.value}'
+
+
+CHALLENGES: list[Challenge] = [
+ Challenge('3d3ba064', Variant.tce, JsChallengeType.N, {
+ 'ZdZIqFPQK-Ty8wId': 'qmtUsIz04xxiNW',
+ '4GMrWHyKI5cEvhDO': 'N9gmEX7YhKTSmw',
+ }),
+ Challenge('3d3ba064', Variant.tce, JsChallengeType.SIG, {
+ 'gN7a-hudCuAuPH6fByOk1_GNXN0yNMHShjZXS2VOgsEItAJz0tipeavEOmNdYN-wUtcEqD3bCXjc0iyKfAyZxCBGgIARwsSdQfJ2CJtt':
+ 'ttJC2JfQdSswRAIgGBCxZyAfKyi0cjXCb3gqEctUw-NYdNmOEvaepit0zJAtIEsgOV2SXZjhSHMNy0NXNG_1kNyBf6HPuAuCduh-a7O',
+ }),
+ Challenge('5ec65609', Variant.tce, JsChallengeType.N, {
+ '0eRGgQWJGfT5rFHFj': '4SvMpDQH-vBJCw',
+ }),
+ Challenge('5ec65609', Variant.tce, JsChallengeType.SIG, {
+ 'AAJAJfQdSswRQIhAMG5SN7-cAFChdrE7tLA6grH0rTMICA1mmDc0HoXgW3CAiAQQ4=CspfaF_vt82XH5yewvqcuEkvzeTsbRuHssRMyJQ=I':
+ 'AJfQdSswRQIhAMG5SN7-cAFChdrE7tLA6grI0rTMICA1mmDc0HoXgW3CAiAQQ4HCspfaF_vt82XH5yewvqcuEkvzeTsbRuHssRMyJQ==',
+ }),
+ Challenge('6742b2b9', Variant.tce, JsChallengeType.N, {
+ '_HPB-7GFg1VTkn9u': 'qUAsPryAO_ByYg',
+ 'K1t_fcB6phzuq2SF': 'Y7PcOt3VE62mog',
+ }),
+ Challenge('6742b2b9', Variant.tce, JsChallengeType.SIG, {
+ 'MMGZJMUucirzS_SnrSPYsc85CJNnTUi6GgR5NKn-znQEICACojE8MHS6S7uYq4TGjQX_D4aPk99hNU6wbTvorvVVMgIARwsSdQfJAA':
+ 'AJfQdSswRAIgMVVvrovTbw6UNh99kPa4D_XQjGT4qYu7S6SHM8EjoCACIEQnz-nKN5RgG6iUTnNJC58csYPSrnS_SzricuUMJZGM',
+ }),
+ Challenge('2b83d2e0', Variant.main, JsChallengeType.N, {
+ '0eRGgQWJGfT5rFHFj': 'euHbygrCMLksxd',
+ }),
+ Challenge('2b83d2e0', Variant.main, JsChallengeType.SIG, {
+ 'MMGZJMUucirzS_SnrSPYsc85CJNnTUi6GgR5NKn-znQEICACojE8MHS6S7uYq4TGjQX_D4aPk99hNU6wbTvorvVVMgIARwsSdQfJA':
+ '-MGZJMUucirzS_SnrSPYsc85CJNnTUi6GgR5NKnMznQEICACojE8MHS6S7uYq4TGjQX_D4aPk99hNU6wbTvorvVVMgIARwsSdQfJ',
+ }),
+ Challenge('638ec5c6', Variant.main, JsChallengeType.N, {
+ 'ZdZIqFPQK-Ty8wId': '1qov8-KM-yH',
+ }),
+ Challenge('638ec5c6', Variant.main, JsChallengeType.SIG, {
+ 'gN7a-hudCuAuPH6fByOk1_GNXN0yNMHShjZXS2VOgsEItAJz0tipeavEOmNdYN-wUtcEqD3bCXjc0iyKfAyZxCBGgIARwsSdQfJ2CJtt':
+ 'MhudCuAuP-6fByOk1_GNXN7gNHHShjyXS2VOgsEItAJz0tipeav0OmNdYN-wUtcEqD3bCXjc0iyKfAyZxCBGgIARwsSdQfJ2CJtt',
+ }),
+]
+
+requests: list[JsChallengeRequest] = []
+responses: list[JsChallengeProviderResponse] = []
+for test in CHALLENGES:
+ input_type, output_type = {
+ JsChallengeType.N: (NChallengeInput, NChallengeOutput),
+ JsChallengeType.SIG: (SigChallengeInput, SigChallengeOutput),
+ }[test.type]
+
+ request = JsChallengeRequest(test.type, input_type(test.url(), list(test.values.keys())), test.player)
+ requests.append(request)
+ responses.append(JsChallengeProviderResponse(request, JsChallengeResponse(test.type, output_type(test.values))))
+
+
+@pytest.fixture(params=[BunJCP, DenoJCP, NodeJCP, QuickJSJCP])
+def jcp(request, ie, logger):
+ obj = request.param(ie, logger, None)
+ if not obj.is_available():
+ pytest.skip(f'{obj.PROVIDER_NAME} is not available')
+ obj.is_dev = True
+ return obj
+
+
+@pytest.mark.download
+def test_bulk_requests(jcp):
+ assert list(jcp.bulk_solve(requests)) == responses
+
+
+@pytest.mark.download
+def test_using_cached_player(jcp):
+ first_player_requests = requests[:3]
+ player = jcp._get_player(first_player_requests[0].video_id, first_player_requests[0].input.player_url)
+ initial = json.loads(jcp._run_js_runtime(jcp._construct_stdin(player, False, first_player_requests)))
+ preprocessed = initial.pop('preprocessed_player')
+ result = json.loads(jcp._run_js_runtime(jcp._construct_stdin(preprocessed, True, first_player_requests)))
+
+ assert initial == result
diff --git a/test/test_jsc/test_provider.py b/test/test_jsc/test_provider.py
new file mode 100644
index 0000000000..3342f77546
--- /dev/null
+++ b/test/test_jsc/test_provider.py
@@ -0,0 +1,194 @@
+
+import pytest
+
+from yt_dlp.extractor.youtube.jsc.provider import (
+ JsChallengeProvider,
+ JsChallengeRequest,
+ JsChallengeProviderResponse,
+ JsChallengeProviderRejectedRequest,
+ JsChallengeType,
+ JsChallengeResponse,
+ NChallengeOutput,
+ NChallengeInput,
+ JsChallengeProviderError,
+ register_provider,
+ register_preference,
+)
+from yt_dlp.extractor.youtube.pot._provider import IEContentProvider
+from yt_dlp.utils import ExtractorError
+from yt_dlp.extractor.youtube.jsc._registry import _jsc_preferences, _jsc_providers
+
+
+class ExampleJCP(JsChallengeProvider):
+ PROVIDER_NAME = 'example-provider'
+ PROVIDER_VERSION = '0.0.1'
+ BUG_REPORT_LOCATION = 'https://example.com/issues'
+
+ _SUPPORTED_TYPES = [JsChallengeType.N]
+
+ def is_available(self) -> bool:
+ return True
+
+ def _real_bulk_solve(self, requests):
+ for request in requests:
+ results = dict.fromkeys(request.input.challenges, 'example-solution')
+ response = JsChallengeResponse(
+ type=request.type,
+ output=NChallengeOutput(results=results))
+ yield JsChallengeProviderResponse(request=request, response=response)
+
+
+PLAYER_URL = 'https://example.com/player.js'
+
+
+class TestJsChallengeProvider:
+ # note: some test covered in TestPoTokenProvider which shares the same base class
+ def test_base_type(self):
+ assert issubclass(JsChallengeProvider, IEContentProvider)
+
+ def test_create_provider_missing_bulk_solve_method(self, ie, logger):
+ class MissingMethodsJCP(JsChallengeProvider):
+ def is_available(self) -> bool:
+ return True
+
+ with pytest.raises(TypeError, match='bulk_solve'):
+ MissingMethodsJCP(ie=ie, logger=logger, settings={})
+
+ def test_create_provider_missing_available_method(self, ie, logger):
+ class MissingMethodsJCP(JsChallengeProvider):
+ def _real_bulk_solve(self, requests):
+ raise JsChallengeProviderRejectedRequest('Not implemented')
+
+ with pytest.raises(TypeError, match='is_available'):
+ MissingMethodsJCP(ie=ie, logger=logger, settings={})
+
+ def test_barebones_provider(self, ie, logger):
+ class BarebonesProviderJCP(JsChallengeProvider):
+ def is_available(self) -> bool:
+ return True
+
+ def _real_bulk_solve(self, requests):
+ raise JsChallengeProviderRejectedRequest('Not implemented')
+
+ provider = BarebonesProviderJCP(ie=ie, logger=logger, settings={})
+ assert provider.PROVIDER_NAME == 'BarebonesProvider'
+ assert provider.PROVIDER_KEY == 'BarebonesProvider'
+ assert provider.PROVIDER_VERSION == '0.0.0'
+ assert provider.BUG_REPORT_MESSAGE == 'please report this issue to the provider developer at (developer has not provided a bug report location) .'
+
+ def test_example_provider_success(self, ie, logger):
+ provider = ExampleJCP(ie=ie, logger=logger, settings={})
+
+ request = JsChallengeRequest(
+ type=JsChallengeType.N,
+ input=NChallengeInput(player_url=PLAYER_URL, challenges=['example-challenge']))
+
+ request_two = JsChallengeRequest(
+ type=JsChallengeType.N,
+ input=NChallengeInput(player_url=PLAYER_URL, challenges=['example-challenge-2']))
+
+ responses = list(provider.bulk_solve([request, request_two]))
+ assert len(responses) == 2
+ assert all(isinstance(r, JsChallengeProviderResponse) for r in responses)
+ assert responses == [
+ JsChallengeProviderResponse(
+ request=request,
+ response=JsChallengeResponse(
+ type=JsChallengeType.N,
+ output=NChallengeOutput(results={'example-challenge': 'example-solution'}),
+ ),
+ ),
+ JsChallengeProviderResponse(
+ request=request_two,
+ response=JsChallengeResponse(
+ type=JsChallengeType.N,
+ output=NChallengeOutput(results={'example-challenge-2': 'example-solution'}),
+ ),
+ ),
+ ]
+
+ def test_provider_unsupported_challenge_type(self, ie, logger):
+ provider = ExampleJCP(ie=ie, logger=logger, settings={})
+ request_supported = JsChallengeRequest(
+ type=JsChallengeType.N,
+ input=NChallengeInput(player_url=PLAYER_URL, challenges=['example-challenge']))
+ request_unsupported = JsChallengeRequest(
+ type=JsChallengeType.SIG,
+ input=NChallengeInput(player_url=PLAYER_URL, challenges=['example-challenge']))
+ responses = list(provider.bulk_solve([request_supported, request_unsupported, request_supported]))
+ assert len(responses) == 3
+ # Requests are validated first before continuing to _real_bulk_solve
+ assert isinstance(responses[0], JsChallengeProviderResponse)
+ assert isinstance(responses[0].error, JsChallengeProviderRejectedRequest)
+ assert responses[0].request is request_unsupported
+ assert str(responses[0].error) == 'JS Challenge type "JsChallengeType.SIG" is not supported by example-provider'
+
+ assert responses[1:] == [
+ JsChallengeProviderResponse(
+ request=request_supported,
+ response=JsChallengeResponse(
+ type=JsChallengeType.N,
+ output=NChallengeOutput(results={'example-challenge': 'example-solution'}),
+ ),
+ ),
+ JsChallengeProviderResponse(
+ request=request_supported,
+ response=JsChallengeResponse(
+ type=JsChallengeType.N,
+ output=NChallengeOutput(results={'example-challenge': 'example-solution'}),
+ ),
+ ),
+ ]
+
+ def test_provider_get_player(self, ie, logger):
+ ie._load_player = lambda video_id, player_url, fatal: (video_id, player_url, fatal)
+ provider = ExampleJCP(ie=ie, logger=logger, settings={})
+ assert provider._get_player('video123', PLAYER_URL) == ('video123', PLAYER_URL, True)
+
+ def test_provider_get_player_error(self, ie, logger):
+ def raise_error(video_id, player_url, fatal):
+ raise ExtractorError('Failed to load player')
+
+ ie._load_player = raise_error
+ provider = ExampleJCP(ie=ie, logger=logger, settings={})
+ with pytest.raises(JsChallengeProviderError, match='Failed to load player for JS challenge'):
+ provider._get_player('video123', PLAYER_URL)
+
+ def test_require_class_end_with_suffix(self, ie, logger):
+ class InvalidSuffix(JsChallengeProvider):
+ PROVIDER_NAME = 'invalid-suffix'
+
+ def _real_bulk_solve(self, requests):
+ raise JsChallengeProviderRejectedRequest('Not implemented')
+
+ def is_available(self) -> bool:
+ return True
+
+ provider = InvalidSuffix(ie=ie, logger=logger, settings={})
+
+ with pytest.raises(AssertionError):
+ provider.PROVIDER_KEY # noqa: B018
+
+
+def test_register_provider(ie):
+
+ @register_provider
+ class UnavailableProviderJCP(JsChallengeProvider):
+ def is_available(self) -> bool:
+ return False
+
+ def _real_bulk_solve(self, requests):
+ raise JsChallengeProviderRejectedRequest('Not implemented')
+
+ assert _jsc_providers.value.get('UnavailableProvider') == UnavailableProviderJCP
+ _jsc_providers.value.pop('UnavailableProvider')
+
+
+def test_register_preference(ie):
+ before = len(_jsc_preferences.value)
+
+ @register_preference(ExampleJCP)
+ def unavailable_preference(*args, **kwargs):
+ return 1
+
+ assert len(_jsc_preferences.value) == before + 1
diff --git a/test/test_networking.py b/test/test_networking.py
index afdd0c7aa7..e972f597b5 100644
--- a/test/test_networking.py
+++ b/test/test_networking.py
@@ -3,6 +3,7 @@
# Allow direct execution
import os
import sys
+from unittest.mock import MagicMock
import pytest
@@ -614,8 +615,11 @@ class TestHTTPRequestHandler(TestRequestHandlerBase):
@pytest.mark.skip_handler('CurlCFFI', 'not supported by curl-cffi')
def test_gzip_trailing_garbage(self, handler):
with handler() as rh:
- data = validate_and_send(rh, Request(f'http://localhost:{self.http_port}/trailing_garbage')).read().decode()
+ res = validate_and_send(rh, Request(f'http://localhost:{self.http_port}/trailing_garbage'))
+ data = res.read().decode()
assert data == ''
+ # Should auto-close and mark the response adaptor as closed
+ assert res.closed
@pytest.mark.skip_handler('CurlCFFI', 'not applicable to curl-cffi')
@pytest.mark.skipif(not brotli, reason='brotli support is not installed')
@@ -627,6 +631,8 @@ class TestHTTPRequestHandler(TestRequestHandlerBase):
headers={'ytdl-encoding': 'br'}))
assert res.headers.get('Content-Encoding') == 'br'
assert res.read() == b''
+ # Should auto-close and mark the response adaptor as closed
+ assert res.closed
def test_deflate(self, handler):
with handler() as rh:
@@ -636,6 +642,8 @@ class TestHTTPRequestHandler(TestRequestHandlerBase):
headers={'ytdl-encoding': 'deflate'}))
assert res.headers.get('Content-Encoding') == 'deflate'
assert res.read() == b''
+ # Should auto-close and mark the response adaptor as closed
+ assert res.closed
def test_gzip(self, handler):
with handler() as rh:
@@ -645,6 +653,8 @@ class TestHTTPRequestHandler(TestRequestHandlerBase):
headers={'ytdl-encoding': 'gzip'}))
assert res.headers.get('Content-Encoding') == 'gzip'
assert res.read() == b''
+ # Should auto-close and mark the response adaptor as closed
+ assert res.closed
def test_multiple_encodings(self, handler):
with handler() as rh:
@@ -655,6 +665,8 @@ class TestHTTPRequestHandler(TestRequestHandlerBase):
headers={'ytdl-encoding': pair}))
assert res.headers.get('Content-Encoding') == pair
assert res.read() == b''
+ # Should auto-close and mark the response adaptor as closed
+ assert res.closed
@pytest.mark.skip_handler('CurlCFFI', 'not supported by curl-cffi')
def test_unsupported_encoding(self, handler):
@@ -665,6 +677,8 @@ class TestHTTPRequestHandler(TestRequestHandlerBase):
headers={'ytdl-encoding': 'unsupported', 'Accept-Encoding': '*'}))
assert res.headers.get('Content-Encoding') == 'unsupported'
assert res.read() == b'raw'
+ # Should auto-close and mark the response adaptor as closed
+ assert res.closed
def test_read(self, handler):
with handler() as rh:
@@ -672,9 +686,13 @@ class TestHTTPRequestHandler(TestRequestHandlerBase):
rh, Request(f'http://127.0.0.1:{self.http_port}/headers'))
assert res.readable()
assert res.read(1) == b'H'
+ # Ensure we don't close the adaptor yet
+ assert not res.closed
assert res.read(3) == b'ost'
assert res.read().decode().endswith('\n\n')
assert res.read() == b''
+ # Should auto-close and mark the response adaptor as closed
+ assert res.closed
def test_request_disable_proxy(self, handler):
for proxy_proto in handler._SUPPORTED_PROXY_SCHEMES or ['http']:
@@ -875,11 +893,31 @@ class TestUrllibRequestHandler(TestRequestHandlerBase):
with handler(enable_file_urls=True) as rh:
res = validate_and_send(rh, req)
- assert res.read() == b'foobar'
- res.close()
+ assert res.read(1) == b'f'
+ assert not res.fp.closed
+ assert res.read() == b'oobar'
+ # Should automatically close the underlying file object
+ assert res.fp.closed
os.unlink(tf.name)
+ def test_data_uri_auto_close(self, handler):
+ with handler() as rh:
+ res = validate_and_send(rh, Request('data:text/plain,hello%20world'))
+ assert res.read() == b'hello world'
+ # Should automatically close the underlying file object
+ assert res.fp.closed
+ assert res.closed
+
+ def test_http_response_auto_close(self, handler):
+ with handler() as rh:
+ res = validate_and_send(rh, Request(f'http://127.0.0.1:{self.http_port}/gen_200'))
+ assert res.read() == b''
+ # Should automatically close the underlying file object in the HTTP Response
+ assert isinstance(res.fp, http.client.HTTPResponse)
+ assert res.fp.fp is None
+ assert res.closed
+
def test_http_error_returns_content(self, handler):
# urllib HTTPError will try close the underlying response if reference to the HTTPError object is lost
def get_response():
@@ -1012,6 +1050,14 @@ class TestRequestsRequestHandler(TestRequestHandlerBase):
rh.close()
assert called
+ def test_http_response_auto_close(self, handler):
+ with handler() as rh:
+ res = validate_and_send(rh, Request(f'http://127.0.0.1:{self.http_port}/gen_200'))
+ assert res.read() == b''
+ # Should automatically close the underlying file object in the HTTP Response
+ assert res.fp.closed
+ assert res.closed
+
@pytest.mark.parametrize('handler', ['CurlCFFI'], indirect=True)
class TestCurlCFFIRequestHandler(TestRequestHandlerBase):
@@ -1177,6 +1223,14 @@ class TestCurlCFFIRequestHandler(TestRequestHandlerBase):
assert res4.closed
assert res4._buffer == b''
+ def test_http_response_auto_close(self, handler):
+ with handler() as rh:
+ res = validate_and_send(rh, Request(f'http://127.0.0.1:{self.http_port}/gen_200'))
+ assert res.read() == b''
+ # Should automatically close the underlying file object in the HTTP Response
+ assert res.fp.closed
+ assert res.closed
+
def run_validation(handler, error, req, **handler_kwargs):
with handler(**handler_kwargs) as rh:
@@ -2032,6 +2086,30 @@ class TestResponse:
assert res.info() is res.headers
assert res.getheader('test') == res.get_header('test')
+ def test_auto_close(self):
+ # Should mark the response as closed if the underlying file is closed
+ class AutoCloseBytesIO(io.BytesIO):
+ def read(self, size=-1, /):
+ data = super().read(size)
+ self.close()
+ return data
+
+ fp = AutoCloseBytesIO(b'test')
+ res = Response(fp, url='test://', headers={}, status=200)
+ assert not res.closed
+ res.read()
+ assert res.closed
+
+ def test_close(self):
+ # Should not call close() on the underlying file when already closed
+ fp = MagicMock()
+ fp.closed = False
+ res = Response(fp, url='test://', headers={}, status=200)
+ res.close()
+ fp.closed = True
+ res.close()
+ assert fp.close.call_count == 1
+
class TestImpersonateTarget:
@pytest.mark.parametrize('target_str,expected', [
diff --git a/test/test_pot/test_pot_framework.py b/test/test_pot/test_pot_framework.py
index d2de1dd290..fae6c80027 100644
--- a/test/test_pot/test_pot_framework.py
+++ b/test/test_pot/test_pot_framework.py
@@ -1,6 +1,6 @@
import pytest
-from yt_dlp.extractor.youtube.pot._provider import IEContentProvider
+from yt_dlp.extractor.youtube.pot._provider import IEContentProvider, configuration_arg
from yt_dlp.cookies import YoutubeDLCookieJar
from yt_dlp.utils.networking import HTTPHeaderDict
from yt_dlp.extractor.youtube.pot.provider import (
@@ -627,3 +627,13 @@ def test_logger_log_level(logger):
assert logger.LogLevel('debuG') == logger.LogLevel.DEBUG
assert logger.LogLevel(10) == logger.LogLevel.DEBUG
assert logger.LogLevel('UNKNOWN') == logger.LogLevel.INFO
+
+
+def test_configuration_arg():
+ config = {'abc': ['123D'], 'xyz': ['456a', '789B']}
+
+ assert configuration_arg(config, 'abc') == ['123d']
+ assert configuration_arg(config, 'abc', default=['default']) == ['123d']
+ assert configuration_arg(config, 'ABC', default=['default']) == ['default']
+ assert configuration_arg(config, 'abc', casesense=True) == ['123D']
+ assert configuration_arg(config, 'xyz', casesense=False) == ['456a', '789b']
diff --git a/test/test_youtube_signature.py b/test/test_youtube_signature.py
deleted file mode 100644
index 2e9c974db2..0000000000
--- a/test/test_youtube_signature.py
+++ /dev/null
@@ -1,504 +0,0 @@
-#!/usr/bin/env python3
-
-# Allow direct execution
-import os
-import sys
-import unittest
-
-sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
-
-
-import contextlib
-import re
-import string
-import urllib.request
-
-from test.helper import FakeYDL, is_download_test
-from yt_dlp.extractor import YoutubeIE
-from yt_dlp.jsinterp import JSInterpreter
-
-_SIG_TESTS = [
- (
- 'https://s.ytimg.com/yts/jsbin/html5player-vflHOr_nV.js',
- 86,
- '>=<;:/.-[+*)(\'&%$#"!ZYX0VUTSRQPONMLKJIHGFEDCBA\\yxwvutsrqponmlkjihgfedcba987654321',
- ),
- (
- 'https://s.ytimg.com/yts/jsbin/html5player-vfldJ8xgI.js',
- 85,
- '3456789a0cdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRS[UVWXYZ!"#$%&\'()*+,-./:;<=>?@',
- ),
- (
- 'https://s.ytimg.com/yts/jsbin/html5player-vfle-mVwz.js',
- 90,
- ']\\[@?>=<;:/.-,+*)(\'&%$#"hZYXWVUTSRQPONMLKJIHGFEDCBAzyxwvutsrqponmlkjiagfedcb39876',
- ),
- (
- 'https://s.ytimg.com/yts/jsbin/html5player-en_US-vfl0Cbn9e.js',
- 84,
- 'O1I3456789abcde0ghijklmnopqrstuvwxyzABCDEFGHfJKLMN2PQRSTUVW@YZ!"#$%&\'()*+,-./:;<=',
- ),
- (
- 'https://s.ytimg.com/yts/jsbin/html5player-en_US-vflXGBaUN.js',
- '2ACFC7A61CA478CD21425E5A57EBD73DDC78E22A.2094302436B2D377D14A3BBA23022D023B8BC25AA',
- 'A52CB8B320D22032ABB3A41D773D2B6342034902.A22E87CDD37DBE75A5E52412DC874AC16A7CFCA2',
- ),
- (
- 'https://s.ytimg.com/yts/jsbin/html5player-en_US-vflBb0OQx.js',
- 84,
- '123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQ0STUVWXYZ!"#$%&\'()*+,@./:;<=>',
- ),
- (
- 'https://s.ytimg.com/yts/jsbin/html5player-en_US-vfl9FYC6l.js',
- 83,
- '123456789abcdefghijklmnopqr0tuvwxyzABCDETGHIJKLMNOPQRS>UVWXYZ!"#$%&\'()*+,-./:;<=F',
- ),
- (
- 'https://s.ytimg.com/yts/jsbin/html5player-en_US-vflCGk6yw/html5player.js',
- '4646B5181C6C3020DF1D9C7FCFEA.AD80ABF70C39BD369CCCAE780AFBB98FA6B6CB42766249D9488C288',
- '82C8849D94266724DC6B6AF89BBFA087EACCD963.B93C07FBA084ACAEFCF7C9D1FD0203C6C1815B6B',
- ),
- (
- 'https://s.ytimg.com/yts/jsbin/html5player-en_US-vflKjOTVq/html5player.js',
- '312AA52209E3623129A412D56A40F11CB0AF14AE.3EE09501CB14E3BCDC3B2AE808BF3F1D14E7FBF12',
- '112AA5220913623229A412D56A40F11CB0AF14AE.3EE0950FCB14EEBCDC3B2AE808BF331D14E7FBF3',
- ),
- (
- 'https://www.youtube.com/s/player/6ed0d907/player_ias.vflset/en_US/base.js',
- '2aq0aqSyOoJXtK73m-uME_jv7-pT15gOFC02RFkGMqWpzEICs69VdbwQ0LDp1v7j8xx92efCJlYFYb1sUkkBSPOlPmXgIARw8JQ0qOAOAA',
- 'AOq0QJ8wRAIgXmPlOPSBkkUs1bYFYlJCfe29xx8j7v1pDL2QwbdV96sCIEzpWqMGkFR20CFOg51Tp-7vj_EMu-m37KtXJoOySqa0',
- ),
- (
- 'https://www.youtube.com/s/player/3bb1f723/player_ias.vflset/en_US/base.js',
- '2aq0aqSyOoJXtK73m-uME_jv7-pT15gOFC02RFkGMqWpzEICs69VdbwQ0LDp1v7j8xx92efCJlYFYb1sUkkBSPOlPmXgIARw8JQ0qOAOAA',
- 'MyOSJXtKI3m-uME_jv7-pT12gOFC02RFkGoqWpzE0Cs69VdbwQ0LDp1v7j8xx92efCJlYFYb1sUkkBSPOlPmXgIARw8JQ0qOAOAA',
- ),
- (
- 'https://www.youtube.com/s/player/2f1832d2/player_ias.vflset/en_US/base.js',
- '2aq0aqSyOoJXtK73m-uME_jv7-pT15gOFC02RFkGMqWpzEICs69VdbwQ0LDp1v7j8xx92efCJlYFYb1sUkkBSPOlPmXgIARw8JQ0qOAOAA',
- '0QJ8wRAIgXmPlOPSBkkUs1bYFYlJCfe29xxAj7v1pDL0QwbdV96sCIEzpWqMGkFR20CFOg51Tp-7vj_EMu-m37KtXJ2OySqa0q',
- ),
- (
- 'https://www.youtube.com/s/player/643afba4/tv-player-ias.vflset/tv-player-ias.js',
- '2aq0aqSyOoJXtK73m-uME_jv7-pT15gOFC02RFkGMqWpzEICs69VdbwQ0LDp1v7j8xx92efCJlYFYb1sUkkBSPOlPmXgIARw8JQ0qOAOAA',
- 'AAOAOq0QJ8wRAIgXmPlOPSBkkUs1bYFYlJCfe29xx8j7vgpDL0QwbdV06sCIEzpWqMGkFR20CFOS21Tp-7vj_EMu-m37KtXJoOy1',
- ),
- (
- 'https://www.youtube.com/s/player/363db69b/player_ias.vflset/en_US/base.js',
- '2aq0aqSyOoJXtK73m-uME_jv7-pT15gOFC02RFkGMqWpzEICs69VdbwQ0LDp1v7j8xx92efCJlYFYb1sUkkBSPOlPmXgIARw8JQ0qOAOAA',
- '0aqSyOoJXtK73m-uME_jv7-pT15gOFC02RFkGMqWpz2ICs6EVdbwQ0LDp1v7j8xx92efCJlYFYb1sUkkBSPOlPmXgIARw8JQ0qOAOAA',
- ),
- (
- 'https://www.youtube.com/s/player/363db69b/player_ias_tce.vflset/en_US/base.js',
- '2aq0aqSyOoJXtK73m-uME_jv7-pT15gOFC02RFkGMqWpzEICs69VdbwQ0LDp1v7j8xx92efCJlYFYb1sUkkBSPOlPmXgIARw8JQ0qOAOAA',
- '0aqSyOoJXtK73m-uME_jv7-pT15gOFC02RFkGMqWpz2ICs6EVdbwQ0LDp1v7j8xx92efCJlYFYb1sUkkBSPOlPmXgIARw8JQ0qOAOAA',
- ),
- (
- 'https://www.youtube.com/s/player/4fcd6e4a/player_ias.vflset/en_US/base.js',
- '2aq0aqSyOoJXtK73m-uME_jv7-pT15gOFC02RFkGMqWpzEICs69VdbwQ0LDp1v7j8xx92efCJlYFYb1sUkkBSPOlPmXgIARw8JQ0qOAOAA',
- 'wAOAOq0QJ8ARAIgXmPlOPSBkkUs1bYFYlJCfe29xx8q7v1pDL0QwbdV96sCIEzpWqMGkFR20CFOg51Tp-7vj_EMu-m37KtXJoOySqa0',
- ),
- (
- 'https://www.youtube.com/s/player/4fcd6e4a/player_ias_tce.vflset/en_US/base.js',
- '2aq0aqSyOoJXtK73m-uME_jv7-pT15gOFC02RFkGMqWpzEICs69VdbwQ0LDp1v7j8xx92efCJlYFYb1sUkkBSPOlPmXgIARw8JQ0qOAOAA',
- 'wAOAOq0QJ8ARAIgXmPlOPSBkkUs1bYFYlJCfe29xx8q7v1pDL0QwbdV96sCIEzpWqMGkFR20CFOg51Tp-7vj_EMu-m37KtXJoOySqa0',
- ),
- (
- 'https://www.youtube.com/s/player/20830619/player_ias.vflset/en_US/base.js',
- '2aq0aqSyOoJXtK73m-uME_jv7-pT15gOFC02RFkGMqWpzEICs69VdbwQ0LDp1v7j8xx92efCJlYFYb1sUkkBSPOlPmXgIARw8JQ0qOAOAA',
- '7AOq0QJ8wRAIgXmPlOPSBkkAs1bYFYlJCfe29xx8jOv1pDL0Q2bdV96sCIEzpWqMGkFR20CFOg51Tp-7vj_EMu-m37KtXJoOySqa0qaw',
- ),
- (
- 'https://www.youtube.com/s/player/20830619/player_ias_tce.vflset/en_US/base.js',
- '2aq0aqSyOoJXtK73m-uME_jv7-pT15gOFC02RFkGMqWpzEICs69VdbwQ0LDp1v7j8xx92efCJlYFYb1sUkkBSPOlPmXgIARw8JQ0qOAOAA',
- '7AOq0QJ8wRAIgXmPlOPSBkkAs1bYFYlJCfe29xx8jOv1pDL0Q2bdV96sCIEzpWqMGkFR20CFOg51Tp-7vj_EMu-m37KtXJoOySqa0qaw',
- ),
- (
- 'https://www.youtube.com/s/player/20830619/player-plasma-ias-phone-en_US.vflset/base.js',
- '2aq0aqSyOoJXtK73m-uME_jv7-pT15gOFC02RFkGMqWpzEICs69VdbwQ0LDp1v7j8xx92efCJlYFYb1sUkkBSPOlPmXgIARw8JQ0qOAOAA',
- '7AOq0QJ8wRAIgXmPlOPSBkkAs1bYFYlJCfe29xx8jOv1pDL0Q2bdV96sCIEzpWqMGkFR20CFOg51Tp-7vj_EMu-m37KtXJoOySqa0qaw',
- ),
- (
- 'https://www.youtube.com/s/player/20830619/player-plasma-ias-tablet-en_US.vflset/base.js',
- '2aq0aqSyOoJXtK73m-uME_jv7-pT15gOFC02RFkGMqWpzEICs69VdbwQ0LDp1v7j8xx92efCJlYFYb1sUkkBSPOlPmXgIARw8JQ0qOAOAA',
- '7AOq0QJ8wRAIgXmPlOPSBkkAs1bYFYlJCfe29xx8jOv1pDL0Q2bdV96sCIEzpWqMGkFR20CFOg51Tp-7vj_EMu-m37KtXJoOySqa0qaw',
- ),
- (
- 'https://www.youtube.com/s/player/8a8ac953/player_ias_tce.vflset/en_US/base.js',
- '2aq0aqSyOoJXtK73m-uME_jv7-pT15gOFC02RFkGMqWpzEICs69VdbwQ0LDp1v7j8xx92efCJlYFYb1sUkkBSPOlPmXgIARw8JQ0qOAOAA',
- 'IAOAOq0QJ8wRAAgXmPlOPSBkkUs1bYFYlJCfe29xx8j7v1pDL0QwbdV96sCIEzpWqMGkFR20CFOg51Tp-7vj_E2u-m37KtXJoOySqa0',
- ),
- (
- 'https://www.youtube.com/s/player/8a8ac953/tv-player-es6.vflset/tv-player-es6.js',
- '2aq0aqSyOoJXtK73m-uME_jv7-pT15gOFC02RFkGMqWpzEICs69VdbwQ0LDp1v7j8xx92efCJlYFYb1sUkkBSPOlPmXgIARw8JQ0qOAOAA',
- 'IAOAOq0QJ8wRAAgXmPlOPSBkkUs1bYFYlJCfe29xx8j7v1pDL0QwbdV96sCIEzpWqMGkFR20CFOg51Tp-7vj_E2u-m37KtXJoOySqa0',
- ),
- (
- 'https://www.youtube.com/s/player/e12fbea4/player_ias.vflset/en_US/base.js',
- 'gN7a-hudCuAuPH6fByOk1_GNXN0yNMHShjZXS2VOgsEItAJz0tipeavEOmNdYN-wUtcEqD3bCXjc0iyKfAyZxCBGgIARwsSdQfJ2CJtt',
- 'JC2JfQdSswRAIgGBCxZyAfKyi0cjXCb3DqEctUw-NYdNmOEvaepit0zJAtIEsgOV2SXZjhSHMNy0NXNG_1kOyBf6HPuAuCduh-a',
- ),
- (
- 'https://www.youtube.com/s/player/010fbc8d/player_es5.vflset/en_US/base.js',
- 'gN7a-hudCuAuPH6fByOk1_GNXN0yNMHShjZXS2VOgsEItAJz0tipeavEOmNdYN-wUtcEqD3bCXjc0iyKfAyZxCBGgIARwsSdQfJ2CJtt',
- 'ttJC2JfQdSswRAIgGBCxZyAfKyi0cjXCb3DqEctUw-NYdNmOEvaepit2zJAsIEggOVaSXZjhSHMNy0NXNG_1kOyBf6HPuAuCduh-',
- ),
- (
- 'https://www.youtube.com/s/player/010fbc8d/player_es6.vflset/en_US/base.js',
- 'gN7a-hudCuAuPH6fByOk1_GNXN0yNMHShjZXS2VOgsEItAJz0tipeavEOmNdYN-wUtcEqD3bCXjc0iyKfAyZxCBGgIARwsSdQfJ2CJtt',
- 'ttJC2JfQdSswRAIgGBCxZyAfKyi0cjXCb3DqEctUw-NYdNmOEvaepit2zJAsIEggOVaSXZjhSHMNy0NXNG_1kOyBf6HPuAuCduh-',
- ),
- (
- 'https://www.youtube.com/s/player/5ec65609/player_ias_tcc.vflset/en_US/base.js',
- 'AAJAJfQdSswRAIgNSN0GDUcHnCIXkKcF61yLBgDHiX1sUhOJdY4_GxunRYCIDeYNYP_16mQTPm5f1OVq3oV1ijUNYPjP4iUSMAjO9bZ',
- 'AJfQdSswRAIgNSN0GDUcHnCIXkKcF61ZLBgDHiX1sUhOJdY4_GxunRYCIDyYNYP_16mQTPm5f1OVq3oV1ijUNYPjP4iUSMAjO9be',
- ),
-]
-
-_NSIG_TESTS = [
- (
- 'https://www.youtube.com/s/player/7862ca1f/player_ias.vflset/en_US/base.js',
- 'X_LCxVDjAavgE5t', 'yxJ1dM6iz5ogUg',
- ),
- (
- 'https://www.youtube.com/s/player/9216d1f7/player_ias.vflset/en_US/base.js',
- 'SLp9F5bwjAdhE9F-', 'gWnb9IK2DJ8Q1w',
- ),
- (
- 'https://www.youtube.com/s/player/f8cb7a3b/player_ias.vflset/en_US/base.js',
- 'oBo2h5euWy6osrUt', 'ivXHpm7qJjJN',
- ),
- (
- 'https://www.youtube.com/s/player/2dfe380c/player_ias.vflset/en_US/base.js',
- 'oBo2h5euWy6osrUt', '3DIBbn3qdQ',
- ),
- (
- 'https://www.youtube.com/s/player/f1ca6900/player_ias.vflset/en_US/base.js',
- 'cu3wyu6LQn2hse', 'jvxetvmlI9AN9Q',
- ),
- (
- 'https://www.youtube.com/s/player/8040e515/player_ias.vflset/en_US/base.js',
- 'wvOFaY-yjgDuIEg5', 'HkfBFDHmgw4rsw',
- ),
- (
- 'https://www.youtube.com/s/player/e06dea74/player_ias.vflset/en_US/base.js',
- 'AiuodmaDDYw8d3y4bf', 'ankd8eza2T6Qmw',
- ),
- (
- 'https://www.youtube.com/s/player/5dd88d1d/player-plasma-ias-phone-en_US.vflset/base.js',
- 'kSxKFLeqzv_ZyHSAt', 'n8gS8oRlHOxPFA',
- ),
- (
- 'https://www.youtube.com/s/player/324f67b9/player_ias.vflset/en_US/base.js',
- 'xdftNy7dh9QGnhW', '22qLGxrmX8F1rA',
- ),
- (
- 'https://www.youtube.com/s/player/4c3f79c5/player_ias.vflset/en_US/base.js',
- 'TDCstCG66tEAO5pR9o', 'dbxNtZ14c-yWyw',
- ),
- (
- 'https://www.youtube.com/s/player/c81bbb4a/player_ias.vflset/en_US/base.js',
- 'gre3EcLurNY2vqp94', 'Z9DfGxWP115WTg',
- ),
- (
- 'https://www.youtube.com/s/player/1f7d5369/player_ias.vflset/en_US/base.js',
- 'batNX7sYqIJdkJ', 'IhOkL_zxbkOZBw',
- ),
- (
- 'https://www.youtube.com/s/player/009f1d77/player_ias.vflset/en_US/base.js',
- '5dwFHw8aFWQUQtffRq', 'audescmLUzI3jw',
- ),
- (
- 'https://www.youtube.com/s/player/dc0c6770/player_ias.vflset/en_US/base.js',
- '5EHDMgYLV6HPGk_Mu-kk', 'n9lUJLHbxUI0GQ',
- ),
- (
- 'https://www.youtube.com/s/player/113ca41c/player_ias.vflset/en_US/base.js',
- 'cgYl-tlYkhjT7A', 'hI7BBr2zUgcmMg',
- ),
- (
- 'https://www.youtube.com/s/player/c57c113c/player_ias.vflset/en_US/base.js',
- 'M92UUMHa8PdvPd3wyM', '3hPqLJsiNZx7yA',
- ),
- (
- 'https://www.youtube.com/s/player/5a3b6271/player_ias.vflset/en_US/base.js',
- 'B2j7f_UPT4rfje85Lu_e', 'm5DmNymaGQ5RdQ',
- ),
- (
- 'https://www.youtube.com/s/player/7a062b77/player_ias.vflset/en_US/base.js',
- 'NRcE3y3mVtm_cV-W', 'VbsCYUATvqlt5w',
- ),
- (
- 'https://www.youtube.com/s/player/dac945fd/player_ias.vflset/en_US/base.js',
- 'o8BkRxXhuYsBCWi6RplPdP', '3Lx32v_hmzTm6A',
- ),
- (
- 'https://www.youtube.com/s/player/6f20102c/player_ias.vflset/en_US/base.js',
- 'lE8DhoDmKqnmJJ', 'pJTTX6XyJP2BYw',
- ),
- (
- 'https://www.youtube.com/s/player/cfa9e7cb/player_ias.vflset/en_US/base.js',
- 'aCi3iElgd2kq0bxVbQ', 'QX1y8jGb2IbZ0w',
- ),
- (
- 'https://www.youtube.com/s/player/8c7583ff/player_ias.vflset/en_US/base.js',
- '1wWCVpRR96eAmMI87L', 'KSkWAVv1ZQxC3A',
- ),
- (
- 'https://www.youtube.com/s/player/b7910ca8/player_ias.vflset/en_US/base.js',
- '_hXMCwMt9qE310D', 'LoZMgkkofRMCZQ',
- ),
- (
- 'https://www.youtube.com/s/player/590f65a6/player_ias.vflset/en_US/base.js',
- '1tm7-g_A9zsI8_Lay_', 'xI4Vem4Put_rOg',
- ),
- (
- 'https://www.youtube.com/s/player/b22ef6e7/player_ias.vflset/en_US/base.js',
- 'b6HcntHGkvBLk_FRf', 'kNPW6A7FyP2l8A',
- ),
- (
- 'https://www.youtube.com/s/player/3400486c/player_ias.vflset/en_US/base.js',
- 'lL46g3XifCKUZn1Xfw', 'z767lhet6V2Skl',
- ),
- (
- 'https://www.youtube.com/s/player/20dfca59/player_ias.vflset/en_US/base.js',
- '-fLCxedkAk4LUTK2', 'O8kfRq1y1eyHGw',
- ),
- (
- 'https://www.youtube.com/s/player/b12cc44b/player_ias.vflset/en_US/base.js',
- 'keLa5R2U00sR9SQK', 'N1OGyujjEwMnLw',
- ),
- (
- 'https://www.youtube.com/s/player/3bb1f723/player_ias.vflset/en_US/base.js',
- 'gK15nzVyaXE9RsMP3z', 'ZFFWFLPWx9DEgQ',
- ),
- (
- 'https://www.youtube.com/s/player/2f1832d2/player_ias.vflset/en_US/base.js',
- 'YWt1qdbe8SAfkoPHW5d', 'RrRjWQOJmBiP',
- ),
- (
- 'https://www.youtube.com/s/player/9c6dfc4a/player_ias.vflset/en_US/base.js',
- 'jbu7ylIosQHyJyJV', 'uwI0ESiynAmhNg',
- ),
- (
- 'https://www.youtube.com/s/player/e7567ecf/player_ias_tce.vflset/en_US/base.js',
- 'Sy4aDGc0VpYRR9ew_', '5UPOT1VhoZxNLQ',
- ),
- (
- 'https://www.youtube.com/s/player/d50f54ef/player_ias_tce.vflset/en_US/base.js',
- 'Ha7507LzRmH3Utygtj', 'XFTb2HoeOE5MHg',
- ),
- (
- 'https://www.youtube.com/s/player/074a8365/player_ias_tce.vflset/en_US/base.js',
- 'Ha7507LzRmH3Utygtj', 'ufTsrE0IVYrkl8v',
- ),
- (
- 'https://www.youtube.com/s/player/643afba4/player_ias.vflset/en_US/base.js',
- 'N5uAlLqm0eg1GyHO', 'dCBQOejdq5s-ww',
- ),
- (
- 'https://www.youtube.com/s/player/69f581a5/tv-player-ias.vflset/tv-player-ias.js',
- '-qIP447rVlTTwaZjY', 'KNcGOksBAvwqQg',
- ),
- (
- 'https://www.youtube.com/s/player/643afba4/tv-player-ias.vflset/tv-player-ias.js',
- 'ir9-V6cdbCiyKxhr', '2PL7ZDYAALMfmA',
- ),
- (
- 'https://www.youtube.com/s/player/363db69b/player_ias.vflset/en_US/base.js',
- 'eWYu5d5YeY_4LyEDc', 'XJQqf-N7Xra3gg',
- ),
- (
- 'https://www.youtube.com/s/player/4fcd6e4a/player_ias.vflset/en_US/base.js',
- 'o_L251jm8yhZkWtBW', 'lXoxI3XvToqn6A',
- ),
- (
- 'https://www.youtube.com/s/player/4fcd6e4a/player_ias_tce.vflset/en_US/base.js',
- 'o_L251jm8yhZkWtBW', 'lXoxI3XvToqn6A',
- ),
- (
- 'https://www.youtube.com/s/player/20830619/tv-player-ias.vflset/tv-player-ias.js',
- 'ir9-V6cdbCiyKxhr', '9YE85kNjZiS4',
- ),
- (
- 'https://www.youtube.com/s/player/20830619/player-plasma-ias-phone-en_US.vflset/base.js',
- 'ir9-V6cdbCiyKxhr', '9YE85kNjZiS4',
- ),
- (
- 'https://www.youtube.com/s/player/20830619/player-plasma-ias-tablet-en_US.vflset/base.js',
- 'ir9-V6cdbCiyKxhr', '9YE85kNjZiS4',
- ),
- (
- 'https://www.youtube.com/s/player/8a8ac953/player_ias_tce.vflset/en_US/base.js',
- 'MiBYeXx_vRREbiCCmh', 'RtZYMVvmkE0JE',
- ),
- (
- 'https://www.youtube.com/s/player/8a8ac953/tv-player-es6.vflset/tv-player-es6.js',
- 'MiBYeXx_vRREbiCCmh', 'RtZYMVvmkE0JE',
- ),
- (
- 'https://www.youtube.com/s/player/59b252b9/player_ias.vflset/en_US/base.js',
- 'D3XWVpYgwhLLKNK4AGX', 'aZrQ1qWJ5yv5h',
- ),
- (
- 'https://www.youtube.com/s/player/fc2a56a5/player_ias.vflset/en_US/base.js',
- 'qTKWg_Il804jd2kAC', 'OtUAm2W6gyzJjB9u',
- ),
- (
- 'https://www.youtube.com/s/player/fc2a56a5/tv-player-ias.vflset/tv-player-ias.js',
- 'qTKWg_Il804jd2kAC', 'OtUAm2W6gyzJjB9u',
- ),
- (
- 'https://www.youtube.com/s/player/a74bf670/player_ias_tce.vflset/en_US/base.js',
- 'kM5r52fugSZRAKHfo3', 'hQP7k1hA22OrNTnq',
- ),
- (
- 'https://www.youtube.com/s/player/6275f73c/player_ias_tce.vflset/en_US/base.js',
- 'kM5r52fugSZRAKHfo3', '-I03XF0iyf6I_X0A',
- ),
- (
- 'https://www.youtube.com/s/player/20c72c18/player_ias_tce.vflset/en_US/base.js',
- 'kM5r52fugSZRAKHfo3', '-I03XF0iyf6I_X0A',
- ),
- (
- 'https://www.youtube.com/s/player/9fe2e06e/player_ias_tce.vflset/en_US/base.js',
- 'kM5r52fugSZRAKHfo3', '6r5ekNIiEMPutZy',
- ),
- (
- 'https://www.youtube.com/s/player/680f8c75/player_ias_tce.vflset/en_US/base.js',
- 'kM5r52fugSZRAKHfo3', '0ml9caTwpa55Jf',
- ),
- (
- 'https://www.youtube.com/s/player/14397202/player_ias_tce.vflset/en_US/base.js',
- 'kM5r52fugSZRAKHfo3', 'ozZFAN21okDdJTa',
- ),
- (
- 'https://www.youtube.com/s/player/5dcb2c1f/player_ias_tce.vflset/en_US/base.js',
- 'kM5r52fugSZRAKHfo3', 'p7iTbRZDYAF',
- ),
- (
- 'https://www.youtube.com/s/player/a10d7fcc/player_ias_tce.vflset/en_US/base.js',
- 'kM5r52fugSZRAKHfo3', '9Zue7DDHJSD',
- ),
- (
- 'https://www.youtube.com/s/player/8e20cb06/player_ias_tce.vflset/en_US/base.js',
- 'kM5r52fugSZRAKHfo3', '5-4tTneTROTpMzba',
- ),
- (
- 'https://www.youtube.com/s/player/e12fbea4/player_ias_tce.vflset/en_US/base.js',
- 'kM5r52fugSZRAKHfo3', 'XkeRfXIPOkSwfg',
- ),
- (
- 'https://www.youtube.com/s/player/ef259203/player_ias_tce.vflset/en_US/base.js',
- 'rPqBC01nJpqhhi2iA2U', 'hY7dbiKFT51UIA',
- ),
- (
- 'https://www.youtube.com/s/player/010fbc8d/player_es5.vflset/en_US/base.js',
- '0hlOAlqjFszVvF4Z', 'R-H23bZGAsRFTg',
- ),
- (
- 'https://www.youtube.com/s/player/010fbc8d/player_es6.vflset/en_US/base.js',
- '0hlOAlqjFszVvF4Z', 'R-H23bZGAsRFTg',
- ),
- (
- 'https://www.youtube.com/s/player/5ec65609/player_ias_tcc.vflset/en_US/base.js',
- '6l5CTNx4AzIqH4MXM', 'NupToduxHBew1g',
- ),
-]
-
-
-@is_download_test
-class TestPlayerInfo(unittest.TestCase):
- def test_youtube_extract_player_info(self):
- PLAYER_URLS = (
- ('https://www.youtube.com/s/player/4c3f79c5/player_ias.vflset/en_US/base.js', '4c3f79c5'),
- ('https://www.youtube.com/s/player/64dddad9/player_ias.vflset/en_US/base.js', '64dddad9'),
- ('https://www.youtube.com/s/player/64dddad9/player_ias.vflset/fr_FR/base.js', '64dddad9'),
- ('https://www.youtube.com/s/player/64dddad9/player-plasma-ias-phone-en_US.vflset/base.js', '64dddad9'),
- ('https://www.youtube.com/s/player/64dddad9/player-plasma-ias-phone-de_DE.vflset/base.js', '64dddad9'),
- ('https://www.youtube.com/s/player/64dddad9/player-plasma-ias-tablet-en_US.vflset/base.js', '64dddad9'),
- ('https://www.youtube.com/s/player/e7567ecf/player_ias_tce.vflset/en_US/base.js', 'e7567ecf'),
- ('https://www.youtube.com/s/player/643afba4/tv-player-ias.vflset/tv-player-ias.js', '643afba4'),
- # obsolete
- ('https://www.youtube.com/yts/jsbin/player_ias-vfle4-e03/en_US/base.js', 'vfle4-e03'),
- ('https://www.youtube.com/yts/jsbin/player_ias-vfl49f_g4/en_US/base.js', 'vfl49f_g4'),
- ('https://www.youtube.com/yts/jsbin/player_ias-vflCPQUIL/en_US/base.js', 'vflCPQUIL'),
- ('https://www.youtube.com/yts/jsbin/player-vflzQZbt7/en_US/base.js', 'vflzQZbt7'),
- ('https://www.youtube.com/yts/jsbin/player-en_US-vflaxXRn1/base.js', 'vflaxXRn1'),
- ('https://s.ytimg.com/yts/jsbin/html5player-en_US-vflXGBaUN.js', 'vflXGBaUN'),
- ('https://s.ytimg.com/yts/jsbin/html5player-en_US-vflKjOTVq/html5player.js', 'vflKjOTVq'),
- )
- for player_url, expected_player_id in PLAYER_URLS:
- player_id = YoutubeIE._extract_player_info(player_url)
- self.assertEqual(player_id, expected_player_id)
-
-
-@is_download_test
-class TestSignature(unittest.TestCase):
- def setUp(self):
- TEST_DIR = os.path.dirname(os.path.abspath(__file__))
- self.TESTDATA_DIR = os.path.join(TEST_DIR, 'testdata/sigs')
- if not os.path.exists(self.TESTDATA_DIR):
- os.mkdir(self.TESTDATA_DIR)
-
- def tearDown(self):
- with contextlib.suppress(OSError):
- for f in os.listdir(self.TESTDATA_DIR):
- os.remove(f)
-
-
-def t_factory(name, sig_func, url_pattern):
- def make_tfunc(url, sig_input, expected_sig):
- m = url_pattern.match(url)
- assert m, f'{url!r} should follow URL format'
- test_id = re.sub(r'[/.-]', '_', m.group('id') or m.group('compat_id'))
-
- def test_func(self):
- basename = f'player-{test_id}.js'
- fn = os.path.join(self.TESTDATA_DIR, basename)
-
- if not os.path.exists(fn):
- urllib.request.urlretrieve(url, fn)
- with open(fn, encoding='utf-8') as testf:
- jscode = testf.read()
- self.assertEqual(sig_func(jscode, sig_input, url), expected_sig)
-
- test_func.__name__ = f'test_{name}_js_{test_id}'
- setattr(TestSignature, test_func.__name__, test_func)
- return make_tfunc
-
-
-def signature(jscode, sig_input, player_url):
- func = YoutubeIE(FakeYDL())._parse_sig_js(jscode, player_url)
- src_sig = (
- str(string.printable[:sig_input])
- if isinstance(sig_input, int) else sig_input)
- return func(src_sig)
-
-
-def n_sig(jscode, sig_input, player_url):
- ie = YoutubeIE(FakeYDL())
- funcname = ie._extract_n_function_name(jscode, player_url=player_url)
- jsi = JSInterpreter(jscode)
- func = jsi.extract_function_from_code(*ie._fixup_n_function_code(*jsi.extract_function_code(funcname), jscode, player_url))
- return func([sig_input])
-
-
-make_sig_test = t_factory(
- 'signature', signature,
- re.compile(r'''(?x)
- .+(?:
- /player/(?P[a-zA-Z0-9_/.-]+)|
- /html5player-(?:en_US-)?(?P[a-zA-Z0-9_-]+)(?:/watch_as3|/html5player)?
- )\.js$'''))
-for test_spec in _SIG_TESTS:
- make_sig_test(*test_spec)
-
-make_nsig_test = t_factory(
- 'nsig', n_sig, re.compile(r'.+/player/(?P[a-zA-Z0-9_/.-]+)\.js$'))
-for test_spec in _NSIG_TESTS:
- make_nsig_test(*test_spec)
-
-
-if __name__ == '__main__':
- unittest.main()
diff --git a/yt_dlp/YoutubeDL.py b/yt_dlp/YoutubeDL.py
index ef42ba68e3..539b10fe29 100644
--- a/yt_dlp/YoutubeDL.py
+++ b/yt_dlp/YoutubeDL.py
@@ -42,6 +42,8 @@ from .globals import (
plugin_pps,
all_plugins_loaded,
plugin_dirs,
+ supported_js_runtimes,
+ supported_remote_components,
)
from .minicurses import format_text
from .networking import HEADRequest, Request, RequestDirector
@@ -533,6 +535,18 @@ class YoutubeDL:
See "EXTRACTOR ARGUMENTS" for details.
Argument values must always be a list of string(s).
E.g. {'youtube': {'skip': ['dash', 'hls']}}
+ js_runtimes: A dictionary of JavaScript runtime keys (in lower case) to enable
+ and a dictionary of additional configuration for the runtime.
+ Currently supported runtimes are 'deno', 'node', 'bun', and 'quickjs'.
+ If None, the default runtime of "deno" will be enabled.
+ The runtime configuration dictionary can have the following keys:
+ - path: Path to the executable (optional)
+ E.g. {'deno': {'path': '/path/to/deno'}
+ remote_components: A list of remote components that are allowed to be fetched when required.
+ Supported components:
+ - ejs:npm (external JavaScript components from npm)
+ - ejs:github (external JavaScript components from yt-dlp-ejs GitHub)
+ By default, no remote components are allowed to be fetched.
mark_watched: Mark videos watched (even with --simulate). Only for YouTube
The following options are deprecated and may be removed in the future:
@@ -717,6 +731,13 @@ class YoutubeDL:
else:
raise
+ # Note: this must be after plugins are loaded
+ self.params['js_runtimes'] = self.params.get('js_runtimes', {'deno': {}})
+ self._clean_js_runtimes(self.params['js_runtimes'])
+
+ self.params['remote_components'] = set(self.params.get('remote_components', ()))
+ self._clean_remote_components(self.params['remote_components'])
+
self.params['compat_opts'] = set(self.params.get('compat_opts', ()))
self.params['http_headers'] = HTTPHeaderDict(std_headers, self.params.get('http_headers'))
self._load_cookies(self.params['http_headers'].get('Cookie')) # compat
@@ -829,6 +850,36 @@ class YoutubeDL:
self.archive = preload_download_archive(self.params.get('download_archive'))
+ def _clean_js_runtimes(self, runtimes):
+ if not (
+ isinstance(runtimes, dict)
+ and all(isinstance(k, str) and (v is None or isinstance(v, dict)) for k, v in runtimes.items())
+ ):
+ raise ValueError('Invalid js_runtimes format, expected a dict of {runtime: {config}}')
+
+ if unsupported_runtimes := runtimes.keys() - supported_js_runtimes.value.keys():
+ self.report_warning(
+ f'Ignoring unsupported JavaScript runtime(s): {", ".join(unsupported_runtimes)}.'
+ f' Supported runtimes: {", ".join(supported_js_runtimes.value.keys())}.')
+ for rt in unsupported_runtimes:
+ runtimes.pop(rt)
+
+ def _clean_remote_components(self, remote_components: set):
+ if unsupported_remote_components := set(remote_components) - set(supported_remote_components.value):
+ self.report_warning(
+ f'Ignoring unsupported remote component(s): {", ".join(unsupported_remote_components)}.'
+ f' Supported remote components: {", ".join(supported_remote_components.value)}.')
+ for rt in unsupported_remote_components:
+ remote_components.remove(rt)
+
+ @functools.cached_property
+ def _js_runtimes(self):
+ runtimes = {}
+ for name, config in self.params.get('js_runtimes', {}).items():
+ runtime_cls = supported_js_runtimes.value.get(name)
+ runtimes[name] = runtime_cls(path=config.get('path')) if runtime_cls else None
+ return runtimes
+
def warn_if_short_id(self, argv):
# short YouTube ID starting with dash?
idxs = [
@@ -4064,6 +4115,18 @@ class YoutubeDL:
join_nonempty(*get_package_info(m)) for m in available_dependencies.values()
})) or 'none'))
+ if not self.params.get('js_runtimes'):
+ write_debug('JS runtimes: none (disabled)')
+ else:
+ write_debug('JS runtimes: %s' % (', '.join(sorted(
+ f'{name} (unknown)' if runtime is None
+ else join_nonempty(
+ runtime.info.name,
+ runtime.info.version + (' (unsupported)' if runtime.info.supported is False else ''),
+ )
+ for name, runtime in self._js_runtimes.items() if runtime is None or runtime.info is not None
+ )) or 'none'))
+
write_debug(f'Proxy map: {self.proxies}')
write_debug(f'Request Handlers: {", ".join(rh.RH_NAME for rh in self._request_director.handlers.values())}')
diff --git a/yt_dlp/__init__.py b/yt_dlp/__init__.py
index 8aee126030..2f6ba47832 100644
--- a/yt_dlp/__init__.py
+++ b/yt_dlp/__init__.py
@@ -61,8 +61,15 @@ from .utils import (
shell_quote,
variadic,
write_string,
+
)
from .utils._utils import _UnsafeExtensionError
+from .utils._jsruntime import (
+ BunJsRuntime as _BunJsRuntime,
+ DenoJsRuntime as _DenoJsRuntime,
+ NodeJsRuntime as _NodeJsRuntime,
+ QuickJsRuntime as _QuickJsRuntime,
+)
from .YoutubeDL import YoutubeDL
@@ -773,6 +780,10 @@ def parse_options(argv=None):
else opts.audioformat if (opts.extractaudio and opts.audioformat in FFmpegExtractAudioPP.SUPPORTED_EXTS)
else None)
+ js_runtimes = {
+ runtime.lower(): {'path': path} for runtime, path in (
+ [*arg.split(':', 1), None][:2] for arg in opts.js_runtimes)}
+
return ParsedOptions(parser, opts, urls, {
'usenetrc': opts.usenetrc,
'netrc_location': opts.netrc_location,
@@ -940,6 +951,8 @@ def parse_options(argv=None):
'geo_bypass_country': opts.geo_bypass_country,
'geo_bypass_ip_block': opts.geo_bypass_ip_block,
'useid': opts.useid or None,
+ 'js_runtimes': js_runtimes,
+ 'remote_components': opts.remote_components,
'warn_when_outdated': opts.update_self is None,
'_warnings': warnings,
'_deprecation_warnings': deprecation_warnings,
@@ -1081,6 +1094,16 @@ def main(argv=None):
from .extractor import gen_extractors, list_extractors
+# Register JS runtimes and remote components
+from .globals import supported_js_runtimes, supported_remote_components
+supported_js_runtimes.value['deno'] = _DenoJsRuntime
+supported_js_runtimes.value['node'] = _NodeJsRuntime
+supported_js_runtimes.value['bun'] = _BunJsRuntime
+supported_js_runtimes.value['quickjs'] = _QuickJsRuntime
+
+supported_remote_components.value.append('ejs:github')
+supported_remote_components.value.append('ejs:npm')
+
__all__ = [
'YoutubeDL',
'gen_extractors',
diff --git a/yt_dlp/__pyinstaller/hook-yt_dlp.py b/yt_dlp/__pyinstaller/hook-yt_dlp.py
index 8e7f42f596..0c4bf7d63b 100644
--- a/yt_dlp/__pyinstaller/hook-yt_dlp.py
+++ b/yt_dlp/__pyinstaller/hook-yt_dlp.py
@@ -34,3 +34,4 @@ print(f'Adding imports: {hiddenimports}')
excludedimports = ['youtube_dl', 'youtube_dlc', 'test', 'ytdlp_plugins', 'devscripts', 'bundle']
datas = collect_data_files('curl_cffi', includes=['cacert.pem'])
+datas += collect_data_files('yt_dlp_ejs', includes=['**/*.js'])
diff --git a/yt_dlp/dependencies/__init__.py b/yt_dlp/dependencies/__init__.py
index 0d58da2bd5..cf2bcfb37e 100644
--- a/yt_dlp/dependencies/__init__.py
+++ b/yt_dlp/dependencies/__init__.py
@@ -81,6 +81,12 @@ except ImportError:
from . import Cryptodome
+try:
+ import yt_dlp_ejs
+except ImportError:
+ yt_dlp_ejs = None
+
+
all_dependencies = {k: v for k, v in globals().items() if not k.startswith('_')}
available_dependencies = {k: v for k, v in all_dependencies.items() if v}
diff --git a/yt_dlp/downloader/external.py b/yt_dlp/downloader/external.py
index 3b8fd27bc7..82bc5106a5 100644
--- a/yt_dlp/downloader/external.py
+++ b/yt_dlp/downloader/external.py
@@ -488,20 +488,6 @@ class FFmpegFD(ExternalFD):
if not self.params.get('verbose'):
args += ['-hide_banner']
- args += traverse_obj(info_dict, ('downloader_options', 'ffmpeg_args', ...))
-
- # These exists only for compatibility. Extractors should use
- # info_dict['downloader_options']['ffmpeg_args'] instead
- args += info_dict.get('_ffmpeg_args') or []
- seekable = info_dict.get('_seekable')
- if seekable is not None:
- # setting -seekable prevents ffmpeg from guessing if the server
- # supports seeking(by adding the header `Range: bytes=0-`), which
- # can cause problems in some cases
- # https://github.com/ytdl-org/youtube-dl/issues/11800#issuecomment-275037127
- # http://trac.ffmpeg.org/ticket/6125#comment:10
- args += ['-seekable', '1' if seekable else '0']
-
env = None
proxy = self.params.get('proxy')
if proxy:
@@ -521,39 +507,10 @@ class FFmpegFD(ExternalFD):
env['HTTP_PROXY'] = proxy
env['http_proxy'] = proxy
- protocol = info_dict.get('protocol')
-
- if protocol == 'rtmp':
- player_url = info_dict.get('player_url')
- page_url = info_dict.get('page_url')
- app = info_dict.get('app')
- play_path = info_dict.get('play_path')
- tc_url = info_dict.get('tc_url')
- flash_version = info_dict.get('flash_version')
- live = info_dict.get('rtmp_live', False)
- conn = info_dict.get('rtmp_conn')
- if player_url is not None:
- args += ['-rtmp_swfverify', player_url]
- if page_url is not None:
- args += ['-rtmp_pageurl', page_url]
- if app is not None:
- args += ['-rtmp_app', app]
- if play_path is not None:
- args += ['-rtmp_playpath', play_path]
- if tc_url is not None:
- args += ['-rtmp_tcurl', tc_url]
- if flash_version is not None:
- args += ['-rtmp_flashver', flash_version]
- if live:
- args += ['-rtmp_live', 'live']
- if isinstance(conn, list):
- for entry in conn:
- args += ['-rtmp_conn', entry]
- elif isinstance(conn, str):
- args += ['-rtmp_conn', conn]
-
start_time, end_time = info_dict.get('section_start') or 0, info_dict.get('section_end')
+ fallback_input_args = traverse_obj(info_dict, ('downloader_options', 'ffmpeg_args', ...))
+
selected_formats = info_dict.get('requested_formats') or [info_dict]
for i, fmt in enumerate(selected_formats):
is_http = re.match(r'https?://', fmt['url'])
@@ -572,6 +529,44 @@ class FFmpegFD(ExternalFD):
if end_time:
args += ['-t', str(end_time - start_time)]
+ protocol = fmt.get('protocol')
+
+ if protocol == 'rtmp':
+ player_url = fmt.get('player_url')
+ page_url = fmt.get('page_url')
+ app = fmt.get('app')
+ play_path = fmt.get('play_path')
+ tc_url = fmt.get('tc_url')
+ flash_version = fmt.get('flash_version')
+ live = fmt.get('rtmp_live', False)
+ conn = fmt.get('rtmp_conn')
+ if player_url is not None:
+ args += ['-rtmp_swfverify', player_url]
+ if page_url is not None:
+ args += ['-rtmp_pageurl', page_url]
+ if app is not None:
+ args += ['-rtmp_app', app]
+ if play_path is not None:
+ args += ['-rtmp_playpath', play_path]
+ if tc_url is not None:
+ args += ['-rtmp_tcurl', tc_url]
+ if flash_version is not None:
+ args += ['-rtmp_flashver', flash_version]
+ if live:
+ args += ['-rtmp_live', 'live']
+ if isinstance(conn, list):
+ for entry in conn:
+ args += ['-rtmp_conn', entry]
+ elif isinstance(conn, str):
+ args += ['-rtmp_conn', conn]
+
+ elif protocol == 'http_dash_segments' and info_dict.get('is_live'):
+ # ffmpeg may try to read past the latest available segments for
+ # live DASH streams unless we pass `-re`. In modern ffmpeg, this
+ # is an alias of `-readrate 1`, but `-readrate` was not added
+ # until ffmpeg 5.0, so we must stick to using `-re`
+ args += ['-re']
+
url = fmt['url']
if self.params.get('enable_file_urls') and url.startswith('file:'):
# The default protocol_whitelist is 'file,crypto,data' when reading local m3u8 URLs,
@@ -586,6 +581,7 @@ class FFmpegFD(ExternalFD):
# https://trac.ffmpeg.org/ticket/2702
url = re.sub(r'^file://(?:localhost)?/', 'file:' if os.name == 'nt' else 'file:/', url)
+ args += traverse_obj(fmt, ('downloader_options', 'ffmpeg_args', ...)) or fallback_input_args
args += [*self._configuration_args((f'_i{i + 1}', '_i')), '-i', url]
if not (start_time or end_time) or not self.params.get('force_keyframes_at_cuts'):
diff --git a/yt_dlp/extractor/_extractors.py b/yt_dlp/extractor/_extractors.py
index 072169d48d..8c247908de 100644
--- a/yt_dlp/extractor/_extractors.py
+++ b/yt_dlp/extractor/_extractors.py
@@ -143,6 +143,8 @@ from .archiveorg import (
from .arcpublishing import ArcPublishingIE
from .ard import (
ARDIE,
+ ARDAudiothekIE,
+ ARDAudiothekPlaylistIE,
ARDBetaMediathekIE,
ARDMediathekCollectionIE,
)
@@ -1216,6 +1218,7 @@ from .n1 import (
N1InfoAssetIE,
N1InfoIIE,
)
+from .nascar import NascarClassicsIE
from .nate import (
NateIE,
NateProgramIE,
diff --git a/yt_dlp/extractor/archiveorg.py b/yt_dlp/extractor/archiveorg.py
index 9a4e0b8c80..3746c58fb7 100644
--- a/yt_dlp/extractor/archiveorg.py
+++ b/yt_dlp/extractor/archiveorg.py
@@ -5,12 +5,9 @@ import re
import urllib.parse
from .common import InfoExtractor
-from .youtube import YoutubeBaseInfoExtractor, YoutubeIE
-from ..networking import HEADRequest
-from ..networking.exceptions import HTTPError
+from .youtube import YoutubeBaseInfoExtractor
from ..utils import (
KNOWN_EXTENSIONS,
- ExtractorError,
bug_reports_message,
clean_html,
dict_get,
@@ -21,18 +18,14 @@ from ..utils import (
join_nonempty,
js_to_json,
merge_dicts,
- mimetype2ext,
orderedSet,
parse_duration,
parse_qs,
str_or_none,
- str_to_int,
traverse_obj,
- try_get,
unified_strdate,
unified_timestamp,
url_or_none,
- urlhandle_detect_ext,
)
@@ -471,7 +464,7 @@ class YoutubeWebArchiveIE(InfoExtractor):
'url': 'https://web.archive.org/web/20110712231407/http://www.youtube.com/watch?v=lTx3G6h2xyA',
'info_dict': {
'id': 'lTx3G6h2xyA',
- 'ext': 'flv',
+ 'ext': 'mp4',
'title': 'Madeon - Pop Culture (live mashup)',
'upload_date': '20110711',
'uploader': 'Madeon',
@@ -578,7 +571,7 @@ class YoutubeWebArchiveIE(InfoExtractor):
'url': 'https://web.archive.org/web/20110126141719/http://www.youtube.com/watch?v=Q_yjX80U7Yc',
'info_dict': {
'id': 'Q_yjX80U7Yc',
- 'ext': 'flv',
+ 'ext': 'webm',
'title': 'Spray Paint Art by Clay Butler: Purple Fantasy Forest',
'uploader_id': 'claybutlermusic',
'description': 'md5:4595264559e3d0a0ceb3f011f6334543',
@@ -680,6 +673,37 @@ class YoutubeWebArchiveIE(InfoExtractor):
'upload_date': '20120407',
'uploader_id': 'thecomputernerd01',
},
+ }, {
+ # Contains split audio/video formats
+ 'url': 'ytarchive:o_T_S_TU12M',
+ 'info_dict': {
+ 'id': 'o_T_S_TU12M',
+ 'ext': 'mp4',
+ 'title': 'Prairie Pulse 1218; Lin Enger, Paul Olson',
+ 'description': 'md5:36e7a34cdc8508e35a920ec042e799c7',
+ 'uploader': 'Prairie Public',
+ 'channel_id': 'UC4BOzQel6tvJm7OEDd3vZlw',
+ 'channel_url': 'https://www.youtube.com/channel/UC4BOzQel6tvJm7OEDd3vZlw',
+ 'duration': 1606,
+ 'upload_date': '20150213',
+ },
+ }, {
+ # Video unavailable through wayback-fakeurl
+ 'url': 'ytarchive:SQCom7wjGDs',
+ 'info_dict': {
+ 'id': 'SQCom7wjGDs',
+ 'ext': 'mp4',
+ 'title': 'Jamin Warren from PBS Game/Show decides that Portal is a feminist Game [Top Hats and No Brain]',
+ 'description': 'md5:c0cb876dd075483ead9afcc86798efb0',
+ 'uploader': 'Top Hats and Champagne',
+ 'uploader_id': 'sparrowtm',
+ 'uploader_url': 'https://www.youtube.com/user/sparrowtm',
+ 'channel_id': 'UCW3T5nG4iEkI7HjG-Du3HQA',
+ 'channel_url': 'https://www.youtube.com/channel/UCW3T5nG4iEkI7HjG-Du3HQA',
+ 'duration': 1500,
+ 'thumbnail': 'https://web.archive.org/web/20160108040020if_/https://i.ytimg.com/vi/SQCom7wjGDs/maxresdefault.jpg',
+ 'upload_date': '20160107',
+ },
}, {
'url': 'https://web.archive.org/web/http://www.youtube.com/watch?v=kH-G_aIBlFw',
'only_matching': True,
@@ -724,6 +748,113 @@ class YoutubeWebArchiveIE(InfoExtractor):
_OLDEST_CAPTURE_DATE = 20050214000000
_NEWEST_CAPTURE_DATE = 20500101000000
+ _FORMATS = {
+ '5': {'ext': 'flv', 'width': 400, 'height': 240, 'acodec': 'mp3', 'vcodec': 'h263'},
+ '6': {'ext': 'flv', 'width': 450, 'height': 270, 'acodec': 'mp3', 'vcodec': 'h263'},
+ '13': {'ext': '3gp', 'acodec': 'aac', 'vcodec': 'mp4v'},
+ '17': {'ext': '3gp', 'width': 176, 'height': 144, 'acodec': 'aac', 'vcodec': 'mp4v'},
+ '18': {'ext': 'mp4', 'width': 640, 'height': 360, 'acodec': 'aac', 'vcodec': 'h264'},
+ '22': {'ext': 'mp4', 'width': 1280, 'height': 720, 'acodec': 'aac', 'vcodec': 'h264'},
+ '34': {'ext': 'flv', 'width': 640, 'height': 360, 'acodec': 'aac', 'vcodec': 'h264'},
+ '35': {'ext': 'flv', 'width': 854, 'height': 480, 'acodec': 'aac', 'vcodec': 'h264'},
+ # itag 36 videos are either 320x180 (BaW_jenozKc) or 320x240 (__2ABJjxzNo), abr varies as well
+ '36': {'ext': '3gp', 'width': 320, 'acodec': 'aac', 'vcodec': 'mp4v'},
+ '37': {'ext': 'mp4', 'width': 1920, 'height': 1080, 'acodec': 'aac', 'vcodec': 'h264'},
+ '38': {'ext': 'mp4', 'width': 4096, 'height': 3072, 'acodec': 'aac', 'vcodec': 'h264'},
+ '43': {'ext': 'webm', 'width': 640, 'height': 360, 'acodec': 'vorbis', 'vcodec': 'vp8'},
+ '44': {'ext': 'webm', 'width': 854, 'height': 480, 'acodec': 'vorbis', 'vcodec': 'vp8'},
+ '45': {'ext': 'webm', 'width': 1280, 'height': 720, 'acodec': 'vorbis', 'vcodec': 'vp8'},
+ '46': {'ext': 'webm', 'width': 1920, 'height': 1080, 'acodec': 'vorbis', 'vcodec': 'vp8'},
+ '59': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'vcodec': 'h264'},
+ '78': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'vcodec': 'h264'},
+
+
+ # 3D videos
+ '82': {'ext': 'mp4', 'height': 360, 'format_note': '3D', 'acodec': 'aac', 'vcodec': 'h264', 'preference': -20},
+ '83': {'ext': 'mp4', 'height': 480, 'format_note': '3D', 'acodec': 'aac', 'vcodec': 'h264', 'preference': -20},
+ '84': {'ext': 'mp4', 'height': 720, 'format_note': '3D', 'acodec': 'aac', 'vcodec': 'h264', 'preference': -20},
+ '85': {'ext': 'mp4', 'height': 1080, 'format_note': '3D', 'acodec': 'aac', 'vcodec': 'h264', 'preference': -20},
+ '100': {'ext': 'webm', 'height': 360, 'format_note': '3D', 'acodec': 'vorbis', 'vcodec': 'vp8', 'preference': -20},
+ '101': {'ext': 'webm', 'height': 480, 'format_note': '3D', 'acodec': 'vorbis', 'vcodec': 'vp8', 'preference': -20},
+ '102': {'ext': 'webm', 'height': 720, 'format_note': '3D', 'acodec': 'vorbis', 'vcodec': 'vp8', 'preference': -20},
+
+ # Apple HTTP Live Streaming
+ '91': {'ext': 'mp4', 'height': 144, 'format_note': 'HLS', 'acodec': 'aac', 'vcodec': 'h264'},
+ '92': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'vcodec': 'h264'},
+ '93': {'ext': 'mp4', 'height': 360, 'format_note': 'HLS', 'acodec': 'aac', 'vcodec': 'h264'},
+ '94': {'ext': 'mp4', 'height': 480, 'format_note': 'HLS', 'acodec': 'aac', 'vcodec': 'h264'},
+ '95': {'ext': 'mp4', 'height': 720, 'format_note': 'HLS', 'acodec': 'aac', 'vcodec': 'h264'},
+ '96': {'ext': 'mp4', 'height': 1080, 'format_note': 'HLS', 'acodec': 'aac', 'vcodec': 'h264'},
+ '132': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'vcodec': 'h264'},
+ '151': {'ext': 'mp4', 'height': 72, 'format_note': 'HLS', 'acodec': 'aac', 'vcodec': 'h264'},
+
+ # DASH mp4 video
+ '133': {'ext': 'mp4', 'height': 240, 'vcodec': 'h264', 'acodec': 'none'},
+ '134': {'ext': 'mp4', 'height': 360, 'vcodec': 'h264', 'acodec': 'none'},
+ '135': {'ext': 'mp4', 'height': 480, 'vcodec': 'h264', 'acodec': 'none'},
+ '136': {'ext': 'mp4', 'height': 720, 'vcodec': 'h264', 'acodec': 'none'},
+ '137': {'ext': 'mp4', 'height': 1080, 'vcodec': 'h264', 'acodec': 'none'},
+ '138': {'ext': 'mp4', 'vcodec': 'h264', 'acodec': 'none'}, # Height can vary (https://github.com/ytdl-org/youtube-dl/issues/4559)
+ '160': {'ext': 'mp4', 'height': 144, 'vcodec': 'h264', 'acodec': 'none'},
+ '212': {'ext': 'mp4', 'height': 480, 'vcodec': 'h264', 'acodec': 'none'},
+ '264': {'ext': 'mp4', 'height': 1440, 'vcodec': 'h264', 'acodec': 'none'},
+ '298': {'ext': 'mp4', 'height': 720, 'vcodec': 'h264', 'fps': 60, 'acodec': 'none'},
+ '299': {'ext': 'mp4', 'height': 1080, 'vcodec': 'h264', 'fps': 60, 'acodec': 'none'},
+ '266': {'ext': 'mp4', 'height': 2160, 'vcodec': 'h264', 'acodec': 'none'},
+
+ # Dash mp4 audio
+ '139': {'ext': 'm4a', 'acodec': 'aac', 'vcodec': 'none'},
+ '140': {'ext': 'm4a', 'acodec': 'aac', 'vcodec': 'none'},
+ '141': {'ext': 'm4a', 'acodec': 'aac', 'vcodec': 'none'},
+ '256': {'ext': 'm4a', 'acodec': 'aac', 'vcodec': 'none'},
+ '258': {'ext': 'm4a', 'acodec': 'aac', 'vcodec': 'none'},
+ '325': {'ext': 'm4a', 'acodec': 'dtse', 'vcodec': 'none'},
+ '328': {'ext': 'm4a', 'acodec': 'ec-3', 'vcodec': 'none'},
+
+ # Dash webm
+ '167': {'ext': 'webm', 'height': 360, 'width': 640, 'vcodec': 'vp8'},
+ '168': {'ext': 'webm', 'height': 480, 'width': 854, 'vcodec': 'vp8'},
+ '169': {'ext': 'webm', 'height': 720, 'width': 1280, 'vcodec': 'vp8'},
+ '170': {'ext': 'webm', 'height': 1080, 'width': 1920, 'vcodec': 'vp8'},
+ '218': {'ext': 'webm', 'height': 480, 'width': 854, 'vcodec': 'vp8'},
+ '219': {'ext': 'webm', 'height': 480, 'width': 854, 'vcodec': 'vp8'},
+ '278': {'ext': 'webm', 'height': 144, 'vcodec': 'vp9', 'acodec': 'none'},
+ '242': {'ext': 'webm', 'height': 240, 'vcodec': 'vp9', 'acodec': 'none'},
+ '243': {'ext': 'webm', 'height': 360, 'vcodec': 'vp9', 'acodec': 'none'},
+ '244': {'ext': 'webm', 'height': 480, 'vcodec': 'vp9', 'acodec': 'none'},
+ '245': {'ext': 'webm', 'height': 480, 'vcodec': 'vp9', 'acodec': 'none'},
+ '246': {'ext': 'webm', 'height': 480, 'vcodec': 'vp9', 'acodec': 'none'},
+ '247': {'ext': 'webm', 'height': 720, 'vcodec': 'vp9', 'acodec': 'none'},
+ '248': {'ext': 'webm', 'height': 1080, 'vcodec': 'vp9', 'acodec': 'none'},
+ '271': {'ext': 'webm', 'height': 1440, 'vcodec': 'vp9', 'acodec': 'none'},
+ # itag 272 videos are either 3840x2160 (e.g. RtoitU2A-3E) or 7680x4320 (sLprVF6d7Ug)
+ '272': {'ext': 'webm', 'height': 2160, 'vcodec': 'vp9', 'acodec': 'none'},
+ '302': {'ext': 'webm', 'height': 720, 'vcodec': 'vp9', 'fps': 60, 'acodec': 'none'},
+ '303': {'ext': 'webm', 'height': 1080, 'vcodec': 'vp9', 'fps': 60, 'acodec': 'none'},
+ '308': {'ext': 'webm', 'height': 1440, 'vcodec': 'vp9', 'fps': 60, 'acodec': 'none'},
+ '313': {'ext': 'webm', 'height': 2160, 'vcodec': 'vp9', 'acodec': 'none'},
+ '315': {'ext': 'webm', 'height': 2160, 'vcodec': 'vp9', 'fps': 60, 'acodec': 'none'},
+
+ # Dash webm audio
+ '171': {'ext': 'webm', 'acodec': 'vorbis', 'vcodec': 'none'},
+ '172': {'ext': 'webm', 'acodec': 'vorbis', 'vcodec': 'none'},
+
+ # Dash webm audio with opus inside
+ '249': {'ext': 'webm', 'acodec': 'opus', 'vcodec': 'none'},
+ '250': {'ext': 'webm', 'acodec': 'opus', 'vcodec': 'none'},
+ '251': {'ext': 'webm', 'acodec': 'opus', 'vcodec': 'none'},
+
+ # av01 video only formats sometimes served with "unknown" codecs
+ '394': {'ext': 'mp4', 'height': 144, 'vcodec': 'av01.0.00M.08', 'acodec': 'none'},
+ '395': {'ext': 'mp4', 'height': 240, 'vcodec': 'av01.0.00M.08', 'acodec': 'none'},
+ '396': {'ext': 'mp4', 'height': 360, 'vcodec': 'av01.0.01M.08', 'acodec': 'none'},
+ '397': {'ext': 'mp4', 'height': 480, 'vcodec': 'av01.0.04M.08', 'acodec': 'none'},
+ '398': {'ext': 'mp4', 'height': 720, 'vcodec': 'av01.0.05M.08', 'acodec': 'none'},
+ '399': {'ext': 'mp4', 'height': 1080, 'vcodec': 'av01.0.08M.08', 'acodec': 'none'},
+ '400': {'ext': 'mp4', 'height': 1440, 'vcodec': 'av01.0.12M.08', 'acodec': 'none'},
+ '401': {'ext': 'mp4', 'height': 2160, 'vcodec': 'av01.0.12M.08', 'acodec': 'none'},
+ }
+
def _call_cdx_api(self, item_id, url, filters: list | None = None, collapse: list | None = None, query: dict | None = None, note=None, fatal=False):
# CDX docs: https://github.com/internetarchive/wayback/blob/master/wayback-cdx-server/README.md
query = {
@@ -933,23 +1064,13 @@ class YoutubeWebArchiveIE(InfoExtractor):
video_id, url_date, url_date_2 = self._match_valid_url(url).group('id', 'date', 'date2')
url_date = url_date or url_date_2
- urlh = None
- retry_manager = self.RetryManager(fatal=False)
- for retry in retry_manager:
- try:
- urlh = self._request_webpage(
- HEADRequest(f'https://web.archive.org/web/2oe_/http://wayback-fakeurl.archive.org/yt/{video_id}'),
- video_id, note='Fetching archived video file url', expected_status=True)
- except ExtractorError as e:
- # HTTP Error 404 is expected if the video is not saved.
- if isinstance(e.cause, HTTPError) and e.cause.status == 404:
- self.raise_no_formats(
- 'The requested video is not archived, indexed, or there is an issue with web.archive.org (try again later)', expected=True)
- else:
- retry.error = e
+ video_info = self._download_json(
+ 'https://web.archive.org/__wb/videoinfo', video_id,
+ query={'vtype': 'youtube', 'vid': video_id})
- if retry_manager.error:
- self.raise_no_formats(retry_manager.error, expected=True, video_id=video_id)
+ if not traverse_obj(video_info, 'formats'):
+ self.raise_no_formats(
+ 'The requested video is not archived or indexed', expected=True)
capture_dates = self._get_capture_dates(video_id, int_or_none(url_date))
self.write_debug('Captures to try: ' + join_nonempty(*capture_dates, delim=', '))
@@ -968,25 +1089,18 @@ class YoutubeWebArchiveIE(InfoExtractor):
info['thumbnails'] = self._extract_thumbnails(video_id)
- if urlh:
- url = urllib.parse.unquote(urlh.url)
- video_file_url_qs = parse_qs(url)
- # Attempt to recover any ext & format info from playback url & response headers
- fmt = {'url': url, 'filesize': int_or_none(urlh.headers.get('x-archive-orig-content-length'))}
- itag = try_get(video_file_url_qs, lambda x: x['itag'][0])
- if itag and itag in YoutubeIE._formats:
- fmt.update(YoutubeIE._formats[itag])
- fmt.update({'format_id': itag})
- else:
- mime = try_get(video_file_url_qs, lambda x: x['mime'][0])
- ext = (mimetype2ext(mime)
- or urlhandle_detect_ext(urlh)
- or mimetype2ext(urlh.headers.get('x-archive-guessed-content-type')))
- fmt.update({'ext': ext})
- info['formats'] = [fmt]
- if not info.get('duration'):
- info['duration'] = str_to_int(try_get(video_file_url_qs, lambda x: x['dur'][0]))
+ formats = []
+ for fmt in traverse_obj(video_info, ('formats', lambda _, v: url_or_none(v['url']))):
+ format_id = traverse_obj(fmt, ('url', {parse_qs}, 'itag', 0))
+ formats.append({
+ 'format_id': format_id,
+ **self._FORMATS.get(format_id, {}),
+ **traverse_obj(fmt, {
+ 'url': ('url', {lambda x: f'https://web.archive.org/web/2id_/{x}'}),
+ 'ext': ('ext', {str}),
+ 'filesize': ('url', {parse_qs}, 'clen', 0, {int_or_none}),
+ }),
+ })
+ info['formats'] = formats
- if not info.get('title'):
- info['title'] = video_id
return info
diff --git a/yt_dlp/extractor/ard.py b/yt_dlp/extractor/ard.py
index 89d3299213..5bcf74e1d0 100644
--- a/yt_dlp/extractor/ard.py
+++ b/yt_dlp/extractor/ard.py
@@ -1,4 +1,5 @@
import functools
+import json
import re
from .common import InfoExtractor
@@ -15,11 +16,12 @@ from ..utils import (
remove_start,
str_or_none,
unified_strdate,
+ update_url,
update_url_query,
url_or_none,
xpath_text,
)
-from ..utils.traversal import traverse_obj
+from ..utils.traversal import traverse_obj, value
class ARDMediathekBaseIE(InfoExtractor):
@@ -601,3 +603,163 @@ class ARDMediathekCollectionIE(InfoExtractor):
return self.playlist_result(
OnDemandPagedList(fetch_page, self._PAGE_SIZE), full_id, display_id=display_id,
title=page_data.get('title'), description=page_data.get('synopsis'))
+
+
+class ARDAudiothekBaseIE(InfoExtractor):
+ def _graphql_query(self, urn, query):
+ return self._download_json(
+ 'https://api.ardaudiothek.de/graphql', urn,
+ data=json.dumps({
+ 'query': query,
+ 'variables': {'id': urn},
+ }).encode(), headers={
+ 'Content-Type': 'application/json',
+ })['data']
+
+
+class ARDAudiothekIE(ARDAudiothekBaseIE):
+ _VALID_URL = r'https:?//(?:www\.)?ardaudiothek\.de/episode/(?Purn:ard:(?:episode|section|extra):[a-f0-9]{16})'
+
+ _TESTS = [{
+ 'url': 'https://www.ardaudiothek.de/episode/urn:ard:episode:eabead1add170e93/',
+ 'info_dict': {
+ 'id': 'urn:ard:episode:eabead1add170e93',
+ 'ext': 'mp3',
+ 'upload_date': '20240717',
+ 'duration': 3339,
+ 'title': 'CAIMAN CLUB (S04E04): Cash Out',
+ 'thumbnail': 'https://api.ardmediathek.de/image-service/images/urn:ard:image:ed64411a07a4b405',
+ 'description': 'md5:0e5d127a3832ae59e8bab40a91a5dadc',
+ 'display_id': 'urn:ard:episode:eabead1add170e93',
+ 'timestamp': 1721181641,
+ 'series': '1LIVE Caiman Club',
+ 'channel': 'WDR',
+ 'episode': 'Episode 4',
+ 'episode_number': 4,
+ },
+ }, {
+ 'url': 'https://www.ardaudiothek.de/episode/urn:ard:section:855c7a53dac72e0a/',
+ 'info_dict': {
+ 'id': 'urn:ard:section:855c7a53dac72e0a',
+ 'ext': 'mp4',
+ 'upload_date': '20241231',
+ 'duration': 3304,
+ 'title': 'Illegaler DDR-Detektiv: Doberschütz und die letzte Staatsjagd (1/2) - Wendezeit',
+ 'thumbnail': 'https://api.ardmediathek.de/image-service/images/urn:ard:image:b9b4f1e8b93da4dd',
+ 'description': 'md5:3552d571e1959754cff66c1da6c0fdae',
+ 'display_id': 'urn:ard:section:855c7a53dac72e0a',
+ 'timestamp': 1735629900,
+ 'series': 'Auf der Spur – Die ARD Ermittlerkrimis',
+ 'channel': 'ARD',
+ 'episode': 'Episode 1',
+ 'episode_number': 1,
+ },
+ }, {
+ 'url': 'https://www.ardaudiothek.de/episode/urn:ard:extra:d2fe7303d2dcbf5d/',
+ 'info_dict': {
+ 'id': 'urn:ard:extra:d2fe7303d2dcbf5d',
+ 'ext': 'mp3',
+ 'title': 'Trailer: Fanta Vier Forever, Baby!?!',
+ 'description': 'md5:b64a586f2e976b8bb5ea0a79dbd8751c',
+ 'channel': 'SWR',
+ 'duration': 62,
+ 'thumbnail': 'https://api.ardmediathek.de/image-service/images/urn:ard:image:48d3c255969be803',
+ 'series': 'Fanta Vier Forever, Baby!?!',
+ 'timestamp': 1732108217,
+ 'upload_date': '20241120',
+ },
+ }]
+
+ _QUERY_ITEM = '''\
+ query($id: ID!) {
+ item(id: $id) {
+ audioList {
+ href
+ distributionType
+ audioBitrate
+ audioCodec
+ }
+ show {
+ title
+ }
+ image {
+ url1X1
+ }
+ programSet {
+ publicationService {
+ organizationName
+ }
+ }
+ description
+ title
+ duration
+ startDate
+ episodeNumber
+ }
+ }'''
+
+ def _real_extract(self, url):
+ urn = self._match_id(url)
+ item = self._graphql_query(urn, self._QUERY_ITEM)['item']
+ return {
+ 'id': urn,
+ **traverse_obj(item, {
+ 'formats': ('audioList', lambda _, v: url_or_none(v['href']), {
+ 'url': 'href',
+ 'format_id': ('distributionType', {str}),
+ 'abr': ('audioBitrate', {int_or_none}),
+ 'acodec': ('audioCodec', {str}),
+ 'vcodec': {value('none')},
+ }),
+ 'channel': ('programSet', 'publicationService', 'organizationName', {str}),
+ 'description': ('description', {str}),
+ 'duration': ('duration', {int_or_none}),
+ 'series': ('show', 'title', {str}),
+ 'episode_number': ('episodeNumber', {int_or_none}),
+ 'thumbnail': ('image', 'url1X1', {url_or_none}, {update_url(query=None)}),
+ 'timestamp': ('startDate', {parse_iso8601}),
+ 'title': ('title', {str}),
+ }),
+ }
+
+
+class ARDAudiothekPlaylistIE(ARDAudiothekBaseIE):
+ _VALID_URL = r'https:?//(?:www\.)?ardaudiothek\.de/sendung/(?P[\w-]+)/(?Purn:ard:show:[a-f0-9]{16})'
+
+ _TESTS = [{
+ 'url': 'https://www.ardaudiothek.de/sendung/mia-insomnia/urn:ard:show:c405aa26d9a4060a/',
+ 'info_dict': {
+ 'display_id': 'mia-insomnia',
+ 'title': 'Mia Insomnia',
+ 'id': 'urn:ard:show:c405aa26d9a4060a',
+ 'description': 'md5:d9ceb7a6b4d26a4db3316573bb564292',
+ },
+ 'playlist_mincount': 37,
+ }, {
+ 'url': 'https://www.ardaudiothek.de/sendung/100-berlin/urn:ard:show:4d248e0806ce37bc/',
+ 'only_matching': True,
+ }]
+
+ _QUERY_PLAYLIST = '''
+ query($id: ID!) {
+ show(id: $id) {
+ title
+ description
+ items(filter: { isPublished: { equalTo: true } }) {
+ nodes {
+ url
+ }
+ }
+ }
+ }'''
+
+ def _real_extract(self, url):
+ urn, playlist = self._match_valid_url(url).group('id', 'playlist')
+ playlist_info = self._graphql_query(urn, self._QUERY_PLAYLIST)['show']
+ entries = []
+ for url in traverse_obj(playlist_info, ('items', 'nodes', ..., 'url', {url_or_none})):
+ entries.append(self.url_result(url, ie=ARDAudiothekIE))
+ return self.playlist_result(entries, urn, display_id=playlist, **traverse_obj(playlist_info, {
+ 'title': ('title', {str}),
+ 'description': ('description', {str}),
+ }))
diff --git a/yt_dlp/extractor/dplay.py b/yt_dlp/extractor/dplay.py
index 86950b2445..9868f0e4d2 100644
--- a/yt_dlp/extractor/dplay.py
+++ b/yt_dlp/extractor/dplay.py
@@ -13,6 +13,7 @@ from ..utils import (
try_get,
unified_timestamp,
)
+from ..utils.traversal import traverse_obj
class DPlayBaseIE(InfoExtractor):
@@ -1053,7 +1054,7 @@ class DiscoveryPlusIndiaIE(DiscoveryPlusBaseIE):
class DiscoveryNetworksDeIE(DiscoveryPlusBaseIE):
- _VALID_URL = r'https?://(?:www\.)?(?P(?:tlc|dmax)\.de|dplay\.co\.uk)/(?:programme|show|sendungen)/(?P[^/]+)/(?:video/)?(?P[^/]+)'
+ _VALID_URL = r'https?://(?:www\.)?(?P(?:tlc|dmax)\.de)/(?:programme|show|sendungen)/(?P[^/?#]+)/(?:video/)?(?P[^/?#]+)'
_TESTS = [{
'url': 'https://dmax.de/sendungen/goldrausch-in-australien/german-gold',
@@ -1062,7 +1063,7 @@ class DiscoveryNetworksDeIE(DiscoveryPlusBaseIE):
'ext': 'mp4',
'title': 'German Gold',
'description': 'md5:f3073306553a8d9b40e6ac4cdbf09fc6',
- 'display_id': 'goldrausch-in-australien/german-gold',
+ 'display_id': 'german-gold',
'episode': 'Episode 1',
'episode_number': 1,
'season': 'Season 5',
@@ -1074,6 +1075,7 @@ class DiscoveryNetworksDeIE(DiscoveryPlusBaseIE):
'creators': ['DMAX'],
'thumbnail': 'https://eu1-prod-images.disco-api.com/2023/05/09/f72fb510-7992-3b12-af7f-f16a2c22d1e3.jpeg',
'tags': ['schatzsucher', 'schatz', 'nugget', 'bodenschätze', 'down under', 'australien', 'goldrausch'],
+ 'categories': ['Gold', 'Schatzsucher'],
},
'params': {'skip_download': 'm3u8'},
}, {
@@ -1100,20 +1102,95 @@ class DiscoveryNetworksDeIE(DiscoveryPlusBaseIE):
}, {
'url': 'https://www.dmax.de/programme/dmax-highlights/video/tuning-star-sidney-hoffmann-exklusiv-bei-dmax/191023082312316',
'only_matching': True,
- }, {
- 'url': 'https://www.dplay.co.uk/show/ghost-adventures/video/hotel-leger-103620/EHD_280313B',
- 'only_matching': True,
}, {
'url': 'https://tlc.de/sendungen/breaking-amish/die-welt-da-drauen/',
'only_matching': True,
+ }, {
+ 'url': 'https://dmax.de/sendungen/feuerwache-3-alarm-in-muenchen/24-stunden-auf-der-feuerwache-3',
+ 'info_dict': {
+ 'id': '8873549',
+ 'ext': 'mp4',
+ 'title': '24 Stunden auf der Feuerwache 3',
+ 'description': 'md5:f3084ef6170bfb79f9a6e0c030e09330',
+ 'display_id': '24-stunden-auf-der-feuerwache-3',
+ 'episode': 'Episode 1',
+ 'episode_number': 1,
+ 'season': 'Season 1',
+ 'season_number': 1,
+ 'series': 'Feuerwache 3 - Alarm in München',
+ 'duration': 2632.0,
+ 'upload_date': '20251016',
+ 'timestamp': 1760645100,
+ 'creators': ['DMAX'],
+ 'thumbnail': 'https://eu1-prod-images.disco-api.com/2025/10/14/0bdee68c-a8d8-33d9-9204-16eb61108552.jpeg',
+ 'tags': [],
+ 'categories': ['DMAX Originals', 'Jobs', 'Blaulicht'],
+ },
+ 'params': {'skip_download': 'm3u8'},
+ }, {
+ 'url': 'https://tlc.de/sendungen/ghost-adventures/der-poltergeist-im-kostumladen',
+ 'info_dict': {
+ 'id': '4550602',
+ 'ext': 'mp4',
+ 'title': 'Der Poltergeist im Kostümladen',
+ 'description': 'md5:20b52b9736a0a3a7873d19a238fad7fc',
+ 'display_id': 'der-poltergeist-im-kostumladen',
+ 'episode': 'Episode 1',
+ 'episode_number': 1,
+ 'season': 'Season 25',
+ 'season_number': 25,
+ 'series': 'Ghost Adventures',
+ 'duration': 2493.0,
+ 'upload_date': '20241223',
+ 'timestamp': 1734948900,
+ 'creators': ['TLC'],
+ 'thumbnail': 'https://eu1-prod-images.disco-api.com/2023/04/05/59941d26-a81b-365f-829f-69d8cd81fd0f.jpeg',
+ 'tags': [],
+ 'categories': ['Paranormal', 'Gruselig!'],
+ },
+ 'params': {'skip_download': 'm3u8'},
+ }, {
+ 'url': 'https://tlc.de/sendungen/evil-gesichter-des-boesen/das-geheimnis-meines-bruders',
+ 'info_dict': {
+ 'id': '7792288',
+ 'ext': 'mp4',
+ 'title': 'Das Geheimnis meines Bruders',
+ 'description': 'md5:3167550bb582eb9c92875c86a0a20882',
+ 'display_id': 'das-geheimnis-meines-bruders',
+ 'episode': 'Episode 1',
+ 'episode_number': 1,
+ 'season': 'Season 1',
+ 'season_number': 1,
+ 'series': 'Evil - Gesichter des Bösen',
+ 'duration': 2626.0,
+ 'upload_date': '20240926',
+ 'timestamp': 1727388000,
+ 'creators': ['TLC'],
+ 'thumbnail': 'https://eu1-prod-images.disco-api.com/2024/11/29/e9f3e3ae-74ec-3631-81b7-fc7bbe844741.jpeg',
+ 'tags': 'count:13',
+ 'categories': ['True Crime', 'Mord'],
+ },
+ 'params': {'skip_download': 'm3u8'},
}]
def _real_extract(self, url):
domain, programme, alternate_id = self._match_valid_url(url).groups()
- country = 'GB' if domain == 'dplay.co.uk' else 'DE'
- realm = 'questuk' if country == 'GB' else domain.replace('.', '')
- return self._get_disco_api_info(
- url, f'{programme}/{alternate_id}', 'eu1-prod.disco-api.com', realm, country)
+ meta = self._download_json(
+ f'https://de-api.loma-cms.com/feloma/videos/{alternate_id}/',
+ alternate_id, query={
+ 'environment': domain.split('.')[0],
+ 'v': '2',
+ 'filter[show.slug]': programme,
+ }, fatal=False)
+ video_id = traverse_obj(meta, ('uid', {str}, {lambda s: s[-7:]})) or alternate_id
+
+ disco_api_info = self._get_disco_api_info(
+ url, video_id, 'eu1-prod.disco-api.com', domain.replace('.', ''), 'DE')
+ disco_api_info['display_id'] = alternate_id
+ disco_api_info['categories'] = traverse_obj(meta, (
+ 'taxonomies', lambda _, v: v['category'] == 'genre', 'title', {str.strip}, filter, all, filter))
+
+ return disco_api_info
def _update_disco_api_headers(self, headers, disco_base, display_id, realm):
headers.update({
diff --git a/yt_dlp/extractor/googledrive.py b/yt_dlp/extractor/googledrive.py
index 0c84f0b241..91c9f60cd8 100644
--- a/yt_dlp/extractor/googledrive.py
+++ b/yt_dlp/extractor/googledrive.py
@@ -1,21 +1,20 @@
import re
-import urllib.parse
from .common import InfoExtractor
-from .youtube import YoutubeIE
from ..utils import (
- ExtractorError,
- bug_reports_message,
determine_ext,
extract_attributes,
+ filter_dict,
get_element_by_class,
get_element_html_by_id,
int_or_none,
- lowercase_escape,
- parse_qs,
- try_get,
+ mimetype2ext,
+ parse_duration,
+ str_or_none,
update_url_query,
+ url_or_none,
)
+from ..utils.traversal import traverse_obj, value
class GoogleDriveIE(InfoExtractor):
@@ -38,8 +37,8 @@ class GoogleDriveIE(InfoExtractor):
'id': '0ByeS4oOUV-49Zzh4R1J6R09zazQ',
'ext': 'mp4',
'title': 'Big Buck Bunny.mp4',
- 'duration': 45,
- 'thumbnail': 'https://drive.google.com/thumbnail?id=0ByeS4oOUV-49Zzh4R1J6R09zazQ',
+ 'duration': 45.069,
+ 'thumbnail': r're:https://lh3\.googleusercontent\.com/drive-storage/',
},
}, {
# has itag 50 which is not in YoutubeIE._formats (royalty Free music from 1922)
@@ -49,8 +48,29 @@ class GoogleDriveIE(InfoExtractor):
'id': '1IP0o8dHcQrIHGgVyp0Ofvx2cGfLzyO1x',
'ext': 'mp3',
'title': 'My Buddy - Henry Burr - Gus Kahn - Walter Donaldson.mp3',
- 'duration': 184,
- 'thumbnail': 'https://drive.google.com/thumbnail?id=1IP0o8dHcQrIHGgVyp0Ofvx2cGfLzyO1x',
+ 'duration': 184.68,
+ },
+ }, {
+ # Has subtitle track
+ 'url': 'https://drive.google.com/file/d/1RAGWRgzn85TXCaCk4gxnwF6TGUaZatzE/view',
+ 'md5': '05488c528da6ef737ec8c962bfa9724e',
+ 'info_dict': {
+ 'id': '1RAGWRgzn85TXCaCk4gxnwF6TGUaZatzE',
+ 'ext': 'mp4',
+ 'title': 'test.mp4',
+ 'duration': 9.999,
+ 'thumbnail': r're:https://lh3\.googleusercontent\.com/drive-storage/',
+ },
+ }, {
+ # Has subtitle track with kind 'asr'
+ 'url': 'https://drive.google.com/file/d/1Prvv9-mtDDfN_gkJgtt1OFvIULK8c3Ev/view',
+ 'md5': 'ccae12d07f18b5988900b2c8b92801fc',
+ 'info_dict': {
+ 'id': '1Prvv9-mtDDfN_gkJgtt1OFvIULK8c3Ev',
+ 'ext': 'mp4',
+ 'title': 'LEE NA GYUNG-3410-VOICE_MESSAGE.mp4',
+ 'duration': 8.766,
+ 'thumbnail': r're:https://lh3\.googleusercontent\.com/drive-storage/',
},
}, {
# video can't be watched anonymously due to view count limit reached,
@@ -71,17 +91,6 @@ class GoogleDriveIE(InfoExtractor):
'url': 'https://drive.usercontent.google.com/download?id=0ByeS4oOUV-49Zzh4R1J6R09zazQ',
'only_matching': True,
}]
- _FORMATS_EXT = {
- **{k: v['ext'] for k, v in YoutubeIE._formats.items() if v.get('ext')},
- '50': 'm4a',
- }
- _BASE_URL_CAPTIONS = 'https://drive.google.com/timedtext'
- _CAPTIONS_ENTRY_TAG = {
- 'subtitles': 'track',
- 'automatic_captions': 'target',
- }
- _caption_formats_ext = []
- _captions_xml = None
@classmethod
def _extract_embed_urls(cls, url, webpage):
@@ -91,129 +100,73 @@ class GoogleDriveIE(InfoExtractor):
if mobj:
yield 'https://drive.google.com/file/d/{}'.format(mobj.group('id'))
- def _download_subtitles_xml(self, video_id, subtitles_id, hl):
- if self._captions_xml:
- return
- self._captions_xml = self._download_xml(
- self._BASE_URL_CAPTIONS, video_id, query={
- 'id': video_id,
- 'vid': subtitles_id,
- 'hl': hl,
+ @staticmethod
+ def _construct_subtitle_url(base_url, video_id, language, fmt, kind):
+ return update_url_query(
+ base_url, filter_dict({
+ 'hl': 'en-US',
'v': video_id,
+ 'type': 'track',
+ 'lang': language,
+ 'fmt': fmt,
+ 'kind': kind,
+ }))
+
+ def _get_subtitles(self, video_id, video_info):
+ subtitles = {}
+ timed_text_base_url = traverse_obj(video_info, ('timedTextDetails', 'timedTextBaseUrl', {url_or_none}))
+ if not timed_text_base_url:
+ return subtitles
+ subtitle_data = self._download_xml(
+ timed_text_base_url, video_id, 'Downloading subtitles XML', fatal=False, query={
+ 'hl': 'en-US',
'type': 'list',
- 'tlangs': '1',
- 'fmts': '1',
- 'vssids': '1',
- }, note='Downloading subtitles XML',
- errnote='Unable to download subtitles XML', fatal=False)
- if self._captions_xml:
- for f in self._captions_xml.findall('format'):
- if f.attrib.get('fmt_code') and not f.attrib.get('default'):
- self._caption_formats_ext.append(f.attrib['fmt_code'])
-
- def _get_captions_by_type(self, video_id, subtitles_id, caption_type,
- origin_lang_code=None, origin_lang_name=None):
- if not subtitles_id or not caption_type:
- return
- captions = {}
- for caption_entry in self._captions_xml.findall(
- self._CAPTIONS_ENTRY_TAG[caption_type]):
- caption_lang_code = caption_entry.attrib.get('lang_code')
- caption_name = caption_entry.attrib.get('name') or origin_lang_name
- if not caption_lang_code or not caption_name:
- self.report_warning(f'Missing necessary caption metadata. '
- f'Need lang_code and name attributes. '
- f'Found: {caption_entry.attrib}')
- continue
- caption_format_data = []
- for caption_format in self._caption_formats_ext:
- query = {
- 'vid': subtitles_id,
- 'v': video_id,
- 'fmt': caption_format,
- 'lang': (caption_lang_code if origin_lang_code is None
- else origin_lang_code),
- 'type': 'track',
- 'name': caption_name,
- 'kind': '',
- }
- if origin_lang_code is not None:
- query.update({'tlang': caption_lang_code})
- caption_format_data.append({
- 'url': update_url_query(self._BASE_URL_CAPTIONS, query),
- 'ext': caption_format,
- })
- captions[caption_lang_code] = caption_format_data
- return captions
-
- def _get_subtitles(self, video_id, subtitles_id, hl):
- if not subtitles_id or not hl:
- return
- self._download_subtitles_xml(video_id, subtitles_id, hl)
- if not self._captions_xml:
- return
- return self._get_captions_by_type(video_id, subtitles_id, 'subtitles')
-
- def _get_automatic_captions(self, video_id, subtitles_id, hl):
- if not subtitles_id or not hl:
- return
- self._download_subtitles_xml(video_id, subtitles_id, hl)
- if not self._captions_xml:
- return
- track = next((t for t in self._captions_xml.findall('track') if t.attrib.get('cantran') == 'true'), None)
- if track is None:
- return
- origin_lang_code = track.attrib.get('lang_code')
- origin_lang_name = track.attrib.get('name')
- if not origin_lang_code or not origin_lang_name:
- return
- return self._get_captions_by_type(
- video_id, subtitles_id, 'automatic_captions', origin_lang_code, origin_lang_name)
+ 'tlangs': 1,
+ 'v': video_id,
+ 'vssids': 1,
+ })
+ subtitle_formats = traverse_obj(subtitle_data, (lambda _, v: v.tag == 'format', {lambda x: x.get('fmt_code')}, {str}))
+ for track in traverse_obj(subtitle_data, (lambda _, v: v.tag == 'track' and v.get('lang_code'))):
+ language = track.get('lang_code')
+ subtitles.setdefault(language, []).extend([{
+ 'url': self._construct_subtitle_url(
+ timed_text_base_url, video_id, language, sub_fmt, track.get('kind')),
+ 'name': track.get('lang_original'),
+ 'ext': sub_fmt,
+ } for sub_fmt in subtitle_formats])
+ return subtitles
def _real_extract(self, url):
video_id = self._match_id(url)
- video_info = urllib.parse.parse_qs(self._download_webpage(
- 'https://drive.google.com/get_video_info',
- video_id, 'Downloading video webpage', query={'docid': video_id}))
-
- def get_value(key):
- return try_get(video_info, lambda x: x[key][0])
-
- reason = get_value('reason')
- title = get_value('title')
+ video_info = self._download_json(
+ f'https://content-workspacevideo-pa.googleapis.com/v1/drive/media/{video_id}/playback',
+ video_id, 'Downloading video webpage', query={'key': 'AIzaSyDVQw45DwoYh632gvsP5vPDqEKvb-Ywnb8'},
+ headers={'Referer': 'https://drive.google.com/'})
formats = []
- fmt_stream_map = (get_value('fmt_stream_map') or '').split(',')
- fmt_list = (get_value('fmt_list') or '').split(',')
- if fmt_stream_map and fmt_list:
- resolutions = {}
- for fmt in fmt_list:
- mobj = re.search(
- r'^(?P\d+)/(?P\d+)[xX](?P\d+)', fmt)
- if mobj:
- resolutions[mobj.group('format_id')] = (
- int(mobj.group('width')), int(mobj.group('height')))
+ for fmt in traverse_obj(video_info, (
+ 'mediaStreamingData', 'formatStreamingData', ('adaptiveTranscodes', 'progressiveTranscodes'),
+ lambda _, v: url_or_none(v['url']))):
+ formats.append({
+ **traverse_obj(fmt, {
+ 'url': 'url',
+ 'format_id': ('itag', {int}, {str_or_none}),
+ }),
+ **traverse_obj(fmt, ('transcodeMetadata', {
+ 'ext': ('mimeType', {mimetype2ext}),
+ 'width': ('width', {int_or_none}),
+ 'height': ('height', {int_or_none}),
+ 'fps': ('videoFps', {int_or_none}),
+ 'filesize': ('contentLength', {int_or_none}),
+ 'vcodec': ((('videoCodecString', {str}), {value('none')}), any),
+ 'acodec': ((('audioCodecString', {str}), {value('none')}), any),
+ })),
+ 'downloader_options': {
+ 'http_chunk_size': 10 << 20,
+ },
+ })
- for fmt_stream in fmt_stream_map:
- fmt_stream_split = fmt_stream.split('|')
- if len(fmt_stream_split) < 2:
- continue
- format_id, format_url = fmt_stream_split[:2]
- ext = self._FORMATS_EXT.get(format_id)
- if not ext:
- self.report_warning(f'Unknown format {format_id}{bug_reports_message()}')
- f = {
- 'url': lowercase_escape(format_url),
- 'format_id': format_id,
- 'ext': ext,
- }
- resolution = resolutions.get(format_id)
- if resolution:
- f.update({
- 'width': resolution[0],
- 'height': resolution[1],
- })
- formats.append(f)
+ title = traverse_obj(video_info, ('mediaMetadata', 'title', {str}))
source_url = update_url_query(
'https://drive.usercontent.google.com/download', {
@@ -264,30 +217,20 @@ class GoogleDriveIE(InfoExtractor):
or get_element_by_class('uc-error-caption', confirmation_webpage)
or 'unable to extract confirmation code')
- if not formats and reason:
- if title:
- self.raise_no_formats(reason, expected=True)
- else:
- raise ExtractorError(reason, expected=True)
-
- hl = get_value('hl')
- subtitles_id = None
- ttsurl = get_value('ttsurl')
- if ttsurl:
- # the subtitles ID is the vid param of the ttsurl query
- subtitles_id = parse_qs(ttsurl).get('vid', [None])[-1]
-
- self.cookiejar.clear(domain='.google.com', path='/', name='NID')
-
return {
'id': video_id,
'title': title,
- 'thumbnail': 'https://drive.google.com/thumbnail?id=' + video_id,
- 'duration': int_or_none(get_value('length_seconds')),
+ **traverse_obj(video_info, {
+ 'duration': ('mediaMetadata', 'duration', {parse_duration}),
+ 'thumbnails': ('thumbnails', lambda _, v: url_or_none(v['url']), {
+ 'url': 'url',
+ 'ext': ('mimeType', {mimetype2ext}),
+ 'width': ('width', {int}),
+ 'height': ('height', {int}),
+ }),
+ }),
'formats': formats,
- 'subtitles': self.extract_subtitles(video_id, subtitles_id, hl),
- 'automatic_captions': self.extract_automatic_captions(
- video_id, subtitles_id, hl),
+ 'subtitles': self.extract_subtitles(video_id, video_info),
}
diff --git a/yt_dlp/extractor/goplay.py b/yt_dlp/extractor/goplay.py
index 2e959cead2..fb9b046126 100644
--- a/yt_dlp/extractor/goplay.py
+++ b/yt_dlp/extractor/goplay.py
@@ -13,12 +13,14 @@ from ..utils.traversal import get_first, traverse_obj
class GoPlayIE(InfoExtractor):
- _VALID_URL = r'https?://(www\.)?goplay\.be/video/([^/?#]+/[^/?#]+/|)(?P[^/#]+)'
+ IE_NAME = 'play.tv'
+ IE_DESC = 'PLAY (formerly goplay.be)'
+ _VALID_URL = r'https?://(www\.)?play\.tv/video/([^/?#]+/[^/?#]+/|)(?P[^/#]+)'
_NETRC_MACHINE = 'goplay'
_TESTS = [{
- 'url': 'https://www.goplay.be/video/de-slimste-mens-ter-wereld/de-slimste-mens-ter-wereld-s22/de-slimste-mens-ter-wereld-s22-aflevering-1',
+ 'url': 'https://www.play.tv/video/de-slimste-mens-ter-wereld/de-slimste-mens-ter-wereld-s22/de-slimste-mens-ter-wereld-s22-aflevering-1',
'info_dict': {
'id': '2baa4560-87a0-421b-bffc-359914e3c387',
'ext': 'mp4',
@@ -33,7 +35,7 @@ class GoPlayIE(InfoExtractor):
'params': {'skip_download': True},
'skip': 'This video is only available for registered users',
}, {
- 'url': 'https://www.goplay.be/video/1917',
+ 'url': 'https://www.play.tv/video/1917',
'info_dict': {
'id': '40cac41d-8d29-4ef5-aa11-75047b9f0907',
'ext': 'mp4',
@@ -43,7 +45,7 @@ class GoPlayIE(InfoExtractor):
'params': {'skip_download': True},
'skip': 'This video is only available for registered users',
}, {
- 'url': 'https://www.goplay.be/video/de-mol/de-mol-s11/de-mol-s11-aflevering-1#autoplay',
+ 'url': 'https://www.play.tv/video/de-mol/de-mol-s11/de-mol-s11-aflevering-1#autoplay',
'info_dict': {
'id': 'ecb79672-92b9-4cd9-a0d7-e2f0250681ee',
'ext': 'mp4',
@@ -101,7 +103,7 @@ class GoPlayIE(InfoExtractor):
break
api = self._download_json(
- f'https://api.goplay.be/web/v1/videos/long-form/{video_id}',
+ f'https://api.play.tv/web/v1/videos/long-form/{video_id}',
video_id, headers={
'Authorization': f'Bearer {self._id_token}',
**self.geo_verification_headers(),
diff --git a/yt_dlp/extractor/kika.py b/yt_dlp/extractor/kika.py
index e277564524..94798b9ac3 100644
--- a/yt_dlp/extractor/kika.py
+++ b/yt_dlp/extractor/kika.py
@@ -17,57 +17,60 @@ class KikaIE(InfoExtractor):
_GEO_COUNTRIES = ['DE']
_TESTS = [{
- 'url': 'https://www.kika.de/logo/videos/logo-vom-samstag-einunddreissig-august-zweitausendvierundzwanzig-100',
- 'md5': 'fbfc8da483719ef06f396e5e5b938c69',
+ # Video without season/episode info
+ 'url': 'https://www.kika.de/logo/videos/logo-vom-dienstag-achtundzwanzig-oktober-zweitausendfuenfundzwanzig-100',
+ 'md5': '4a9f6e0f9c6bfcc82394c294f186d6db',
'info_dict': {
- 'id': 'logo-vom-samstag-einunddreissig-august-zweitausendvierundzwanzig-100',
+ 'id': 'logo-vom-dienstag-achtundzwanzig-oktober-zweitausendfuenfundzwanzig-100',
'ext': 'mp4',
- 'upload_date': '20240831',
- 'timestamp': 1725126600,
- 'season_number': 2024,
- 'modified_date': '20240831',
- 'episode': 'Episode 476',
- 'episode_number': 476,
- 'season': 'Season 2024',
- 'duration': 634,
- 'title': 'logo! vom Samstag, 31. August 2024',
- 'modified_timestamp': 1725129983,
+ 'title': 'logo! vom Dienstag, 28. Oktober 2025',
+ 'description': 'md5:4d28b92cef423bec99740ffaa3e7ec04',
+ 'duration': 651,
+ 'timestamp': 1761678000,
+ 'upload_date': '20251028',
+ 'modified_timestamp': 1761682624,
+ 'modified_date': '20251028',
},
}, {
+ # Video with season/episode info
+ # Also: Video with subtitles
'url': 'https://www.kika.de/kaltstart/videos/video92498',
- 'md5': '710ece827e5055094afeb474beacb7aa',
+ 'md5': 'e58073070acb195906c55c4ad31dceb3',
'info_dict': {
'id': 'video92498',
'ext': 'mp4',
'title': '7. Wo ist Leo?',
'description': 'md5:fb48396a5b75068bcac1df74f1524920',
'duration': 436,
+ 'season': 'Season 1',
+ 'season_number': 1,
+ 'episode': 'Episode 7',
+ 'episode_number': 7,
'timestamp': 1702926876,
'upload_date': '20231218',
- 'episode_number': 7,
- 'modified_date': '20240319',
'modified_timestamp': 1710880610,
- 'episode': 'Episode 7',
- 'season_number': 1,
- 'season': 'Season 1',
+ 'modified_date': '20240319',
+ 'subtitles': 'count:1',
},
}, {
- 'url': 'https://www.kika.de/bernd-das-brot/astrobrot/videos/video90088',
- 'md5': 'ffd1b700d7de0a6616a1d08544c77294',
+ # Video without subtitles
+ 'url': 'https://www.kika.de/die-pfefferkoerner/videos/abgezogen-102',
+ 'md5': '62e97961ce5343c19f0f330a1b6dd736',
'info_dict': {
- 'id': 'video90088',
+ 'id': 'abgezogen-102',
'ext': 'mp4',
- 'upload_date': '20221102',
- 'timestamp': 1667390580,
- 'duration': 197,
- 'modified_timestamp': 1711093771,
- 'episode_number': 8,
- 'title': 'Es ist nicht leicht, ein Astrobrot zu sein',
- 'modified_date': '20240322',
- 'description': 'md5:d3641deaf1b5515a160788b2be4159a9',
- 'season_number': 1,
- 'episode': 'Episode 8',
+ 'title': '1. Abgezogen',
+ 'description': 'md5:42d87963364391f9f8eba8affcb30bd2',
+ 'duration': 1574,
'season': 'Season 1',
+ 'season_number': 1,
+ 'episode': 'Episode 1',
+ 'episode_number': 1,
+ 'timestamp': 1735382700,
+ 'upload_date': '20241228',
+ 'modified_timestamp': 1757344051,
+ 'modified_date': '20250908',
+ 'subtitles': 'count:0',
},
}]
@@ -78,16 +81,19 @@ class KikaIE(InfoExtractor):
video_assets = self._download_json(doc['assets']['url'], video_id)
subtitles = {}
- if ttml_resource := url_or_none(video_assets.get('videoSubtitle')):
- subtitles['de'] = [{
- 'url': ttml_resource,
- 'ext': 'ttml',
- }]
- if webvtt_resource := url_or_none(video_assets.get('webvttUrl')):
- subtitles.setdefault('de', []).append({
- 'url': webvtt_resource,
- 'ext': 'vtt',
- })
+ # Subtitle API endpoints may be present in the JSON even if there are no subtitles.
+ # They then return HTTP 200 with invalid data. So we must check explicitly.
+ if doc.get('hasSubtitle'):
+ if ttml_resource := url_or_none(video_assets.get('videoSubtitle')):
+ subtitles['de'] = [{
+ 'url': ttml_resource,
+ 'ext': 'ttml',
+ }]
+ if webvtt_resource := url_or_none(video_assets.get('webvttUrl')):
+ subtitles.setdefault('de', []).append({
+ 'url': webvtt_resource,
+ 'ext': 'vtt',
+ })
return {
'id': video_id,
diff --git a/yt_dlp/extractor/nascar.py b/yt_dlp/extractor/nascar.py
new file mode 100644
index 0000000000..b14a3b0aa1
--- /dev/null
+++ b/yt_dlp/extractor/nascar.py
@@ -0,0 +1,60 @@
+from .common import InfoExtractor
+from ..utils import (
+ float_or_none,
+ parse_iso8601,
+ url_or_none,
+)
+from ..utils.traversal import traverse_obj
+
+
+class NascarClassicsIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?classics\.nascar\.com/video/(?P[\w~-]+)'
+ _TESTS = [{
+ 'url': 'https://classics.nascar.com/video/Ka5qGuxzZ~SIvJii7uAC~wszPshklHN',
+ 'md5': '81d712eccffa7169c328281b8cc28f77',
+ 'info_dict': {
+ 'id': 'Ka5qGuxzZ~SIvJii7uAC~wszPshklHN',
+ 'ext': 'mp4',
+ 'title': 'Cook Out 400 2023',
+ 'thumbnail': 'https://va.aws.nascar.com/IMAGES/CUP_2023_22_RICHMOND_THUMB_NCD.jpg',
+ 'timestamp': 1690732800,
+ 'upload_date': '20230730',
+ 'tags': ['2023', 'race #22', 'richmond', 'chris buescher', 'cup'],
+ 'chapters': 'count:18',
+ },
+ }, {
+ 'url': 'https://classics.nascar.com/video/UASvPDOwEha~SIvJii7uAC~wszPshklHN',
+ 'md5': 'a5e8d6ec6005da3857d25ba2df5e7133',
+ 'info_dict': {
+ 'id': 'UASvPDOwEha~SIvJii7uAC~wszPshklHN',
+ 'ext': 'mp4',
+ 'title': 'I Love New York 355 at the Glen 2017',
+ 'thumbnail': 'https://va.aws.nascar.com/IMAGES/CUP_2017_22_WATKINSGLEN_THUMB_NCD.jpg',
+ 'timestamp': 1501995600,
+ 'upload_date': '20170806',
+ 'tags': ['watkins glen', 'race #22', '2017', 'martin truex jr.', 'cup'],
+ 'chapters': 'count:13',
+ },
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ webpage = self._download_webpage(url, video_id)
+ content_data = self._search_nextjs_data(
+ webpage, video_id)['props']['pageProps']['contentData']
+
+ return {
+ 'id': video_id,
+ 'formats': self._extract_m3u8_formats(content_data['input']['src'], video_id, 'mp4'),
+ **traverse_obj(content_data, {
+ 'title': ('input', 'name', {str}),
+ 'description': ('input', 'description', {str}, filter),
+ 'thumbnail': ('input', 'thumbnail', {url_or_none}),
+ 'tags': ('input', 'settings', 'tags', ..., {str}),
+ 'timestamp': ('input', 'start_time', {parse_iso8601}),
+ 'chapters': ('overlay', 'data', 'timelines', 0, 'events', lambda _, v: float(v['timestamp']) is not None, {
+ 'start_time': ('timestamp', {float_or_none}),
+ 'title': ('name', {str}),
+ }),
+ }),
+ }
diff --git a/yt_dlp/extractor/nbc.py b/yt_dlp/extractor/nbc.py
index caa9dc0175..41811b8a20 100644
--- a/yt_dlp/extractor/nbc.py
+++ b/yt_dlp/extractor/nbc.py
@@ -63,7 +63,7 @@ class NBCUniversalBaseIE(ThePlatformBaseIE):
# formats='mpeg4' will return either a working m3u8 URL or an m3u8 template for non-DRM HLS
# formats='m3u+none,mpeg4' may return DRM HLS but w/the "folders" needed for non-DRM template
query['formats'] = 'm3u+none,mpeg4'
- m3u8_url = self._download_nbcu_smil_and_extract_m3u8_url(tp_path, video_id, query)
+ orig_m3u8_url = m3u8_url = self._download_nbcu_smil_and_extract_m3u8_url(tp_path, video_id, query)
if mobj := re.fullmatch(self._M3U8_RE, m3u8_url):
query['formats'] = 'mpeg4'
@@ -76,7 +76,17 @@ class NBCUniversalBaseIE(ThePlatformBaseIE):
if '/mpeg_cenc' in m3u8_url or '/mpeg_cbcs' in m3u8_url:
self.report_drm(video_id)
- return self._extract_m3u8_formats_and_subtitles(m3u8_url, video_id, 'mp4', m3u8_id='hls')
+ formats, subtitles = self._extract_m3u8_formats_and_subtitles(
+ m3u8_url, video_id, 'mp4', m3u8_id='hls', fatal=False)
+
+ if not formats and m3u8_url != orig_m3u8_url:
+ orig_fmts, subtitles = self._extract_m3u8_formats_and_subtitles(
+ orig_m3u8_url, video_id, 'mp4', m3u8_id='hls', fatal=False)
+ formats = [f for f in orig_fmts if not f.get('has_drm')]
+ if orig_fmts and not formats:
+ self.report_drm(video_id)
+
+ return formats, subtitles
def _extract_nbcu_video(self, url, display_id, old_ie_key=None):
webpage = self._download_webpage(url, display_id)
diff --git a/yt_dlp/extractor/urplay.py b/yt_dlp/extractor/urplay.py
index 79bb8a8055..a0ac2a0bc6 100644
--- a/yt_dlp/extractor/urplay.py
+++ b/yt_dlp/extractor/urplay.py
@@ -8,6 +8,7 @@ from ..utils import (
try_get,
unified_timestamp,
)
+from ..utils.traversal import traverse_obj
class URPlayIE(InfoExtractor):
@@ -25,7 +26,7 @@ class URPlayIE(InfoExtractor):
'upload_date': '20171214',
'series': 'UR Samtiden - Livet, universum och rymdens märkliga musik',
'duration': 2269,
- 'categories': ['Vetenskap & teknik'],
+ 'categories': ['Kultur & historia'],
'tags': ['Kritiskt tänkande', 'Vetenskap', 'Vetenskaplig verksamhet'],
'episode': 'Om vetenskap, kritiskt tänkande och motstånd',
'age_limit': 15,
@@ -78,7 +79,7 @@ class URPlayIE(InfoExtractor):
webpage = self._download_webpage(url, video_id)
urplayer_data = self._search_nextjs_data(webpage, video_id, fatal=False) or {}
if urplayer_data:
- urplayer_data = try_get(urplayer_data, lambda x: x['props']['pageProps']['program'], dict)
+ urplayer_data = traverse_obj(urplayer_data, ('props', 'pageProps', 'productData', {dict}))
if not urplayer_data:
raise ExtractorError('Unable to parse __NEXT_DATA__')
else:
diff --git a/yt_dlp/extractor/youtube/_base.py b/yt_dlp/extractor/youtube/_base.py
index 17e942465d..9ecce15553 100644
--- a/yt_dlp/extractor/youtube/_base.py
+++ b/yt_dlp/extractor/youtube/_base.py
@@ -327,6 +327,17 @@ INNERTUBE_CLIENTS = {
# See: https://github.com/youtube/cobalt/blob/main/cobalt/browser/user_agent/user_agent_platform_info.cc#L506
'AUTHENTICATED_USER_AGENT': 'Mozilla/5.0 (ChromiumStylePlatform) Cobalt/25.lts.30.1034943-gold (unlike Gecko), Unknown_TV_Unknown_0/Unknown (Unknown, Unknown)',
},
+ 'tv_downgraded': {
+ 'INNERTUBE_CONTEXT': {
+ 'client': {
+ 'clientName': 'TVHTML5',
+ 'clientVersion': '5.20251105',
+ 'userAgent': 'Mozilla/5.0 (ChromiumStylePlatform) Cobalt/Version',
+ },
+ },
+ 'INNERTUBE_CONTEXT_CLIENT_NAME': 7,
+ 'SUPPORTS_COOKIES': True,
+ },
'tv_simply': {
'INNERTUBE_CONTEXT': {
'client': {
@@ -380,11 +391,15 @@ def short_client_name(client_name):
return join_nonempty(main[:4], ''.join(x[0] for x in parts)).upper()
-def build_innertube_clients():
- THIRD_PARTY = {
+def _fix_embedded_ytcfg(ytcfg):
+ ytcfg['INNERTUBE_CONTEXT'].setdefault('thirdParty', {}).update({
'embedUrl': 'https://www.youtube.com/', # Can be any valid URL
- }
- BASE_CLIENTS = ('ios', 'web', 'tv', 'mweb', 'android')
+ })
+
+
+def build_innertube_clients():
+ # From highest to lowest priority
+ BASE_CLIENTS = ('tv', 'web', 'mweb', 'android', 'ios')
priority = qualities(BASE_CLIENTS[::-1])
for client, ytcfg in tuple(INNERTUBE_CLIENTS.items()):
@@ -405,10 +420,7 @@ def build_innertube_clients():
ytcfg['priority'] = 10 * priority(base_client)
if variant == 'embedded':
- ytcfg['INNERTUBE_CONTEXT']['thirdParty'] = THIRD_PARTY
- ytcfg['priority'] -= 2
- elif variant:
- ytcfg['priority'] -= 3
+ _fix_embedded_ytcfg(ytcfg)
build_innertube_clients()
@@ -991,6 +1003,10 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
ytcfg = self.extract_ytcfg(video_id, webpage) or {}
+ # See https://github.com/yt-dlp/yt-dlp/issues/14826
+ if _split_innertube_client(client)[2] == 'embedded':
+ _fix_embedded_ytcfg(ytcfg)
+
# Workaround for https://github.com/yt-dlp/yt-dlp/issues/12563
# But it's not effective when logged-in
if client == 'tv' and not self.is_authenticated:
diff --git a/yt_dlp/extractor/youtube/_tab.py b/yt_dlp/extractor/youtube/_tab.py
index 72a66e0a1a..f991d99759 100644
--- a/yt_dlp/extractor/youtube/_tab.py
+++ b/yt_dlp/extractor/youtube/_tab.py
@@ -340,8 +340,9 @@ class YoutubeTabBaseInfoExtractor(YoutubeBaseInfoExtractor):
thumbnails=self._extract_thumbnails(view_model, (
'contentImage', *thumb_keys, 'thumbnailViewModel', 'image'), final_key='sources'),
duration=traverse_obj(view_model, (
- 'contentImage', 'thumbnailViewModel', 'overlays', ..., 'thumbnailOverlayBadgeViewModel',
- 'thumbnailBadges', ..., 'thumbnailBadgeViewModel', 'text', {parse_duration}, any)),
+ 'contentImage', 'thumbnailViewModel', 'overlays', ...,
+ (('thumbnailBottomOverlayViewModel', 'badges'), ('thumbnailOverlayBadgeViewModel', 'thumbnailBadges')),
+ ..., 'thumbnailBadgeViewModel', 'text', {parse_duration}, any)),
timestamp=(traverse_obj(view_model, (
'metadata', 'lockupMetadataViewModel', 'metadata', 'contentMetadataViewModel', 'metadataRows',
..., 'metadataParts', ..., 'text', 'content', {lambda t: self._parse_time_text(t, report_failure=False)}, any))
diff --git a/yt_dlp/extractor/youtube/_video.py b/yt_dlp/extractor/youtube/_video.py
index 1fc45dac6f..e7b8e2d5a2 100644
--- a/yt_dlp/extractor/youtube/_video.py
+++ b/yt_dlp/extractor/youtube/_video.py
@@ -4,9 +4,7 @@ import collections
import datetime as dt
import functools
import itertools
-import json
import math
-import os.path
import random
import re
import sys
@@ -26,10 +24,11 @@ from ._base import (
_split_innertube_client,
short_client_name,
)
+from .jsc._builtin.ejs import _EJS_WIKI_URL
+from .jsc._director import initialize_jsc_director
+from .jsc.provider import JsChallengeRequest, JsChallengeType, NChallengeInput, SigChallengeInput
from .pot._director import initialize_pot_director
from .pot.provider import PoTokenContext, PoTokenRequest
-from ..openload import PhantomJSwrapper
-from ...jsinterp import JSInterpreter, LocalNameSpace
from ...networking.exceptions import HTTPError
from ...utils import (
NO_DEFAULT,
@@ -39,13 +38,11 @@ from ...utils import (
clean_html,
datetime_from_str,
filesize_from_tbr,
- filter_dict,
float_or_none,
format_field,
get_first,
int_or_none,
join_nonempty,
- js_to_json,
mimetype2ext,
orderedSet,
parse_codecs,
@@ -147,120 +144,12 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
r'/(?P[a-zA-Z0-9_-]{8,})/player(?:_ias\.vflset(?:/[a-zA-Z]{2,3}_[a-zA-Z]{2,3})?|-plasma-ias-(?:phone|tablet)-[a-z]{2}_[A-Z]{2}\.vflset)/base\.js$',
r'\b(?Pvfl[a-zA-Z0-9_-]+)\b.*?\.js$',
)
- _formats = { # NB: Used in YoutubeWebArchiveIE and GoogleDriveIE
- '5': {'ext': 'flv', 'width': 400, 'height': 240, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
- '6': {'ext': 'flv', 'width': 450, 'height': 270, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
- '13': {'ext': '3gp', 'acodec': 'aac', 'vcodec': 'mp4v'},
- '17': {'ext': '3gp', 'width': 176, 'height': 144, 'acodec': 'aac', 'abr': 24, 'vcodec': 'mp4v'},
- '18': {'ext': 'mp4', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 96, 'vcodec': 'h264'},
- '22': {'ext': 'mp4', 'width': 1280, 'height': 720, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
- '34': {'ext': 'flv', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
- '35': {'ext': 'flv', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
- # itag 36 videos are either 320x180 (BaW_jenozKc) or 320x240 (__2ABJjxzNo), abr varies as well
- '36': {'ext': '3gp', 'width': 320, 'acodec': 'aac', 'vcodec': 'mp4v'},
- '37': {'ext': 'mp4', 'width': 1920, 'height': 1080, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
- '38': {'ext': 'mp4', 'width': 4096, 'height': 3072, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
- '43': {'ext': 'webm', 'width': 640, 'height': 360, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
- '44': {'ext': 'webm', 'width': 854, 'height': 480, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
- '45': {'ext': 'webm', 'width': 1280, 'height': 720, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
- '46': {'ext': 'webm', 'width': 1920, 'height': 1080, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
- '59': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
- '78': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
-
-
- # 3D videos
- '82': {'ext': 'mp4', 'height': 360, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
- '83': {'ext': 'mp4', 'height': 480, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
- '84': {'ext': 'mp4', 'height': 720, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
- '85': {'ext': 'mp4', 'height': 1080, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
- '100': {'ext': 'webm', 'height': 360, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8', 'preference': -20},
- '101': {'ext': 'webm', 'height': 480, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
- '102': {'ext': 'webm', 'height': 720, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
-
- # Apple HTTP Live Streaming
- '91': {'ext': 'mp4', 'height': 144, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
- '92': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
- '93': {'ext': 'mp4', 'height': 360, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
- '94': {'ext': 'mp4', 'height': 480, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
- '95': {'ext': 'mp4', 'height': 720, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
- '96': {'ext': 'mp4', 'height': 1080, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
- '132': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
- '151': {'ext': 'mp4', 'height': 72, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 24, 'vcodec': 'h264', 'preference': -10},
-
- # DASH mp4 video
- '133': {'ext': 'mp4', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'h264'},
- '134': {'ext': 'mp4', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'h264'},
- '135': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
- '136': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264'},
- '137': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264'},
- '138': {'ext': 'mp4', 'format_note': 'DASH video', 'vcodec': 'h264'}, # Height can vary (https://github.com/ytdl-org/youtube-dl/issues/4559)
- '160': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'vcodec': 'h264'},
- '212': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
- '264': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'h264'},
- '298': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
- '299': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
- '266': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'h264'},
-
- # Dash mp4 audio
- '139': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 48, 'container': 'm4a_dash'},
- '140': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 128, 'container': 'm4a_dash'},
- '141': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 256, 'container': 'm4a_dash'},
- '256': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
- '258': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
- '325': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'dtse', 'container': 'm4a_dash'},
- '328': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'ec-3', 'container': 'm4a_dash'},
-
- # Dash webm
- '167': {'ext': 'webm', 'height': 360, 'width': 640, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
- '168': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
- '169': {'ext': 'webm', 'height': 720, 'width': 1280, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
- '170': {'ext': 'webm', 'height': 1080, 'width': 1920, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
- '218': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
- '219': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
- '278': {'ext': 'webm', 'height': 144, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp9'},
- '242': {'ext': 'webm', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'vp9'},
- '243': {'ext': 'webm', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'vp9'},
- '244': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
- '245': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
- '246': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
- '247': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9'},
- '248': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9'},
- '271': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9'},
- # itag 272 videos are either 3840x2160 (e.g. RtoitU2A-3E) or 7680x4320 (sLprVF6d7Ug)
- '272': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
- '302': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
- '303': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
- '308': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
- '313': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
- '315': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
-
- # Dash webm audio
- '171': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 128},
- '172': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 256},
-
- # Dash webm audio with opus inside
- '249': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 50},
- '250': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 70},
- '251': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 160},
-
- # RTMP (unnamed)
- '_rtmp': {'protocol': 'rtmp'},
-
- # av01 video only formats sometimes served with "unknown" codecs
- '394': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'vcodec': 'av01.0.00M.08'},
- '395': {'ext': 'mp4', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'av01.0.00M.08'},
- '396': {'ext': 'mp4', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'av01.0.01M.08'},
- '397': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'av01.0.04M.08'},
- '398': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'av01.0.05M.08'},
- '399': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'av01.0.08M.08'},
- '400': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'av01.0.12M.08'},
- '401': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'av01.0.12M.08'},
- }
_SUBTITLE_FORMATS = ('json3', 'srv1', 'srv2', 'srv3', 'ttml', 'srt', 'vtt')
- _DEFAULT_CLIENTS = ('android_sdkless', 'tv', 'web_safari', 'web')
- _DEFAULT_AUTHED_CLIENTS = ('tv', 'web_safari', 'web')
+ _DEFAULT_CLIENTS = ('tv', 'android_sdkless', 'web')
+ _DEFAULT_JSLESS_CLIENTS = ('android_sdkless', 'web_safari', 'web')
+ _DEFAULT_AUTHED_CLIENTS = ('tv_downgraded', 'web_safari', 'web')
# Premium does not require POT (except for subtitles)
- _DEFAULT_PREMIUM_CLIENTS = ('tv', 'web_creator', 'web_safari', 'web')
+ _DEFAULT_PREMIUM_CLIENTS = ('tv_downgraded', 'web_creator', 'web')
_GEO_BYPASS = False
@@ -1667,6 +1556,46 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'view_count': int,
},
'params': {'skip_download': True},
+ }, {
+ # Youtube Music Auto-generated description with dot in artist name
+ 'url': 'https://music.youtube.com/watch?v=DbCvuSGfR3Y',
+ 'info_dict': {
+ 'id': 'DbCvuSGfR3Y',
+ 'ext': 'mp4',
+ 'title': 'Back Around',
+ 'artists': ['half·alive'],
+ 'track': 'Back Around',
+ 'album': 'Conditions Of A Punk',
+ 'release_date': '20221202',
+ 'release_year': 2021,
+ 'alt_title': 'Back Around',
+ 'description': 'md5:bfc0e2b3cc903a608d8a85a13cb50f95',
+ 'media_type': 'video',
+ 'uploader': 'half•alive',
+ 'channel': 'half•alive',
+ 'channel_id': 'UCYQrYophdVI3nVDPOnXyIng',
+ 'channel_url': 'https://www.youtube.com/channel/UCYQrYophdVI3nVDPOnXyIng',
+ 'channel_is_verified': True,
+ 'channel_follower_count': int,
+ 'comment_count': int,
+ 'view_count': int,
+ 'like_count': int,
+ 'age_limit': 0,
+ 'duration': 223,
+ 'thumbnail': 'https://i.ytimg.com/vi_webp/DbCvuSGfR3Y/maxresdefault.webp',
+ 'heatmap': 'count:100',
+ 'categories': ['Music'],
+ 'tags': ['half·alive', 'Conditions Of A Punk', 'Back Around'],
+ 'creators': ['half·alive'],
+ 'timestamp': 1669889281,
+ 'upload_date': '20221201',
+ 'playable_in_embed': True,
+ 'availability': 'public',
+ 'live_status': 'not_live',
+ },
+ 'params': {
+ 'skip_download': True,
+ },
}]
_WEBPAGE_TESTS = [{
#