diff --git a/.github/actionlint.yml b/.github/actionlint.yml index bdd3901a37..ee319fef54 100644 --- a/.github/actionlint.yml +++ b/.github/actionlint.yml @@ -1,5 +1,4 @@ config-variables: - - KEEP_CACHE_WARM - PUSH_VERSION_COMMIT - UPDATE_TO_VERIFICATION - PYPI_PROJECT diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 9b58653bd7..ea19bdfb13 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -74,11 +74,11 @@ on: default: true type: boolean -permissions: - contents: read +permissions: {} jobs: process: + name: Process runs-on: ubuntu-latest outputs: origin: ${{ steps.process_inputs.outputs.origin }} @@ -146,7 +146,6 @@ jobs: 'runner': 'ubuntu-24.04-arm', 'qemu_platform': 'linux/arm/v7', 'onefile': False, - 'cache_requirements': True, 'update_to': 'yt-dlp/yt-dlp@2023.03.04', }], 'musllinux': [{ @@ -175,7 +174,6 @@ jobs: exe.setdefault('qemu_platform', None) exe.setdefault('onefile', True) exe.setdefault('onedir', True) - exe.setdefault('cache_requirements', False) exe.setdefault('python_version', os.environ['PYTHON_VERSION']) exe.setdefault('update_to', os.environ['UPDATE_TO']) if not any(INPUTS.get(key) for key in EXE_MAP): @@ -186,8 +184,11 @@ jobs: f.write(f'matrix={json.dumps(matrix)}') unix: - needs: process + name: unix + needs: [process] if: inputs.unix + permissions: + contents: read runs-on: ubuntu-latest env: CHANNEL: ${{ inputs.channel }} @@ -196,11 +197,12 @@ jobs: UPDATE_TO: yt-dlp/yt-dlp@2025.09.05 steps: - - uses: actions/checkout@v6 + - uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # v6.0.1 with: fetch-depth: 0 # Needed for changelog + persist-credentials: false - - uses: actions/setup-python@v6 + - uses: actions/setup-python@83679a892e2d95755f2dac6acb0bfd1e9ac5d548 # v6.1.0 with: python-version: "3.10" @@ -229,7 +231,7 @@ jobs: [[ "${version}" != "${downgraded_version}" ]] - name: Upload artifacts - uses: actions/upload-artifact@v4 + uses: actions/upload-artifact@b7c566a772e6b6bfb58ed0dc250532a479d7789f # v6.0.0 with: name: build-bin-${{ github.job }} path: | @@ -239,8 +241,10 @@ jobs: linux: name: ${{ matrix.os }} (${{ matrix.arch }}) + needs: [process] if: inputs.linux || inputs.linux_armv7l || inputs.musllinux - needs: process + permissions: + contents: read runs-on: ${{ matrix.runner }} strategy: fail-fast: false @@ -257,26 +261,16 @@ jobs: SKIP_ONEFILE_BUILD: ${{ (!matrix.onefile && '1') || '' }} steps: - - uses: actions/checkout@v6 - - - name: Cache requirements - if: matrix.cache_requirements - id: cache-venv - uses: actions/cache@v4 - env: - SEGMENT_DOWNLOAD_TIMEOUT_MINS: 1 + - uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # v6.0.1 with: - path: | - venv - key: cache-reqs-${{ matrix.os }}_${{ matrix.arch }}-${{ github.ref }}-${{ needs.process.outputs.timestamp }} - restore-keys: | - cache-reqs-${{ matrix.os }}_${{ matrix.arch }}-${{ github.ref }}- - cache-reqs-${{ matrix.os }}_${{ matrix.arch }}- + persist-credentials: false - name: Set up QEMU if: matrix.qemu_platform - uses: docker/setup-qemu-action@v3 + uses: docker/setup-qemu-action@c7c53464625b32c7a7e944ae62b3e17d2b600130 # v3.7.0 with: + image: tonistiigi/binfmt:qemu-v10.0.4-56@sha256:30cc9a4d03765acac9be2ed0afc23af1ad018aed2c28ea4be8c2eb9afe03fbd1 + cache-image: false platforms: ${{ matrix.qemu_platform }} - name: Build executable @@ -300,7 +294,7 @@ jobs: docker compose up --build --exit-code-from "${SERVICE}" "${SERVICE}" - name: Upload artifacts - uses: actions/upload-artifact@v4 + uses: actions/upload-artifact@b7c566a772e6b6bfb58ed0dc250532a479d7789f # v6.0.0 with: name: build-bin-${{ matrix.os }}_${{ matrix.arch }} path: | @@ -308,7 +302,8 @@ jobs: compression-level: 0 macos: - needs: process + name: macos + needs: [process] if: inputs.macos permissions: contents: read @@ -320,21 +315,11 @@ jobs: UPDATE_TO: yt-dlp/yt-dlp@2025.09.05 steps: - - uses: actions/checkout@v6 - # NB: Building universal2 does not work with python from actions/setup-python - - - name: Cache requirements - id: cache-venv - uses: actions/cache@v4 - env: - SEGMENT_DOWNLOAD_TIMEOUT_MINS: 1 + - uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # v6.0.1 with: - path: | - ~/yt-dlp-build-venv - key: cache-reqs-${{ github.job }}-${{ github.ref }}-${{ needs.process.outputs.timestamp }} - restore-keys: | - cache-reqs-${{ github.job }}-${{ github.ref }}- - cache-reqs-${{ github.job }}- + persist-credentials: false + + # NB: Building universal2 does not work with python from actions/setup-python - name: Install Requirements run: | @@ -399,7 +384,7 @@ jobs: [[ "$version" != "$downgraded_version" ]] - name: Upload artifacts - uses: actions/upload-artifact@v4 + uses: actions/upload-artifact@b7c566a772e6b6bfb58ed0dc250532a479d7789f # v6.0.0 with: name: build-bin-${{ github.job }} path: | @@ -409,7 +394,7 @@ jobs: windows: name: windows (${{ matrix.arch }}) - needs: process + needs: [process] if: inputs.windows permissions: contents: read @@ -450,26 +435,15 @@ jobs: PYI_WHEEL: pyinstaller-${{ matrix.pyi_version }}-py3-none-${{ matrix.platform_tag }}.whl steps: - - uses: actions/checkout@v6 - - uses: actions/setup-python@v6 + - uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # v6.0.1 + with: + persist-credentials: false + + - uses: actions/setup-python@83679a892e2d95755f2dac6acb0bfd1e9ac5d548 # v6.1.0 with: python-version: ${{ matrix.python_version }} architecture: ${{ matrix.arch }} - - name: Cache requirements - id: cache-venv - if: matrix.arch == 'arm64' - uses: actions/cache@v4 - env: - SEGMENT_DOWNLOAD_TIMEOUT_MINS: 1 - with: - path: | - /yt-dlp-build-venv - key: ${{ env.BASE_CACHE_KEY }}-${{ github.ref }}-${{ needs.process.outputs.timestamp }} - restore-keys: | - ${{ env.BASE_CACHE_KEY }}-${{ github.ref }}- - ${{ env.BASE_CACHE_KEY }}- - - name: Install Requirements env: ARCH: ${{ matrix.arch }} @@ -477,6 +451,8 @@ jobs: PYI_HASH: ${{ matrix.pyi_hash }} shell: pwsh run: | + $ErrorActionPreference = "Stop" + $PSNativeCommandUseErrorActionPreference = $true python -m venv /yt-dlp-build-venv /yt-dlp-build-venv/Scripts/Activate.ps1 python -m pip install -U pip @@ -494,12 +470,16 @@ jobs: - name: Prepare shell: pwsh run: | + $ErrorActionPreference = "Stop" + $PSNativeCommandUseErrorActionPreference = $true python devscripts/update-version.py -c "${Env:CHANNEL}" -r "${Env:ORIGIN}" "${Env:VERSION}" python devscripts/make_lazy_extractors.py - name: Build shell: pwsh run: | + $ErrorActionPreference = "Stop" + $PSNativeCommandUseErrorActionPreference = $true /yt-dlp-build-venv/Scripts/Activate.ps1 python -m bundle.pyinstaller python -m bundle.pyinstaller --onedir @@ -509,6 +489,8 @@ jobs: if: vars.UPDATE_TO_VERIFICATION shell: pwsh run: | + $ErrorActionPreference = "Stop" + $PSNativeCommandUseErrorActionPreference = $true $name = "yt-dlp${Env:SUFFIX}" Copy-Item "./dist/${name}.exe" "./dist/${name}_downgraded.exe" $version = & "./dist/${name}.exe" --version @@ -519,7 +501,7 @@ jobs: } - name: Upload artifacts - uses: actions/upload-artifact@v4 + uses: actions/upload-artifact@b7c566a772e6b6bfb58ed0dc250532a479d7789f # v6.0.0 with: name: build-bin-${{ github.job }}-${{ matrix.arch }} path: | @@ -528,23 +510,25 @@ jobs: compression-level: 0 meta_files: - if: always() && !cancelled() + name: Metadata files needs: - process - unix - linux - macos - windows + if: always() && !failure() && !cancelled() runs-on: ubuntu-latest steps: - name: Download artifacts - uses: actions/download-artifact@v5 + uses: actions/download-artifact@37930b1c2abaa49bbe596cd826c3c89aef350131 # v7.0.0 with: path: artifact pattern: build-bin-* merge-multiple: true - name: Make SHA2-SUMS files + shell: bash run: | cd ./artifact/ # make sure SHA sums are also printed to stdout @@ -600,13 +584,13 @@ jobs: GPG_SIGNING_KEY: ${{ secrets.GPG_SIGNING_KEY }} if: env.GPG_SIGNING_KEY run: | - gpg --batch --import <<< "${{ secrets.GPG_SIGNING_KEY }}" + gpg --batch --import <<< "${GPG_SIGNING_KEY}" for signfile in ./SHA*SUMS; do gpg --batch --detach-sign "$signfile" done - name: Upload artifacts - uses: actions/upload-artifact@v4 + uses: actions/upload-artifact@b7c566a772e6b6bfb58ed0dc250532a479d7789f # v6.0.0 with: name: build-${{ github.job }} path: | diff --git a/.github/workflows/cache-warmer.yml b/.github/workflows/cache-warmer.yml deleted file mode 100644 index 00ec1e1f96..0000000000 --- a/.github/workflows/cache-warmer.yml +++ /dev/null @@ -1,23 +0,0 @@ -name: Keep cache warm -on: - workflow_dispatch: - schedule: - - cron: '0 22 1,6,11,16,21,27 * *' - -jobs: - build: - if: | - vars.KEEP_CACHE_WARM || github.event_name == 'workflow_dispatch' - uses: ./.github/workflows/build.yml - with: - version: '999999' - channel: stable - origin: ${{ github.repository }} - unix: false - linux: false - linux_armv7l: true - musllinux: false - macos: true - windows: true - permissions: - contents: read diff --git a/.github/workflows/challenge-tests.yml b/.github/workflows/challenge-tests.yml index 68fe117191..8a98545750 100644 --- a/.github/workflows/challenge-tests.yml +++ b/.github/workflows/challenge-tests.yml @@ -16,8 +16,8 @@ on: - yt_dlp/extractor/youtube/jsc/**.py - yt_dlp/extractor/youtube/pot/**.py - yt_dlp/utils/_jsruntime.py -permissions: - contents: read + +permissions: {} concurrency: group: challenge-tests-${{ github.event.pull_request.number || github.ref }} @@ -26,6 +26,8 @@ concurrency: jobs: tests: name: Challenge Tests + permissions: + contents: read runs-on: ${{ matrix.os }} strategy: fail-fast: false @@ -35,26 +37,30 @@ jobs: env: QJS_VERSION: '2025-04-26' # Earliest version with rope strings steps: - - uses: actions/checkout@v6 + - uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # v6.0.1 + with: + persist-credentials: false - name: Set up Python ${{ matrix.python-version }} - uses: actions/setup-python@v6 + uses: actions/setup-python@83679a892e2d95755f2dac6acb0bfd1e9ac5d548 # v6.1.0 with: python-version: ${{ matrix.python-version }} - name: Install Deno - uses: denoland/setup-deno@v2 + uses: denoland/setup-deno@e95548e56dfa95d4e1a28d6f422fafe75c4c26fb # v2.0.3 with: deno-version: '2.0.0' # minimum supported version - name: Install Bun - uses: oven-sh/setup-bun@v2 + uses: oven-sh/setup-bun@735343b667d3e6f658f44d0eca948eb6282f2b76 # v2.0.2 with: # minimum supported version is 1.0.31 but earliest available Windows version is 1.1.0 bun-version: ${{ (matrix.os == 'windows-latest' && '1.1.0') || '1.0.31' }} + no-cache: true - name: Install Node - uses: actions/setup-node@v6 + uses: actions/setup-node@395ad3262231945c25e8478fd5baf05154b1d79f # v6.1.0 with: node-version: '20.0' # minimum supported version - name: Install QuickJS (Linux) if: matrix.os == 'ubuntu-latest' + shell: bash run: | wget "https://bellard.org/quickjs/binary_releases/quickjs-linux-x86_64-${QJS_VERSION}.zip" -O quickjs.zip unzip quickjs.zip qjs @@ -63,15 +69,19 @@ jobs: if: matrix.os == 'windows-latest' shell: pwsh run: | + $ErrorActionPreference = "Stop" + $PSNativeCommandUseErrorActionPreference = $true Invoke-WebRequest "https://bellard.org/quickjs/binary_releases/quickjs-win-x86_64-${Env:QJS_VERSION}.zip" -OutFile quickjs.zip unzip quickjs.zip - name: Install test requirements + shell: bash run: | python ./devscripts/install_deps.py --print --omit-default --include-extra test > requirements.txt python ./devscripts/install_deps.py --print -c certifi -c requests -c urllib3 -c yt-dlp-ejs >> requirements.txt python -m pip install -U -r requirements.txt - name: Run tests timeout-minutes: 15 + shell: bash run: | python -m yt_dlp -v --js-runtimes node --js-runtimes bun --js-runtimes quickjs || true python ./devscripts/run_tests.py test/test_jsc -k download diff --git a/.github/workflows/codeql.yml b/.github/workflows/codeql.yml index fda5351c08..c9eb40df41 100644 --- a/.github/workflows/codeql.yml +++ b/.github/workflows/codeql.yml @@ -9,14 +9,20 @@ on: schedule: - cron: '59 11 * * 5' +permissions: {} + +concurrency: + group: codeql-${{ github.event.pull_request.number || github.ref }} + cancel-in-progress: ${{ github.event_name == 'pull_request' }} + jobs: analyze: name: Analyze (${{ matrix.language }}) runs-on: ubuntu-latest permissions: - actions: read + actions: read # Needed by github/codeql-action if repository is private contents: read - security-events: write + security-events: write # Needed to use github/codeql-action with Github Advanced Security strategy: fail-fast: false @@ -25,15 +31,17 @@ jobs: steps: - name: Checkout repository - uses: actions/checkout@v6 + uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # v6.0.1 + with: + persist-credentials: false - name: Initialize CodeQL - uses: github/codeql-action/init@v4 + uses: github/codeql-action/init@5d4e8d1aca955e8d8589aabd499c5cae939e33c7 # v4.31.9 with: languages: ${{ matrix.language }} build-mode: none - name: Perform CodeQL Analysis - uses: github/codeql-action/analyze@v4 + uses: github/codeql-action/analyze@5d4e8d1aca955e8d8589aabd499c5cae939e33c7 # v4.31.9 with: category: "/language:${{matrix.language}}" diff --git a/.github/workflows/core.yml b/.github/workflows/core.yml index 16c2b92b40..2d0dfae8a0 100644 --- a/.github/workflows/core.yml +++ b/.github/workflows/core.yml @@ -22,8 +22,8 @@ on: - yt_dlp/extractor/__init__.py - yt_dlp/extractor/common.py - yt_dlp/extractor/extractors.py -permissions: - contents: read + +permissions: {} concurrency: group: core-${{ github.event.pull_request.number || github.ref }} @@ -33,6 +33,8 @@ jobs: tests: name: Core Tests if: "!contains(github.event.head_commit.message, 'ci skip')" + permissions: + contents: read runs-on: ${{ matrix.os }} strategy: fail-fast: false @@ -55,11 +57,12 @@ jobs: - os: windows-latest python-version: pypy-3.11 steps: - - uses: actions/checkout@v6 + - uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # v6.0.1 with: fetch-depth: 0 + persist-credentials: false - name: Set up Python ${{ matrix.python-version }} - uses: actions/setup-python@v6 + uses: actions/setup-python@83679a892e2d95755f2dac6acb0bfd1e9ac5d548 # v6.1.0 with: python-version: ${{ matrix.python-version }} - name: Install test requirements diff --git a/.github/workflows/download.yml b/.github/workflows/download.yml deleted file mode 100644 index 62a2cf9ba0..0000000000 --- a/.github/workflows/download.yml +++ /dev/null @@ -1,48 +0,0 @@ -name: Download Tests -on: [push, pull_request] -permissions: - contents: read - -jobs: - quick: - name: Quick Download Tests - if: "contains(github.event.head_commit.message, 'ci run dl')" - runs-on: ubuntu-latest - steps: - - uses: actions/checkout@v6 - - name: Set up Python - uses: actions/setup-python@v6 - with: - python-version: '3.10' - - name: Install test requirements - run: python ./devscripts/install_deps.py --include-extra dev - - name: Run tests - continue-on-error: true - run: python ./devscripts/run_tests.py download - - full: - name: Full Download Tests - if: "contains(github.event.head_commit.message, 'ci run dl all')" - runs-on: ${{ matrix.os }} - strategy: - fail-fast: true - matrix: - os: [ubuntu-latest] - python-version: ['3.11', '3.12', '3.13', '3.14', pypy-3.11] - include: - # atleast one of each CPython/PyPy tests must be in windows - - os: windows-latest - python-version: '3.10' - - os: windows-latest - python-version: pypy-3.11 - steps: - - uses: actions/checkout@v6 - - name: Set up Python ${{ matrix.python-version }} - uses: actions/setup-python@v6 - with: - python-version: ${{ matrix.python-version }} - - name: Install test requirements - run: python ./devscripts/install_deps.py --include-extra dev - - name: Run tests - continue-on-error: true - run: python ./devscripts/run_tests.py download diff --git a/.github/workflows/issue-lockdown.yml b/.github/workflows/issue-lockdown.yml index 4b973e2e61..09f47ee622 100644 --- a/.github/workflows/issue-lockdown.yml +++ b/.github/workflows/issue-lockdown.yml @@ -3,13 +3,14 @@ on: issues: types: [opened] -permissions: - issues: write +permissions: {} jobs: lockdown: name: Issue Lockdown if: vars.ISSUE_LOCKDOWN + permissions: + issues: write # Needed to lock issues runs-on: ubuntu-latest steps: - name: "Lock new issue" diff --git a/.github/workflows/quick-test.yml b/.github/workflows/quick-test.yml index f72f6a5651..7584790d7f 100644 --- a/.github/workflows/quick-test.yml +++ b/.github/workflows/quick-test.yml @@ -1,33 +1,47 @@ name: Quick Test on: [push, pull_request] -permissions: - contents: read + +permissions: {} + +concurrency: + group: quick-test-${{ github.event.pull_request.number || github.ref }} + cancel-in-progress: ${{ github.event_name == 'pull_request' }} jobs: tests: name: Core Test if: "!contains(github.event.head_commit.message, 'ci skip all')" + permissions: + contents: read runs-on: ubuntu-latest steps: - - uses: actions/checkout@v6 + - uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # v6.0.1 + with: + persist-credentials: false - name: Set up Python 3.10 - uses: actions/setup-python@v6 + uses: actions/setup-python@83679a892e2d95755f2dac6acb0bfd1e9ac5d548 # v6.1.0 with: python-version: '3.10' - name: Install test requirements + shell: bash run: python ./devscripts/install_deps.py --omit-default --include-extra test - name: Run tests timeout-minutes: 15 + shell: bash run: | python3 -m yt_dlp -v || true python3 ./devscripts/run_tests.py --pytest-args '--reruns 2 --reruns-delay 3.0' core check: name: Code check if: "!contains(github.event.head_commit.message, 'ci skip all')" + permissions: + contents: read runs-on: ubuntu-latest steps: - - uses: actions/checkout@v6 - - uses: actions/setup-python@v6 + - uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # v6.0.1 + with: + persist-credentials: false + - uses: actions/setup-python@83679a892e2d95755f2dac6acb0bfd1e9ac5d548 # v6.1.0 with: python-version: '3.10' - name: Install dev dependencies @@ -39,4 +53,5 @@ jobs: - name: Run autopep8 run: autopep8 --diff . - name: Check file mode + shell: bash run: git ls-files --format="%(objectmode) %(path)" yt_dlp/ | ( ! grep -v "^100644" ) diff --git a/.github/workflows/release-master.yml b/.github/workflows/release-master.yml index f44da792f8..da8e75d696 100644 --- a/.github/workflows/release-master.yml +++ b/.github/workflows/release-master.yml @@ -14,35 +14,39 @@ on: - ".github/workflows/release-master.yml" concurrency: group: release-master -permissions: - contents: read + +permissions: {} jobs: release: + name: Publish Github release if: vars.BUILD_MASTER + permissions: + contents: write # May be needed to publish release + id-token: write # Needed for trusted publishing uses: ./.github/workflows/release.yml with: prerelease: true source: ${{ (github.repository != 'yt-dlp/yt-dlp' && vars.MASTER_ARCHIVE_REPO) || 'master' }} target: 'master' - permissions: - contents: write - id-token: write # mandatory for trusted publishing - secrets: inherit + secrets: + ARCHIVE_REPO_TOKEN: ${{ secrets.ARCHIVE_REPO_TOKEN }} + GPG_SIGNING_KEY: ${{ secrets.GPG_SIGNING_KEY }} publish_pypi: + name: Publish to PyPI needs: [release] if: vars.MASTER_PYPI_PROJECT - runs-on: ubuntu-latest permissions: - id-token: write # mandatory for trusted publishing + id-token: write # Needed for trusted publishing + runs-on: ubuntu-latest steps: - name: Download artifacts - uses: actions/download-artifact@v5 + uses: actions/download-artifact@37930b1c2abaa49bbe596cd826c3c89aef350131 # v7.0.0 with: path: dist name: build-pypi - name: Publish to PyPI - uses: pypa/gh-action-pypi-publish@release/v1 + uses: pypa/gh-action-pypi-publish@ed0c53931b1dc9bd32cbe73a98c7f6766f8a527e # v1.13.0 with: verbose: true diff --git a/.github/workflows/release-nightly.yml b/.github/workflows/release-nightly.yml index ac7e8cc675..9bbcc75a78 100644 --- a/.github/workflows/release-nightly.yml +++ b/.github/workflows/release-nightly.yml @@ -2,21 +2,43 @@ name: Release (nightly) on: schedule: - cron: '23 23 * * *' -permissions: - contents: read + workflow_dispatch: + +permissions: {} jobs: check_nightly: + name: Check for new commits if: vars.BUILD_NIGHTLY + permissions: + contents: read runs-on: ubuntu-latest outputs: commit: ${{ steps.check_for_new_commits.outputs.commit }} steps: - - uses: actions/checkout@v6 + - uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # v6.0.1 with: fetch-depth: 0 + persist-credentials: false + + - name: Retrieve HEAD commit hash + id: head + shell: bash + run: echo "head=$(git rev-parse HEAD)" | tee -a "${GITHUB_OUTPUT}" + + - name: Cache nightly commit hash + uses: actions/cache@9255dc7a253b0ccc959486e2bca901246202afeb # v5.0.1 + env: + SEGMENT_DOWNLOAD_TIMEOUT_MINS: 1 + with: + path: .nightly_commit_hash + key: release-nightly-${{ steps.head.outputs.head }} + restore-keys: | + release-nightly- + - name: Check for new commits id: check_for_new_commits + shell: bash run: | relevant_files=( "yt_dlp/*.py" @@ -30,34 +52,54 @@ jobs: ".github/workflows/release.yml" ".github/workflows/release-nightly.yml" ) - echo "commit=$(git log --format=%H -1 --since="24 hours ago" -- "${relevant_files[@]}")" | tee "$GITHUB_OUTPUT" + if [[ -f .nightly_commit_hash ]]; then + limit_args=( + "$(cat .nightly_commit_hash)..HEAD" + ) + else + limit_args=( + --since="24 hours ago" + ) + fi + echo "commit=$(git log --format=%H -1 "${limit_args[@]}" -- "${relevant_files[@]}")" | tee -a "${GITHUB_OUTPUT}" + + - name: Record new nightly commit hash + env: + HEAD: ${{ steps.head.outputs.head }} + shell: bash + run: echo "${HEAD}" | tee .nightly_commit_hash release: + name: Publish Github release needs: [check_nightly] if: ${{ needs.check_nightly.outputs.commit }} + permissions: + contents: write # May be needed to publish release + id-token: write # Needed for trusted publishing uses: ./.github/workflows/release.yml with: prerelease: true source: ${{ (github.repository != 'yt-dlp/yt-dlp' && vars.NIGHTLY_ARCHIVE_REPO) || 'nightly' }} target: 'nightly' - permissions: - contents: write - id-token: write # mandatory for trusted publishing - secrets: inherit + secrets: + ARCHIVE_REPO_TOKEN: ${{ secrets.ARCHIVE_REPO_TOKEN }} + GPG_SIGNING_KEY: ${{ secrets.GPG_SIGNING_KEY }} publish_pypi: + name: Publish to PyPI needs: [release] if: vars.NIGHTLY_PYPI_PROJECT - runs-on: ubuntu-latest permissions: - id-token: write # mandatory for trusted publishing + id-token: write # Needed for trusted publishing + runs-on: ubuntu-latest steps: - name: Download artifacts - uses: actions/download-artifact@v5 + uses: actions/download-artifact@37930b1c2abaa49bbe596cd826c3c89aef350131 # v7.0.0 with: path: dist name: build-pypi + - name: Publish to PyPI - uses: pypa/gh-action-pypi-publish@release/v1 + uses: pypa/gh-action-pypi-publish@ed0c53931b1dc9bd32cbe73a98c7f6766f8a527e # v1.13.0 with: verbose: true diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index e9facc0430..1ef565723d 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -22,6 +22,11 @@ on: required: false default: true type: boolean + secrets: + ARCHIVE_REPO_TOKEN: + required: false + GPG_SIGNING_KEY: + required: false workflow_dispatch: inputs: source: @@ -56,30 +61,30 @@ on: default: false type: boolean -permissions: - contents: read +permissions: {} jobs: prepare: + name: Prepare permissions: - contents: write + contents: write # Needed to git-push the release commit runs-on: ubuntu-latest outputs: channel: ${{ steps.setup_variables.outputs.channel }} version: ${{ steps.setup_variables.outputs.version }} target_repo: ${{ steps.setup_variables.outputs.target_repo }} - target_repo_token: ${{ steps.setup_variables.outputs.target_repo_token }} target_tag: ${{ steps.setup_variables.outputs.target_tag }} pypi_project: ${{ steps.setup_variables.outputs.pypi_project }} pypi_suffix: ${{ steps.setup_variables.outputs.pypi_suffix }} head_sha: ${{ steps.get_target.outputs.head_sha }} steps: - - uses: actions/checkout@v6 + - uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # v6.0.1 with: fetch-depth: 0 + persist-credentials: true # Needed to git-push the release commit - - uses: actions/setup-python@v6 + - uses: actions/setup-python@83679a892e2d95755f2dac6acb0bfd1e9ac5d548 # v6.1.0 with: python-version: "3.10" # Keep this in sync with test-workflows.yml @@ -104,8 +109,6 @@ jobs: TARGET_PYPI_SUFFIX: ${{ vars[format('{0}_pypi_suffix', steps.process_inputs.outputs.target_repo)] }} SOURCE_ARCHIVE_REPO: ${{ vars[format('{0}_archive_repo', steps.process_inputs.outputs.source_repo)] }} TARGET_ARCHIVE_REPO: ${{ vars[format('{0}_archive_repo', steps.process_inputs.outputs.target_repo)] }} - HAS_SOURCE_ARCHIVE_REPO_TOKEN: ${{ !!secrets[format('{0}_archive_repo_token', steps.process_inputs.outputs.source_repo)] }} - HAS_TARGET_ARCHIVE_REPO_TOKEN: ${{ !!secrets[format('{0}_archive_repo_token', steps.process_inputs.outputs.target_repo)] }} HAS_ARCHIVE_REPO_TOKEN: ${{ !!secrets.ARCHIVE_REPO_TOKEN }} run: | python -m devscripts.setup_variables @@ -150,30 +153,34 @@ jobs: run: git push origin "${GITHUB_EVENT_REF}" build: - needs: prepare + name: Build + needs: [prepare] + permissions: + contents: read uses: ./.github/workflows/build.yml with: version: ${{ needs.prepare.outputs.version }} channel: ${{ needs.prepare.outputs.channel }} origin: ${{ needs.prepare.outputs.target_repo }} linux_armv7l: ${{ inputs.linux_armv7l }} - permissions: - contents: read secrets: GPG_SIGNING_KEY: ${{ secrets.GPG_SIGNING_KEY }} publish_pypi: + name: Publish to PyPI needs: [prepare, build] if: ${{ needs.prepare.outputs.pypi_project }} - runs-on: ubuntu-latest permissions: - id-token: write # mandatory for trusted publishing + contents: read + id-token: write # Needed for trusted publishing + runs-on: ubuntu-latest steps: - - uses: actions/checkout@v6 + - uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # v6.0.1 with: - fetch-depth: 0 - - uses: actions/setup-python@v6 + fetch-depth: 0 # Needed for changelog + persist-credentials: false + - uses: actions/setup-python@83679a892e2d95755f2dac6acb0bfd1e9ac5d548 # v6.1.0 with: python-version: "3.10" @@ -208,8 +215,8 @@ jobs: python -m build --no-isolation . - name: Upload artifacts - if: github.event_name != 'workflow_dispatch' - uses: actions/upload-artifact@v4 + if: github.event.workflow != '.github/workflows/release.yml' # Reusable workflow_call + uses: actions/upload-artifact@b7c566a772e6b6bfb58ed0dc250532a479d7789f # v6.0.0 with: name: build-pypi path: | @@ -217,15 +224,16 @@ jobs: compression-level: 0 - name: Publish to PyPI - if: github.event_name == 'workflow_dispatch' - uses: pypa/gh-action-pypi-publish@release/v1 + if: github.event.workflow == '.github/workflows/release.yml' # Direct workflow_dispatch + uses: pypa/gh-action-pypi-publish@ed0c53931b1dc9bd32cbe73a98c7f6766f8a527e # v1.13.0 with: verbose: true publish: + name: Publish Github release needs: [prepare, build] permissions: - contents: write + contents: write # Needed by gh to publish release to Github runs-on: ubuntu-latest env: TARGET_REPO: ${{ needs.prepare.outputs.target_repo }} @@ -233,15 +241,16 @@ jobs: VERSION: ${{ needs.prepare.outputs.version }} HEAD_SHA: ${{ needs.prepare.outputs.head_sha }} steps: - - uses: actions/checkout@v6 + - uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # v6.0.1 with: fetch-depth: 0 - - uses: actions/download-artifact@v5 + persist-credentials: false + - uses: actions/download-artifact@37930b1c2abaa49bbe596cd826c3c89aef350131 # v7.0.0 with: path: artifact pattern: build-* merge-multiple: true - - uses: actions/setup-python@v6 + - uses: actions/setup-python@83679a892e2d95755f2dac6acb0bfd1e9ac5d548 # v6.1.0 with: python-version: "3.10" @@ -282,7 +291,7 @@ jobs: - name: Publish to archive repo env: - GH_TOKEN: ${{ secrets[needs.prepare.outputs.target_repo_token] }} + GH_TOKEN: ${{ secrets.ARCHIVE_REPO_TOKEN }} GH_REPO: ${{ needs.prepare.outputs.target_repo }} TITLE_PREFIX: ${{ startswith(env.TARGET_REPO, 'yt-dlp/') && 'yt-dlp ' || '' }} TITLE: ${{ inputs.target != env.TARGET_REPO && inputs.target || needs.prepare.outputs.channel }} diff --git a/.github/workflows/sanitize-comment.yml b/.github/workflows/sanitize-comment.yml index 45c87cdd47..5faf4cb3f7 100644 --- a/.github/workflows/sanitize-comment.yml +++ b/.github/workflows/sanitize-comment.yml @@ -4,14 +4,15 @@ on: issue_comment: types: [created, edited] -permissions: - issues: write +permissions: {} jobs: sanitize-comment: name: Sanitize comment if: vars.SANITIZE_COMMENT && !github.event.issue.pull_request + permissions: + issues: write # Needed by yt-dlp/sanitize-comment to edit comments runs-on: ubuntu-latest steps: - name: Sanitize comment - uses: yt-dlp/sanitize-comment@v1 + uses: yt-dlp/sanitize-comment@4536c691101b89f5373d50fe8a7980cae146346b # v1.0.0 diff --git a/.github/workflows/test-workflows.yml b/.github/workflows/test-workflows.yml index e1a125461a..8e0eba5ddd 100644 --- a/.github/workflows/test-workflows.yml +++ b/.github/workflows/test-workflows.yml @@ -1,21 +1,30 @@ name: Test and lint workflows on: push: + branches: [master] paths: + - .github/*.yml - .github/workflows/* - bundle/docker/linux/*.sh - devscripts/setup_variables.py - devscripts/setup_variables_tests.py - devscripts/utils.py pull_request: + branches: [master] paths: + - .github/*.yml - .github/workflows/* - bundle/docker/linux/*.sh - devscripts/setup_variables.py - devscripts/setup_variables_tests.py - devscripts/utils.py -permissions: - contents: read + +permissions: {} + +concurrency: + group: test-workflows-${{ github.event.pull_request.number || github.ref }} + cancel-in-progress: ${{ github.event_name == 'pull_request' }} + env: ACTIONLINT_VERSION: "1.7.9" ACTIONLINT_SHA256SUM: 233b280d05e100837f4af1433c7b40a5dcb306e3aa68fb4f17f8a7f45a7df7b4 @@ -24,15 +33,20 @@ env: jobs: check: name: Check workflows + permissions: + contents: read runs-on: ubuntu-latest steps: - - uses: actions/checkout@v6 - - uses: actions/setup-python@v6 + - uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # v6.0.1 + with: + persist-credentials: false + - uses: actions/setup-python@83679a892e2d95755f2dac6acb0bfd1e9ac5d548 # v6.1.0 with: python-version: "3.10" # Keep this in sync with release.yml's prepare job - name: Install requirements env: ACTIONLINT_TARBALL: ${{ format('actionlint_{0}_linux_amd64.tar.gz', env.ACTIONLINT_VERSION) }} + shell: bash run: | python -m devscripts.install_deps --omit-default --include-extra test sudo apt -y install shellcheck @@ -50,3 +64,20 @@ jobs: - name: Test GHA devscripts run: | pytest -Werror --tb=short --color=yes devscripts/setup_variables_tests.py + + zizmor: + name: Run zizmor + permissions: + contents: read + actions: read # Needed by zizmorcore/zizmor-action if repository is private + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # v6.0.1 + with: + persist-credentials: false + - name: Run zizmor + uses: zizmorcore/zizmor-action@e639db99335bc9038abc0e066dfcd72e23d26fb4 # v0.3.0 + with: + advanced-security: false + persona: pedantic + version: v1.19.0 diff --git a/.github/zizmor.yml b/.github/zizmor.yml new file mode 100644 index 0000000000..01645c87e8 --- /dev/null +++ b/.github/zizmor.yml @@ -0,0 +1,15 @@ +rules: + concurrency-limits: + ignore: + - build.yml # Can only be triggered by maintainers or cronjob + - issue-lockdown.yml # It *should* run for *every* new issue + - release-nightly.yml # Can only be triggered by once-daily cronjob + - release.yml # Can only be triggered by maintainers or cronjob + - sanitize-comment.yml # It *should* run for *every* new comment/edit + obfuscation: + ignore: + - release.yml # Not actual obfuscation + unpinned-uses: + config: + policies: + "*": hash-pin diff --git a/README.md b/README.md index 7b67986b46..c4f85a5e39 100644 --- a/README.md +++ b/README.md @@ -1352,6 +1352,7 @@ The available fields are: - `repost_count` (numeric): Number of reposts of the video - `average_rating` (numeric): Average rating given by users, the scale used depends on the webpage - `comment_count` (numeric): Number of comments on the video (For some extractors, comments are only downloaded at the end, and so this field cannot be used) + - `save_count` (numeric): Number of times the video has been saved or bookmarked - `age_limit` (numeric): Age restriction for the video (years) - `live_status` (string): One of "not_live", "is_live", "is_upcoming", "was_live", "post_live" (was live, but VOD is not yet processed) - `is_live` (boolean): Whether this video is a live stream or a fixed-length video @@ -1821,6 +1822,9 @@ $ yt-dlp --parse-metadata "title:%(artist)s - %(title)s" # Regex example $ yt-dlp --parse-metadata "description:Artist - (?P.+)" +# Copy the episode field to the title field (with FROM and TO as single fields) +$ yt-dlp --parse-metadata "episode:title" + # Set title as "Series name S01E05" $ yt-dlp --parse-metadata "%(series)s S%(season_number)02dE%(episode_number)02d:%(title)s" @@ -1860,8 +1864,9 @@ The following extractors use this feature: * `player_js_variant`: The player javascript variant to use for n/sig deciphering. The known variants are: `main`, `tcc`, `tce`, `es5`, `es6`, `tv`, `tv_es6`, `phone`, `tablet`. The default is `main`, and the others are for debugging purposes. You can use `actual` to go with what is prescribed by the site * `player_js_version`: The player javascript version to use for n/sig deciphering, in the format of `signature_timestamp@hash` (e.g. `20348@0004de42`). The default is to use what is prescribed by the site, and can be selected with `actual` * `comment_sort`: `top` or `new` (default) - choose comment sorting mode (on YouTube's side) -* `max_comments`: Limit the amount of comments to gather. Comma-separated list of integers representing `max-comments,max-parents,max-replies,max-replies-per-thread`. Default is `all,all,all,all` - * E.g. `all,all,1000,10` will get a maximum of 1000 replies total, with up to 10 replies per thread. `1000,all,100` will get a maximum of 1000 comments, with a maximum of 100 replies total +* `max_comments`: Limit the amount of comments to gather. Comma-separated list of integers representing `max-comments,max-parents,max-replies,max-replies-per-thread,max-depth`. Default is `all,all,all,all,all` + * A `max-depth` value of `1` will discard all replies, regardless of the `max-replies` or `max-replies-per-thread` values given + * E.g. `all,all,1000,10,2` will get a maximum of 1000 replies total, with up to 10 replies per thread, and only 2 levels of depth (i.e. top-level comments plus their immediate replies). `1000,all,100` will get a maximum of 1000 comments, with a maximum of 100 replies total * `formats`: Change the types of formats to return. `dashy` (convert HTTP to DASH), `duplicate` (identical content but different URLs or protocol; includes `dashy`), `incomplete` (cannot be downloaded completely - live dash and post-live m3u8), `missing_pot` (include formats that require a PO Token but are missing one) * `innertube_host`: Innertube API host to use for all API requests; e.g. `studio.youtube.com`, `youtubei.googleapis.com`. Note that cookies exported from one subdomain will not work on others * `innertube_key`: Innertube API key to use for all API requests. By default, no API key is used diff --git a/bundle/docker/compose.yml b/bundle/docker/compose.yml index 19a011d7a2..ee78eb4fcc 100644 --- a/bundle/docker/compose.yml +++ b/bundle/docker/compose.yml @@ -26,7 +26,7 @@ services: platforms: - "linux/amd64" args: - VERIFYIMAGE: quay.io/pypa/manylinux2014_x86_64:latest + VERIFYIMAGE: quay.io/pypa/manylinux2014_x86_64:2025.12.19-1@sha256:b716645f9aecd0c1418283af930804bbdbd68a73d855a60101c5aab8548d737d environment: EXE_NAME: ${EXE_NAME:?} UPDATE_TO: @@ -61,7 +61,7 @@ services: platforms: - "linux/arm64" args: - VERIFYIMAGE: quay.io/pypa/manylinux2014_aarch64:latest + VERIFYIMAGE: quay.io/pypa/manylinux2014_aarch64:2025.12.19-1@sha256:36cbe6638c7c605c2b44a92e35751baa537ec8902112f790139d89c7e1ccd2a4 environment: EXE_NAME: ${EXE_NAME:?} UPDATE_TO: @@ -97,7 +97,7 @@ services: platforms: - "linux/arm/v7" args: - VERIFYIMAGE: arm32v7/debian:bullseye + VERIFYIMAGE: arm32v7/debian:bullseye@sha256:9d544bf6ff73e36b8df1b7e415f6c8ee40ed84a0f3a26970cac8ea88b0ccf2ac environment: EXE_NAME: ${EXE_NAME:?} UPDATE_TO: @@ -132,7 +132,7 @@ services: platforms: - "linux/amd64" args: - VERIFYIMAGE: alpine:3.22 + VERIFYIMAGE: alpine:3.23.2@sha256:865b95f46d98cf867a156fe4a135ad3fe50d2056aa3f25ed31662dff6da4eb62 environment: EXE_NAME: ${EXE_NAME:?} UPDATE_TO: @@ -168,7 +168,7 @@ services: platforms: - "linux/arm64" args: - VERIFYIMAGE: alpine:3.22 + VERIFYIMAGE: alpine:3.23.2@sha256:865b95f46d98cf867a156fe4a135ad3fe50d2056aa3f25ed31662dff6da4eb62 environment: EXE_NAME: ${EXE_NAME:?} UPDATE_TO: diff --git a/devscripts/setup_variables.py b/devscripts/setup_variables.py index a45a36835c..a5bde4701c 100644 --- a/devscripts/setup_variables.py +++ b/devscripts/setup_variables.py @@ -21,8 +21,6 @@ def setup_variables(environment): SOURCE_PYPI_PROJECT, SOURCE_PYPI_SUFFIX, TARGET_PYPI_PROJECT, TARGET_PYPI_SUFFIX, SOURCE_ARCHIVE_REPO, TARGET_ARCHIVE_REPO, - HAS_SOURCE_ARCHIVE_REPO_TOKEN, - HAS_TARGET_ARCHIVE_REPO_TOKEN, HAS_ARCHIVE_REPO_TOKEN `INPUTS` must contain these keys: @@ -37,8 +35,6 @@ def setup_variables(environment): PROCESSED = json.loads(environment['PROCESSED']) source_channel = None - does_not_have_needed_token = False - target_repo_token = None pypi_project = None pypi_suffix = None @@ -81,28 +77,19 @@ def setup_variables(environment): target_repo = REPOSITORY if target_repo != REPOSITORY: target_repo = environment['TARGET_ARCHIVE_REPO'] - target_repo_token = f'{PROCESSED["target_repo"].upper()}_ARCHIVE_REPO_TOKEN' - if not json.loads(environment['HAS_TARGET_ARCHIVE_REPO_TOKEN']): - does_not_have_needed_token = True pypi_project = environment['TARGET_PYPI_PROJECT'] or None pypi_suffix = environment['TARGET_PYPI_SUFFIX'] or None else: target_tag = source_tag or version if source_channel: target_repo = source_channel - target_repo_token = f'{PROCESSED["source_repo"].upper()}_ARCHIVE_REPO_TOKEN' - if not json.loads(environment['HAS_SOURCE_ARCHIVE_REPO_TOKEN']): - does_not_have_needed_token = True pypi_project = environment['SOURCE_PYPI_PROJECT'] or None pypi_suffix = environment['SOURCE_PYPI_SUFFIX'] or None else: target_repo = REPOSITORY - if does_not_have_needed_token: - if not json.loads(environment['HAS_ARCHIVE_REPO_TOKEN']): - print(f'::error::Repository access secret {target_repo_token} not found') - return None - target_repo_token = 'ARCHIVE_REPO_TOKEN' + if target_repo != REPOSITORY and not json.loads(environment['HAS_ARCHIVE_REPO_TOKEN']): + return None if target_repo == REPOSITORY and not INPUTS['prerelease']: pypi_project = environment['PYPI_PROJECT'] or None @@ -111,7 +98,6 @@ def setup_variables(environment): 'channel': resolved_source, 'version': version, 'target_repo': target_repo, - 'target_repo_token': target_repo_token, 'target_tag': target_tag, 'pypi_project': pypi_project, 'pypi_suffix': pypi_suffix, @@ -147,6 +133,7 @@ if __name__ == '__main__': outputs = setup_variables(dict(os.environ)) if not outputs: + print('::error::Repository access secret ARCHIVE_REPO_TOKEN not found') sys.exit(1) print('::group::Output variables') diff --git a/devscripts/setup_variables_tests.py b/devscripts/setup_variables_tests.py index 42abba9d1f..22efe0a804 100644 --- a/devscripts/setup_variables_tests.py +++ b/devscripts/setup_variables_tests.py @@ -9,8 +9,10 @@ import json from devscripts.setup_variables import STABLE_REPOSITORY, process_inputs, setup_variables from devscripts.utils import calculate_version +GENERATE_TEST_DATA = object() -def _test(github_repository, note, repo_vars, repo_secrets, inputs, expected=None, ignore_revision=False): + +def _test(github_repository, note, repo_vars, repo_secrets, inputs, expected, ignore_revision=False): inp = inputs.copy() inp.setdefault('linux_armv7l', True) inp.setdefault('prerelease', False) @@ -33,16 +35,19 @@ def _test(github_repository, note, repo_vars, repo_secrets, inputs, expected=Non 'TARGET_PYPI_SUFFIX': variables.get(f'{target_repo}_PYPI_SUFFIX') or '', 'SOURCE_ARCHIVE_REPO': variables.get(f'{source_repo}_ARCHIVE_REPO') or '', 'TARGET_ARCHIVE_REPO': variables.get(f'{target_repo}_ARCHIVE_REPO') or '', - 'HAS_SOURCE_ARCHIVE_REPO_TOKEN': json.dumps(bool(secrets.get(f'{source_repo}_ARCHIVE_REPO_TOKEN'))), - 'HAS_TARGET_ARCHIVE_REPO_TOKEN': json.dumps(bool(secrets.get(f'{target_repo}_ARCHIVE_REPO_TOKEN'))), 'HAS_ARCHIVE_REPO_TOKEN': json.dumps(bool(secrets.get('ARCHIVE_REPO_TOKEN'))), } result = setup_variables(env) - if not expected: + + if expected is GENERATE_TEST_DATA: print(' {\n' + '\n'.join(f' {k!r}: {v!r},' for k, v in result.items()) + '\n }') return + if expected is None: + assert result is None, f'expected error/None but got dict: {github_repository} {note}' + return + exp = expected.copy() if ignore_revision: assert len(result['version']) == len(exp['version']), f'revision missing: {github_repository} {note}' @@ -77,7 +82,6 @@ def test_setup_variables(): 'channel': 'stable', 'version': DEFAULT_VERSION, 'target_repo': STABLE_REPOSITORY, - 'target_repo_token': None, 'target_tag': DEFAULT_VERSION, 'pypi_project': 'yt-dlp', 'pypi_suffix': None, @@ -91,7 +95,6 @@ def test_setup_variables(): 'channel': 'nightly', 'version': DEFAULT_VERSION_WITH_REVISION, 'target_repo': 'yt-dlp/yt-dlp-nightly-builds', - 'target_repo_token': 'ARCHIVE_REPO_TOKEN', 'target_tag': DEFAULT_VERSION_WITH_REVISION, 'pypi_project': 'yt-dlp', 'pypi_suffix': 'dev', @@ -106,7 +109,6 @@ def test_setup_variables(): 'channel': 'nightly', 'version': DEFAULT_VERSION_WITH_REVISION, 'target_repo': 'yt-dlp/yt-dlp-nightly-builds', - 'target_repo_token': 'ARCHIVE_REPO_TOKEN', 'target_tag': DEFAULT_VERSION_WITH_REVISION, 'pypi_project': 'yt-dlp', 'pypi_suffix': 'dev', @@ -120,7 +122,6 @@ def test_setup_variables(): 'channel': 'master', 'version': DEFAULT_VERSION_WITH_REVISION, 'target_repo': 'yt-dlp/yt-dlp-master-builds', - 'target_repo_token': 'ARCHIVE_REPO_TOKEN', 'target_tag': DEFAULT_VERSION_WITH_REVISION, 'pypi_project': None, 'pypi_suffix': None, @@ -135,7 +136,6 @@ def test_setup_variables(): 'channel': 'master', 'version': DEFAULT_VERSION_WITH_REVISION, 'target_repo': 'yt-dlp/yt-dlp-master-builds', - 'target_repo_token': 'ARCHIVE_REPO_TOKEN', 'target_tag': DEFAULT_VERSION_WITH_REVISION, 'pypi_project': None, 'pypi_suffix': None, @@ -149,7 +149,6 @@ def test_setup_variables(): 'channel': 'stable', 'version': DEFAULT_VERSION_WITH_REVISION, 'target_repo': STABLE_REPOSITORY, - 'target_repo_token': None, 'target_tag': 'experimental', 'pypi_project': None, 'pypi_suffix': None, @@ -163,7 +162,6 @@ def test_setup_variables(): 'channel': 'stable', 'version': DEFAULT_VERSION_WITH_REVISION, 'target_repo': STABLE_REPOSITORY, - 'target_repo_token': None, 'target_tag': 'experimental', 'pypi_project': None, 'pypi_suffix': None, @@ -175,7 +173,6 @@ def test_setup_variables(): 'channel': FORK_REPOSITORY, 'version': DEFAULT_VERSION_WITH_REVISION, 'target_repo': FORK_REPOSITORY, - 'target_repo_token': None, 'target_tag': DEFAULT_VERSION_WITH_REVISION, 'pypi_project': None, 'pypi_suffix': None, @@ -186,7 +183,6 @@ def test_setup_variables(): 'channel': FORK_REPOSITORY, 'version': DEFAULT_VERSION_WITH_REVISION, 'target_repo': FORK_REPOSITORY, - 'target_repo_token': None, 'target_tag': DEFAULT_VERSION_WITH_REVISION, 'pypi_project': None, 'pypi_suffix': None, @@ -201,7 +197,6 @@ def test_setup_variables(): 'channel': f'{FORK_REPOSITORY}@nightly', 'version': DEFAULT_VERSION_WITH_REVISION, 'target_repo': FORK_REPOSITORY, - 'target_repo_token': None, 'target_tag': 'nightly', 'pypi_project': None, 'pypi_suffix': None, @@ -216,7 +211,6 @@ def test_setup_variables(): 'channel': f'{FORK_REPOSITORY}@master', 'version': DEFAULT_VERSION_WITH_REVISION, 'target_repo': FORK_REPOSITORY, - 'target_repo_token': None, 'target_tag': 'master', 'pypi_project': None, 'pypi_suffix': None, @@ -227,7 +221,6 @@ def test_setup_variables(): 'channel': FORK_REPOSITORY, 'version': f'{DEFAULT_VERSION[:10]}.123', 'target_repo': FORK_REPOSITORY, - 'target_repo_token': None, 'target_tag': f'{DEFAULT_VERSION[:10]}.123', 'pypi_project': None, 'pypi_suffix': None, @@ -239,7 +232,6 @@ def test_setup_variables(): 'channel': FORK_REPOSITORY, 'version': DEFAULT_VERSION, 'target_repo': FORK_REPOSITORY, - 'target_repo_token': None, 'target_tag': DEFAULT_VERSION, 'pypi_project': None, 'pypi_suffix': None, @@ -250,19 +242,16 @@ def test_setup_variables(): 'channel': FORK_REPOSITORY, 'version': DEFAULT_VERSION_WITH_REVISION, 'target_repo': FORK_REPOSITORY, - 'target_repo_token': None, 'target_tag': DEFAULT_VERSION_WITH_REVISION, 'pypi_project': None, 'pypi_suffix': None, }, ignore_revision=True) _test( - FORK_REPOSITORY, 'fork w/NIGHTLY_ARCHIVE_REPO_TOKEN, nightly', { + FORK_REPOSITORY, 'fork, nightly', { 'NIGHTLY_ARCHIVE_REPO': f'{FORK_ORG}/yt-dlp-nightly-builds', 'PYPI_PROJECT': 'yt-dlp-test', - }, { - 'NIGHTLY_ARCHIVE_REPO_TOKEN': '1', - }, { + }, BASE_REPO_SECRETS, { 'source': f'{FORK_ORG}/yt-dlp-nightly-builds', 'target': 'nightly', 'prerelease': True, @@ -270,19 +259,16 @@ def test_setup_variables(): 'channel': f'{FORK_ORG}/yt-dlp-nightly-builds', 'version': DEFAULT_VERSION_WITH_REVISION, 'target_repo': f'{FORK_ORG}/yt-dlp-nightly-builds', - 'target_repo_token': 'NIGHTLY_ARCHIVE_REPO_TOKEN', 'target_tag': DEFAULT_VERSION_WITH_REVISION, 'pypi_project': None, 'pypi_suffix': None, }, ignore_revision=True) _test( - FORK_REPOSITORY, 'fork w/MASTER_ARCHIVE_REPO_TOKEN, master', { + FORK_REPOSITORY, 'fork, master', { 'MASTER_ARCHIVE_REPO': f'{FORK_ORG}/yt-dlp-master-builds', 'MASTER_PYPI_PROJECT': 'yt-dlp-test', 'MASTER_PYPI_SUFFIX': 'dev', - }, { - 'MASTER_ARCHIVE_REPO_TOKEN': '1', - }, { + }, BASE_REPO_SECRETS, { 'source': f'{FORK_ORG}/yt-dlp-master-builds', 'target': 'master', 'prerelease': True, @@ -290,7 +276,6 @@ def test_setup_variables(): 'channel': f'{FORK_ORG}/yt-dlp-master-builds', 'version': DEFAULT_VERSION_WITH_REVISION, 'target_repo': f'{FORK_ORG}/yt-dlp-master-builds', - 'target_repo_token': 'MASTER_ARCHIVE_REPO_TOKEN', 'target_tag': DEFAULT_VERSION_WITH_REVISION, 'pypi_project': 'yt-dlp-test', 'pypi_suffix': 'dev', @@ -302,7 +287,6 @@ def test_setup_variables(): 'channel': f'{FORK_REPOSITORY}@experimental', 'version': DEFAULT_VERSION_WITH_REVISION, 'target_repo': FORK_REPOSITORY, - 'target_repo_token': None, 'target_tag': 'experimental', 'pypi_project': None, 'pypi_suffix': None, @@ -317,8 +301,15 @@ def test_setup_variables(): 'channel': 'stable', 'version': DEFAULT_VERSION_WITH_REVISION, 'target_repo': FORK_REPOSITORY, - 'target_repo_token': None, 'target_tag': 'experimental', 'pypi_project': None, 'pypi_suffix': None, }, ignore_revision=True) + + _test( + STABLE_REPOSITORY, 'official vars but no ARCHIVE_REPO_TOKEN, nightly', + BASE_REPO_VARS, {}, { + 'source': 'nightly', + 'target': 'nightly', + 'prerelease': True, + }, None) diff --git a/test/helper.py b/test/helper.py index e96835fc46..adc5f455e5 100644 --- a/test/helper.py +++ b/test/helper.py @@ -261,9 +261,9 @@ def sanitize_got_info_dict(got_dict): def expect_info_dict(self, got_dict, expected_dict): ALLOWED_KEYS_SORT_ORDER = ( # NB: Keep in sync with the docstring of extractor/common.py - 'id', 'ext', 'direct', 'display_id', 'title', 'alt_title', 'description', 'media_type', + 'ie_key', 'url', 'id', 'ext', 'direct', 'display_id', 'title', 'alt_title', 'description', 'media_type', 'uploader', 'uploader_id', 'uploader_url', 'channel', 'channel_id', 'channel_url', 'channel_is_verified', - 'channel_follower_count', 'comment_count', 'view_count', 'concurrent_view_count', + 'channel_follower_count', 'comment_count', 'view_count', 'concurrent_view_count', 'save_count', 'like_count', 'dislike_count', 'repost_count', 'average_rating', 'age_limit', 'duration', 'thumbnail', 'heatmap', 'chapters', 'chapter', 'chapter_number', 'chapter_id', 'start_time', 'end_time', 'section_start', 'section_end', 'categories', 'tags', 'cast', 'composers', 'artists', 'album_artists', 'creators', 'genres', diff --git a/test/test_iqiyi_sdk_interpreter.py b/test/test_iqiyi_sdk_interpreter.py deleted file mode 100644 index 4e41007c82..0000000000 --- a/test/test_iqiyi_sdk_interpreter.py +++ /dev/null @@ -1,44 +0,0 @@ -#!/usr/bin/env python3 - -# Allow direct execution -import os -import sys -import unittest - -sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) - - -from test.helper import FakeYDL, is_download_test -from yt_dlp.extractor import IqiyiIE - - -class WarningLogger: - def __init__(self): - self.messages = [] - - def warning(self, msg): - self.messages.append(msg) - - def debug(self, msg): - pass - - def error(self, msg): - pass - - -@is_download_test -class TestIqiyiSDKInterpreter(unittest.TestCase): - def test_iqiyi_sdk_interpreter(self): - """ - Test the functionality of IqiyiSDKInterpreter by trying to log in - - If `sign` is incorrect, /validate call throws an HTTP 556 error - """ - logger = WarningLogger() - ie = IqiyiIE(FakeYDL({'logger': logger})) - ie._perform_login('foo', 'bar') - self.assertTrue('unable to log in:' in logger.messages[0]) - - -if __name__ == '__main__': - unittest.main() diff --git a/test/test_postprocessors.py b/test/test_postprocessors.py index 3911567066..d58a97fc6e 100644 --- a/test/test_postprocessors.py +++ b/test/test_postprocessors.py @@ -29,6 +29,11 @@ class TestMetadataFromField(unittest.TestCase): MetadataParserPP.format_to_regex('%(title)s - %(artist)s'), r'(?P.+)\ \-\ (?P<artist>.+)') self.assertEqual(MetadataParserPP.format_to_regex(r'(?P<x>.+)'), r'(?P<x>.+)') + self.assertEqual(MetadataParserPP.format_to_regex(r'text (?P<x>.+)'), r'text (?P<x>.+)') + self.assertEqual(MetadataParserPP.format_to_regex('x'), r'(?s)(?P<x>.+)') + self.assertEqual(MetadataParserPP.format_to_regex('Field_Name1'), r'(?s)(?P<Field_Name1>.+)') + self.assertEqual(MetadataParserPP.format_to_regex('é'), r'(?s)(?P<é>.+)') + self.assertEqual(MetadataParserPP.format_to_regex('invalid '), 'invalid ') def test_field_to_template(self): self.assertEqual(MetadataParserPP.field_to_template('title'), '%(title)s') diff --git a/test/test_utils.py b/test/test_utils.py index 72f0eb7f76..0ee5c58fa8 100644 --- a/test/test_utils.py +++ b/test/test_utils.py @@ -489,6 +489,10 @@ class TestUtil(unittest.TestCase): self.assertEqual(unified_timestamp('Wednesday 31 December 1969 18:01:26 MDT'), 86) self.assertEqual(unified_timestamp('12/31/1969 20:01:18 EDT', False), 78) + self.assertEqual(unified_timestamp('2026-01-01 00:00:00', tz_offset=0), 1767225600) + self.assertEqual(unified_timestamp('2026-01-01 00:00:00', tz_offset=8), 1767196800) + self.assertEqual(unified_timestamp('2026-01-01 00:00:00 +0800', tz_offset=-5), 1767196800) + def test_determine_ext(self): self.assertEqual(determine_ext('http://example.com/foo/bar.mp4/?download'), 'mp4') self.assertEqual(determine_ext('http://example.com/foo/bar/?download', None), None) @@ -1276,6 +1280,9 @@ class TestUtil(unittest.TestCase): on = js_to_json('[new Date("spam"), \'("eggs")\']') self.assertEqual(json.loads(on), ['spam', '("eggs")'], msg='Date regex should match a single string') + on = js_to_json('[0.077, 7.06, 29.064, 169.0072]') + self.assertEqual(json.loads(on), [0.077, 7.06, 29.064, 169.0072]) + def test_js_to_json_malformed(self): self.assertEqual(js_to_json('42a1'), '42"a1"') self.assertEqual(js_to_json('42a-1'), '42"a"-1') diff --git a/yt_dlp/YoutubeDL.py b/yt_dlp/YoutubeDL.py index 539b10fe29..aceaa59eb8 100644 --- a/yt_dlp/YoutubeDL.py +++ b/yt_dlp/YoutubeDL.py @@ -595,7 +595,7 @@ class YoutubeDL: 'width', 'height', 'asr', 'audio_channels', 'fps', 'tbr', 'abr', 'vbr', 'filesize', 'filesize_approx', 'timestamp', 'release_timestamp', 'available_at', - 'duration', 'view_count', 'like_count', 'dislike_count', 'repost_count', + 'duration', 'view_count', 'like_count', 'dislike_count', 'repost_count', 'save_count', 'average_rating', 'comment_count', 'age_limit', 'start_time', 'end_time', 'chapter_number', 'season_number', 'episode_number', @@ -3026,6 +3026,10 @@ class YoutubeDL: format_selector = self.format_selector while True: if interactive_format_selection: + if not formats: + # Bypass interactive format selection if no formats & --ignore-no-formats-error + formats_to_download = None + break req_format = input(self._format_screen('\nEnter format selector ', self.Styles.EMPHASIS) + '(Press ENTER for default, or Ctrl+C to quit)' + self._format_screen(': ', self.Styles.EMPHASIS)) diff --git a/yt_dlp/extractor/_extractors.py b/yt_dlp/extractor/_extractors.py index 9129b5e68f..ea49a25b23 100644 --- a/yt_dlp/extractor/_extractors.py +++ b/yt_dlp/extractor/_extractors.py @@ -431,6 +431,7 @@ from .cpac import ( ) from .cracked import CrackedIE from .craftsy import CraftsyIE +from .croatianfilm import CroatianFilmIE from .crooksandliars import CrooksAndLiarsIE from .crowdbunker import ( CrowdBunkerChannelIE, @@ -638,6 +639,7 @@ from .fc2 import ( ) from .fczenit import FczenitIE from .fifa import FifaIE +from .filmarchiv import FilmArchivIE from .filmon import ( FilmOnChannelIE, FilmOnIE, @@ -1086,11 +1088,6 @@ from .mangomolo import ( MangomoloLiveIE, MangomoloVideoIE, ) -from .manoto import ( - ManotoTVIE, - ManotoTVLiveIE, - ManotoTVShowIE, -) from .manyvids import ManyVidsIE from .maoritv import MaoriTVIE from .markiza import ( @@ -1278,6 +1275,7 @@ from .nebula import ( NebulaChannelIE, NebulaClassIE, NebulaIE, + NebulaSeasonIE, NebulaSubscriptionsIE, ) from .nekohacker import NekoHackerIE @@ -1312,12 +1310,6 @@ from .newgrounds import ( ) from .newspicks import NewsPicksIE from .newsy import NewsyIE -from .nextmedia import ( - AppleDailyIE, - NextMediaActionNewsIE, - NextMediaIE, - NextTVIE, -) from .nexx import ( NexxEmbedIE, NexxIE, @@ -1486,6 +1478,7 @@ from .palcomp3 import ( PalcoMP3IE, PalcoMP3VideoIE, ) +from .pandatv import PandaTvIE from .panopto import ( PanoptoIE, PanoptoListIE, @@ -1834,10 +1827,6 @@ from .scrippsnetworks import ( ScrippsNetworksWatchIE, ) from .scrolller import ScrolllerIE -from .scte import ( - SCTEIE, - SCTECourseIE, -) from .sejmpl import SejmIE from .sen import SenIE from .senalcolombia import SenalColombiaLiveIE @@ -2019,6 +2008,11 @@ from .taptap import ( TapTapMomentIE, TapTapPostIntlIE, ) +from .tarangplus import ( + TarangPlusEpisodesIE, + TarangPlusPlaylistIE, + TarangPlusVideoIE, +) from .tass import TassIE from .tbs import TBSIE from .tbsjp import ( diff --git a/yt_dlp/extractor/archiveorg.py b/yt_dlp/extractor/archiveorg.py index 02c39beb68..7bf5199bc9 100644 --- a/yt_dlp/extractor/archiveorg.py +++ b/yt_dlp/extractor/archiveorg.py @@ -279,7 +279,7 @@ class ArchiveOrgIE(InfoExtractor): 'url': 'https://archive.org/' + track['file'].lstrip('/'), } - metadata = self._download_json('http://archive.org/metadata/' + identifier, identifier) + metadata = self._download_json(f'https://archive.org/metadata/{identifier}', identifier) m = metadata['metadata'] identifier = m['identifier'] diff --git a/yt_dlp/extractor/bandcamp.py b/yt_dlp/extractor/bandcamp.py index 0a8f88fa8c..510fc5f91d 100644 --- a/yt_dlp/extractor/bandcamp.py +++ b/yt_dlp/extractor/bandcamp.py @@ -5,16 +5,18 @@ import time from .common import InfoExtractor from ..utils import ( - KNOWN_EXTENSIONS, ExtractorError, clean_html, extract_attributes, float_or_none, + format_field, int_or_none, + join_nonempty, parse_filesize, + parse_qs, str_or_none, + strftime_or_none, try_get, - unified_strdate, unified_timestamp, update_url_query, url_or_none, @@ -411,70 +413,67 @@ class BandcampAlbumIE(BandcampIE): # XXX: Do not subclass from concrete IE class BandcampWeeklyIE(BandcampIE): # XXX: Do not subclass from concrete IE IE_NAME = 'Bandcamp:weekly' - _VALID_URL = r'https?://(?:www\.)?bandcamp\.com/?\?(?:.*?&)?show=(?P<id>\d+)' + _VALID_URL = r'https?://(?:www\.)?bandcamp\.com/radio/?\?(?:[^#]+&)?show=(?P<id>\d+)' _TESTS = [{ - 'url': 'https://bandcamp.com/?show=224', + 'url': 'https://bandcamp.com/radio?show=224', 'md5': '61acc9a002bed93986b91168aa3ab433', 'info_dict': { 'id': '224', 'ext': 'mp3', - 'title': 'BC Weekly April 4th 2017 - Magic Moments', + 'title': 'Bandcamp Weekly, 2017-04-04', 'description': 'md5:5d48150916e8e02d030623a48512c874', - 'duration': 5829.77, - 'release_date': '20170404', + 'thumbnail': 'https://f4.bcbits.com/img/9982549_0.jpg', 'series': 'Bandcamp Weekly', - 'episode': 'Magic Moments', 'episode_id': '224', + 'release_timestamp': 1491264000, + 'release_date': '20170404', + 'duration': 5829.77, }, 'params': { 'format': 'mp3-128', }, }, { - 'url': 'https://bandcamp.com/?blah/blah@&show=228', + 'url': 'https://bandcamp.com/radio/?foo=bar&show=224', 'only_matching': True, }] def _real_extract(self, url): show_id = self._match_id(url) - webpage = self._download_webpage(url, show_id) + audio_data = self._download_json( + 'https://bandcamp.com/api/bcradio_api/1/get_show', + show_id, 'Downloading radio show JSON', + data=json.dumps({'id': show_id}).encode(), + headers={'Content-Type': 'application/json'})['radioShowAudio'] - blob = self._extract_data_attr(webpage, show_id, 'blob') + stream_url = audio_data['streamUrl'] + format_id = traverse_obj(stream_url, ({parse_qs}, 'enc', -1)) + encoding, _, bitrate_str = (format_id or '').partition('-') - show = blob['bcw_data'][show_id] + webpage = self._download_webpage(url, show_id, fatal=False) + metadata = traverse_obj( + self._extract_data_attr(webpage, show_id, 'blob', fatal=False), + ('appData', 'shows', lambda _, v: str(v['showId']) == show_id, any)) or {} - formats = [] - for format_id, format_url in show['audio_stream'].items(): - if not url_or_none(format_url): - continue - for known_ext in KNOWN_EXTENSIONS: - if known_ext in format_id: - ext = known_ext - break - else: - ext = None - formats.append({ - 'format_id': format_id, - 'url': format_url, - 'ext': ext, - 'vcodec': 'none', - }) - - title = show.get('audio_title') or 'Bandcamp Weekly' - subtitle = show.get('subtitle') - if subtitle: - title += f' - {subtitle}' + series_title = audio_data.get('title') or metadata.get('title') + release_timestamp = unified_timestamp(audio_data.get('date')) or unified_timestamp(metadata.get('date')) return { 'id': show_id, - 'title': title, - 'description': show.get('desc') or show.get('short_desc'), - 'duration': float_or_none(show.get('audio_duration')), - 'is_live': False, - 'release_date': unified_strdate(show.get('published_date')), - 'series': 'Bandcamp Weekly', - 'episode': show.get('subtitle'), 'episode_id': show_id, - 'formats': formats, + 'title': join_nonempty(series_title, strftime_or_none(release_timestamp, '%Y-%m-%d'), delim=', '), + 'series': series_title, + 'thumbnail': format_field(metadata, 'imageId', 'https://f4.bcbits.com/img/%s_0.jpg', default=None), + 'description': metadata.get('desc') or metadata.get('short_desc'), + 'duration': float_or_none(audio_data.get('duration')), + 'release_timestamp': release_timestamp, + 'formats': [{ + 'url': stream_url, + 'format_id': format_id, + 'ext': encoding or 'mp3', + 'acodec': encoding or None, + 'vcodec': 'none', + 'abr': int_or_none(bitrate_str), + }], } diff --git a/yt_dlp/extractor/bigo.py b/yt_dlp/extractor/bigo.py index b1c230f357..3e5117329e 100644 --- a/yt_dlp/extractor/bigo.py +++ b/yt_dlp/extractor/bigo.py @@ -1,5 +1,5 @@ from .common import InfoExtractor -from ..utils import ExtractorError, urlencode_postdata +from ..utils import ExtractorError, UserNotLive, urlencode_postdata class BigoIE(InfoExtractor): @@ -40,7 +40,7 @@ class BigoIE(InfoExtractor): info = info_raw.get('data') or {} if not info.get('alive'): - raise ExtractorError('This user is offline.', expected=True) + raise UserNotLive(video_id=user_id) formats, subs = self._extract_m3u8_formats_and_subtitles( info.get('hls_src'), user_id, 'mp4', 'm3u8') diff --git a/yt_dlp/extractor/cda.py b/yt_dlp/extractor/cda.py index 027b37d448..f8fb60644f 100644 --- a/yt_dlp/extractor/cda.py +++ b/yt_dlp/extractor/cda.py @@ -27,7 +27,7 @@ from ..utils.traversal import traverse_obj class CDAIE(InfoExtractor): - _VALID_URL = r'https?://(?:(?:www\.)?cda\.pl/video|ebd\.cda\.pl/[0-9]+x[0-9]+)/(?P<id>[0-9a-z]+)' + _VALID_URL = r'https?://(?:(?:(?:www|m)\.)?cda\.pl/video|ebd\.cda\.pl/[0-9]+x[0-9]+)/(?P<id>[0-9a-z]+)' _NETRC_MACHINE = 'cdapl' _BASE_URL = 'https://www.cda.pl' @@ -110,6 +110,9 @@ class CDAIE(InfoExtractor): }, { 'url': 'http://ebd.cda.pl/0x0/5749950c', 'only_matching': True, + }, { + 'url': 'https://m.cda.pl/video/617297677', + 'only_matching': True, }] def _download_age_confirm_page(self, url, video_id, *args, **kwargs): @@ -367,35 +370,35 @@ class CDAIE(InfoExtractor): class CDAFolderIE(InfoExtractor): _MAX_PAGE_SIZE = 36 - _VALID_URL = r'https?://(?:www\.)?cda\.pl/(?P<channel>[\w-]+)/folder/(?P<id>\d+)' - _TESTS = [ - { - 'url': 'https://www.cda.pl/domino264/folder/31188385', - 'info_dict': { - 'id': '31188385', - 'title': 'SERIA DRUGA', - }, - 'playlist_mincount': 13, + _VALID_URL = r'https?://(?:(?:www|m)\.)?cda\.pl/(?P<channel>[\w-]+)/folder/(?P<id>\d+)' + _TESTS = [{ + 'url': 'https://www.cda.pl/domino264/folder/31188385', + 'info_dict': { + 'id': '31188385', + 'title': 'SERIA DRUGA', }, - { - 'url': 'https://www.cda.pl/smiechawaTV/folder/2664592/vfilm', - 'info_dict': { - 'id': '2664592', - 'title': 'VideoDowcipy - wszystkie odcinki', - }, - 'playlist_mincount': 71, + 'playlist_mincount': 13, + }, { + 'url': 'https://www.cda.pl/smiechawaTV/folder/2664592/vfilm', + 'info_dict': { + 'id': '2664592', + 'title': 'VideoDowcipy - wszystkie odcinki', }, - { - 'url': 'https://www.cda.pl/DeliciousBeauty/folder/19129979/vfilm', - 'info_dict': { - 'id': '19129979', - 'title': 'TESTY KOSMETYKÓW', - }, - 'playlist_mincount': 139, - }, { - 'url': 'https://www.cda.pl/FILMY-SERIALE-ANIME-KRESKOWKI-BAJKI/folder/18493422', - 'only_matching': True, - }] + 'playlist_mincount': 71, + }, { + 'url': 'https://www.cda.pl/DeliciousBeauty/folder/19129979/vfilm', + 'info_dict': { + 'id': '19129979', + 'title': 'TESTY KOSMETYKÓW', + }, + 'playlist_mincount': 139, + }, { + 'url': 'https://www.cda.pl/FILMY-SERIALE-ANIME-KRESKOWKI-BAJKI/folder/18493422', + 'only_matching': True, + }, { + 'url': 'https://m.cda.pl/smiechawaTV/folder/2664592/vfilm', + 'only_matching': True, + }] def _real_extract(self, url): folder_id, channel = self._match_valid_url(url).group('id', 'channel') diff --git a/yt_dlp/extractor/common.py b/yt_dlp/extractor/common.py index 80e52366ff..7704758763 100644 --- a/yt_dlp/extractor/common.py +++ b/yt_dlp/extractor/common.py @@ -349,6 +349,7 @@ class InfoExtractor: duration: Length of the video in seconds, as an integer or float. view_count: How many users have watched the video on the platform. concurrent_view_count: How many users are currently watching the video on the platform. + save_count: Number of times the video has been saved or bookmarked like_count: Number of positive ratings of the video dislike_count: Number of negative ratings of the video repost_count: Number of reposts of the video diff --git a/yt_dlp/extractor/croatianfilm.py b/yt_dlp/extractor/croatianfilm.py new file mode 100644 index 0000000000..de68829bd7 --- /dev/null +++ b/yt_dlp/extractor/croatianfilm.py @@ -0,0 +1,79 @@ +from .common import InfoExtractor +from .vimeo import VimeoIE +from ..utils import ( + ExtractorError, + join_nonempty, +) +from ..utils.traversal import traverse_obj + + +class CroatianFilmIE(InfoExtractor): + IE_NAME = 'croatian.film' + _VALID_URL = r'https://?(?:www\.)?croatian\.film/[a-z]{2}/[^/?#]+/(?P<id>\d+)' + _GEO_COUNTRIES = ['HR'] + + _TESTS = [{ + 'url': 'https://www.croatian.film/hr/films/72472', + 'info_dict': { + 'id': '1078340774', + 'ext': 'mp4', + 'title': '“ŠKAFETIN”, r. Paško Vukasović', + 'uploader': 'croatian.film', + 'uploader_id': 'user94192658', + 'uploader_url': 'https://vimeo.com/user94192658', + 'duration': 1357, + 'thumbnail': 'https://i.vimeocdn.com/video/2008556407-40eb1315ec11be5fcb8dda4d7059675b0881e182b9fc730892e267db72cb57f5-d', + }, + 'params': {'skip_download': 'm3u8'}, + 'expected_warnings': ['Failed to parse XML: not well-formed'], + }, { + # geo-restricted but works with xff + 'url': 'https://www.croatian.film/en/films/77144', + 'info_dict': { + 'id': '1144997795', + 'ext': 'mp4', + 'title': '“ROKO” r. Ivana Marinić Kragić', + 'uploader': 'croatian.film', + 'uploader_id': 'user94192658', + 'uploader_url': 'https://vimeo.com/user94192658', + 'duration': 1023, + 'thumbnail': 'https://i.vimeocdn.com/video/2093793231-11c2928698ff8347489e679b4d563a576e7acd0681ce95b383a9a25f6adb5e8f-d', + }, + 'params': {'skip_download': 'm3u8'}, + 'expected_warnings': ['Failed to parse XML: not well-formed'], + }, { + 'url': 'https://www.croatian.film/en/films/75904/watch', + 'info_dict': { + 'id': '1134883757', + 'ext': 'mp4', + 'title': '"CARPE DIEM" r. Nina Damjanović', + 'uploader': 'croatian.film', + 'uploader_id': 'user94192658', + 'uploader_url': 'https://vimeo.com/user94192658', + 'duration': 1123, + 'thumbnail': 'https://i.vimeocdn.com/video/2080022187-bb691c470c28c4d979258cf235e594bf9a11c14b837a0784326c25c95edd83f9-d', + }, + 'params': {'skip_download': 'm3u8'}, + 'expected_warnings': ['Failed to parse XML: not well-formed'], + }] + + def _real_extract(self, url): + display_id = self._match_id(url) + api_data = self._download_json( + f'https://api.croatian.film/api/videos/{display_id}', + display_id) + + if errors := traverse_obj(api_data, ('errors', lambda _, v: v['code'])): + codes = traverse_obj(errors, (..., 'code', {str})) + if 'INVALID_COUNTRY' in codes: + self.raise_geo_restricted(countries=self._GEO_COUNTRIES) + raise ExtractorError(join_nonempty( + *(traverse_obj(errors, (..., 'details', {str})) or codes), + delim='; ')) + + vimeo_id = self._search_regex( + r'/videos/(\d+)', api_data['video']['vimeoURL'], 'vimeo ID') + + return self.url_result( + VimeoIE._smuggle_referrer(f'https://player.vimeo.com/video/{vimeo_id}', url), + VimeoIE, vimeo_id) diff --git a/yt_dlp/extractor/dropbox.py b/yt_dlp/extractor/dropbox.py index ce8435c8cb..2c1f1a40e5 100644 --- a/yt_dlp/extractor/dropbox.py +++ b/yt_dlp/extractor/dropbox.py @@ -14,7 +14,7 @@ from ..utils import ( class DropboxIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?dropbox\.com/(?:(?:e/)?scl/fi|sh?)/(?P<id>\w+)' + _VALID_URL = r'https?://(?:www\.)?dropbox\.com/(?:(?:e/)?scl/f[io]|sh?)/(?P<id>\w+)' _TESTS = [ { 'url': 'https://www.dropbox.com/s/nelirfsxnmcfbfh/youtube-dl%20test%20video%20%27%C3%A4%22BaW_jenozKc.mp4?dl=0', @@ -35,6 +35,9 @@ class DropboxIE(InfoExtractor): }, { 'url': 'https://www.dropbox.com/e/scl/fi/r2kd2skcy5ylbbta5y1pz/DJI_0003.MP4?dl=0&rlkey=wcdgqangn7t3lnmmv6li9mu9h', 'only_matching': True, + }, { + 'url': 'https://www.dropbox.com/scl/fo/zjfqse5txqfd7twa8iewj/AOfZzSYWUSKle2HD7XF7kzQ/A-BEAT%20C.mp4?rlkey=6tg3jkp4tv6a5vt58a6dag0mm&dl=0', + 'only_matching': True, }, ] diff --git a/yt_dlp/extractor/facebook.py b/yt_dlp/extractor/facebook.py index 2c35013faa..99f64272b2 100644 --- a/yt_dlp/extractor/facebook.py +++ b/yt_dlp/extractor/facebook.py @@ -4,8 +4,6 @@ import urllib.parse from .common import InfoExtractor from ..compat import compat_etree_fromstring -from ..networking import Request -from ..networking.exceptions import network_exceptions from ..utils import ( ExtractorError, clean_html, @@ -64,9 +62,6 @@ class FacebookIE(InfoExtractor): class=(?P<q1>[\'"])[^\'"]*\bfb-(?:video|post)\b[^\'"]*(?P=q1)[^>]+ data-href=(?P<q2>[\'"])(?P<url>(?:https?:)?//(?:www\.)?facebook.com/.+?)(?P=q2)''', ] - _LOGIN_URL = 'https://www.facebook.com/login.php?next=http%3A%2F%2Ffacebook.com%2Fhome.php&login_attempt=1' - _CHECKPOINT_URL = 'https://www.facebook.com/checkpoint/?next=http%3A%2F%2Ffacebook.com%2Fhome.php&_fb_noscript=1' - _NETRC_MACHINE = 'facebook' IE_NAME = 'facebook' _VIDEO_PAGE_TEMPLATE = 'https://www.facebook.com/video/video.php?v=%s' @@ -469,65 +464,6 @@ class FacebookIE(InfoExtractor): 'graphURI': '/api/graphql/', } - def _perform_login(self, username, password): - login_page_req = Request(self._LOGIN_URL) - self._set_cookie('facebook.com', 'locale', 'en_US') - login_page = self._download_webpage(login_page_req, None, - note='Downloading login page', - errnote='Unable to download login page') - lsd = self._search_regex( - r'<input type="hidden" name="lsd" value="([^"]*)"', - login_page, 'lsd') - lgnrnd = self._search_regex(r'name="lgnrnd" value="([^"]*?)"', login_page, 'lgnrnd') - - login_form = { - 'email': username, - 'pass': password, - 'lsd': lsd, - 'lgnrnd': lgnrnd, - 'next': 'http://facebook.com/home.php', - 'default_persistent': '0', - 'legacy_return': '1', - 'timezone': '-60', - 'trynum': '1', - } - request = Request(self._LOGIN_URL, urlencode_postdata(login_form)) - request.headers['Content-Type'] = 'application/x-www-form-urlencoded' - try: - login_results = self._download_webpage(request, None, - note='Logging in', errnote='unable to fetch login page') - if re.search(r'<form(.*)name="login"(.*)</form>', login_results) is not None: - error = self._html_search_regex( - r'(?s)<div[^>]+class=(["\']).*?login_error_box.*?\1[^>]*><div[^>]*>.*?</div><div[^>]*>(?P<error>.+?)</div>', - login_results, 'login error', default=None, group='error') - if error: - raise ExtractorError(f'Unable to login: {error}', expected=True) - self.report_warning('unable to log in: bad username/password, or exceeded login rate limit (~3/min). Check credentials or wait.') - return - - fb_dtsg = self._search_regex( - r'name="fb_dtsg" value="(.+?)"', login_results, 'fb_dtsg', default=None) - h = self._search_regex( - r'name="h"\s+(?:\w+="[^"]+"\s+)*?value="([^"]+)"', login_results, 'h', default=None) - - if not fb_dtsg or not h: - return - - check_form = { - 'fb_dtsg': fb_dtsg, - 'h': h, - 'name_action_selected': 'dont_save', - } - check_req = Request(self._CHECKPOINT_URL, urlencode_postdata(check_form)) - check_req.headers['Content-Type'] = 'application/x-www-form-urlencoded' - check_response = self._download_webpage(check_req, None, - note='Confirming login') - if re.search(r'id="checkpointSubmitButton"', check_response) is not None: - self.report_warning('Unable to confirm login, you have to login in your browser and authorize the login.') - except network_exceptions as err: - self.report_warning(f'unable to log in: {err}') - return - def _extract_from_url(self, url, video_id): webpage = self._download_webpage( url.replace('://m.facebook.com/', '://www.facebook.com/'), video_id) diff --git a/yt_dlp/extractor/filmarchiv.py b/yt_dlp/extractor/filmarchiv.py new file mode 100644 index 0000000000..50fde2aff4 --- /dev/null +++ b/yt_dlp/extractor/filmarchiv.py @@ -0,0 +1,52 @@ +from .common import InfoExtractor +from ..utils import clean_html +from ..utils.traversal import ( + find_element, + find_elements, + traverse_obj, +) + + +class FilmArchivIE(InfoExtractor): + IE_DESC = 'FILMARCHIV ON' + _VALID_URL = r'https?://(?:www\.)?filmarchiv\.at/de/filmarchiv-on/video/(?P<id>f_[0-9a-zA-Z]{5,})' + _TESTS = [{ + 'url': 'https://www.filmarchiv.at/de/filmarchiv-on/video/f_0305p7xKrXUPBwoNE9x6mh', + 'md5': '54a6596f6a84624531866008a77fa27a', + 'info_dict': { + 'id': 'f_0305p7xKrXUPBwoNE9x6mh', + 'ext': 'mp4', + 'title': 'Der Wurstelprater zur Kaiserzeit', + 'description': 'md5:9843f92df5cc9a4975cee7aabcf6e3b2', + 'thumbnail': r're:https://cdn\.filmarchiv\.at/f_0305/p7xKrXUPBwoNE9x6mh_v1/poster\.jpg', + }, + }, { + 'url': 'https://www.filmarchiv.at/de/filmarchiv-on/video/f_0306vI3wO0tJIsfrqYFQXF', + 'md5': '595385d7f54cb6529140ee8de7d1c3c7', + 'info_dict': { + 'id': 'f_0306vI3wO0tJIsfrqYFQXF', + 'ext': 'mp4', + 'title': 'Vor 70 Jahren: Wettgehen der Briefträger in Wien', + 'description': 'md5:b2a2e4230923cd1969d471c552e62811', + 'thumbnail': r're:https://cdn\.filmarchiv\.at/f_0306/vI3wO0tJIsfrqYFQXF_v1/poster\.jpg', + }, + }] + + def _real_extract(self, url): + media_id = self._match_id(url) + webpage = self._download_webpage(url, media_id) + path = '/'.join((media_id[:6], media_id[6:])) + formats, subtitles = self._extract_m3u8_formats_and_subtitles( + f'https://cdn.filmarchiv.at/{path}_v1_sv1/playlist.m3u8', media_id) + + return { + 'id': media_id, + 'title': traverse_obj(webpage, ({find_element(tag='title-div')}, {clean_html})), + 'description': traverse_obj(webpage, ( + {find_elements(tag='div', attr='class', value=r'.*\bborder-base-content\b', regex=True)}, ..., + {find_elements(tag='div', attr='class', value=r'.*\bprose\b', html=False, regex=True)}, ..., + {clean_html}, any)), + 'thumbnail': f'https://cdn.filmarchiv.at/{path}_v1/poster.jpg', + 'formats': formats, + 'subtitles': subtitles, + } diff --git a/yt_dlp/extractor/generic.py b/yt_dlp/extractor/generic.py index d44e6d3c4b..7c7bb71a74 100644 --- a/yt_dlp/extractor/generic.py +++ b/yt_dlp/extractor/generic.py @@ -821,13 +821,17 @@ class GenericIE(InfoExtractor): 'Referer': smuggled_data.get('referer'), }), impersonate=impersonate) except ExtractorError as e: - if not (isinstance(e.cause, HTTPError) and e.cause.status == 403 - and e.cause.response.get_header('cf-mitigated') == 'challenge' - and e.cause.response.extensions.get('impersonate') is None): + if not isinstance(e.cause, HTTPError) or e.cause.status != 403: + raise + res = e.cause.response + already_impersonating = res.extensions.get('impersonate') is not None + if already_impersonating or ( + res.get_header('cf-mitigated') != 'challenge' + and b'<title>Attention Required! | Cloudflare' not in res.read() + ): raise cf_cookie_domain = traverse_obj( - LenientSimpleCookie(e.cause.response.get_header('set-cookie')), - ('__cf_bm', 'domain')) + LenientSimpleCookie(res.get_header('set-cookie')), ('__cf_bm', 'domain')) if cf_cookie_domain: self.write_debug(f'Clearing __cf_bm cookie for {cf_cookie_domain}') self.cookiejar.clear(domain=cf_cookie_domain, path='/', name='__cf_bm') diff --git a/yt_dlp/extractor/gofile.py b/yt_dlp/extractor/gofile.py index a9777a5946..e2144d7eca 100644 --- a/yt_dlp/extractor/gofile.py +++ b/yt_dlp/extractor/gofile.py @@ -46,6 +46,7 @@ class GofileIE(InfoExtractor): 'videopassword': 'password', }, }] + _STATIC_TOKEN = '4fd6sg89d7s6' # From https://gofile.io/dist/js/config.js _TOKEN = None def _real_initialize(self): @@ -60,13 +61,16 @@ class GofileIE(InfoExtractor): self._set_cookie('.gofile.io', 'accountToken', self._TOKEN) def _entries(self, file_id): - query_params = {'wt': '4fd6sg89d7s6'} # From https://gofile.io/dist/js/alljs.js - password = self.get_param('videopassword') - if password: + query_params = {} + if password := self.get_param('videopassword'): query_params['password'] = hashlib.sha256(password.encode()).hexdigest() + files = self._download_json( f'https://api.gofile.io/contents/{file_id}', file_id, 'Getting filelist', - query=query_params, headers={'Authorization': f'Bearer {self._TOKEN}'}) + query=query_params, headers={ + 'Authorization': f'Bearer {self._TOKEN}', + 'X-Website-Token': self._STATIC_TOKEN, + }) status = files['status'] if status == 'error-passwordRequired': diff --git a/yt_dlp/extractor/hotstar.py b/yt_dlp/extractor/hotstar.py index 2ae527a59e..6bbb538e7b 100644 --- a/yt_dlp/extractor/hotstar.py +++ b/yt_dlp/extractor/hotstar.py @@ -27,7 +27,7 @@ class HotStarBaseIE(InfoExtractor): _TOKEN_NAME = 'userUP' _BASE_URL = 'https://www.hotstar.com' _API_URL = 'https://api.hotstar.com' - _API_URL_V2 = 'https://apix.hotstar.com/v2' + _API_URL_V2 = 'https://www.hotstar.com/api/internal/bff/v2' _AKAMAI_ENCRYPTION_KEY = b'\x05\xfc\x1a\x01\xca\xc9\x4b\xc4\x12\xfc\x53\x12\x07\x75\xf9\xee' _FREE_HEADERS = { diff --git a/yt_dlp/extractor/iqiyi.py b/yt_dlp/extractor/iqiyi.py index 735b44637c..f8b4afa9f9 100644 --- a/yt_dlp/extractor/iqiyi.py +++ b/yt_dlp/extractor/iqiyi.py @@ -9,14 +9,12 @@ from .openload import PhantomJSwrapper from ..utils import ( ExtractorError, clean_html, - decode_packed_codes, float_or_none, format_field, get_element_by_attribute, get_element_by_id, int_or_none, js_to_json, - ohdave_rsa_encrypt, parse_age_limit, parse_duration, parse_iso8601, @@ -33,143 +31,12 @@ def md5_text(text): return hashlib.md5(text.encode()).hexdigest() -class IqiyiSDK: - def __init__(self, target, ip, timestamp): - self.target = target - self.ip = ip - self.timestamp = timestamp - - @staticmethod - def split_sum(data): - return str(sum(int(p, 16) for p in data)) - - @staticmethod - def digit_sum(num): - if isinstance(num, int): - num = str(num) - return str(sum(map(int, num))) - - def even_odd(self): - even = self.digit_sum(str(self.timestamp)[::2]) - odd = self.digit_sum(str(self.timestamp)[1::2]) - return even, odd - - def preprocess(self, chunksize): - self.target = md5_text(self.target) - chunks = [] - for i in range(32 // chunksize): - chunks.append(self.target[chunksize * i:chunksize * (i + 1)]) - if 32 % chunksize: - chunks.append(self.target[32 - 32 % chunksize:]) - return chunks, list(map(int, self.ip.split('.'))) - - def mod(self, modulus): - chunks, ip = self.preprocess(32) - self.target = chunks[0] + ''.join(str(p % modulus) for p in ip) - - def split(self, chunksize): - modulus_map = { - 4: 256, - 5: 10, - 8: 100, - } - - chunks, ip = self.preprocess(chunksize) - ret = '' - for i in range(len(chunks)): - ip_part = str(ip[i] % modulus_map[chunksize]) if i < 4 else '' - if chunksize == 8: - ret += ip_part + chunks[i] - else: - ret += chunks[i] + ip_part - self.target = ret - - def handle_input16(self): - self.target = md5_text(self.target) - self.target = self.split_sum(self.target[:16]) + self.target + self.split_sum(self.target[16:]) - - def handle_input8(self): - self.target = md5_text(self.target) - ret = '' - for i in range(4): - part = self.target[8 * i:8 * (i + 1)] - ret += self.split_sum(part) + part - self.target = ret - - def handleSum(self): - self.target = md5_text(self.target) - self.target = self.split_sum(self.target) + self.target - - def date(self, scheme): - self.target = md5_text(self.target) - d = time.localtime(self.timestamp) - strings = { - 'y': str(d.tm_year), - 'm': '%02d' % d.tm_mon, - 'd': '%02d' % d.tm_mday, - } - self.target += ''.join(strings[c] for c in scheme) - - def split_time_even_odd(self): - even, odd = self.even_odd() - self.target = odd + md5_text(self.target) + even - - def split_time_odd_even(self): - even, odd = self.even_odd() - self.target = even + md5_text(self.target) + odd - - def split_ip_time_sum(self): - chunks, ip = self.preprocess(32) - self.target = str(sum(ip)) + chunks[0] + self.digit_sum(self.timestamp) - - def split_time_ip_sum(self): - chunks, ip = self.preprocess(32) - self.target = self.digit_sum(self.timestamp) + chunks[0] + str(sum(ip)) - - -class IqiyiSDKInterpreter: - def __init__(self, sdk_code): - self.sdk_code = sdk_code - - def run(self, target, ip, timestamp): - self.sdk_code = decode_packed_codes(self.sdk_code) - - functions = re.findall(r'input=([a-zA-Z0-9]+)\(input', self.sdk_code) - - sdk = IqiyiSDK(target, ip, timestamp) - - other_functions = { - 'handleSum': sdk.handleSum, - 'handleInput8': sdk.handle_input8, - 'handleInput16': sdk.handle_input16, - 'splitTimeEvenOdd': sdk.split_time_even_odd, - 'splitTimeOddEven': sdk.split_time_odd_even, - 'splitIpTimeSum': sdk.split_ip_time_sum, - 'splitTimeIpSum': sdk.split_time_ip_sum, - } - for function in functions: - if re.match(r'mod\d+', function): - sdk.mod(int(function[3:])) - elif re.match(r'date[ymd]{3}', function): - sdk.date(function[4:]) - elif re.match(r'split\d+', function): - sdk.split(int(function[5:])) - elif function in other_functions: - other_functions[function]() - else: - raise ExtractorError(f'Unknown function {function}') - - return sdk.target - - class IqiyiIE(InfoExtractor): IE_NAME = 'iqiyi' IE_DESC = '爱奇艺' _VALID_URL = r'https?://(?:(?:[^.]+\.)?iqiyi\.com|www\.pps\.tv)/.+\.html' - _NETRC_MACHINE = 'iqiyi' - _TESTS = [{ 'url': 'http://www.iqiyi.com/v_19rrojlavg.html', # MD5 checksum differs on my machine and Travis CI @@ -234,57 +101,6 @@ class IqiyiIE(InfoExtractor): '18': 7, # 1080p } - @staticmethod - def _rsa_fun(data): - # public key extracted from http://static.iqiyi.com/js/qiyiV2/20160129180840/jobs/i18n/i18nIndex.js - N = 0xab86b6371b5318aaa1d3c9e612a9f1264f372323c8c0f19875b5fc3b3fd3afcc1e5bec527aa94bfa85bffc157e4245aebda05389a5357b75115ac94f074aefcd - e = 65537 - - return ohdave_rsa_encrypt(data, e, N) - - def _perform_login(self, username, password): - - data = self._download_json( - 'http://kylin.iqiyi.com/get_token', None, - note='Get token for logging', errnote='Unable to get token for logging') - sdk = data['sdk'] - timestamp = int(time.time()) - target = ( - f'/apis/reglogin/login.action?lang=zh_TW&area_code=null&email={username}' - f'&passwd={self._rsa_fun(password.encode())}&agenttype=1&from=undefined&keeplogin=0&piccode=&fromurl=&_pos=1') - - interp = IqiyiSDKInterpreter(sdk) - sign = interp.run(target, data['ip'], timestamp) - - validation_params = { - 'target': target, - 'server': 'BEA3AA1908656AABCCFF76582C4C6660', - 'token': data['token'], - 'bird_src': 'f8d91d57af224da7893dd397d52d811a', - 'sign': sign, - 'bird_t': timestamp, - } - validation_result = self._download_json( - 'http://kylin.iqiyi.com/validate?' + urllib.parse.urlencode(validation_params), None, - note='Validate credentials', errnote='Unable to validate credentials') - - MSG_MAP = { - 'P00107': 'please login via the web interface and enter the CAPTCHA code', - 'P00117': 'bad username or password', - } - - code = validation_result['code'] - if code != 'A00000': - msg = MSG_MAP.get(code) - if not msg: - msg = f'error {code}' - if validation_result.get('msg'): - msg += ': ' + validation_result['msg'] - self.report_warning('unable to log in: ' + msg) - return False - - return True - def get_raw_data(self, tvid, video_id): tm = int(time.time() * 1000) diff --git a/yt_dlp/extractor/manoto.py b/yt_dlp/extractor/manoto.py deleted file mode 100644 index 1dd0b15c1e..0000000000 --- a/yt_dlp/extractor/manoto.py +++ /dev/null @@ -1,128 +0,0 @@ -from .common import InfoExtractor -from ..utils import clean_html, int_or_none, traverse_obj - -_API_URL = 'https://dak1vd5vmi7x6.cloudfront.net/api/v1/publicrole/{}/{}?id={}' - - -class ManotoTVIE(InfoExtractor): - IE_DESC = 'Manoto TV (Episode)' - _VALID_URL = r'https?://(?:www\.)?manototv\.com/episode/(?P[0-9]+)' - _TESTS = [{ - 'url': 'https://www.manototv.com/episode/8475', - 'info_dict': { - 'id': '8475', - 'series': 'خانه های رویایی با برادران اسکات', - 'season_number': 7, - 'episode_number': 25, - 'episode_id': 'My Dream Home S7: Carol & John', - 'duration': 3600, - 'categories': ['سرگرمی'], - 'title': 'کارول و جان', - 'description': 'md5:d0fff1f8ba5c6775d312a00165d1a97e', - 'thumbnail': r're:^https?://.*\.(jpeg|png|jpg)$', - 'ext': 'mp4', - }, - 'params': { - 'skip_download': 'm3u8', - }, - }, { - 'url': 'https://www.manototv.com/episode/12576', - 'info_dict': { - 'id': '12576', - 'series': 'فیلم های ایرانی', - 'episode_id': 'Seh Mah Taatili', - 'duration': 5400, - 'view_count': int, - 'categories': ['سرگرمی'], - 'title': 'سه ماه تعطیلی', - 'description': 'سه ماه تعطیلی فیلمی به کارگردانی و نویسندگی شاپور قریب ساختهٔ سال ۱۳۵۶ است.', - 'thumbnail': r're:^https?://.*\.(jpeg|png|jpg)$', - 'ext': 'mp4', - }, - 'params': { - 'skip_download': 'm3u8', - }, - }] - - def _real_extract(self, url): - video_id = self._match_id(url) - episode_json = self._download_json(_API_URL.format('showmodule', 'episodedetails', video_id), video_id) - details = episode_json.get('details', {}) - formats = self._extract_m3u8_formats(details.get('videoM3u8Url'), video_id, 'mp4') - return { - 'id': video_id, - 'series': details.get('showTitle'), - 'season_number': int_or_none(details.get('analyticsSeasonNumber')), - 'episode_number': int_or_none(details.get('episodeNumber')), - 'episode_id': details.get('analyticsEpisodeTitle'), - 'duration': int_or_none(details.get('durationInMinutes'), invscale=60), - 'view_count': details.get('viewCount'), - 'categories': [details.get('videoCategory')], - 'title': details.get('episodeTitle'), - 'description': clean_html(details.get('episodeDescription')), - 'thumbnail': details.get('episodelandscapeImgIxUrl'), - 'formats': formats, - } - - -class ManotoTVShowIE(InfoExtractor): - IE_DESC = 'Manoto TV (Show)' - _VALID_URL = r'https?://(?:www\.)?manototv\.com/show/(?P[0-9]+)' - _TESTS = [{ - 'url': 'https://www.manototv.com/show/2526', - 'playlist_mincount': 68, - 'info_dict': { - 'id': '2526', - 'title': 'فیلم های ایرانی', - 'description': 'مجموعه ای از فیلم های سینمای کلاسیک ایران', - }, - }] - - def _real_extract(self, url): - show_id = self._match_id(url) - show_json = self._download_json(_API_URL.format('showmodule', 'details', show_id), show_id) - show_details = show_json.get('details', {}) - title = show_details.get('showTitle') - description = show_details.get('showSynopsis') - - series_json = self._download_json(_API_URL.format('showmodule', 'serieslist', show_id), show_id) - playlist_id = str(traverse_obj(series_json, ('details', 'list', 0, 'id'))) - - playlist_json = self._download_json(_API_URL.format('showmodule', 'episodelist', playlist_id), playlist_id) - playlist = traverse_obj(playlist_json, ('details', 'list')) or [] - - entries = [ - self.url_result( - 'https://www.manototv.com/episode/{}'.format(item['slideID']), ie=ManotoTVIE.ie_key(), video_id=item['slideID']) - for item in playlist] - return self.playlist_result(entries, show_id, title, description) - - -class ManotoTVLiveIE(InfoExtractor): - IE_DESC = 'Manoto TV (Live)' - _VALID_URL = r'https?://(?:www\.)?manototv\.com/live/' - _TEST = { - 'url': 'https://www.manototv.com/live/', - 'info_dict': { - 'id': 'live', - 'title': 'Manoto TV Live', - 'ext': 'mp4', - 'is_live': True, - }, - 'params': { - 'skip_download': 'm3u8', - }, - } - - def _real_extract(self, url): - video_id = 'live' - json = self._download_json(_API_URL.format('livemodule', 'details', ''), video_id) - details = json.get('details', {}) - video_url = details.get('liveUrl') - formats = self._extract_m3u8_formats(video_url, video_id, 'mp4', live=True) - return { - 'id': video_id, - 'title': 'Manoto TV Live', - 'is_live': True, - 'formats': formats, - } diff --git a/yt_dlp/extractor/nebula.py b/yt_dlp/extractor/nebula.py index 6ced19d6a2..2708c722f3 100644 --- a/yt_dlp/extractor/nebula.py +++ b/yt_dlp/extractor/nebula.py @@ -478,3 +478,64 @@ class NebulaChannelIE(NebulaBaseIE): playlist_id=collection_slug, playlist_title=channel.get('title'), playlist_description=channel.get('description')) + + +class NebulaSeasonIE(NebulaBaseIE): + IE_NAME = 'nebula:season' + _VALID_URL = rf'{_BASE_URL_RE}/(?P[\w-]+)/season/(?P[\w-]+)' + _TESTS = [{ + 'url': 'https://nebula.tv/jetlag/season/15', + 'info_dict': { + 'id': 'jetlag_15', + 'title': 'Tag: All Stars', + 'description': 'md5:5aa5b8abf3de71756448dc44ffebb674', + }, + 'playlist_count': 8, + }, { + 'url': 'https://nebula.tv/jetlag/season/14', + 'info_dict': { + 'id': 'jetlag_14', + 'title': 'Snake', + 'description': 'md5:6da9040f1c2ac559579738bfb6919d1e', + }, + 'playlist_count': 8, + }, { + 'url': 'https://nebula.tv/jetlag/season/13-5', + 'info_dict': { + 'id': 'jetlag_13-5', + 'title': 'Hide + Seek Across NYC', + 'description': 'md5:5b87bb9acc6dcdff289bb4c71a2ad59f', + }, + 'playlist_count': 3, + }] + + def _build_url_result(self, item): + url = ( + traverse_obj(item, ('share_url', {url_or_none})) + or urljoin('https://nebula.tv/', item.get('app_path')) + or f'https://nebula.tv/videos/{item["slug"]}') + return self.url_result( + smuggle_url(url, {'id': item['id']}), + NebulaIE, url_transparent=True, + **self._extract_video_metadata(item)) + + def _entries(self, data): + for episode in traverse_obj(data, ('episodes', lambda _, v: v['video']['id'], 'video')): + yield self._build_url_result(episode) + for extra in traverse_obj(data, ('extras', ..., 'items', lambda _, v: v['id'])): + yield self._build_url_result(extra) + for trailer in traverse_obj(data, ('trailers', lambda _, v: v['id'])): + yield self._build_url_result(trailer) + + def _real_extract(self, url): + series, season_id = self._match_valid_url(url).group('series', 'season_number') + playlist_id = f'{series}_{season_id}' + data = self._call_api( + f'https://content.api.nebula.app/content/{series}/season/{season_id}', playlist_id) + + return self.playlist_result( + self._entries(data), playlist_id, + **traverse_obj(data, { + 'title': ('title', {str}), + 'description': ('description', {str}), + })) diff --git a/yt_dlp/extractor/neteasemusic.py b/yt_dlp/extractor/neteasemusic.py index 6c47086b9b..8f3a7d2358 100644 --- a/yt_dlp/extractor/neteasemusic.py +++ b/yt_dlp/extractor/neteasemusic.py @@ -528,7 +528,7 @@ class NetEaseMusicMvIE(NetEaseMusicBaseIE): class NetEaseMusicProgramIE(NetEaseMusicBaseIE): IE_NAME = 'netease:program' IE_DESC = '网易云音乐 - 电台节目' - _VALID_URL = r'https?://music\.163\.com/(?:#/)?program\?id=(?P[0-9]+)' + _VALID_URL = r'https?://music\.163\.com/(?:#/)?(?:dj|program)\?id=(?P[0-9]+)' _TESTS = [{ 'url': 'http://music.163.com/#/program?id=10109055', 'info_dict': { @@ -572,6 +572,9 @@ class NetEaseMusicProgramIE(NetEaseMusicBaseIE): 'params': { 'noplaylist': True, }, + }, { + 'url': 'https://music.163.com/#/dj?id=3706179315', + 'only_matching': True, }] def _real_extract(self, url): diff --git a/yt_dlp/extractor/netzkino.py b/yt_dlp/extractor/netzkino.py index c07b1715af..05f6c23a31 100644 --- a/yt_dlp/extractor/netzkino.py +++ b/yt_dlp/extractor/netzkino.py @@ -2,84 +2,59 @@ from .common import InfoExtractor from ..utils import ( clean_html, int_or_none, - js_to_json, - parse_iso8601, + url_or_none, + urljoin, ) +from ..utils.traversal import traverse_obj class NetzkinoIE(InfoExtractor): - _WORKING = False - _VALID_URL = r'https?://(?:www\.)?netzkino\.de/\#!/[^/]+/(?P[^/]+)' - + _GEO_COUNTRIES = ['DE'] + _VALID_URL = r'https?://(?:www\.)?netzkino\.de/details/(?P[^/?#]+)' _TESTS = [{ - 'url': 'https://www.netzkino.de/#!/scifikino/rakete-zum-mond', - 'md5': '92a3f8b76f8d7220acce5377ea5d4873', + 'url': 'https://www.netzkino.de/details/snow-beast', + 'md5': '1a4c90fe40d3ccabce163287e45e56dd', 'info_dict': { - 'id': 'rakete-zum-mond', + 'id': 'snow-beast', 'ext': 'mp4', - 'title': 'Rakete zum Mond \u2013 Jules Verne', - 'description': 'md5:f0a8024479618ddbfa450ff48ffa6c60', - 'upload_date': '20120813', - 'thumbnail': r're:https?://.*\.jpg$', - 'timestamp': 1344858571, + 'title': 'Snow Beast', 'age_limit': 12, - }, - 'params': { - 'skip_download': 'Download only works from Germany', - }, - }, { - 'url': 'https://www.netzkino.de/#!/filme/dr-jekyll-mrs-hyde-2', - 'md5': 'c7728b2dadd04ff6727814847a51ef03', - 'info_dict': { - 'id': 'dr-jekyll-mrs-hyde-2', - 'ext': 'mp4', - 'title': 'Dr. Jekyll & Mrs. Hyde 2', - 'description': 'md5:c2e9626ebd02de0a794b95407045d186', - 'upload_date': '20190130', - 'thumbnail': r're:https?://.*\.jpg$', - 'timestamp': 1548849437, - 'age_limit': 18, - }, - 'params': { - 'skip_download': 'Download only works from Germany', + 'alt_title': 'Snow Beast', + 'cast': 'count:3', + 'categories': 'count:7', + 'creators': 'count:2', + 'description': 'md5:e604a954a7f827a80e96a3a97d48b269', + 'location': 'US', + 'release_year': 2011, + 'thumbnail': r're:https?://.+\.jpg', }, }] def _real_extract(self, url): - mobj = self._match_valid_url(url) - video_id = mobj.group('id') + video_id = self._match_id(url) + webpage = self._download_webpage(url, video_id) + next_js_data = self._search_nextjs_data(webpage, video_id) - api_url = f'https://api.netzkino.de.simplecache.net/capi-2.0a/movies/{video_id}.json?d=www' - info = self._download_json(api_url, video_id) - custom_fields = info['custom_fields'] - - production_js = self._download_webpage( - 'http://www.netzkino.de/beta/dist/production.min.js', video_id, - note='Downloading player code') - avo_js = self._search_regex( - r'var urlTemplate=(\{.*?"\})', - production_js, 'URL templates') - templates = self._parse_json( - avo_js, video_id, transform_source=js_to_json) - - suffix = { - 'hds': '.mp4/manifest.f4m', - 'hls': '.mp4/master.m3u8', - 'pmd': '.mp4', - } - film_fn = custom_fields['Streaming'][0] - formats = [{ - 'format_id': key, - 'ext': 'mp4', - 'url': tpl.replace('{}', film_fn) + suffix[key], - } for key, tpl in templates.items()] + query = traverse_obj(next_js_data, ( + 'props', '__dehydratedState', 'queries', ..., 'state', + 'data', 'data', lambda _, v: v['__typename'] == 'CmsMovie', any)) + if 'DRM' in traverse_obj(query, ('licenses', 'nodes', ..., 'properties', {str})): + self.report_drm(video_id) return { 'id': video_id, - 'formats': formats, - 'title': info['title'], - 'age_limit': int_or_none(custom_fields.get('FSK')[0]), - 'timestamp': parse_iso8601(info.get('date'), delimiter=' '), - 'description': clean_html(info.get('content')), - 'thumbnail': info.get('thumbnail'), + **traverse_obj(query, { + 'title': ('originalTitle', {clean_html}), + 'age_limit': ('fskRating', {int_or_none}), + 'alt_title': ('originalTitle', {clean_html}, filter), + 'cast': ('cast', 'nodes', ..., 'person', 'name', {clean_html}, filter), + 'creators': (('directors', 'writers'), 'nodes', ..., 'person', 'name', {clean_html}, filter), + 'categories': ('categories', 'nodes', ..., 'category', 'title', {clean_html}, filter), + 'description': ('longSynopsis', {clean_html}, filter), + 'duration': ('runtimeInSeconds', {int_or_none}), + 'location': ('productionCountry', {clean_html}, filter), + 'release_year': ('productionYear', {int_or_none}), + 'thumbnail': ('coverImage', 'masterUrl', {url_or_none}), + 'url': ('videoSource', 'pmdUrl', {urljoin('https://pmd.netzkino-seite.netzkino.de/')}), + }), } diff --git a/yt_dlp/extractor/nextmedia.py b/yt_dlp/extractor/nextmedia.py deleted file mode 100644 index 81da3ffde3..0000000000 --- a/yt_dlp/extractor/nextmedia.py +++ /dev/null @@ -1,238 +0,0 @@ -import urllib.parse - -from .common import InfoExtractor -from ..utils import ( - clean_html, - get_element_by_class, - int_or_none, - parse_iso8601, - remove_start, - unified_timestamp, -) - - -class NextMediaIE(InfoExtractor): - IE_DESC = '蘋果日報' - _VALID_URL = r'https?://hk\.apple\.nextmedia\.com/[^/]+/[^/]+/(?P\d+)/(?P\d+)' - _TESTS = [{ - 'url': 'http://hk.apple.nextmedia.com/realtime/news/20141108/53109199', - 'md5': 'dff9fad7009311c421176d1ac90bfe4f', - 'info_dict': { - 'id': '53109199', - 'ext': 'mp4', - 'title': '【佔領金鐘】50外國領事議員撐場 讚學生勇敢香港有希望', - 'thumbnail': r're:^https?://.*\.jpg$', - 'description': 'md5:28222b9912b6665a21011b034c70fcc7', - 'timestamp': 1415456273, - 'upload_date': '20141108', - }, - }] - - _URL_PATTERN = r'\{ url: \'(.+)\' \}' - - def _real_extract(self, url): - news_id = self._match_id(url) - page = self._download_webpage(url, news_id) - return self._extract_from_nextmedia_page(news_id, url, page) - - def _extract_from_nextmedia_page(self, news_id, url, page): - redirection_url = self._search_regex( - r'window\.location\.href\s*=\s*([\'"])(?P(?!\1).+)\1', - page, 'redirection URL', default=None, group='url') - if redirection_url: - return self.url_result(urllib.parse.urljoin(url, redirection_url)) - - title = self._fetch_title(page) - video_url = self._search_regex(self._URL_PATTERN, page, 'video url') - - attrs = { - 'id': news_id, - 'title': title, - 'url': video_url, # ext can be inferred from url - 'thumbnail': self._fetch_thumbnail(page), - 'description': self._fetch_description(page), - } - - timestamp = self._fetch_timestamp(page) - if timestamp: - attrs['timestamp'] = timestamp - else: - attrs['upload_date'] = self._fetch_upload_date(url) - - return attrs - - def _fetch_title(self, page): - return self._og_search_title(page) - - def _fetch_thumbnail(self, page): - return self._og_search_thumbnail(page) - - def _fetch_timestamp(self, page): - date_created = self._search_regex('"dateCreated":"([^"]+)"', page, 'created time') - return parse_iso8601(date_created) - - def _fetch_upload_date(self, url): - return self._search_regex(self._VALID_URL, url, 'upload date', group='date') - - def _fetch_description(self, page): - return self._og_search_property('description', page) - - -class NextMediaActionNewsIE(NextMediaIE): # XXX: Do not subclass from concrete IE - IE_DESC = '蘋果日報 - 動新聞' - _VALID_URL = r'https?://hk\.dv\.nextmedia\.com/actionnews/[^/]+/(?P\d+)/(?P\d+)/\d+' - _TESTS = [{ - 'url': 'http://hk.dv.nextmedia.com/actionnews/hit/20150121/19009428/20061460', - 'md5': '05fce8ffeed7a5e00665d4b7cf0f9201', - 'info_dict': { - 'id': '19009428', - 'ext': 'mp4', - 'title': '【壹週刊】細10年男友偷食 50歲邵美琪再失戀', - 'thumbnail': r're:^https?://.*\.jpg$', - 'description': 'md5:cd802fad1f40fd9ea178c1e2af02d659', - 'timestamp': 1421791200, - 'upload_date': '20150120', - }, - }] - - def _real_extract(self, url): - news_id = self._match_id(url) - actionnews_page = self._download_webpage(url, news_id) - article_url = self._og_search_url(actionnews_page) - article_page = self._download_webpage(article_url, news_id) - return self._extract_from_nextmedia_page(news_id, url, article_page) - - -class AppleDailyIE(NextMediaIE): # XXX: Do not subclass from concrete IE - IE_DESC = '臺灣蘋果日報' - _VALID_URL = r'https?://(www|ent)\.appledaily\.com\.tw/[^/]+/[^/]+/[^/]+/(?P\d+)/(?P\d+)(/.*)?' - _TESTS = [{ - 'url': 'http://ent.appledaily.com.tw/enews/article/entertainment/20150128/36354694', - 'md5': 'a843ab23d150977cc55ef94f1e2c1e4d', - 'info_dict': { - 'id': '36354694', - 'ext': 'mp4', - 'title': '周亭羽走過摩鐵陰霾2男陪吃 九把刀孤寒看醫生', - 'thumbnail': r're:^https?://.*\.jpg$', - 'description': 'md5:2acd430e59956dc47cd7f67cb3c003f4', - 'upload_date': '20150128', - }, - }, { - 'url': 'http://www.appledaily.com.tw/realtimenews/article/strange/20150128/550549/%E4%B8%8D%E6%BB%BF%E8%A2%AB%E8%B8%A9%E8%85%B3%E3%80%80%E5%B1%B1%E6%9D%B1%E5%85%A9%E5%A4%A7%E5%AA%BD%E4%B8%80%E8%B7%AF%E6%89%93%E4%B8%8B%E8%BB%8A', - 'md5': '86b4e9132d158279c7883822d94ccc49', - 'info_dict': { - 'id': '550549', - 'ext': 'mp4', - 'title': '不滿被踩腳 山東兩大媽一路打下車', - 'thumbnail': r're:^https?://.*\.jpg$', - 'description': 'md5:175b4260c1d7c085993474217e4ab1b4', - 'upload_date': '20150128', - }, - }, { - 'url': 'http://www.appledaily.com.tw/animation/realtimenews/new/20150128/5003671', - 'md5': '03df296d95dedc2d5886debbb80cb43f', - 'info_dict': { - 'id': '5003671', - 'ext': 'mp4', - 'title': '20正妹熱舞 《刀龍傳說Online》火辣上市', - 'thumbnail': r're:^https?://.*\.jpg$', - 'description': 'md5:23c0aac567dc08c9c16a3161a2c2e3cd', - 'upload_date': '20150128', - }, - 'skip': 'redirect to http://www.appledaily.com.tw/animation/', - }, { - # No thumbnail - 'url': 'http://www.appledaily.com.tw/animation/realtimenews/new/20150128/5003673/', - 'md5': 'b06182cd386ea7bc6115ec7ff0f72aeb', - 'info_dict': { - 'id': '5003673', - 'ext': 'mp4', - 'title': '半夜尿尿 好像會看到___', - 'description': 'md5:61d2da7fe117fede148706cdb85ac066', - 'upload_date': '20150128', - }, - 'expected_warnings': [ - 'video thumbnail', - ], - 'skip': 'redirect to http://www.appledaily.com.tw/animation/', - }, { - 'url': 'http://www.appledaily.com.tw/appledaily/article/supplement/20140417/35770334/', - 'md5': 'eaa20e6b9df418c912d7f5dec2ba734d', - 'info_dict': { - 'id': '35770334', - 'ext': 'mp4', - 'title': '咖啡占卜測 XU裝熟指數', - 'thumbnail': r're:^https?://.*\.jpg$', - 'description': 'md5:7b859991a6a4fedbdf3dd3b66545c748', - 'upload_date': '20140417', - }, - }, { - 'url': 'http://www.appledaily.com.tw/actionnews/appledaily/7/20161003/960588/', - 'only_matching': True, - }, { - # Redirected from http://ent.appledaily.com.tw/enews/article/entertainment/20150128/36354694 - 'url': 'http://ent.appledaily.com.tw/section/article/headline/20150128/36354694', - 'only_matching': True, - }] - - _URL_PATTERN = r'\{url: \'(.+)\'\}' - - def _fetch_title(self, page): - return (self._html_search_regex(r'

([^<>]+)

', page, 'news title', default=None) - or self._html_search_meta('description', page, 'news title')) - - def _fetch_thumbnail(self, page): - return self._html_search_regex(r"setInitialImage\(\'([^']+)'\)", page, 'video thumbnail', fatal=False) - - def _fetch_timestamp(self, page): - return None - - def _fetch_description(self, page): - return self._html_search_meta('description', page, 'news description') - - -class NextTVIE(InfoExtractor): - _WORKING = False - _ENABLED = None # XXX: pass through to GenericIE - IE_DESC = '壹電視' - _VALID_URL = r'https?://(?:www\.)?nexttv\.com\.tw/(?:[^/]+/)+(?P\d+)' - - _TEST = { - 'url': 'http://www.nexttv.com.tw/news/realtime/politics/11779671', - 'info_dict': { - 'id': '11779671', - 'ext': 'mp4', - 'title': '「超收稅」近4千億! 藍議員籲發消費券', - 'thumbnail': r're:^https?://.*\.jpg$', - 'timestamp': 1484825400, - 'upload_date': '20170119', - 'view_count': int, - }, - } - - def _real_extract(self, url): - video_id = self._match_id(url) - - webpage = self._download_webpage(url, video_id) - - title = self._html_search_regex( - r']*>([^<]+)', webpage, 'title') - - data = self._hidden_inputs(webpage) - - video_url = data['ntt-vod-src-detailview'] - - date_str = get_element_by_class('date', webpage) - timestamp = unified_timestamp(date_str + '+0800') if date_str else None - - view_count = int_or_none(remove_start( - clean_html(get_element_by_class('click', webpage)), '點閱:')) - - return { - 'id': video_id, - 'title': title, - 'url': video_url, - 'thumbnail': data.get('ntt-vod-img-src'), - 'timestamp': timestamp, - 'view_count': view_count, - } diff --git a/yt_dlp/extractor/pandatv.py b/yt_dlp/extractor/pandatv.py new file mode 100644 index 0000000000..00ac49a3ce --- /dev/null +++ b/yt_dlp/extractor/pandatv.py @@ -0,0 +1,83 @@ +from .common import InfoExtractor +from ..utils import ( + ExtractorError, + UserNotLive, + filter_dict, + int_or_none, + join_nonempty, + parse_iso8601, + url_or_none, + urlencode_postdata, +) +from ..utils.traversal import traverse_obj + + +class PandaTvIE(InfoExtractor): + IE_DESC = 'pandalive.co.kr (팬더티비)' + _VALID_URL = r'https?://(?:www\.|m\.)?pandalive\.co\.kr/play/(?P\w+)' + _TESTS = [{ + 'url': 'https://www.pandalive.co.kr/play/bebenim', + 'info_dict': { + 'id': 'bebenim', + 'ext': 'mp4', + 'channel': '릴리ෆ', + 'title': r're:앙앙❤ \d{4}-\d{2}-\d{2} \d{2}:\d{2}', + 'thumbnail': r're:https://cdn\.pandalive\.co\.kr/ivs/v1/.+/thumb\.jpg', + 'concurrent_view_count': int, + 'like_count': int, + 'live_status': 'is_live', + 'upload_date': str, + }, + 'skip': 'The channel is not currently live', + }] + + def _real_extract(self, url): + channel_id = self._match_id(url) + video_meta = self._download_json( + 'https://api.pandalive.co.kr/v1/live/play', channel_id, + 'Downloading video meta data', 'Unable to download video meta data', + data=urlencode_postdata(filter_dict({ + 'action': 'watch', + 'userId': channel_id, + 'password': self.get_param('videopassword'), + })), expected_status=400) + + if error_code := traverse_obj(video_meta, ('errorData', 'code', {str})): + if error_code == 'castEnd': + raise UserNotLive(video_id=channel_id) + elif error_code == 'needAdult': + self.raise_login_required('Adult verification is required for this stream') + elif error_code == 'needLogin': + self.raise_login_required('Login is required for this stream') + elif error_code == 'needCoinPurchase': + raise ExtractorError('Coin purchase is required for this stream', expected=True) + elif error_code == 'needUnlimitItem': + raise ExtractorError('Ticket purchase is required for this stream', expected=True) + elif error_code == 'needPw': + raise ExtractorError('Password protected video, use --video-password ', expected=True) + elif error_code == 'wrongPw': + raise ExtractorError('Wrong password', expected=True) + else: + error_msg = video_meta.get('message') + raise ExtractorError(join_nonempty( + 'API returned error code', error_code, + error_msg and 'with error message:', error_msg, + delim=' ')) + + http_headers = {'Origin': 'https://www.pandalive.co.kr'} + + return { + 'id': channel_id, + 'is_live': True, + 'formats': self._extract_m3u8_formats( + video_meta['PlayList']['hls'][0]['url'], channel_id, 'mp4', headers=http_headers, live=True), + 'http_headers': http_headers, + **traverse_obj(video_meta, ('media', { + 'title': ('title', {str}), + 'release_timestamp': ('startTime', {parse_iso8601(delim=' ')}), + 'thumbnail': ('ivsThumbnail', {url_or_none}), + 'channel': ('userNick', {str}), + 'concurrent_view_count': ('user', {int_or_none}), + 'like_count': ('likeCnt', {int_or_none}), + })), + } diff --git a/yt_dlp/extractor/parti.py b/yt_dlp/extractor/parti.py index acadefc4e4..225bc36ee4 100644 --- a/yt_dlp/extractor/parti.py +++ b/yt_dlp/extractor/parti.py @@ -6,7 +6,10 @@ from ..utils.traversal import traverse_obj class PartiBaseIE(InfoExtractor): def _call_api(self, path, video_id, note=None): return self._download_json( - f'https://api-backend.parti.com/parti_v2/profile/{path}', video_id, note) + f'https://prod-api.parti.com/parti_v2/profile/{path}', video_id, note, headers={ + 'Origin': 'https://parti.com', + 'Referer': 'https://parti.com/', + }) class PartiVideoIE(PartiBaseIE): @@ -20,7 +23,7 @@ class PartiVideoIE(PartiBaseIE): 'title': 'NOW LIVE ', 'upload_date': '20250327', 'categories': ['Gaming'], - 'thumbnail': 'https://assets.parti.com/351424_eb9e5250-2821-484a-9c5f-ca99aa666c87.png', + 'thumbnail': 'https://media.parti.com/351424_eb9e5250-2821-484a-9c5f-ca99aa666c87.png', 'channel': 'ItZTMGG', 'timestamp': 1743044379, }, @@ -34,7 +37,7 @@ class PartiVideoIE(PartiBaseIE): return { 'id': video_id, 'formats': self._extract_m3u8_formats( - urljoin('https://watch.parti.com', data['livestream_recording']), video_id, 'mp4'), + urljoin('https://media.parti.com/', data['livestream_recording']), video_id, 'mp4'), **traverse_obj(data, { 'title': ('event_title', {str}), 'channel': ('user_name', {str}), @@ -47,32 +50,27 @@ class PartiVideoIE(PartiBaseIE): class PartiLivestreamIE(PartiBaseIE): IE_NAME = 'parti:livestream' - _VALID_URL = r'https?://(?:www\.)?parti\.com/creator/(?P[\w]+)/(?P[\w/-]+)' + _VALID_URL = r'https?://(?:www\.)?parti\.com/(?!video/)(?P[\w/-]+)' _TESTS = [{ - 'url': 'https://parti.com/creator/parti/Capt_Robs_Adventures', + 'url': 'https://parti.com/247CryptoTracker', 'info_dict': { - 'id': 'Capt_Robs_Adventures', 'ext': 'mp4', + 'id': '247CryptoTracker', + 'description': 'md5:a78051f3d7e66e6a64c6b1eaf59fd364', 'title': r"re:I'm Live on Parti \d{4}-\d{2}-\d{2} \d{2}:\d{2}", - 'view_count': int, - 'thumbnail': r're:https://assets\.parti\.com/.+\.png', - 'timestamp': 1743879776, - 'upload_date': '20250405', + 'thumbnail': r're:https://media\.parti\.com/stream-screenshots/.+\.png', 'live_status': 'is_live', }, 'params': {'skip_download': 'm3u8'}, - }, { - 'url': 'https://parti.com/creator/discord/sazboxgaming/0', - 'only_matching': True, }] def _real_extract(self, url): - service, creator_slug = self._match_valid_url(url).group('service', 'id') + creator_slug = self._match_id(url) encoded_creator_slug = creator_slug.replace('/', '%23') creator_id = self._call_api( - f'get_user_by_social_media/{service}/{encoded_creator_slug}', - creator_slug, note='Fetching user ID') + f'user_id_from_name/{encoded_creator_slug}', + creator_slug, note='Fetching user ID')['user_id'] data = self._call_api( f'get_livestream_channel_info/{creator_id}', creator_id, @@ -85,11 +83,7 @@ class PartiLivestreamIE(PartiBaseIE): return { 'id': creator_slug, - 'formats': self._extract_m3u8_formats( - channel_info['playback_url'], creator_slug, live=True, query={ - 'token': channel_info['playback_auth_token'], - 'player_version': '1.17.0', - }), + 'formats': self._extract_m3u8_formats(channel_info['playback_url'], creator_slug, live=True), 'is_live': True, **traverse_obj(data, { 'title': ('livestream_event_info', 'event_name', {str}), diff --git a/yt_dlp/extractor/picarto.py b/yt_dlp/extractor/picarto.py index 92431fa241..4db8425241 100644 --- a/yt_dlp/extractor/picarto.py +++ b/yt_dlp/extractor/picarto.py @@ -4,6 +4,7 @@ from .common import InfoExtractor from ..utils import ( ExtractorError, str_or_none, + strip_or_none, traverse_obj, update_url, ) @@ -50,7 +51,6 @@ class PicartoIE(InfoExtractor): if metadata.get('online') == 0: raise ExtractorError('Stream is offline', expected=True) - title = metadata['title'] cdn_data = self._download_json(''.join(( update_url(data['getLoadBalancerUrl']['url'], scheme='https'), @@ -79,7 +79,7 @@ class PicartoIE(InfoExtractor): return { 'id': channel_id, - 'title': title.strip(), + 'title': strip_or_none(metadata.get('title')), 'is_live': True, 'channel': channel_id, 'channel_id': metadata.get('id'), @@ -159,7 +159,7 @@ class PicartoVodIE(InfoExtractor): 'id': video_id, **traverse_obj(data, { 'id': ('id', {str_or_none}), - 'title': ('title', {str}), + 'title': ('title', {str.strip}), 'thumbnail': 'video_recording_image_url', 'channel': ('channel', 'name', {str}), 'age_limit': ('adult', {lambda x: 18 if x else 0}), diff --git a/yt_dlp/extractor/pornhub.py b/yt_dlp/extractor/pornhub.py index e1e9777e8e..cdfa3f1b0f 100644 --- a/yt_dlp/extractor/pornhub.py +++ b/yt_dlp/extractor/pornhub.py @@ -24,6 +24,7 @@ from ..utils import ( url_or_none, urlencode_postdata, ) +from ..utils.traversal import find_elements, traverse_obj class PornHubBaseIE(InfoExtractor): @@ -137,23 +138,24 @@ class PornHubIE(PornHubBaseIE): _EMBED_REGEX = [r']+?src=["\'](?P(?:https?:)?//(?:www\.)?pornhub(?:premium)?\.(?:com|net|org)/embed/[\da-z]+)'] _TESTS = [{ 'url': 'http://www.pornhub.com/view_video.php?viewkey=648719015', - 'md5': 'a6391306d050e4547f62b3f485dd9ba9', + 'md5': '4d4a4e9178b655776f86cf89ecaf0edf', 'info_dict': { 'id': '648719015', 'ext': 'mp4', 'title': 'Seductive Indian beauty strips down and fingers her pink pussy', - 'uploader': 'Babes', + 'uploader': 'BABES-COM', + 'uploader_id': '/users/babes-com', 'upload_date': '20130628', 'timestamp': 1372447216, 'duration': 361, 'view_count': int, 'like_count': int, - 'dislike_count': int, 'comment_count': int, 'age_limit': 18, 'tags': list, 'categories': list, 'cast': list, + 'thumbnail': r're:https?://.+', }, }, { # non-ASCII title @@ -480,13 +482,6 @@ class PornHubIE(PornHubBaseIE): comment_count = self._extract_count( r'All Comments\s*\(([\d,.]+)\)', webpage, 'comment') - def extract_list(meta_key): - div = self._search_regex( - rf'(?s)]+\bclass=["\'].*?\b{meta_key}Wrapper[^>]*>(.+?)', - webpage, meta_key, default=None) - if div: - return [clean_html(x).strip() for x in re.findall(r'(?s)]+\bhref=[^>]+>.+?', div)] - info = self._search_json_ld(webpage, video_id, default={}) # description provided in JSON-LD is irrelevant info['description'] = None @@ -505,9 +500,11 @@ class PornHubIE(PornHubBaseIE): 'comment_count': comment_count, 'formats': formats, 'age_limit': 18, - 'tags': extract_list('tags'), - 'categories': extract_list('categories'), - 'cast': extract_list('pornstars'), + **traverse_obj(webpage, { + 'tags': ({find_elements(attr='data-label', value='tag')}, ..., {clean_html}), + 'categories': ({find_elements(attr='data-label', value='category')}, ..., {clean_html}), + 'cast': ({find_elements(attr='data-label', value='pornstar')}, ..., {clean_html}), + }), 'subtitles': subtitles, }, info) diff --git a/yt_dlp/extractor/scte.py b/yt_dlp/extractor/scte.py deleted file mode 100644 index 39711325fc..0000000000 --- a/yt_dlp/extractor/scte.py +++ /dev/null @@ -1,137 +0,0 @@ -import re - -from .common import InfoExtractor -from ..utils import ( - ExtractorError, - decode_packed_codes, - urlencode_postdata, -) - - -class SCTEBaseIE(InfoExtractor): - _LOGIN_URL = 'https://www.scte.org/SCTE/Sign_In.aspx' - _NETRC_MACHINE = 'scte' - - def _perform_login(self, username, password): - login_popup = self._download_webpage( - self._LOGIN_URL, None, 'Downloading login popup') - - def is_logged(webpage): - return any(re.search(p, webpage) for p in ( - r'class=["\']welcome\b', r'>Sign Out<')) - - # already logged in - if is_logged(login_popup): - return - - login_form = self._hidden_inputs(login_popup) - - login_form.update({ - 'ctl01$TemplateBody$WebPartManager1$gwpciNewContactSignInCommon$ciNewContactSignInCommon$signInUserName': username, - 'ctl01$TemplateBody$WebPartManager1$gwpciNewContactSignInCommon$ciNewContactSignInCommon$signInPassword': password, - 'ctl01$TemplateBody$WebPartManager1$gwpciNewContactSignInCommon$ciNewContactSignInCommon$RememberMe': 'on', - }) - - response = self._download_webpage( - self._LOGIN_URL, None, 'Logging in', - data=urlencode_postdata(login_form)) - - if '|pageRedirect|' not in response and not is_logged(response): - error = self._html_search_regex( - r'(?s)<[^>]+class=["\']AsiError["\'][^>]*>(.+?)\d+)' - _TESTS = [{ - 'url': 'https://learning.scte.org/mod/scorm/view.php?id=31484', - 'info_dict': { - 'title': 'Introduction to DOCSIS Engineering Professional', - 'id': '31484', - }, - 'playlist_count': 5, - 'skip': 'Requires account credentials', - }] - - def _real_extract(self, url): - video_id = self._match_id(url) - - webpage = self._download_webpage(url, video_id) - - title = self._search_regex(r'

(.+?)

', webpage, 'title') - - context_id = self._search_regex(r'context-(\d+)', webpage, video_id) - content_base = f'https://learning.scte.org/pluginfile.php/{context_id}/mod_scorm/content/8/' - context = decode_packed_codes(self._download_webpage( - f'{content_base}mobile/data.js', video_id)) - - data = self._parse_xml( - self._search_regex( - r'CreateData\(\s*"(.+?)"', context, 'data').replace(r"\'", "'"), - video_id) - - entries = [] - for asset in data.findall('.//asset'): - asset_url = asset.get('url') - if not asset_url or not asset_url.endswith('.mp4'): - continue - asset_id = self._search_regex( - r'video_([^_]+)_', asset_url, 'asset id', default=None) - if not asset_id: - continue - entries.append({ - 'id': asset_id, - 'title': title, - 'url': content_base + asset_url, - }) - - return self.playlist_result(entries, video_id, title) - - -class SCTECourseIE(SCTEBaseIE): - _WORKING = False - _VALID_URL = r'https?://learning\.scte\.org/(?:mod/sub)?course/view\.php?.*?\bid=(?P\d+)' - _TESTS = [{ - 'url': 'https://learning.scte.org/mod/subcourse/view.php?id=31491', - 'only_matching': True, - }, { - 'url': 'https://learning.scte.org/course/view.php?id=3639', - 'only_matching': True, - }, { - 'url': 'https://learning.scte.org/course/view.php?id=3073', - 'only_matching': True, - }] - - def _real_extract(self, url): - course_id = self._match_id(url) - - webpage = self._download_webpage(url, course_id) - - title = self._search_regex( - r'

(.+?)

', webpage, 'title', default=None) - - entries = [] - for mobj in re.finditer( - r'''(?x) - ]+ - href=(["\']) - (?P - https?://learning\.scte\.org/mod/ - (?Pscorm|subcourse)/view\.php?(?:(?!\1).)*? - \bid=\d+ - ) - ''', - webpage): - item_url = mobj.group('url') - if item_url == url: - continue - ie = (SCTEIE.ie_key() if mobj.group('kind') == 'scorm' - else SCTECourseIE.ie_key()) - entries.append(self.url_result(item_url, ie=ie)) - - return self.playlist_result(entries, course_id, title) diff --git a/yt_dlp/extractor/tarangplus.py b/yt_dlp/extractor/tarangplus.py new file mode 100644 index 0000000000..62f254dd6f --- /dev/null +++ b/yt_dlp/extractor/tarangplus.py @@ -0,0 +1,243 @@ +import base64 +import binascii +import functools +import re +import urllib.parse + +from .common import InfoExtractor +from ..dependencies import Cryptodome +from ..utils import ( + ExtractorError, + OnDemandPagedList, + clean_html, + extract_attributes, + urljoin, +) +from ..utils.traversal import ( + find_element, + find_elements, + require, + traverse_obj, +) + + +class TarangPlusBaseIE(InfoExtractor): + _BASE_URL = 'https://tarangplus.in' + + +class TarangPlusVideoIE(TarangPlusBaseIE): + IE_NAME = 'tarangplus:video' + _VALID_URL = r'https?://(?:www\.)?tarangplus\.in/(?:movies|[^#?/]+/[^#?/]+)/(?!episodes)(?P[^#?/]+)' + _TESTS = [{ + 'url': 'https://tarangplus.in/tarangaplus-originals/khitpit/khitpit-ep-10', + 'md5': '78ce056cee755687b8a48199909ecf53', + 'info_dict': { + 'id': '67b8206719521d054c0059b7', + 'display_id': 'khitpit-ep-10', + 'ext': 'mp4', + 'title': 'Khitpit Ep-10', + 'description': 'md5:a45b805cb628e15c853d78b0406eab48', + 'thumbnail': r're:https?://.+/.+\.jpg', + 'duration': 756.0, + 'timestamp': 1740355200, + 'upload_date': '20250224', + 'media_type': 'episode', + 'categories': ['Originals'], + }, + }, { + 'url': 'https://tarangplus.in/tarang-serials/bada-bohu/bada-bohu-ep-233', + 'md5': 'b4f9beb15172559bb362203b4f48382e', + 'info_dict': { + 'id': '680b9d6c19521d054c007782', + 'display_id': 'bada-bohu-ep-233', + 'ext': 'mp4', + 'title': 'Bada Bohu | Ep -233', + 'description': 'md5:e6b8e7edc9e60b92c1b390f8789ecd69', + 'thumbnail': r're:https?://.+/.+\.jpg', + 'duration': 1392.0, + 'timestamp': 1745539200, + 'upload_date': '20250425', + 'media_type': 'episode', + 'categories': ['Prime'], + }, + }, { + # Decrypted m3u8 URL has trailing control characters that need to be stripped + 'url': 'https://tarangplus.in/tarangaplus-originals/ichha/ichha-teaser-1', + 'md5': '16ee43fe21ad8b6e652ec65eba38a64e', + 'info_dict': { + 'id': '5f0f252d3326af0720000342', + 'ext': 'mp4', + 'display_id': 'ichha-teaser-1', + 'title': 'Ichha Teaser', + 'description': 'md5:c724b0b0669a2cefdada3711cec792e6', + 'media_type': 'episode', + 'duration': 21.0, + 'thumbnail': r're:https?://.+/.+\.jpg', + 'categories': ['Originals'], + 'timestamp': 1758153600, + 'upload_date': '20250918', + }, + }, { + 'url': 'https://tarangplus.in/short/ai-maa/ai-maa', + 'only_matching': True, + }, { + 'url': 'https://tarangplus.in/shows/tarang-cine-utsav-2024/tarang-cine-utsav-2024-seg-1', + 'only_matching': True, + }, { + 'url': 'https://tarangplus.in/music-videos/chori-chori-bohu-chori-songs/nijara-laguchu-dhire-dhire', + 'only_matching': True, + }, { + 'url': 'https://tarangplus.in/kids-shows/chhota-jaga/chhota-jaga-ep-33-jamidar-ra-khajana-adaya', + 'only_matching': True, + }, { + 'url': 'https://tarangplus.in/movies/swayambara', + 'only_matching': True, + }] + + def decrypt(self, data, key): + if not Cryptodome.AES: + raise ExtractorError('pycryptodomex not found. Please install', expected=True) + iv = binascii.unhexlify('00000000000000000000000000000000') + cipher = Cryptodome.AES.new(base64.b64decode(key), Cryptodome.AES.MODE_CBC, iv) + return cipher.decrypt(base64.b64decode(data)).decode('utf-8') + + def _real_extract(self, url): + display_id = self._match_id(url) + webpage = self._download_webpage(url, display_id) + hidden_inputs_data = self._hidden_inputs(webpage) + json_ld_data = self._search_json_ld(webpage, display_id) + json_ld_data.pop('url', None) + + iframe_url = traverse_obj(webpage, ( + {find_element(tag='iframe', attr='src', value=r'.+[?&]contenturl=.+', html=True, regex=True)}, + {extract_attributes}, 'src', {require('iframe URL')})) + # Can't use parse_qs here since it would decode the encrypted base64 `+` chars to spaces + content = self._search_regex(r'[?&]contenturl=(.+)', iframe_url, 'content') + encrypted_data, _, attrs = content.partition('|') + metadata = { + m.group('k'): m.group('v') + for m in re.finditer(r'(?:^|\|)(?P[a-z_]+)=(?P(?:(?!\|[a-z_]+=).)+)', attrs) + } + m3u8_url = urllib.parse.unquote( + self.decrypt(encrypted_data, metadata['key'])).rstrip('\x0e\x0f') + + return { + 'id': display_id, # Fallback + 'display_id': display_id, + **json_ld_data, + **traverse_obj(metadata, { + 'id': ('content_id', {str}), + 'title': ('title', {str}), + 'thumbnail': ('image', {str}), + }), + **traverse_obj(hidden_inputs_data, { + 'id': ('content_id', {str}), + 'media_type': ('theme_type', {str}), + 'categories': ('genre', {str}, filter, all, filter), + }), + 'formats': self._extract_m3u8_formats(m3u8_url, display_id), + } + + +class TarangPlusEpisodesIE(TarangPlusBaseIE): + IE_NAME = 'tarangplus:episodes' + _VALID_URL = r'https?://(?:www\.)?tarangplus\.in/(?P[^#?/]+)/(?P[^#?/]+)/episodes/?(?:$|[?#])' + _TESTS = [{ + 'url': 'https://tarangplus.in/tarangaplus-originals/balijatra/episodes', + 'info_dict': { + 'id': 'balijatra', + 'title': 'Balijatra', + }, + 'playlist_mincount': 7, + }, { + 'url': 'https://tarangplus.in/tarang-serials/bada-bohu/episodes', + 'info_dict': { + 'id': 'bada-bohu', + 'title': 'Bada Bohu', + }, + 'playlist_mincount': 236, + }, { + 'url': 'https://tarangplus.in/shows/dr-nonsense/episodes', + 'info_dict': { + 'id': 'dr-nonsense', + 'title': 'Dr. Nonsense', + }, + 'playlist_mincount': 15, + }] + _PAGE_SIZE = 20 + + def _entries(self, playlist_url, playlist_id, page): + data = self._download_json( + playlist_url, playlist_id, f'Downloading playlist JSON page {page + 1}', + query={'page_no': page}) + for item in traverse_obj(data, ('items', ..., {str})): + yield self.url_result( + urljoin(self._BASE_URL, item.split('$')[3]), TarangPlusVideoIE) + + def _real_extract(self, url): + url_type, display_id = self._match_valid_url(url).group('type', 'id') + series_url = f'{self._BASE_URL}/{url_type}/{display_id}' + webpage = self._download_webpage(series_url, display_id) + + entries = OnDemandPagedList( + functools.partial(self._entries, f'{series_url}/episodes', display_id), + self._PAGE_SIZE) + return self.playlist_result( + entries, display_id, self._hidden_inputs(webpage).get('title')) + + +class TarangPlusPlaylistIE(TarangPlusBaseIE): + IE_NAME = 'tarangplus:playlist' + _VALID_URL = r'https?://(?:www\.)?tarangplus\.in/(?P[^#?/]+)/all/?(?:$|[?#])' + _TESTS = [{ + 'url': 'https://tarangplus.in/chhota-jaga/all', + 'info_dict': { + 'id': 'chhota-jaga', + 'title': 'Chhota Jaga', + }, + 'playlist_mincount': 33, + }, { + 'url': 'https://tarangplus.in/kids-yali-show/all', + 'info_dict': { + 'id': 'kids-yali-show', + 'title': 'Yali', + }, + 'playlist_mincount': 10, + }, { + 'url': 'https://tarangplus.in/trailer/all', + 'info_dict': { + 'id': 'trailer', + 'title': 'Trailer', + }, + 'playlist_mincount': 57, + }, { + 'url': 'https://tarangplus.in/latest-songs/all', + 'info_dict': { + 'id': 'latest-songs', + 'title': 'Latest Songs', + }, + 'playlist_mincount': 46, + }, { + 'url': 'https://tarangplus.in/premium-serials-episodes/all', + 'info_dict': { + 'id': 'premium-serials-episodes', + 'title': 'Primetime Latest Episodes', + }, + 'playlist_mincount': 100, + }] + + def _entries(self, webpage): + for url_path in traverse_obj(webpage, ( + {find_elements(cls='item')}, ..., + {find_elements(tag='a', attr='href', value='/.+', html=True, regex=True)}, + ..., {extract_attributes}, 'href', + )): + yield self.url_result(urljoin(self._BASE_URL, url_path), TarangPlusVideoIE) + + def _real_extract(self, url): + display_id = self._match_id(url) + webpage = self._download_webpage(url, display_id) + + return self.playlist_result( + self._entries(webpage), display_id, + traverse_obj(webpage, ({find_element(id='al_title')}, {clean_html}))) diff --git a/yt_dlp/extractor/telecinco.py b/yt_dlp/extractor/telecinco.py index bdcae3b774..6846191bcb 100644 --- a/yt_dlp/extractor/telecinco.py +++ b/yt_dlp/extractor/telecinco.py @@ -6,20 +6,21 @@ from ..networking.exceptions import HTTPError from ..utils import ( ExtractorError, clean_html, + extract_attributes, int_or_none, join_nonempty, str_or_none, - traverse_obj, update_url, url_or_none, ) +from ..utils.traversal import traverse_obj class TelecincoBaseIE(InfoExtractor): def _parse_content(self, content, url): - video_id = content['dataMediaId'] + video_id = content['dataMediaId'][1] config = self._download_json( - content['dataConfig'], video_id, 'Downloading config JSON') + content['dataConfig'][1], video_id, 'Downloading config JSON') services = config['services'] caronte = self._download_json(services['caronte'], video_id) if traverse_obj(caronte, ('dls', 0, 'drm', {bool})): @@ -57,9 +58,9 @@ class TelecincoBaseIE(InfoExtractor): 'id': video_id, 'title': traverse_obj(config, ('info', 'title', {str})), 'formats': formats, - 'thumbnail': (traverse_obj(content, ('dataPoster', {url_or_none})) + 'thumbnail': (traverse_obj(content, ('dataPoster', 1, {url_or_none})) or traverse_obj(config, 'poster', 'imageUrl', expected_type=url_or_none)), - 'duration': traverse_obj(content, ('dataDuration', {int_or_none})), + 'duration': traverse_obj(content, ('dataDuration', 1, {int_or_none})), 'http_headers': headers, } @@ -137,30 +138,45 @@ class TelecincoIE(TelecincoBaseIE): 'url': 'http://www.cuatro.com/chesterinlove/a-carta/chester-chester_in_love-chester_edu_2_2331030022.html', 'only_matching': True, }] + _ASTRO_ISLAND_RE = re.compile(r']+>') def _real_extract(self, url): display_id = self._match_id(url) webpage = self._download_webpage(url, display_id, impersonate=True) - article = self._search_json( - r'window\.\$REACTBASE_STATE\.article(?:_multisite)?\s*=', - webpage, 'article', display_id)['article'] - description = traverse_obj(article, ('leadParagraph', {clean_html}, filter)) - if article.get('editorialType') != 'VID': + props_list = traverse_obj(webpage, ( + {self._ASTRO_ISLAND_RE.findall}, ..., + {extract_attributes}, 'props', {json.loads})) + + description = traverse_obj(props_list, (..., 'leadParagraph', 1, {clean_html}, any, filter)) + main_content = traverse_obj(props_list, (..., ('content', ('articleData', 1, 'opening')), 1, {dict}, any)) + + if traverse_obj(props_list, (..., 'editorialType', 1, {str}, any)) != 'VID': # e.g. 'ART' entries = [] - for p in traverse_obj(article, ((('opening', all), 'body'), lambda _, v: v['content'])): - content = p['content'] - type_ = p.get('type') - if type_ == 'paragraph' and isinstance(content, str): + for p in traverse_obj(props_list, (..., 'articleData', 1, ('opening', ('body', 1, ...)), 1, {dict})): + type_ = traverse_obj(p, ('type', 1, {str})) + content = traverse_obj(p, ('content', 1, {str} if type_ == 'paragraph' else {dict})) + if not content: + continue + if type_ == 'paragraph': description = join_nonempty(description, content, delim='') - elif type_ == 'video' and isinstance(content, dict): + elif type_ == 'video': entries.append(self._parse_content(content, url)) + else: + self.report_warning( + f'Skipping unsupported content type "{type_}"', display_id, only_once=True) return self.playlist_result( - entries, str_or_none(article.get('id')), - traverse_obj(article, ('title', {str})), clean_html(description)) + entries, + traverse_obj(props_list, (..., 'id', 1, {int}, {str_or_none}, any)) or display_id, + traverse_obj(main_content, ('dataTitle', 1, {str})), + clean_html(description)) - info = self._parse_content(article['opening']['content'], url) + if not main_content: + raise ExtractorError('Unable to extract main content from webpage') + + info = self._parse_content(main_content, url) info['description'] = description + return info diff --git a/yt_dlp/extractor/tiktok.py b/yt_dlp/extractor/tiktok.py index b7e058ebe7..02ec2b2f45 100644 --- a/yt_dlp/extractor/tiktok.py +++ b/yt_dlp/extractor/tiktok.py @@ -454,6 +454,7 @@ class TikTokBaseIE(InfoExtractor): 'like_count': 'digg_count', 'repost_count': 'share_count', 'comment_count': 'comment_count', + 'save_count': 'collect_count', }, expected_type=int_or_none), **author_info, 'channel_url': format_field(author_info, 'channel_id', self._UPLOADER_URL_FORMAT, default=None), @@ -607,6 +608,7 @@ class TikTokBaseIE(InfoExtractor): 'like_count': 'diggCount', 'repost_count': 'shareCount', 'comment_count': 'commentCount', + 'save_count': 'collectCount', }), expected_type=int_or_none), 'thumbnails': [ { @@ -646,6 +648,7 @@ class TikTokIE(TikTokBaseIE): 'like_count': int, 'repost_count': int, 'comment_count': int, + 'save_count': int, 'artist': 'Ysrbeats', 'album': 'Lehanga', 'track': 'Lehanga', @@ -675,6 +678,7 @@ class TikTokIE(TikTokBaseIE): 'like_count': int, 'repost_count': int, 'comment_count': int, + 'save_count': int, 'artists': ['Evan Todd', 'Jessica Keenan Wynn', 'Alice Lee', 'Barrett Wilbert Weed', 'Jon Eidson'], 'track': 'Big Fun', }, @@ -702,6 +706,7 @@ class TikTokIE(TikTokBaseIE): 'like_count': int, 'repost_count': int, 'comment_count': int, + 'save_count': int, }, }, { # Sponsored video, only available with feed workaround @@ -725,6 +730,7 @@ class TikTokIE(TikTokBaseIE): 'like_count': int, 'repost_count': int, 'comment_count': int, + 'save_count': int, }, 'skip': 'This video is unavailable', }, { @@ -751,6 +757,7 @@ class TikTokIE(TikTokBaseIE): 'like_count': int, 'repost_count': int, 'comment_count': int, + 'save_count': int, }, }, { # hydration JSON is sent in a