Merge c2821d18aa into 7ec6b9bc40

2025-12-06 14:55:02 +01:00 · 2025-12-05 20:34:50 +01:00 · 2025-12-05 20:34:50 +01:00 · c62bc958ca
commit c62bc958ca
parent 7ec6b9bc40 c2821d18aa
4 changed files with 257 additions and 0 deletions
--- a/supportedsites.md
+++ b/supportedsites.md
@ -1837,4 +1837,6 @@ The only reliable way to check if a site is supported is to try it.
 - **zingmp3:week-chart**
 - **zoom**
 - **Zype**
 - **Porndead**
 - **SexDead**
 - **generic**: Generic downloader that works on some sites
--- a/yt_dlp/extractor/_extractors.py
+++ b/yt_dlp/extractor/_extractors.py
@ -1583,6 +1583,7 @@ from .polskieradio import (
 from .popcorntimes import PopcorntimesIE
 from .popcorntv import PopcornTVIE
 from .pornbox import PornboxIE
 from .porndead import PornDeadIE
 from .pornflip import PornFlipIE
 from .pornhub import (
    PornHubIE,
@ -1847,6 +1848,7 @@ from .senategov import (
 from .sendtonews import SendtoNewsIE
 from .servus import ServusIE
 from .sevenplus import SevenPlusIE
 from .sexdead import SexDeadIE
 from .sexu import SexuIE
 from .seznamzpravy import (
    SeznamZpravyArticleIE,
--- a/yt_dlp/extractor/porndead.py
+++ b/yt_dlp/extractor/porndead.py
@ -0,0 +1,127 @@
 import re
 import urllib.parse
 from yt_dlp.utils._utils import int_or_none
 from .common import InfoExtractor
 from ..utils import ExtractorError
 class PornDeadIE(InfoExtractor):
    _VALID_URL = r'https?://(?:www\.)?porndead\.org/video/(?P<id>[0-9a-f]+)'
    _TESTS = [
        {
            'url': 'https://porndead.org/video/65fefcb523810',
            'info_dict': {
                'id': '65fefcb523810',
                'ext': 'mp4',
                'title': 'Hysterical Literature - Isabel Love',
                'age_limit': 18,
            },
        },
    ]
    def _real_extract(self, url):
        url = url.strip().lower()
        # if www is missing, add it because the relative URLs seem to depend on it
        parsed = urllib.parse.urlparse(url)
        if parsed.netloc == 'porndead.org':
            parsed = parsed._replace(netloc='www.porndead.org')
            url = urllib.parse.urlunparse(parsed)
        video_id = self._match_id(url)
        webpage = self._download_webpage(url, video_id)
        # find video title ideally
        title = (
            self._html_search_regex(
                r'<div[^>]+class=["\']title_video["\'][^>]*>([^<]+)</div>',
                webpage,
                'title',
                default=None,
            )
            or self._og_search_title(webpage, default=None)
            or f'Video {video_id}'
        )
        # extract variable player_url from <script> player_url = "..." </script>
        player_rel = self._search_regex(
            r'(?is)player[_-]?url\s*=\s*(["\'])(?P<u>[^"\']+)\1',
            webpage,
            'player url',
            default=None,
            group='u',
        )
        if not player_rel:
            raise ExtractorError('Could not find player_url on page', expected=True)
        # resolve relative URL and append type=1 like the JS on the page does
        player_url = urllib.parse.urljoin(url, player_rel)
        player_endpoint = player_url + ('&type=1' if '?' in player_url else '?type=1')
        ajax_headers = {
            'Referer': url,
            'X-Requested-With': 'XMLHttpRequest',
            'User-Agent': 'Mozilla/5.0 (compatible)',
            'Accept': '*/*',
        }
        # get the options html
        options_html = None
        try:
            options_html = self._download_webpage(
                player_endpoint,
                video_id,
                headers=ajax_headers,
                data=b'',  # empty body to force POST where supported
            )
        except Exception as e:
            raise ExtractorError(
                f'Failed to download options from {player_endpoint}: {e}',
                expected=True,
            )
        formats = []
        # try to find direct mp4 links in the returned HTML (anchors with class href_mp4)
        links = re.findall(
            r'<a[^>]+class=["\']href_mp4["\'][^>]*href=["\']([^"\']+)["\'][^>]*>([^<]+)</a>',
            options_html or '',
            flags=re.IGNORECASE,
        )
        for href, label in links:
            full_url = urllib.parse.urljoin(url, href)
            # try to infer height from label (e.g., '240p', '720p') or from filename (720P_)
            m_h = re.search(r'(\d{3,4})[pP]', label) or re.search(r'(\d{3,4})P_', href)
            height = int_or_none(m_h.group(1))
            # try to infer bitrate (e.g., '4000K' or rate=500k in query)
            m_k = re.search(r'([0-9]+)[kK]', href) or re.search(r'rate=([0-9]+)k', href)
            tbr = int_or_none(m_k.group(1))
            fmt_id = f'{height}p' if height else label.strip()
            fmt = {
                'format_id': fmt_id,
                'url': full_url,
                'ext': 'mp4',
            }
            if height:
                fmt['height'] = height
            if tbr:
                fmt['tbr'] = tbr
            fmt['http_headers'] = {'Referer': url, 'User-Agent': 'Mozilla/5.0'}
            formats.append(fmt)
        return {
            'id': video_id,
            'title': title,
            'formats': formats,
            'age_limit': 18,
        }
--- a/yt_dlp/extractor/sexdead.py
+++ b/yt_dlp/extractor/sexdead.py
@ -0,0 +1,126 @@
 import re
 import urllib.parse
 from yt_dlp.utils._utils import int_or_none
 from .common import InfoExtractor
 from ..utils import ExtractorError
 class SexDeadIE(InfoExtractor):
    _VALID_URL = r'https?://(?:www\.)?sexdead\.org/video/(?P<id>[0-9a-f]+)'
    _TESTS = [
        {
            'url': 'https://sexdead.org/video/65fefcb523810',
            'info_dict': {
                'id': '65fefcb523810',
                'ext': 'mp4',
                'title': 'Hysterical Literature - Isabel Love',
                'age_limit': 18,
            },
        },
    ]
    def _real_extract(self, url):
        url = url.strip().lower()
        # if www is missing, add it because the relative URLs seem to depend on it
        parsed = urllib.parse.urlparse(url)
        if parsed.netloc == 'sexdead.org':
            parsed = parsed._replace(netloc='www.sexdead.org')
            url = urllib.parse.urlunparse(parsed)
        video_id = self._match_id(url)
        webpage = self._download_webpage(url, video_id)
        # find video title ideally
        title = (
            self._html_search_regex(
                r'<div[^>]+class=["\']title_video["\'][^>]*>([^<]+)</div>',
                webpage,
                'title',
                default=None,
            )
            or self._og_search_title(webpage, default=None)
            or f'Video {video_id}'
        )
        # extract variable player_url from <script> player_url = "..." </script>
        player_rel = self._search_regex(
            r'(?is)player[_-]?url\s*=\s*(["\'])(?P<u>[^"\']+)\1',
            webpage,
            'player url',
            default=None,
            group='u',
        )
        if not player_rel:
            raise ExtractorError('Could not find player_url on page', expected=True)
        # resolve relative URL and append type=1 like the JS on the page does
        player_url = urllib.parse.urljoin(url, player_rel)
        player_endpoint = player_url + ('&type=1' if '?' in player_url else '?type=1')
        ajax_headers = {
            'Referer': url,
            'X-Requested-With': 'XMLHttpRequest',
            'User-Agent': 'Mozilla/5.0 (compatible)',
            'Accept': '*/*',
        }
        # get the options html
        options_html = None
        try:
            options_html = self._download_webpage(
                player_endpoint,
                video_id,
                headers=ajax_headers,
                data=b'',  # empty body to force POST where supported
            )
        except Exception as e:
            raise ExtractorError(
                f'Failed to download options from {player_endpoint}: {e}',
                expected=True,
            )
        formats = []
        # try to find direct mp4 links in the returned HTML (anchors with class href_mp4)
        links = re.findall(
            r'<a[^>]+class=["\']href_mp4["\'][^>]*href=["\']([^"\']+)["\'][^>]*>([^<]+)</a>',
            options_html or '',
            flags=re.IGNORECASE,
        )
        for href, label in links:
            full_url = urllib.parse.urljoin(url, href)
            # try to infer height from label (e.g., '240p', '720p') or from filename (720P_)
            m_h = re.search(r'(\d{3,4})[pP]', label) or re.search(r'(\d{3,4})P_', href)
            height = int_or_none(m_h.group(1))
            # try to infer bitrate (e.g., '4000K' or rate=500k in query)
            m_k = re.search(r'([0-9]+)[kK]', href) or re.search(r'rate=([0-9]+)k', href)
            tbr = int_or_none(m_k.group(1))
            fmt_id = f'{height}p' if height else label.strip()
            fmt = {
                'format_id': fmt_id,
                'url': full_url,
                'ext': 'mp4',
            }
            if height:
                fmt['height'] = height
            if tbr:
                fmt['tbr'] = tbr
            fmt['http_headers'] = {'Referer': url, 'User-Agent': 'Mozilla/5.0'}
            formats.append(fmt)
        return {
            'id': video_id,
            'title': title,
            'formats': formats,
            'age_limit': 18,
        }