Merge c2821d18aa into 7ec6b9bc40

2025-12-06 14:55:02 +01:00 · 2025-12-05 20:34:50 +01:00 · 2025-12-05 20:34:50 +01:00 · c62bc958ca
commit c62bc958ca
parent 7ec6b9bc40 c2821d18aa
4 changed files with 257 additions and 0 deletions
--- a/supportedsites.md
+++ b/supportedsites.md
@ -1837,4 +1837,6 @@ The only reliable way to check if a site is supported is to try it.
 - **zingmp3:week-chart**
 - **zoom**
 - **Zype**
+ - **Porndead**
+ - **SexDead**
 - **generic**: Generic downloader that works on some sites
--- a/yt_dlp/extractor/_extractors.py
+++ b/yt_dlp/extractor/_extractors.py
@ -1583,6 +1583,7 @@ from .polskieradio import (
 from .popcorntimes import PopcorntimesIE
 from .popcorntv import PopcornTVIE
 from .pornbox import PornboxIE
+from .porndead import PornDeadIE
 from .pornflip import PornFlipIE
 from .pornhub import (
    PornHubIE,
@ -1847,6 +1848,7 @@ from .senategov import (
 from .sendtonews import SendtoNewsIE
 from .servus import ServusIE
 from .sevenplus import SevenPlusIE
+from .sexdead import SexDeadIE
 from .sexu import SexuIE
 from .seznamzpravy import (
    SeznamZpravyArticleIE,
--- a/yt_dlp/extractor/porndead.py
+++ b/yt_dlp/extractor/porndead.py
@ -0,0 +1,127 @@
+import re
+import urllib.parse
+
+from yt_dlp.utils._utils import int_or_none
+
+from .common import InfoExtractor
+from ..utils import ExtractorError
+
+
+class PornDeadIE(InfoExtractor):
+    _VALID_URL = r'https?://(?:www\.)?porndead\.org/video/(?P<id>[0-9a-f]+)'
+    _TESTS = [
+        {
+            'url': 'https://porndead.org/video/65fefcb523810',
+            'info_dict': {
+                'id': '65fefcb523810',
+                'ext': 'mp4',
+                'title': 'Hysterical Literature - Isabel Love',
+                'age_limit': 18,
+            },
+        },
+    ]
+
+    def _real_extract(self, url):
+        url = url.strip().lower()
+
+        # if www is missing, add it because the relative URLs seem to depend on it
+        parsed = urllib.parse.urlparse(url)
+        if parsed.netloc == 'porndead.org':
+            parsed = parsed._replace(netloc='www.porndead.org')
+            url = urllib.parse.urlunparse(parsed)
+
+        video_id = self._match_id(url)
+        webpage = self._download_webpage(url, video_id)
+
+        # find video title ideally
+        title = (
+            self._html_search_regex(
+                r'<div[^>]+class=["\']title_video["\'][^>]*>([^<]+)</div>',
+                webpage,
+                'title',
+                default=None,
+            )
+            or self._og_search_title(webpage, default=None)
+            or f'Video {video_id}'
+        )
+
+        # extract variable player_url from <script> player_url = "..." </script>
+        player_rel = self._search_regex(
+            r'(?is)player[_-]?url\s*=\s*(["\'])(?P<u>[^"\']+)\1',
+            webpage,
+            'player url',
+            default=None,
+            group='u',
+        )
+
+        if not player_rel:
+            raise ExtractorError('Could not find player_url on page', expected=True)
+
+        # resolve relative URL and append type=1 like the JS on the page does
+        player_url = urllib.parse.urljoin(url, player_rel)
+        player_endpoint = player_url + ('&type=1' if '?' in player_url else '?type=1')
+
+        ajax_headers = {
+            'Referer': url,
+            'X-Requested-With': 'XMLHttpRequest',
+            'User-Agent': 'Mozilla/5.0 (compatible)',
+            'Accept': '*/*',
+        }
+
+        # get the options html
+        options_html = None
+        try:
+            options_html = self._download_webpage(
+                player_endpoint,
+                video_id,
+                headers=ajax_headers,
+                data=b'',  # empty body to force POST where supported
+            )
+        except Exception as e:
+            raise ExtractorError(
+                f'Failed to download options from {player_endpoint}: {e}',
+                expected=True,
+            )
+
+        formats = []
+
+        # try to find direct mp4 links in the returned HTML (anchors with class href_mp4)
+        links = re.findall(
+            r'<a[^>]+class=["\']href_mp4["\'][^>]*href=["\']([^"\']+)["\'][^>]*>([^<]+)</a>',
+            options_html or '',
+            flags=re.IGNORECASE,
+        )
+
+        for href, label in links:
+            full_url = urllib.parse.urljoin(url, href)
+
+            # try to infer height from label (e.g., '240p', '720p') or from filename (720P_)
+            m_h = re.search(r'(\d{3,4})[pP]', label) or re.search(r'(\d{3,4})P_', href)
+            height = int_or_none(m_h.group(1))
+
+            # try to infer bitrate (e.g., '4000K' or rate=500k in query)
+            m_k = re.search(r'([0-9]+)[kK]', href) or re.search(r'rate=([0-9]+)k', href)
+            tbr = int_or_none(m_k.group(1))
+
+            fmt_id = f'{height}p' if height else label.strip()
+
+            fmt = {
+                'format_id': fmt_id,
+                'url': full_url,
+                'ext': 'mp4',
+            }
+            if height:
+                fmt['height'] = height
+            if tbr:
+                fmt['tbr'] = tbr
+
+            fmt['http_headers'] = {'Referer': url, 'User-Agent': 'Mozilla/5.0'}
+
+            formats.append(fmt)
+
+        return {
+            'id': video_id,
+            'title': title,
+            'formats': formats,
+            'age_limit': 18,
+        }
--- a/yt_dlp/extractor/sexdead.py
+++ b/yt_dlp/extractor/sexdead.py
@ -0,0 +1,126 @@
+import re
+import urllib.parse
+
+from yt_dlp.utils._utils import int_or_none
+
+from .common import InfoExtractor
+from ..utils import ExtractorError
+
+
+class SexDeadIE(InfoExtractor):
+    _VALID_URL = r'https?://(?:www\.)?sexdead\.org/video/(?P<id>[0-9a-f]+)'
+    _TESTS = [
+        {
+            'url': 'https://sexdead.org/video/65fefcb523810',
+            'info_dict': {
+                'id': '65fefcb523810',
+                'ext': 'mp4',
+                'title': 'Hysterical Literature - Isabel Love',
+                'age_limit': 18,
+            },
+        },
+    ]
+
+    def _real_extract(self, url):
+        url = url.strip().lower()
+        # if www is missing, add it because the relative URLs seem to depend on it
+        parsed = urllib.parse.urlparse(url)
+        if parsed.netloc == 'sexdead.org':
+            parsed = parsed._replace(netloc='www.sexdead.org')
+            url = urllib.parse.urlunparse(parsed)
+
+        video_id = self._match_id(url)
+        webpage = self._download_webpage(url, video_id)
+
+        # find video title ideally
+        title = (
+            self._html_search_regex(
+                r'<div[^>]+class=["\']title_video["\'][^>]*>([^<]+)</div>',
+                webpage,
+                'title',
+                default=None,
+            )
+            or self._og_search_title(webpage, default=None)
+            or f'Video {video_id}'
+        )
+
+        # extract variable player_url from <script> player_url = "..." </script>
+        player_rel = self._search_regex(
+            r'(?is)player[_-]?url\s*=\s*(["\'])(?P<u>[^"\']+)\1',
+            webpage,
+            'player url',
+            default=None,
+            group='u',
+        )
+
+        if not player_rel:
+            raise ExtractorError('Could not find player_url on page', expected=True)
+
+        # resolve relative URL and append type=1 like the JS on the page does
+        player_url = urllib.parse.urljoin(url, player_rel)
+        player_endpoint = player_url + ('&type=1' if '?' in player_url else '?type=1')
+
+        ajax_headers = {
+            'Referer': url,
+            'X-Requested-With': 'XMLHttpRequest',
+            'User-Agent': 'Mozilla/5.0 (compatible)',
+            'Accept': '*/*',
+        }
+
+        # get the options html
+        options_html = None
+        try:
+            options_html = self._download_webpage(
+                player_endpoint,
+                video_id,
+                headers=ajax_headers,
+                data=b'',  # empty body to force POST where supported
+            )
+        except Exception as e:
+            raise ExtractorError(
+                f'Failed to download options from {player_endpoint}: {e}',
+                expected=True,
+            )
+
+        formats = []
+
+        # try to find direct mp4 links in the returned HTML (anchors with class href_mp4)
+        links = re.findall(
+            r'<a[^>]+class=["\']href_mp4["\'][^>]*href=["\']([^"\']+)["\'][^>]*>([^<]+)</a>',
+            options_html or '',
+            flags=re.IGNORECASE,
+        )
+
+        for href, label in links:
+            full_url = urllib.parse.urljoin(url, href)
+
+            # try to infer height from label (e.g., '240p', '720p') or from filename (720P_)
+            m_h = re.search(r'(\d{3,4})[pP]', label) or re.search(r'(\d{3,4})P_', href)
+            height = int_or_none(m_h.group(1))
+
+            # try to infer bitrate (e.g., '4000K' or rate=500k in query)
+            m_k = re.search(r'([0-9]+)[kK]', href) or re.search(r'rate=([0-9]+)k', href)
+            tbr = int_or_none(m_k.group(1))
+
+            fmt_id = f'{height}p' if height else label.strip()
+
+            fmt = {
+                'format_id': fmt_id,
+                'url': full_url,
+                'ext': 'mp4',
+            }
+            if height:
+                fmt['height'] = height
+            if tbr:
+                fmt['tbr'] = tbr
+
+            fmt['http_headers'] = {'Referer': url, 'User-Agent': 'Mozilla/5.0'}
+
+            formats.append(fmt)
+
+        return {
+            'id': video_id,
+            'title': title,
+            'formats': formats,
+            'age_limit': 18,
+        }