[networking] Ensure underlying file object is closed when fully read (#14935)

Fixes https://github.com/yt-dlp/yt-dlp/issues/14891 Authored by: coletdjnz
2025-12-06 06:45:00 +01:00 · 2025-11-08 18:30:43 +13:00 · 2025-11-08 18:30:43 +13:00 · 5767fb4ab1
commit 5767fb4ab1
parent 73fd850d17
5 changed files with 124 additions and 16 deletions
--- a/yt_dlp/networking/_curlcffi.py
+++ b/yt_dlp/networking/_curlcffi.py
@ -96,7 +96,10 @@ class CurlCFFIResponseAdapter(Response):

    def read(self, amt=None):
        try:
-            return self.fp.read(amt)
+            res = self.fp.read(amt)
+            if self.fp.closed:
+                self.close()
+            return res
        except curl_cffi.requests.errors.RequestsError as e:
            if e.code == CurlECode.PARTIAL_FILE:
                content_length = e.response and int_or_none(e.response.headers.get('Content-Length'))
--- a/yt_dlp/networking/_requests.py
+++ b/yt_dlp/networking/_requests.py
@ -119,17 +119,22 @@ class RequestsResponseAdapter(Response):

        self._requests_response = res

+    def _real_read(self, amt: int | None = None) -> bytes:
+        # Work around issue with `.read(amt)` then `.read()`
+        # See: https://github.com/urllib3/urllib3/issues/3636
+        if amt is None:
+            # compat: py3.9: Python 3.9 preallocates the whole read buffer, read in chunks
+            read_chunk = functools.partial(self.fp.read, 1 << 20, decode_content=True)
+            return b''.join(iter(read_chunk, b''))
+        # Interact with urllib3 response directly.
+        return self.fp.read(amt, decode_content=True)
+
    def read(self, amt: int | None = None):
        try:
-            # Work around issue with `.read(amt)` then `.read()`
-            # See: https://github.com/urllib3/urllib3/issues/3636
-            if amt is None:
-                # compat: py3.9: Python 3.9 preallocates the whole read buffer, read in chunks
-                read_chunk = functools.partial(self.fp.read, 1 << 20, decode_content=True)
-                return b''.join(iter(read_chunk, b''))
-            # Interact with urllib3 response directly.
-            return self.fp.read(amt, decode_content=True)
-
+            data = self._real_read(amt)
+            if self.fp.closed:
+                self.close()
+            return data
        # See urllib3.response.HTTPResponse.read() for exceptions raised on read
        except urllib3.exceptions.SSLError as e:
            raise SSLError(cause=e) from e
--- a/yt_dlp/networking/_urllib.py
+++ b/yt_dlp/networking/_urllib.py
@ -306,7 +306,25 @@ class UrllibResponseAdapter(Response):

    def read(self, amt=None):
        try:
-            return self.fp.read(amt)
+            data = self.fp.read(amt)
+            underlying = getattr(self.fp, 'fp', None)
+            if isinstance(self.fp, http.client.HTTPResponse) and underlying is None:
+                # http.client.HTTPResponse automatically closes itself when fully read
+                self.close()
+            elif isinstance(self.fp, urllib.response.addinfourl) and underlying is not None:
+                # urllib's addinfourl does not close the underlying fp automatically when fully read
+                if isinstance(underlying, io.BytesIO):
+                    # data URLs or in-memory responses (e.g. gzip/deflate/brotli decoded)
+                    if underlying.tell() >= len(underlying.getbuffer()):
+                        self.close()
+                elif isinstance(underlying, io.BufferedReader) and amt is None:
+                    # file URLs.
+                    # XXX: this will not mark the response as closed if it was fully read with amt.
+                    self.close()
+            elif underlying is not None and underlying.closed:
+                # Catch-all for any cases where underlying file is closed
+                self.close()
+            return data
        except Exception as e:
            handle_response_read_exceptions(e)
            raise e
--- a/yt_dlp/networking/common.py
+++ b/yt_dlp/networking/common.py
@ -554,12 +554,16 @@ class Response(io.IOBase):
        # Expected errors raised here should be of type RequestError or subclasses.
        # Subclasses should redefine this method with more precise error handling.
        try:
-            return self.fp.read(amt)
+            res = self.fp.read(amt)
+            if self.fp.closed:
+                self.close()
+            return res
        except Exception as e:
            raise TransportError(cause=e) from e

    def close(self):
-        self.fp.close()
+        if not self.fp.closed:
+            self.fp.close()
        return super().close()

    def get_header(self, name, default=None):