[networking] Ensure underlying file object is closed when fully read (#14935)

Fixes https://github.com/yt-dlp/yt-dlp/issues/14891

Authored by: coletdjnz
This commit is contained in:
coletdjnz 2025-11-08 18:30:43 +13:00 committed by GitHub
parent 73fd850d17
commit 5767fb4ab1
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
5 changed files with 124 additions and 16 deletions

View file

@ -96,7 +96,10 @@ class CurlCFFIResponseAdapter(Response):
def read(self, amt=None):
try:
return self.fp.read(amt)
res = self.fp.read(amt)
if self.fp.closed:
self.close()
return res
except curl_cffi.requests.errors.RequestsError as e:
if e.code == CurlECode.PARTIAL_FILE:
content_length = e.response and int_or_none(e.response.headers.get('Content-Length'))

View file

@ -119,17 +119,22 @@ class RequestsResponseAdapter(Response):
self._requests_response = res
def _real_read(self, amt: int | None = None) -> bytes:
# Work around issue with `.read(amt)` then `.read()`
# See: https://github.com/urllib3/urllib3/issues/3636
if amt is None:
# compat: py3.9: Python 3.9 preallocates the whole read buffer, read in chunks
read_chunk = functools.partial(self.fp.read, 1 << 20, decode_content=True)
return b''.join(iter(read_chunk, b''))
# Interact with urllib3 response directly.
return self.fp.read(amt, decode_content=True)
def read(self, amt: int | None = None):
try:
# Work around issue with `.read(amt)` then `.read()`
# See: https://github.com/urllib3/urllib3/issues/3636
if amt is None:
# compat: py3.9: Python 3.9 preallocates the whole read buffer, read in chunks
read_chunk = functools.partial(self.fp.read, 1 << 20, decode_content=True)
return b''.join(iter(read_chunk, b''))
# Interact with urllib3 response directly.
return self.fp.read(amt, decode_content=True)
data = self._real_read(amt)
if self.fp.closed:
self.close()
return data
# See urllib3.response.HTTPResponse.read() for exceptions raised on read
except urllib3.exceptions.SSLError as e:
raise SSLError(cause=e) from e

View file

@ -306,7 +306,25 @@ class UrllibResponseAdapter(Response):
def read(self, amt=None):
try:
return self.fp.read(amt)
data = self.fp.read(amt)
underlying = getattr(self.fp, 'fp', None)
if isinstance(self.fp, http.client.HTTPResponse) and underlying is None:
# http.client.HTTPResponse automatically closes itself when fully read
self.close()
elif isinstance(self.fp, urllib.response.addinfourl) and underlying is not None:
# urllib's addinfourl does not close the underlying fp automatically when fully read
if isinstance(underlying, io.BytesIO):
# data URLs or in-memory responses (e.g. gzip/deflate/brotli decoded)
if underlying.tell() >= len(underlying.getbuffer()):
self.close()
elif isinstance(underlying, io.BufferedReader) and amt is None:
# file URLs.
# XXX: this will not mark the response as closed if it was fully read with amt.
self.close()
elif underlying is not None and underlying.closed:
# Catch-all for any cases where underlying file is closed
self.close()
return data
except Exception as e:
handle_response_read_exceptions(e)
raise e

View file

@ -554,12 +554,16 @@ class Response(io.IOBase):
# Expected errors raised here should be of type RequestError or subclasses.
# Subclasses should redefine this method with more precise error handling.
try:
return self.fp.read(amt)
res = self.fp.read(amt)
if self.fp.closed:
self.close()
return res
except Exception as e:
raise TransportError(cause=e) from e
def close(self):
self.fp.close()
if not self.fp.closed:
self.fp.close()
return super().close()
def get_header(self, name, default=None):