Fixhancement: improve text thumbnail generation for large files (#10483)

This commit is contained in:
shamoon 2025-08-01 13:26:35 -04:00 committed by GitHub
parent f09965464a
commit 23501b9060
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
3 changed files with 35 additions and 2 deletions

View file

@ -1,3 +1,4 @@
import tempfile
from pathlib import Path
from paperless_text.parsers import TextDocumentParser
@ -35,3 +36,26 @@ class TestTextParser:
assert text_parser.get_text() == "Pantothens<EFBFBD>ure\n"
assert text_parser.get_archive_path() is None
def test_thumbnail_large_file(self, text_parser: TextDocumentParser):
"""
GIVEN:
- A very large text file (>50MB)
WHEN:
- A thumbnail is requested
THEN:
- A thumbnail is created without reading the entire file into memory
"""
with tempfile.NamedTemporaryFile(
delete=False,
mode="w",
encoding="utf-8",
suffix=".txt",
) as tmp:
tmp.write("A" * (51 * 1024 * 1024)) # 51 MB of 'A'
large_file = Path(tmp.name)
thumb = text_parser.get_thumbnail(large_file, "text/plain")
assert thumb.exists()
assert thumb.is_file()
large_file.unlink()