paperless-ngx/src/paperless_text/tests/test_parser.py

38 lines
1.1 KiB
Python
Raw Normal View History

from pathlib import Path
2020-12-15 13:26:01 +01:00
from paperless_text.parsers import TextDocumentParser
class TestTextParser:
def test_thumbnail(self, text_parser: TextDocumentParser, sample_txt_file: Path):
2020-12-15 13:26:01 +01:00
# just make sure that it does not crash
f = text_parser.get_thumbnail(sample_txt_file, "text/plain")
assert f.exists()
assert f.is_file()
2020-12-15 13:26:01 +01:00
def test_parse(self, text_parser: TextDocumentParser, sample_txt_file: Path):
text_parser.parse(sample_txt_file, "text/plain")
2020-12-15 13:26:01 +01:00
assert text_parser.get_text() == "This is a test file.\n"
assert text_parser.get_archive_path() is None
def test_parse_invalid_bytes(
self,
text_parser: TextDocumentParser,
malformed_txt_file: Path,
):
"""
GIVEN:
- Text file which contains invalid UTF bytes
WHEN:
- The file is parsed
THEN:
- Parsing continues
- Invalid bytes are removed
"""
text_parser.parse(malformed_txt_file, "text/plain")
assert text_parser.get_text() == "Pantothens<EFBFBD>ure\n"
assert text_parser.get_archive_path() is None