mirror of
https://github.com/paperless-ngx/paperless-ngx.git
synced 2025-12-16 11:36:39 +01:00
Adds testing coverage of multipage TIFF with alpha, without and with alpha/sRGB
This commit is contained in:
parent
59e0c1fe4e
commit
0fd51e35e1
4 changed files with 63 additions and 0 deletions
Binary file not shown.
Binary file not shown.
BIN
src/paperless_tesseract/tests/samples/multi-page-images.tiff
Normal file
BIN
src/paperless_tesseract/tests/samples/multi-page-images.tiff
Normal file
Binary file not shown.
|
|
@ -542,6 +542,69 @@ class TestParser(DirectoriesMixin, TestCase):
|
|||
],
|
||||
)
|
||||
|
||||
def test_multi_page_tiff(self):
|
||||
"""
|
||||
GIVEN:
|
||||
- Multi-page TIFF image
|
||||
WHEN:
|
||||
- Image is parsed
|
||||
THEN:
|
||||
- Text from all pages extracted
|
||||
"""
|
||||
parser = RasterisedDocumentParser(None)
|
||||
parser.parse(
|
||||
os.path.join(self.SAMPLE_FILES, "multi-page-images.tiff"),
|
||||
"image/tiff",
|
||||
)
|
||||
self.assertTrue(os.path.isfile(parser.archive_path))
|
||||
self.assertContainsStrings(
|
||||
parser.get_text().lower(),
|
||||
["page 1", "page 2", "page 3"],
|
||||
)
|
||||
|
||||
def test_multi_page_tiff_alpha(self):
|
||||
"""
|
||||
GIVEN:
|
||||
- Multi-page TIFF image
|
||||
- Image include an alpha channel
|
||||
WHEN:
|
||||
- Image is parsed
|
||||
THEN:
|
||||
- Text from all pages extracted
|
||||
"""
|
||||
parser = RasterisedDocumentParser(None)
|
||||
parser.parse(
|
||||
os.path.join(self.SAMPLE_FILES, "multi-page-images-alpha.tiff"),
|
||||
"image/tiff",
|
||||
)
|
||||
self.assertTrue(os.path.isfile(parser.archive_path))
|
||||
self.assertContainsStrings(
|
||||
parser.get_text().lower(),
|
||||
["page 1", "page 2", "page 3"],
|
||||
)
|
||||
|
||||
def test_multi_page_tiff_alpha_srgb(self):
|
||||
"""
|
||||
GIVEN:
|
||||
- Multi-page TIFF image
|
||||
- Image include an alpha channel
|
||||
- Image is srgb colorspace
|
||||
WHEN:
|
||||
- Image is parsed
|
||||
THEN:
|
||||
- Text from all pages extracted
|
||||
"""
|
||||
parser = RasterisedDocumentParser(None)
|
||||
parser.parse(
|
||||
os.path.join(self.SAMPLE_FILES, "multi-page-images-alpha-rgb.tiff"),
|
||||
"image/tiff",
|
||||
)
|
||||
self.assertTrue(os.path.isfile(parser.archive_path))
|
||||
self.assertContainsStrings(
|
||||
parser.get_text().lower(),
|
||||
["page 1", "page 2", "page 3"],
|
||||
)
|
||||
|
||||
def test_ocrmypdf_parameters(self):
|
||||
parser = RasterisedDocumentParser(None)
|
||||
params = parser.construct_ocrmypdf_parameters(
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue