mirror of
https://github.com/paperless-ngx/paperless-ngx.git
synced 2025-12-16 03:26:50 +01:00
Updates to provide the user provided max pixel size to ocrmypdf
This commit is contained in:
parent
feaf2da834
commit
fc26fe0ac0
3 changed files with 23 additions and 9 deletions
|
|
@ -8,8 +8,6 @@ from documents.parsers import make_thumbnail_from_pdf
|
|||
from documents.parsers import ParseError
|
||||
from PIL import Image
|
||||
|
||||
Image.MAX_IMAGE_PIXELS = settings.OCR_MAX_IMAGE_PIXELS
|
||||
|
||||
|
||||
class NoTextFoundException(Exception):
|
||||
pass
|
||||
|
|
@ -225,6 +223,24 @@ class RasterisedDocumentParser(DocumentParser):
|
|||
f"they will not be used. Error: {e}",
|
||||
)
|
||||
|
||||
if settings.OCR_MAX_IMAGE_PIXELS is not None:
|
||||
# Convert pixels to mega-pixels and provide to ocrmypdf
|
||||
max_pixels_mpixels = settings.OCR_MAX_IMAGE_PIXELS / 1_000_000.0
|
||||
if max_pixels_mpixels > 0:
|
||||
|
||||
self.log(
|
||||
"debug",
|
||||
f"Calculated {max_pixels_mpixels} megapixels for OCR",
|
||||
)
|
||||
|
||||
ocrmypdf_args["max_image_mpixels"] = max_pixels_mpixels
|
||||
else:
|
||||
self.log(
|
||||
"warning",
|
||||
"There is an issue with PAPERLESS_OCR_MAX_IMAGE_PIXELS, "
|
||||
"this value must be at least 1 megapixel if set",
|
||||
)
|
||||
|
||||
return ocrmypdf_args
|
||||
|
||||
def parse(self, document_path, mime_type, file_name=None):
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue