mirror of
https://github.com/paperless-ngx/paperless-ngx.git
synced 2025-12-19 21:16:56 +01:00
Add PAPERLESS_OCR_SKIP_ARCHIVE_FILE config setting
This commit is contained in:
parent
8a89f5ae27
commit
ca412e0184
8 changed files with 185 additions and 14 deletions
|
|
@ -294,7 +294,11 @@ class RasterisedDocumentParser(DocumentParser):
|
|||
|
||||
# If the original has text, and the user doesn't want an archive,
|
||||
# we're done here
|
||||
if settings.OCR_MODE == "skip_noarchive" and original_has_text:
|
||||
skip_archive_for_text = (
|
||||
settings.OCR_MODE == "skip_noarchive"
|
||||
or settings.OCR_SKIP_ARCHIVE_FILE in ["with_text", "always"]
|
||||
)
|
||||
if skip_archive_for_text and original_has_text:
|
||||
self.log("debug", "Document has text, skipping OCRmyPDF entirely.")
|
||||
self.text = text_original
|
||||
return
|
||||
|
|
@ -320,7 +324,8 @@ class RasterisedDocumentParser(DocumentParser):
|
|||
self.log("debug", f"Calling OCRmyPDF with args: {args}")
|
||||
ocrmypdf.ocr(**args)
|
||||
|
||||
self.archive_path = archive_path
|
||||
if settings.OCR_SKIP_ARCHIVE_FILE != "always":
|
||||
self.archive_path = archive_path
|
||||
|
||||
self.text = self.extract_text(sidecar_file, archive_path)
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue