mirror of
https://github.com/paperless-ngx/paperless-ngx.git
synced 2025-12-10 08:37:19 +01:00
reworked PDF parser that uses OCRmyPDF and produces archive files.
This commit is contained in:
parent
95ec520f13
commit
2d559d330d
7 changed files with 374 additions and 186 deletions
|
|
@ -107,23 +107,6 @@ def run_convert(input_file,
|
|||
raise ParseError("Convert failed at {}".format(args))
|
||||
|
||||
|
||||
def run_unpaper(pnm, logging_group=None):
|
||||
pnm_out = pnm.replace(".pnm", ".unpaper.pnm")
|
||||
|
||||
command_args = (settings.UNPAPER_BINARY, "--overwrite", "--quiet", pnm,
|
||||
pnm_out)
|
||||
|
||||
logger.debug(f"Execute: {' '.join(command_args)}",
|
||||
extra={'group': logging_group})
|
||||
|
||||
if not subprocess.Popen(command_args,
|
||||
stdout=subprocess.DEVNULL,
|
||||
stderr=subprocess.DEVNULL).wait() == 0:
|
||||
raise ParseError(f"Unpaper failed at {command_args}")
|
||||
|
||||
return pnm_out
|
||||
|
||||
|
||||
class ParseError(Exception):
|
||||
pass
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue