paperless-ngx/src/documents/management/commands/document_thumbnails.py

85 lines
2.2 KiB
Python
Raw Normal View History

import logging
import multiprocessing
import shutil
import tqdm
from django import db
from django.core.management.base import BaseCommand
from documents.models import Document
from ...parsers import get_parser_class_for_mime_type
def _process_document(doc_in):
document: Document = Document.objects.get(id=doc_in)
2021-01-05 13:50:27 +01:00
parser_class = get_parser_class_for_mime_type(document.mime_type)
if parser_class:
parser = parser_class(logging_group=None)
else:
print(f"{document} No parser for mime type {document.mime_type}")
return
try:
thumb = parser.get_thumbnail(
document.source_path,
document.mime_type,
document.get_public_filename(),
)
shutil.move(thumb, document.thumbnail_path)
finally:
parser.cleanup()
2020-12-30 17:20:03 +01:00
2021-02-04 23:40:53 +01:00
class Command(BaseCommand):
help = """
This will regenerate the thumbnails for all documents.
2022-02-27 15:26:41 +01:00
""".replace(
" ",
"",
2022-02-27 15:26:41 +01:00
)
def add_arguments(self, parser):
parser.add_argument(
2022-02-27 15:26:41 +01:00
"-d",
"--document",
default=None,
type=int,
required=False,
help="Specify the ID of a document, and this command will only "
2022-02-27 15:26:41 +01:00
"run on this specific document.",
)
parser.add_argument(
"--no-progress-bar",
default=False,
action="store_true",
2022-02-27 15:26:41 +01:00
help="If set, the progress bar will not be shown",
)
def handle(self, *args, **options):
logging.getLogger().handlers[0].level = logging.ERROR
2022-02-27 15:26:41 +01:00
if options["document"]:
documents = Document.objects.filter(pk=options["document"])
else:
documents = Document.objects.all()
ids = [doc.id for doc in documents]
# Note to future self: this prevents django from reusing database
# connections between processes, which is bad and does not work
# with postgres.
db.connections.close_all()
with multiprocessing.Pool() as pool:
2022-02-27 15:26:41 +01:00
list(
tqdm.tqdm(
pool.imap_unordered(_process_document, ids),
total=len(ids),
disable=options["no_progress_bar"],
),
2022-02-27 15:26:41 +01:00
)