paperless-ngx/src/documents/signals/handlers.py

420 lines
14 KiB
Python
Raw Normal View History

import logging
import os
2016-03-28 19:47:11 +01:00
2021-03-04 22:16:56 +01:00
from django.utils import termcolors
2016-03-28 19:47:11 +01:00
from django.conf import settings
from django.contrib.admin.models import ADDITION, LogEntry
from django.contrib.auth.models import User
from django.contrib.contenttypes.models import ContentType
from django.db import models, DatabaseError
2020-12-12 01:19:22 +01:00
from django.db.models import Q
from django.dispatch import receiver
2021-03-04 22:16:56 +01:00
from django.utils import termcolors, timezone
2020-12-08 13:54:35 +01:00
from filelock import FileLock
2016-03-28 19:47:11 +01:00
2021-02-15 13:26:36 +01:00
from .. import matching
2020-12-08 13:54:35 +01:00
from ..file_handling import delete_empty_directories, \
create_source_path_directory, \
2020-12-08 13:54:35 +01:00
generate_unique_filename
2021-03-04 22:16:56 +01:00
from ..models import Document, Tag, MatchingModel
2021-02-05 01:10:29 +01:00
logger = logging.getLogger("paperless.handlers")
def add_inbox_tags(sender, document=None, logging_group=None, **kwargs):
inbox_tags = Tag.objects.filter(is_inbox_tag=True)
document.tags.add(*inbox_tags)
2016-03-28 19:47:11 +01:00
2020-11-21 14:03:45 +01:00
def set_correspondent(sender,
document=None,
logging_group=None,
classifier=None,
replace=False,
use_first=True,
2021-03-04 22:16:56 +01:00
suggest=False,
base_url=None,
color=False,
2020-11-21 14:03:45 +01:00
**kwargs):
if document.correspondent and not replace:
return
2021-01-13 17:17:23 +01:00
potential_correspondents = matching.match_correspondents(document,
2020-11-21 14:03:45 +01:00
classifier)
potential_count = len(potential_correspondents)
if potential_correspondents:
selected = potential_correspondents[0]
else:
selected = None
if potential_count > 1:
if use_first:
2021-04-10 14:38:39 +02:00
logger.debug(
2020-11-21 14:03:45 +01:00
f"Detected {potential_count} potential correspondents, "
f"so we've opted for {selected}",
2021-02-05 01:10:29 +01:00
extra={'group': logging_group}
)
else:
2021-04-10 14:38:39 +02:00
logger.debug(
2020-11-21 14:03:45 +01:00
f"Detected {potential_count} potential correspondents, "
f"not assigning any correspondent",
2021-02-05 01:10:29 +01:00
extra={'group': logging_group}
)
return
if selected or replace:
2021-03-04 22:16:56 +01:00
if suggest:
if base_url:
print(
termcolors.colorize(str(document), fg='green')
if color
else str(document)
)
print(f"{base_url}/documents/{document.pk}")
else:
print(
(
termcolors.colorize(str(document), fg='green')
if color
else str(document)
) + f" [{document.pk}]"
)
print(f"Suggest correspondent {selected}")
else:
logger.info(
f"Assigning correspondent {selected} to {document}",
extra={'group': logging_group}
)
2021-03-04 22:16:56 +01:00
document.correspondent = selected
document.save(update_fields=("correspondent",))
2020-11-21 14:03:45 +01:00
def set_document_type(sender,
document=None,
logging_group=None,
classifier=None,
replace=False,
use_first=True,
2021-03-04 22:16:56 +01:00
suggest=False,
base_url=None,
color=False,
2020-11-21 14:03:45 +01:00
**kwargs):
if document.document_type and not replace:
return
2021-01-13 17:17:23 +01:00
potential_document_type = matching.match_document_types(document,
2020-11-21 14:03:45 +01:00
classifier)
potential_count = len(potential_document_type)
if potential_document_type:
selected = potential_document_type[0]
else:
selected = None
if potential_count > 1:
if use_first:
2021-02-05 01:10:29 +01:00
logger.info(
2020-11-21 14:03:45 +01:00
f"Detected {potential_count} potential document types, "
f"so we've opted for {selected}",
2021-02-05 01:10:29 +01:00
extra={'group': logging_group}
)
else:
2021-02-05 01:10:29 +01:00
logger.info(
2020-11-21 14:03:45 +01:00
f"Detected {potential_count} potential document types, "
f"not assigning any document type",
2021-02-05 01:10:29 +01:00
extra={'group': logging_group}
)
return
if selected or replace:
2021-03-04 22:16:56 +01:00
if suggest:
if base_url:
print(
termcolors.colorize(str(document), fg='green')
if color
else str(document)
)
print(f"{base_url}/documents/{document.pk}")
else:
print(
(
termcolors.colorize(str(document), fg='green')
if color
else str(document)
) + f" [{document.pk}]"
)
2022-02-20 15:57:26 +01:00
print(f"Suggest document type {selected}")
2021-03-04 22:16:56 +01:00
else:
logger.info(
f"Assigning document type {selected} to {document}",
extra={'group': logging_group}
)
2021-03-04 22:16:56 +01:00
document.document_type = selected
document.save(update_fields=("document_type",))
2020-11-21 14:03:45 +01:00
def set_tags(sender,
document=None,
logging_group=None,
classifier=None,
replace=False,
2021-03-04 22:16:56 +01:00
suggest=False,
base_url=None,
color=False,
2020-11-21 14:03:45 +01:00
**kwargs):
2020-12-12 01:19:22 +01:00
if replace:
2020-12-12 02:06:43 +01:00
Document.tags.through.objects.filter(document=document).exclude(
Q(tag__is_inbox_tag=True)).exclude(
Q(tag__match="") & ~Q(tag__matching_algorithm=Tag.MATCH_AUTO)
2020-12-12 01:19:22 +01:00
).delete()
current_tags = set(document.tags.all())
2021-01-13 17:17:23 +01:00
matched_tags = matching.match_tags(document, classifier)
2020-11-21 14:03:45 +01:00
relevant_tags = set(matched_tags) - current_tags
2021-03-04 22:16:56 +01:00
if suggest:
extra_tags = current_tags - set(matched_tags)
extra_tags = [
t for t in extra_tags
if t.matching_algorithm == MatchingModel.MATCH_AUTO
]
if not relevant_tags and not extra_tags:
return
if base_url:
print(
termcolors.colorize(str(document), fg='green')
if color
else str(document)
)
print(f"{base_url}/documents/{document.pk}")
else:
print(
(
termcolors.colorize(str(document), fg='green')
if color
else str(document)
) + f" [{document.pk}]"
)
if relevant_tags:
print(
"Suggest tags: " + ", ".join([t.name for t in relevant_tags])
)
if extra_tags:
print("Extra tags: " + ", ".join([t.name for t in extra_tags]))
else:
if not relevant_tags:
return
2021-03-04 22:16:56 +01:00
message = 'Tagging "{}" with "{}"'
logger.info(
message.format(
document, ", ".join([t.name for t in relevant_tags])
),
extra={'group': logging_group}
)
2021-03-04 22:16:56 +01:00
document.tags.add(*relevant_tags)
@receiver(models.signals.post_delete, sender=Document)
def cleanup_document_deletion(sender, instance, using, **kwargs):
with FileLock(settings.MEDIA_LOCK):
if settings.TRASH_DIR:
2022-02-20 14:04:28 +01:00
# Find a non-conflicting filename in case a document with the same
# name was moved to trash earlier
counter = 0
old_filename = os.path.split(instance.source_path)[1]
(old_filebase, old_fileext) = os.path.splitext(old_filename)
while True:
new_file_path = os.path.join(
settings.TRASH_DIR,
old_filebase +
(f"_{counter:02}" if counter else "") +
old_fileext
)
if os.path.exists(new_file_path):
counter += 1
else:
break
logger.debug(
f"Moving {instance.source_path} to trash at {new_file_path}")
2022-02-20 14:04:43 +01:00
try:
os.rename(instance.source_path, new_file_path)
except OSError as e:
logger.error(
f"Failed to move {instance.source_path} to trash at "
f"{new_file_path}: {e}. Skipping cleanup!"
)
return
for filename in (instance.source_path,
instance.archive_path,
instance.thumbnail_path):
if filename and os.path.isfile(filename):
try:
os.unlink(filename)
2021-02-05 01:10:29 +01:00
logger.debug(
f"Deleted file {filename}.")
except OSError as e:
2021-02-05 01:10:29 +01:00
logger.warning(
f"While deleting document {str(instance)}, the file "
f"{filename} could not be deleted: {e}"
)
delete_empty_directories(
os.path.dirname(instance.source_path),
root=settings.ORIGINALS_DIR
)
2020-11-30 21:38:21 +01:00
if instance.has_archive_version:
delete_empty_directories(
os.path.dirname(instance.archive_path),
root=settings.ARCHIVE_DIR
)
2020-11-30 21:38:21 +01:00
2021-02-12 01:31:50 +01:00
class CannotMoveFilesException(Exception):
pass
2020-11-30 21:38:21 +01:00
def validate_move(instance, old_path, new_path):
if not os.path.isfile(old_path):
# Can't do anything if the old file does not exist anymore.
2021-02-05 01:10:29 +01:00
logger.fatal(
2020-11-30 21:38:21 +01:00
f"Document {str(instance)}: File {old_path} has gone.")
2021-02-12 01:31:50 +01:00
raise CannotMoveFilesException()
2020-11-30 21:38:21 +01:00
if os.path.isfile(new_path):
# Can't do anything if the new file already exists. Skip updating file.
2021-02-05 01:10:29 +01:00
logger.warning(
2020-11-30 21:38:21 +01:00
f"Document {str(instance)}: Cannot rename file "
f"since target path {new_path} already exists.")
2021-02-12 01:31:50 +01:00
raise CannotMoveFilesException()
@receiver(models.signals.m2m_changed, sender=Document.tags.through)
@receiver(models.signals.post_save, sender=Document)
def update_filename_and_move_files(sender, instance, **kwargs):
if not instance.filename:
2020-11-30 21:38:21 +01:00
# Can't update the filename if there is no filename to begin with
# This happens when the consumer creates a new document.
# The document is modified and saved multiple times, and only after
# everything is done (i.e., the generated filename is final),
# filename will be set to the location where the consumer has put
# the file.
#
# This will in turn cause this logic to move the file where it belongs.
return
2020-12-08 13:54:35 +01:00
with FileLock(settings.MEDIA_LOCK):
2021-02-12 01:31:50 +01:00
try:
old_filename = instance.filename
old_source_path = instance.source_path
2021-02-12 01:31:50 +01:00
instance.filename = generate_unique_filename(instance)
move_original = old_filename != instance.filename
2021-02-11 13:47:17 +01:00
2021-02-12 01:31:50 +01:00
old_archive_filename = instance.archive_filename
2020-12-08 13:54:35 +01:00
old_archive_path = instance.archive_path
2021-02-12 01:31:50 +01:00
if instance.has_archive_version:
2020-11-30 21:38:21 +01:00
2021-02-12 01:31:50 +01:00
instance.archive_filename = generate_unique_filename(
instance, archive_filename=True
)
move_archive = old_archive_filename != instance.archive_filename # NOQA: E501
else:
move_archive = False
if not move_original and not move_archive:
# Don't do anything if filenames did not change.
return
2021-02-11 13:47:17 +01:00
if move_original:
2021-02-12 01:31:50 +01:00
validate_move(instance, old_source_path, instance.source_path)
create_source_path_directory(instance.source_path)
os.rename(old_source_path, instance.source_path)
2020-11-29 15:47:56 +01:00
2021-02-11 13:47:17 +01:00
if move_archive:
2021-02-12 01:31:50 +01:00
validate_move(
instance, old_archive_path, instance.archive_path)
create_source_path_directory(instance.archive_path)
os.rename(old_archive_path, instance.archive_path)
2020-12-08 13:54:35 +01:00
# Don't save() here to prevent infinite recursion.
Document.objects.filter(pk=instance.pk).update(
filename=instance.filename,
archive_filename=instance.archive_filename,
)
2020-11-30 21:38:21 +01:00
2021-02-12 01:31:50 +01:00
except (OSError, DatabaseError, CannotMoveFilesException):
2021-02-11 13:47:17 +01:00
# This happens when either:
# - moving the files failed due to file system errors
# - saving to the database failed due to database errors
# In both cases, we need to revert to the original state.
# Try to move files to their original location.
2020-12-08 13:54:35 +01:00
try:
2021-02-12 01:31:50 +01:00
if move_original and os.path.isfile(instance.source_path):
os.rename(instance.source_path, old_source_path)
2021-02-11 13:47:17 +01:00
2021-02-12 01:31:50 +01:00
if move_archive and os.path.isfile(instance.archive_path):
os.rename(instance.archive_path, old_archive_path)
2021-02-11 13:47:17 +01:00
2020-12-08 13:54:35 +01:00
except Exception as e:
# This is fine, since:
# A: if we managed to move source from A to B, we will also
# manage to move it from B to A. If not, we have a serious
# issue that's going to get caught by the santiy checker.
# All files remain in place and will never be overwritten,
# so this is not the end of the world.
# B: if moving the orignal file failed, nothing has changed
# anyway.
pass
2021-02-12 01:31:50 +01:00
# restore old values on the instance
instance.filename = old_filename
instance.archive_filename = old_archive_filename
2020-12-08 13:54:35 +01:00
# finally, remove any empty sub folders. This will do nothing if
# something has failed above.
if not os.path.isfile(old_source_path):
delete_empty_directories(os.path.dirname(old_source_path),
root=settings.ORIGINALS_DIR)
if instance.has_archive_version and not os.path.isfile(old_archive_path): # NOQA: E501
2020-12-08 13:54:35 +01:00
delete_empty_directories(os.path.dirname(old_archive_path),
root=settings.ARCHIVE_DIR)
def set_log_entry(sender, document=None, logging_group=None, **kwargs):
ct = ContentType.objects.get(model="document")
user = User.objects.get(username="consumer")
LogEntry.objects.create(
action_flag=ADDITION,
action_time=timezone.now(),
content_type=ct,
object_id=document.pk,
user=user,
object_repr=document.__str__(),
)
def add_to_index(sender, document, **kwargs):
2021-02-15 13:26:36 +01:00
from documents import index
index.add_or_update_document(document)