paperless-ngx/src/documents/signals/handlers.py

880 lines
31 KiB
Python
Raw Normal View History

import logging
import os
import shutil
from typing import Optional
2016-03-28 19:47:11 +01:00
from celery import states
from celery.signals import before_task_publish
from celery.signals import task_failure
from celery.signals import task_postrun
from celery.signals import task_prerun
2016-03-28 19:47:11 +01:00
from django.conf import settings
from django.contrib.admin.models import ADDITION
from django.contrib.admin.models import LogEntry
from django.contrib.auth.models import User
from django.contrib.contenttypes.models import ContentType
from django.db import DatabaseError
from django.db import close_old_connections
from django.db import models
2020-12-12 01:19:22 +01:00
from django.db.models import Q
from django.dispatch import receiver
from django.utils import timezone
2020-12-08 13:54:35 +01:00
from filelock import FileLock
from guardian.shortcuts import remove_perm
2016-03-28 19:47:11 +01:00
from documents import matching
from documents.caching import clear_document_caches
from documents.classifier import DocumentClassifier
2024-01-03 00:19:19 -08:00
from documents.consumer import parse_doc_title_w_placeholders
from documents.file_handling import create_source_path_directory
from documents.file_handling import delete_empty_directories
from documents.file_handling import generate_unique_filename
2024-01-03 00:19:19 -08:00
from documents.models import CustomFieldInstance
from documents.models import Document
from documents.models import MatchingModel
from documents.models import PaperlessTask
from documents.models import Tag
2024-01-03 00:19:19 -08:00
from documents.models import Workflow
from documents.models import WorkflowAction
2024-01-03 00:19:19 -08:00
from documents.models import WorkflowTrigger
2023-04-27 01:24:22 -07:00
from documents.permissions import get_objects_for_user_owner_aware
2024-01-03 00:19:19 -08:00
from documents.permissions import set_permissions_for_object
2021-02-05 01:10:29 +01:00
logger = logging.getLogger("paperless.handlers")
def add_inbox_tags(sender, document: Document, logging_group=None, **kwargs):
2023-04-27 01:24:22 -07:00
if document.owner is not None:
tags = get_objects_for_user_owner_aware(
document.owner,
"documents.view_tag",
2023-04-27 01:24:22 -07:00
Tag,
)
else:
tags = Tag.objects.all()
inbox_tags = tags.filter(is_inbox_tag=True)
document.tags.add(*inbox_tags)
2016-03-28 19:47:11 +01:00
def _suggestion_printer(
stdout,
style_func,
suggestion_type: str,
document: Document,
selected: MatchingModel,
base_url: Optional[str] = None,
):
"""
Smaller helper to reduce duplication when just outputting suggestions to the console
"""
doc_str = str(document)
if base_url is not None:
stdout.write(style_func.SUCCESS(doc_str))
stdout.write(style_func.SUCCESS(f"{base_url}/documents/{document.pk}"))
else:
stdout.write(style_func.SUCCESS(f"{doc_str} [{document.pk}]"))
stdout.write(f"Suggest {suggestion_type}: {selected}")
2022-02-27 15:26:41 +01:00
def set_correspondent(
sender,
document: Document,
2022-02-27 15:26:41 +01:00
logging_group=None,
classifier: Optional[DocumentClassifier] = None,
2022-02-27 15:26:41 +01:00
replace=False,
use_first=True,
suggest=False,
base_url=None,
stdout=None,
style_func=None,
2022-02-27 15:26:41 +01:00
**kwargs,
):
if document.correspondent and not replace:
return
2022-02-27 15:26:41 +01:00
potential_correspondents = matching.match_correspondents(document, classifier)
potential_count = len(potential_correspondents)
selected = potential_correspondents[0] if potential_correspondents else None
if potential_count > 1:
if use_first:
2021-04-10 14:38:39 +02:00
logger.debug(
2020-11-21 14:03:45 +01:00
f"Detected {potential_count} potential correspondents, "
f"so we've opted for {selected}",
2022-02-27 15:26:41 +01:00
extra={"group": logging_group},
)
else:
2021-04-10 14:38:39 +02:00
logger.debug(
2020-11-21 14:03:45 +01:00
f"Detected {potential_count} potential correspondents, "
f"not assigning any correspondent",
2022-02-27 15:26:41 +01:00
extra={"group": logging_group},
)
return
if selected or replace:
2021-03-04 22:16:56 +01:00
if suggest:
_suggestion_printer(
stdout,
style_func,
"correspondent",
document,
selected,
base_url,
)
2021-03-04 22:16:56 +01:00
else:
logger.info(
f"Assigning correspondent {selected} to {document}",
2022-02-27 15:26:41 +01:00
extra={"group": logging_group},
2021-03-04 22:16:56 +01:00
)
2021-03-04 22:16:56 +01:00
document.correspondent = selected
document.save(update_fields=("correspondent",))
2022-02-27 15:26:41 +01:00
def set_document_type(
sender,
document: Document,
2022-02-27 15:26:41 +01:00
logging_group=None,
classifier: Optional[DocumentClassifier] = None,
2022-02-27 15:26:41 +01:00
replace=False,
use_first=True,
suggest=False,
base_url=None,
stdout=None,
style_func=None,
2022-02-27 15:26:41 +01:00
**kwargs,
):
if document.document_type and not replace:
return
2022-02-27 15:26:41 +01:00
potential_document_type = matching.match_document_types(document, classifier)
potential_count = len(potential_document_type)
selected = potential_document_type[0] if potential_document_type else None
if potential_count > 1:
if use_first:
2021-02-05 01:10:29 +01:00
logger.info(
2020-11-21 14:03:45 +01:00
f"Detected {potential_count} potential document types, "
f"so we've opted for {selected}",
2022-02-27 15:26:41 +01:00
extra={"group": logging_group},
)
else:
2021-02-05 01:10:29 +01:00
logger.info(
2020-11-21 14:03:45 +01:00
f"Detected {potential_count} potential document types, "
f"not assigning any document type",
2022-02-27 15:26:41 +01:00
extra={"group": logging_group},
)
return
if selected or replace:
2021-03-04 22:16:56 +01:00
if suggest:
_suggestion_printer(
stdout,
style_func,
"document type",
document,
selected,
base_url,
)
2021-03-04 22:16:56 +01:00
else:
logger.info(
f"Assigning document type {selected} to {document}",
2022-02-27 15:26:41 +01:00
extra={"group": logging_group},
2021-03-04 22:16:56 +01:00
)
2021-03-04 22:16:56 +01:00
document.document_type = selected
document.save(update_fields=("document_type",))
2022-02-27 15:26:41 +01:00
def set_tags(
sender,
document: Document,
2022-02-27 15:26:41 +01:00
logging_group=None,
classifier: Optional[DocumentClassifier] = None,
2022-02-27 15:26:41 +01:00
replace=False,
suggest=False,
base_url=None,
stdout=None,
style_func=None,
2022-02-27 15:26:41 +01:00
**kwargs,
):
if replace:
2020-12-12 02:06:43 +01:00
Document.tags.through.objects.filter(document=document).exclude(
Q(tag__is_inbox_tag=True),
2022-02-27 15:26:41 +01:00
).exclude(
Q(tag__match="") & ~Q(tag__matching_algorithm=Tag.MATCH_AUTO),
2020-12-12 01:19:22 +01:00
).delete()
current_tags = set(document.tags.all())
2021-01-13 17:17:23 +01:00
matched_tags = matching.match_tags(document, classifier)
2020-11-21 14:03:45 +01:00
relevant_tags = set(matched_tags) - current_tags
2021-03-04 22:16:56 +01:00
if suggest:
extra_tags = current_tags - set(matched_tags)
extra_tags = [
2022-02-27 15:26:41 +01:00
t for t in extra_tags if t.matching_algorithm == MatchingModel.MATCH_AUTO
2021-03-04 22:16:56 +01:00
]
if not relevant_tags and not extra_tags:
return
doc_str = style_func.SUCCESS(str(document))
2021-03-04 22:16:56 +01:00
if base_url:
stdout.write(doc_str)
stdout.write(f"{base_url}/documents/{document.pk}")
2021-03-04 22:16:56 +01:00
else:
stdout.write(doc_str + style_func.SUCCESS(f" [{document.pk}]"))
2021-03-04 22:16:56 +01:00
if relevant_tags:
stdout.write("Suggest tags: " + ", ".join([t.name for t in relevant_tags]))
2021-03-04 22:16:56 +01:00
if extra_tags:
stdout.write("Extra tags: " + ", ".join([t.name for t in extra_tags]))
2021-03-04 22:16:56 +01:00
else:
if not relevant_tags:
return
2021-03-04 22:16:56 +01:00
message = 'Tagging "{}" with "{}"'
logger.info(
2022-02-27 15:26:41 +01:00
message.format(document, ", ".join([t.name for t in relevant_tags])),
extra={"group": logging_group},
2021-03-04 22:16:56 +01:00
)
2021-03-04 22:16:56 +01:00
document.tags.add(*relevant_tags)
Feature: Dynamic document storage pathes (#916) * Added devcontainer * Add feature storage pathes * Exclude tests and add versioning * Check escaping * Check escaping * Check quoting * Echo * Escape * Escape : * Double escape \ * Escaping * Remove if * Escape colon * Missing \ * Esacpe : * Escape all * test * Remove sed * Fix exclude * Remove SED command * Add LD_LIBRARY_PATH * Adjusted to v1.7 * Updated test-cases * Remove devcontainer * Removed internal build-file * Run pre-commit * Corrected flak8 error * Adjusted to v1.7 * Updated test-cases * Corrected flak8 error * Adjusted to new plural translations * Small adjustments due to code-review backend * Adjusted line-break * Removed PAPERLESS prefix from settings variables * Corrected style change due to search+replace * First documentation draft * Revert changes to Pipfile * Add sphinx-autobuild with keep-outdated * Revert merge error that results in wrong storage path is evaluated * Adjust styles of generated files ... * Adds additional testing to cover dynamic storage path functionality * Remove unnecessary condition * Add hint to edit storage path dialog * Correct spelling of pathes to paths * Minor documentation tweaks * Minor typo * improving wrapping of filter editor buttons with new storage path button * Update .gitignore * Fix select border radius in non input-groups * Better storage path edit hint * Add note to edit storage path dialog re document_renamer * Add note to bulk edit storage path re document_renamer * Rename FILTER_STORAGE_DIRECTORY to PATH * Fix broken filter rule parsing * Show default storage if unspecified * Remove note re storage path on bulk edit * Add basic validation of filename variables Co-authored-by: Markus Kling <markus@markus-kling.net> Co-authored-by: Trenton Holmes <holmes.trenton@gmail.com> Co-authored-by: Michael Shamoon <4887959+shamoon@users.noreply.github.com> Co-authored-by: Quinn Casey <quinn@quinncasey.com>
2022-05-19 23:42:25 +02:00
def set_storage_path(
sender,
document: Document,
Feature: Dynamic document storage pathes (#916) * Added devcontainer * Add feature storage pathes * Exclude tests and add versioning * Check escaping * Check escaping * Check quoting * Echo * Escape * Escape : * Double escape \ * Escaping * Remove if * Escape colon * Missing \ * Esacpe : * Escape all * test * Remove sed * Fix exclude * Remove SED command * Add LD_LIBRARY_PATH * Adjusted to v1.7 * Updated test-cases * Remove devcontainer * Removed internal build-file * Run pre-commit * Corrected flak8 error * Adjusted to v1.7 * Updated test-cases * Corrected flak8 error * Adjusted to new plural translations * Small adjustments due to code-review backend * Adjusted line-break * Removed PAPERLESS prefix from settings variables * Corrected style change due to search+replace * First documentation draft * Revert changes to Pipfile * Add sphinx-autobuild with keep-outdated * Revert merge error that results in wrong storage path is evaluated * Adjust styles of generated files ... * Adds additional testing to cover dynamic storage path functionality * Remove unnecessary condition * Add hint to edit storage path dialog * Correct spelling of pathes to paths * Minor documentation tweaks * Minor typo * improving wrapping of filter editor buttons with new storage path button * Update .gitignore * Fix select border radius in non input-groups * Better storage path edit hint * Add note to edit storage path dialog re document_renamer * Add note to bulk edit storage path re document_renamer * Rename FILTER_STORAGE_DIRECTORY to PATH * Fix broken filter rule parsing * Show default storage if unspecified * Remove note re storage path on bulk edit * Add basic validation of filename variables Co-authored-by: Markus Kling <markus@markus-kling.net> Co-authored-by: Trenton Holmes <holmes.trenton@gmail.com> Co-authored-by: Michael Shamoon <4887959+shamoon@users.noreply.github.com> Co-authored-by: Quinn Casey <quinn@quinncasey.com>
2022-05-19 23:42:25 +02:00
logging_group=None,
classifier: Optional[DocumentClassifier] = None,
Feature: Dynamic document storage pathes (#916) * Added devcontainer * Add feature storage pathes * Exclude tests and add versioning * Check escaping * Check escaping * Check quoting * Echo * Escape * Escape : * Double escape \ * Escaping * Remove if * Escape colon * Missing \ * Esacpe : * Escape all * test * Remove sed * Fix exclude * Remove SED command * Add LD_LIBRARY_PATH * Adjusted to v1.7 * Updated test-cases * Remove devcontainer * Removed internal build-file * Run pre-commit * Corrected flak8 error * Adjusted to v1.7 * Updated test-cases * Corrected flak8 error * Adjusted to new plural translations * Small adjustments due to code-review backend * Adjusted line-break * Removed PAPERLESS prefix from settings variables * Corrected style change due to search+replace * First documentation draft * Revert changes to Pipfile * Add sphinx-autobuild with keep-outdated * Revert merge error that results in wrong storage path is evaluated * Adjust styles of generated files ... * Adds additional testing to cover dynamic storage path functionality * Remove unnecessary condition * Add hint to edit storage path dialog * Correct spelling of pathes to paths * Minor documentation tweaks * Minor typo * improving wrapping of filter editor buttons with new storage path button * Update .gitignore * Fix select border radius in non input-groups * Better storage path edit hint * Add note to edit storage path dialog re document_renamer * Add note to bulk edit storage path re document_renamer * Rename FILTER_STORAGE_DIRECTORY to PATH * Fix broken filter rule parsing * Show default storage if unspecified * Remove note re storage path on bulk edit * Add basic validation of filename variables Co-authored-by: Markus Kling <markus@markus-kling.net> Co-authored-by: Trenton Holmes <holmes.trenton@gmail.com> Co-authored-by: Michael Shamoon <4887959+shamoon@users.noreply.github.com> Co-authored-by: Quinn Casey <quinn@quinncasey.com>
2022-05-19 23:42:25 +02:00
replace=False,
use_first=True,
suggest=False,
base_url=None,
stdout=None,
style_func=None,
Feature: Dynamic document storage pathes (#916) * Added devcontainer * Add feature storage pathes * Exclude tests and add versioning * Check escaping * Check escaping * Check quoting * Echo * Escape * Escape : * Double escape \ * Escaping * Remove if * Escape colon * Missing \ * Esacpe : * Escape all * test * Remove sed * Fix exclude * Remove SED command * Add LD_LIBRARY_PATH * Adjusted to v1.7 * Updated test-cases * Remove devcontainer * Removed internal build-file * Run pre-commit * Corrected flak8 error * Adjusted to v1.7 * Updated test-cases * Corrected flak8 error * Adjusted to new plural translations * Small adjustments due to code-review backend * Adjusted line-break * Removed PAPERLESS prefix from settings variables * Corrected style change due to search+replace * First documentation draft * Revert changes to Pipfile * Add sphinx-autobuild with keep-outdated * Revert merge error that results in wrong storage path is evaluated * Adjust styles of generated files ... * Adds additional testing to cover dynamic storage path functionality * Remove unnecessary condition * Add hint to edit storage path dialog * Correct spelling of pathes to paths * Minor documentation tweaks * Minor typo * improving wrapping of filter editor buttons with new storage path button * Update .gitignore * Fix select border radius in non input-groups * Better storage path edit hint * Add note to edit storage path dialog re document_renamer * Add note to bulk edit storage path re document_renamer * Rename FILTER_STORAGE_DIRECTORY to PATH * Fix broken filter rule parsing * Show default storage if unspecified * Remove note re storage path on bulk edit * Add basic validation of filename variables Co-authored-by: Markus Kling <markus@markus-kling.net> Co-authored-by: Trenton Holmes <holmes.trenton@gmail.com> Co-authored-by: Michael Shamoon <4887959+shamoon@users.noreply.github.com> Co-authored-by: Quinn Casey <quinn@quinncasey.com>
2022-05-19 23:42:25 +02:00
**kwargs,
):
if document.storage_path and not replace:
return
potential_storage_path = matching.match_storage_paths(
document,
classifier,
)
potential_count = len(potential_storage_path)
selected = potential_storage_path[0] if potential_storage_path else None
Feature: Dynamic document storage pathes (#916) * Added devcontainer * Add feature storage pathes * Exclude tests and add versioning * Check escaping * Check escaping * Check quoting * Echo * Escape * Escape : * Double escape \ * Escaping * Remove if * Escape colon * Missing \ * Esacpe : * Escape all * test * Remove sed * Fix exclude * Remove SED command * Add LD_LIBRARY_PATH * Adjusted to v1.7 * Updated test-cases * Remove devcontainer * Removed internal build-file * Run pre-commit * Corrected flak8 error * Adjusted to v1.7 * Updated test-cases * Corrected flak8 error * Adjusted to new plural translations * Small adjustments due to code-review backend * Adjusted line-break * Removed PAPERLESS prefix from settings variables * Corrected style change due to search+replace * First documentation draft * Revert changes to Pipfile * Add sphinx-autobuild with keep-outdated * Revert merge error that results in wrong storage path is evaluated * Adjust styles of generated files ... * Adds additional testing to cover dynamic storage path functionality * Remove unnecessary condition * Add hint to edit storage path dialog * Correct spelling of pathes to paths * Minor documentation tweaks * Minor typo * improving wrapping of filter editor buttons with new storage path button * Update .gitignore * Fix select border radius in non input-groups * Better storage path edit hint * Add note to edit storage path dialog re document_renamer * Add note to bulk edit storage path re document_renamer * Rename FILTER_STORAGE_DIRECTORY to PATH * Fix broken filter rule parsing * Show default storage if unspecified * Remove note re storage path on bulk edit * Add basic validation of filename variables Co-authored-by: Markus Kling <markus@markus-kling.net> Co-authored-by: Trenton Holmes <holmes.trenton@gmail.com> Co-authored-by: Michael Shamoon <4887959+shamoon@users.noreply.github.com> Co-authored-by: Quinn Casey <quinn@quinncasey.com>
2022-05-19 23:42:25 +02:00
if potential_count > 1:
if use_first:
logger.info(
f"Detected {potential_count} potential storage paths, "
f"so we've opted for {selected}",
extra={"group": logging_group},
)
else:
logger.info(
f"Detected {potential_count} potential storage paths, "
f"not assigning any storage directory",
extra={"group": logging_group},
)
return
if selected or replace:
if suggest:
_suggestion_printer(
stdout,
style_func,
"storage directory",
document,
selected,
base_url,
)
Feature: Dynamic document storage pathes (#916) * Added devcontainer * Add feature storage pathes * Exclude tests and add versioning * Check escaping * Check escaping * Check quoting * Echo * Escape * Escape : * Double escape \ * Escaping * Remove if * Escape colon * Missing \ * Esacpe : * Escape all * test * Remove sed * Fix exclude * Remove SED command * Add LD_LIBRARY_PATH * Adjusted to v1.7 * Updated test-cases * Remove devcontainer * Removed internal build-file * Run pre-commit * Corrected flak8 error * Adjusted to v1.7 * Updated test-cases * Corrected flak8 error * Adjusted to new plural translations * Small adjustments due to code-review backend * Adjusted line-break * Removed PAPERLESS prefix from settings variables * Corrected style change due to search+replace * First documentation draft * Revert changes to Pipfile * Add sphinx-autobuild with keep-outdated * Revert merge error that results in wrong storage path is evaluated * Adjust styles of generated files ... * Adds additional testing to cover dynamic storage path functionality * Remove unnecessary condition * Add hint to edit storage path dialog * Correct spelling of pathes to paths * Minor documentation tweaks * Minor typo * improving wrapping of filter editor buttons with new storage path button * Update .gitignore * Fix select border radius in non input-groups * Better storage path edit hint * Add note to edit storage path dialog re document_renamer * Add note to bulk edit storage path re document_renamer * Rename FILTER_STORAGE_DIRECTORY to PATH * Fix broken filter rule parsing * Show default storage if unspecified * Remove note re storage path on bulk edit * Add basic validation of filename variables Co-authored-by: Markus Kling <markus@markus-kling.net> Co-authored-by: Trenton Holmes <holmes.trenton@gmail.com> Co-authored-by: Michael Shamoon <4887959+shamoon@users.noreply.github.com> Co-authored-by: Quinn Casey <quinn@quinncasey.com>
2022-05-19 23:42:25 +02:00
else:
logger.info(
f"Assigning storage path {selected} to {document}",
extra={"group": logging_group},
)
document.storage_path = selected
document.save(update_fields=("storage_path",))
@receiver(models.signals.post_delete, sender=Document)
def cleanup_document_deletion(sender, instance, using, **kwargs):
with FileLock(settings.MEDIA_LOCK):
if settings.TRASH_DIR:
2022-02-20 14:04:28 +01:00
# Find a non-conflicting filename in case a document with the same
# name was moved to trash earlier
counter = 0
old_filename = os.path.split(instance.source_path)[1]
(old_filebase, old_fileext) = os.path.splitext(old_filename)
while True:
new_file_path = os.path.join(
settings.TRASH_DIR,
2022-02-27 15:26:41 +01:00
old_filebase + (f"_{counter:02}" if counter else "") + old_fileext,
)
if os.path.exists(new_file_path):
counter += 1
else:
break
2022-02-27 15:26:41 +01:00
logger.debug(f"Moving {instance.source_path} to trash at {new_file_path}")
2022-02-20 14:04:43 +01:00
try:
shutil.move(instance.source_path, new_file_path)
2022-02-20 14:04:43 +01:00
except OSError as e:
logger.error(
f"Failed to move {instance.source_path} to trash at "
f"{new_file_path}: {e}. Skipping cleanup!",
2022-02-20 14:04:43 +01:00
)
return
2022-02-27 15:26:41 +01:00
for filename in (
instance.source_path,
instance.archive_path,
instance.thumbnail_path,
):
if filename and os.path.isfile(filename):
try:
os.unlink(filename)
2022-02-27 15:26:41 +01:00
logger.debug(f"Deleted file {filename}.")
except OSError as e:
2021-02-05 01:10:29 +01:00
logger.warning(
f"While deleting document {instance!s}, the file "
f"{filename} could not be deleted: {e}",
)
delete_empty_directories(
os.path.dirname(instance.source_path),
root=settings.ORIGINALS_DIR,
)
2020-11-30 21:38:21 +01:00
if instance.has_archive_version:
delete_empty_directories(
os.path.dirname(instance.archive_path),
root=settings.ARCHIVE_DIR,
)
2020-11-30 21:38:21 +01:00
2021-02-12 01:31:50 +01:00
class CannotMoveFilesException(Exception):
pass
2020-11-30 21:38:21 +01:00
def validate_move(instance, old_path, new_path):
if not os.path.isfile(old_path):
# Can't do anything if the old file does not exist anymore.
logger.fatal(f"Document {instance!s}: File {old_path} has gone.")
raise CannotMoveFilesException
2020-11-30 21:38:21 +01:00
if os.path.isfile(new_path):
# Can't do anything if the new file already exists. Skip updating file.
2021-02-05 01:10:29 +01:00
logger.warning(
f"Document {instance!s}: Cannot rename file "
f"since target path {new_path} already exists.",
2022-02-27 15:26:41 +01:00
)
raise CannotMoveFilesException
@receiver(models.signals.m2m_changed, sender=Document.tags.through)
@receiver(models.signals.post_save, sender=Document)
def update_filename_and_move_files(sender, instance: Document, **kwargs):
if not instance.filename:
2020-11-30 21:38:21 +01:00
# Can't update the filename if there is no filename to begin with
# This happens when the consumer creates a new document.
# The document is modified and saved multiple times, and only after
# everything is done (i.e., the generated filename is final),
# filename will be set to the location where the consumer has put
# the file.
#
# This will in turn cause this logic to move the file where it belongs.
return
2020-12-08 13:54:35 +01:00
with FileLock(settings.MEDIA_LOCK):
2021-02-12 01:31:50 +01:00
try:
# If this was waiting for the lock, the filename or archive_filename
# of this document may have been updated. This happens if multiple updates
# get queued from the UI for the same document
# So freshen up the data before doing anything
instance.refresh_from_db()
2021-02-12 01:31:50 +01:00
old_filename = instance.filename
old_source_path = instance.source_path
2021-02-12 01:31:50 +01:00
instance.filename = generate_unique_filename(instance)
move_original = old_filename != instance.filename
2021-02-11 13:47:17 +01:00
2021-02-12 01:31:50 +01:00
old_archive_filename = instance.archive_filename
2020-12-08 13:54:35 +01:00
old_archive_path = instance.archive_path
2021-02-12 01:31:50 +01:00
if instance.has_archive_version:
instance.archive_filename = generate_unique_filename(
instance,
archive_filename=True,
2021-02-12 01:31:50 +01:00
)
move_archive = old_archive_filename != instance.archive_filename
2021-02-12 01:31:50 +01:00
else:
move_archive = False
if not move_original and not move_archive:
# Don't do anything if filenames did not change.
return
2021-02-11 13:47:17 +01:00
if move_original:
2021-02-12 01:31:50 +01:00
validate_move(instance, old_source_path, instance.source_path)
create_source_path_directory(instance.source_path)
shutil.move(old_source_path, instance.source_path)
2020-11-29 15:47:56 +01:00
2021-02-11 13:47:17 +01:00
if move_archive:
2022-02-27 15:26:41 +01:00
validate_move(instance, old_archive_path, instance.archive_path)
2021-02-12 01:31:50 +01:00
create_source_path_directory(instance.archive_path)
shutil.move(old_archive_path, instance.archive_path)
2020-12-08 13:54:35 +01:00
# Don't save() here to prevent infinite recursion.
Document.objects.filter(pk=instance.pk).update(
filename=instance.filename,
archive_filename=instance.archive_filename,
modified=timezone.now(),
)
# Clear any caching for this document. Slightly overkill, but not terrible
clear_document_caches(instance.pk)
2020-11-30 21:38:21 +01:00
except (OSError, DatabaseError, CannotMoveFilesException) as e:
2022-12-26 13:43:30 -08:00
logger.warning(f"Exception during file handling: {e}")
2021-02-11 13:47:17 +01:00
# This happens when either:
# - moving the files failed due to file system errors
# - saving to the database failed due to database errors
# In both cases, we need to revert to the original state.
# Try to move files to their original location.
2020-12-08 13:54:35 +01:00
try:
2021-02-12 01:31:50 +01:00
if move_original and os.path.isfile(instance.source_path):
logger.info("Restoring previous original path")
shutil.move(instance.source_path, old_source_path)
2021-02-11 13:47:17 +01:00
2021-02-12 01:31:50 +01:00
if move_archive and os.path.isfile(instance.archive_path):
logger.info("Restoring previous archive path")
shutil.move(instance.archive_path, old_archive_path)
2021-02-11 13:47:17 +01:00
except Exception:
2020-12-08 13:54:35 +01:00
# This is fine, since:
# A: if we managed to move source from A to B, we will also
# manage to move it from B to A. If not, we have a serious
# issue that's going to get caught by the santiy checker.
# All files remain in place and will never be overwritten,
# so this is not the end of the world.
# B: if moving the original file failed, nothing has changed
2020-12-08 13:54:35 +01:00
# anyway.
pass
2021-02-12 01:31:50 +01:00
# restore old values on the instance
instance.filename = old_filename
instance.archive_filename = old_archive_filename
2020-12-08 13:54:35 +01:00
# finally, remove any empty sub folders. This will do nothing if
# something has failed above.
if not os.path.isfile(old_source_path):
2022-02-27 15:26:41 +01:00
delete_empty_directories(
os.path.dirname(old_source_path),
root=settings.ORIGINALS_DIR,
2022-02-27 15:26:41 +01:00
)
2020-12-08 13:54:35 +01:00
2022-02-27 15:26:41 +01:00
if instance.has_archive_version and not os.path.isfile(
old_archive_path,
):
2022-02-27 15:26:41 +01:00
delete_empty_directories(
os.path.dirname(old_archive_path),
root=settings.ARCHIVE_DIR,
2022-02-27 15:26:41 +01:00
)
def set_log_entry(sender, document: Document, logging_group=None, **kwargs):
ct = ContentType.objects.get(model="document")
user = User.objects.get(username="consumer")
LogEntry.objects.create(
action_flag=ADDITION,
action_time=timezone.now(),
content_type=ct,
object_id=document.pk,
user=user,
object_repr=document.__str__(),
)
def add_to_index(sender, document, **kwargs):
2021-02-15 13:26:36 +01:00
from documents import index
index.add_or_update_document(document)
2024-01-03 00:19:19 -08:00
def run_workflow_added(sender, document: Document, logging_group=None, **kwargs):
run_workflow(
WorkflowTrigger.WorkflowTriggerType.DOCUMENT_ADDED,
document,
logging_group,
)
def run_workflow_updated(sender, document: Document, logging_group=None, **kwargs):
run_workflow(
WorkflowTrigger.WorkflowTriggerType.DOCUMENT_UPDATED,
document,
logging_group,
)
def run_workflow(
trigger_type: WorkflowTrigger.WorkflowTriggerType,
document: Document,
logging_group=None,
):
for workflow in (
Workflow.objects.filter(
enabled=True,
triggers__type=trigger_type,
)
.prefetch_related("actions")
.prefetch_related("actions__assign_view_users")
.prefetch_related("actions__assign_view_groups")
.prefetch_related("actions__assign_change_users")
.prefetch_related("actions__assign_change_groups")
.prefetch_related("actions__assign_custom_fields")
.prefetch_related("actions__remove_tags")
.prefetch_related("actions__remove_correspondents")
.prefetch_related("actions__remove_document_types")
.prefetch_related("actions__remove_storage_paths")
.prefetch_related("actions__remove_custom_fields")
.prefetch_related("actions__remove_owners")
.prefetch_related("triggers")
.order_by("order")
):
2024-01-03 00:19:19 -08:00
if matching.document_matches_workflow(
document,
workflow,
trigger_type,
):
action: WorkflowAction
2024-01-03 00:19:19 -08:00
for action in workflow.actions.all():
logger.info(
f"Applying {action} from {workflow}",
extra={"group": logging_group},
)
if action.type == WorkflowAction.WorkflowActionType.ASSIGNMENT:
if action.assign_tags.all().count() > 0:
document.tags.add(*action.assign_tags.all())
if action.assign_correspondent is not None:
document.correspondent = action.assign_correspondent
if action.assign_document_type is not None:
document.document_type = action.assign_document_type
if action.assign_storage_path is not None:
document.storage_path = action.assign_storage_path
if action.assign_owner is not None:
document.owner = action.assign_owner
if action.assign_title is not None:
try:
document.title = parse_doc_title_w_placeholders(
action.assign_title,
(
document.correspondent.name
if document.correspondent is not None
else ""
),
(
document.document_type.name
if document.document_type is not None
else ""
),
(
document.owner.username
if document.owner is not None
else ""
),
timezone.localtime(document.added),
(
document.original_filename
if document.original_filename is not None
else ""
),
timezone.localtime(document.created),
2024-01-03 00:19:19 -08:00
)
except Exception:
logger.exception(
f"Error occurred parsing title assignment '{action.assign_title}', falling back to original",
extra={"group": logging_group},
)
2024-01-03 00:19:19 -08:00
if (
(
action.assign_view_users is not None
and action.assign_view_users.count() > 0
)
or (
action.assign_view_groups is not None
and action.assign_view_groups.count() > 0
)
or (
action.assign_change_users is not None
and action.assign_change_users.count() > 0
)
or (
action.assign_change_groups is not None
and action.assign_change_groups.count() > 0
)
):
permissions = {
"view": {
"users": action.assign_view_users.all().values_list(
"id",
)
or [],
"groups": action.assign_view_groups.all().values_list(
"id",
)
or [],
},
"change": {
"users": action.assign_change_users.all().values_list(
"id",
)
or [],
"groups": action.assign_change_groups.all().values_list(
"id",
)
or [],
},
}
set_permissions_for_object(
permissions=permissions,
object=document,
merge=True,
)
if action.assign_custom_fields is not None:
for field in action.assign_custom_fields.all():
if (
CustomFieldInstance.objects.filter(
field=field,
document=document,
).count()
== 0
):
# can be triggered on existing docs, so only add the field if it doesn't already exist
CustomFieldInstance.objects.create(
field=field,
document=document,
)
elif action.type == WorkflowAction.WorkflowActionType.REMOVAL:
if action.remove_all_tags:
document.tags.clear()
else:
for tag in action.remove_tags.filter(
pk__in=list(document.tags.values_list("pk", flat=True)),
).all():
document.tags.remove(tag.pk)
if action.remove_all_correspondents or (
document.correspondent
and (
action.remove_correspondents.filter(
pk=document.correspondent.pk,
).exists()
)
):
document.correspondent = None
if action.remove_all_document_types or (
document.document_type
and (
action.remove_document_types.filter(
pk=document.document_type.pk,
).exists()
)
):
document.document_type = None
if action.remove_all_storage_paths or (
document.storage_path
and (
action.remove_storage_paths.filter(
pk=document.storage_path.pk,
).exists()
)
):
document.storage_path = None
if action.remove_all_owners or (
document.owner
and (action.remove_owners.filter(pk=document.owner.pk).exists())
):
document.owner = None
if action.remove_all_permissions:
permissions = {
"view": {
"users": [],
"groups": [],
},
"change": {
"users": [],
"groups": [],
},
}
set_permissions_for_object(
permissions=permissions,
object=document,
merge=False,
)
elif (
(action.remove_view_users.all().count() > 0)
or (action.remove_view_groups.all().count() > 0)
or (action.remove_change_users.all().count() > 0)
or (action.remove_change_groups.all().count() > 0)
):
for user in action.remove_view_users.all():
remove_perm("view_document", user, document)
for user in action.remove_change_users.all():
remove_perm("change_document", user, document)
for group in action.remove_view_groups.all():
remove_perm("view_document", group, document)
for group in action.remove_change_groups.all():
remove_perm("change_document", group, document)
if action.remove_all_custom_fields:
CustomFieldInstance.objects.filter(document=document).delete()
elif action.remove_custom_fields.all().count() > 0:
CustomFieldInstance.objects.filter(
field__in=action.remove_custom_fields.all(),
document=document,
).delete()
2024-01-03 00:19:19 -08:00
document.save()
@before_task_publish.connect
def before_task_publish_handler(sender=None, headers=None, body=None, **kwargs):
"""
Creates the PaperlessTask object in a pending state. This is sent before
the task reaches the broker, but before it begins executing on a worker.
https://docs.celeryq.dev/en/stable/userguide/signals.html#before-task-publish
https://docs.celeryq.dev/en/stable/internals/protocol.html#version-2
"""
if "task" not in headers or headers["task"] != "documents.tasks.consume_file":
# Assumption: this is only ever a v2 message
return
2022-05-23 01:52:46 -07:00
try:
close_old_connections()
task_args = body[0]
input_doc, _ = task_args
task_file_name = input_doc.original_file.name
PaperlessTask.objects.create(
task_id=headers["id"],
status=states.PENDING,
task_file_name=task_file_name,
task_name=headers["task"],
result=None,
date_created=timezone.now(),
date_started=None,
date_done=None,
)
except Exception: # pragma: no cover
# Don't let an exception in the signal handlers prevent
# a document from being consumed.
logger.exception("Creating PaperlessTask failed")
@task_prerun.connect
def task_prerun_handler(sender=None, task_id=None, task=None, **kwargs):
"""
Updates the PaperlessTask to be started. Sent before the task begins execution
on a worker.
https://docs.celeryq.dev/en/stable/userguide/signals.html#task-prerun
"""
try:
close_old_connections()
task_instance = PaperlessTask.objects.filter(task_id=task_id).first()
if task_instance is not None:
task_instance.status = states.STARTED
task_instance.date_started = timezone.now()
task_instance.save()
except Exception: # pragma: no cover
# Don't let an exception in the signal handlers prevent
# a document from being consumed.
logger.exception("Setting PaperlessTask started failed")
@task_postrun.connect
def task_postrun_handler(
sender=None,
task_id=None,
task=None,
retval=None,
state=None,
**kwargs,
):
"""
Updates the result of the PaperlessTask.
https://docs.celeryq.dev/en/stable/userguide/signals.html#task-postrun
"""
try:
close_old_connections()
task_instance = PaperlessTask.objects.filter(task_id=task_id).first()
if task_instance is not None:
task_instance.status = state
task_instance.result = retval
task_instance.date_done = timezone.now()
task_instance.save()
except Exception: # pragma: no cover
# Don't let an exception in the signal handlers prevent
# a document from being consumed.
logger.exception("Updating PaperlessTask failed")
@task_failure.connect
def task_failure_handler(
sender=None,
task_id=None,
exception=None,
args=None,
traceback=None,
**kwargs,
):
"""
Updates the result of a failed PaperlessTask.
https://docs.celeryq.dev/en/stable/userguide/signals.html#task-failure
"""
try:
close_old_connections()
task_instance = PaperlessTask.objects.filter(task_id=task_id).first()
if task_instance is not None and task_instance.result is None:
task_instance.status = states.FAILURE
task_instance.result = traceback
task_instance.date_done = timezone.now()
task_instance.save()
except Exception: # pragma: no cover
logger.exception("Updating PaperlessTask failed")