paperless-ngx/src/documents/models.py

1016 lines
27 KiB
Python
Raw Normal View History

import datetime
2016-02-27 20:18:50 +00:00
import logging
2016-01-01 16:13:59 +00:00
import os
2016-01-28 07:23:11 +00:00
import re
from collections import OrderedDict
from pathlib import Path
from typing import Final
from typing import Optional
2016-03-24 19:18:33 +00:00
2018-09-09 21:03:37 +01:00
import dateutil.parser
import pathvalidate
from celery import states
2016-01-01 16:13:59 +00:00
from django.conf import settings
from django.contrib.auth.models import Group
2020-12-12 15:46:56 +01:00
from django.contrib.auth.models import User
2023-01-23 20:28:12 -08:00
from django.core.validators import MaxValueValidator
from django.core.validators import MinValueValidator
2015-12-20 19:23:33 +00:00
from django.db import models
2015-12-26 13:20:52 +00:00
from django.utils import timezone
2020-12-30 21:48:34 +01:00
from django.utils.translation import gettext_lazy as _
from multiselectfield import MultiSelectField
if settings.AUDIT_LOG_ENABLED:
from auditlog.registry import auditlog
from documents.data_models import DocumentSource
from documents.parsers import get_default_file_extension
2018-09-09 21:03:37 +01:00
class ModelWithOwner(models.Model):
owner = models.ForeignKey(
User,
blank=True,
null=True,
on_delete=models.SET_NULL,
verbose_name=_("owner"),
)
class Meta:
abstract = True
class MatchingModel(ModelWithOwner):
2023-02-21 20:01:30 -05:00
MATCH_NONE = 0
MATCH_ANY = 1
MATCH_ALL = 2
MATCH_LITERAL = 3
MATCH_REGEX = 4
MATCH_FUZZY = 5
MATCH_AUTO = 6
MATCHING_ALGORITHMS = (
2023-02-21 20:01:30 -05:00
(MATCH_NONE, _("None")),
2021-01-02 00:45:23 +01:00
(MATCH_ANY, _("Any word")),
(MATCH_ALL, _("All words")),
(MATCH_LITERAL, _("Exact match")),
(MATCH_REGEX, _("Regular expression")),
(MATCH_FUZZY, _("Fuzzy word")),
(MATCH_AUTO, _("Automatic")),
)
name = models.CharField(_("name"), max_length=128)
2020-12-30 21:48:34 +01:00
2022-02-27 15:26:41 +01:00
match = models.CharField(_("match"), max_length=256, blank=True)
matching_algorithm = models.PositiveIntegerField(
_("matching algorithm"),
choices=MATCHING_ALGORITHMS,
default=MATCH_ANY,
2018-09-25 16:09:33 +02:00
)
2016-10-05 23:43:55 +02:00
2022-02-27 15:26:41 +01:00
is_insensitive = models.BooleanField(_("is insensitive"), default=True)
2018-05-27 23:21:36 +01:00
class Meta:
abstract = True
ordering = ("name",)
constraints = [
models.UniqueConstraint(
fields=["name", "owner"],
name="%(app_label)s_%(class)s_unique_name_owner",
),
models.UniqueConstraint(
name="%(app_label)s_%(class)s_name_uniq",
fields=["name"],
condition=models.Q(owner__isnull=True),
),
]
def __str__(self):
return self.name
class Correspondent(MatchingModel):
class Meta(MatchingModel.Meta):
2020-12-30 21:48:34 +01:00
verbose_name = _("correspondent")
verbose_name_plural = _("correspondents")
class Tag(MatchingModel):
2022-02-27 15:26:41 +01:00
color = models.CharField(_("color"), max_length=7, default="#a6cee3")
2016-01-23 04:40:35 +00:00
is_inbox_tag = models.BooleanField(
2020-12-30 21:48:34 +01:00
_("is inbox tag"),
default=False,
2022-02-27 15:26:41 +01:00
help_text=_(
"Marks this tag as an inbox tag: All newly consumed "
"documents will be tagged with inbox tags.",
2022-02-27 15:26:41 +01:00
),
2018-09-25 16:09:33 +02:00
)
class Meta(MatchingModel.Meta):
2020-12-30 21:48:34 +01:00
verbose_name = _("tag")
verbose_name_plural = _("tags")
2016-01-23 04:40:35 +00:00
class DocumentType(MatchingModel):
class Meta(MatchingModel.Meta):
2020-12-30 21:48:34 +01:00
verbose_name = _("document type")
verbose_name_plural = _("document types")
2018-08-24 13:45:15 +02:00
class StoragePath(MatchingModel):
Feature: Dynamic document storage pathes (#916) * Added devcontainer * Add feature storage pathes * Exclude tests and add versioning * Check escaping * Check escaping * Check quoting * Echo * Escape * Escape : * Double escape \ * Escaping * Remove if * Escape colon * Missing \ * Esacpe : * Escape all * test * Remove sed * Fix exclude * Remove SED command * Add LD_LIBRARY_PATH * Adjusted to v1.7 * Updated test-cases * Remove devcontainer * Removed internal build-file * Run pre-commit * Corrected flak8 error * Adjusted to v1.7 * Updated test-cases * Corrected flak8 error * Adjusted to new plural translations * Small adjustments due to code-review backend * Adjusted line-break * Removed PAPERLESS prefix from settings variables * Corrected style change due to search+replace * First documentation draft * Revert changes to Pipfile * Add sphinx-autobuild with keep-outdated * Revert merge error that results in wrong storage path is evaluated * Adjust styles of generated files ... * Adds additional testing to cover dynamic storage path functionality * Remove unnecessary condition * Add hint to edit storage path dialog * Correct spelling of pathes to paths * Minor documentation tweaks * Minor typo * improving wrapping of filter editor buttons with new storage path button * Update .gitignore * Fix select border radius in non input-groups * Better storage path edit hint * Add note to edit storage path dialog re document_renamer * Add note to bulk edit storage path re document_renamer * Rename FILTER_STORAGE_DIRECTORY to PATH * Fix broken filter rule parsing * Show default storage if unspecified * Remove note re storage path on bulk edit * Add basic validation of filename variables Co-authored-by: Markus Kling <markus@markus-kling.net> Co-authored-by: Trenton Holmes <holmes.trenton@gmail.com> Co-authored-by: Michael Shamoon <4887959+shamoon@users.noreply.github.com> Co-authored-by: Quinn Casey <quinn@quinncasey.com>
2022-05-19 23:42:25 +02:00
path = models.CharField(
_("path"),
max_length=512,
)
class Meta(MatchingModel.Meta):
Feature: Dynamic document storage pathes (#916) * Added devcontainer * Add feature storage pathes * Exclude tests and add versioning * Check escaping * Check escaping * Check quoting * Echo * Escape * Escape : * Double escape \ * Escaping * Remove if * Escape colon * Missing \ * Esacpe : * Escape all * test * Remove sed * Fix exclude * Remove SED command * Add LD_LIBRARY_PATH * Adjusted to v1.7 * Updated test-cases * Remove devcontainer * Removed internal build-file * Run pre-commit * Corrected flak8 error * Adjusted to v1.7 * Updated test-cases * Corrected flak8 error * Adjusted to new plural translations * Small adjustments due to code-review backend * Adjusted line-break * Removed PAPERLESS prefix from settings variables * Corrected style change due to search+replace * First documentation draft * Revert changes to Pipfile * Add sphinx-autobuild with keep-outdated * Revert merge error that results in wrong storage path is evaluated * Adjust styles of generated files ... * Adds additional testing to cover dynamic storage path functionality * Remove unnecessary condition * Add hint to edit storage path dialog * Correct spelling of pathes to paths * Minor documentation tweaks * Minor typo * improving wrapping of filter editor buttons with new storage path button * Update .gitignore * Fix select border radius in non input-groups * Better storage path edit hint * Add note to edit storage path dialog re document_renamer * Add note to bulk edit storage path re document_renamer * Rename FILTER_STORAGE_DIRECTORY to PATH * Fix broken filter rule parsing * Show default storage if unspecified * Remove note re storage path on bulk edit * Add basic validation of filename variables Co-authored-by: Markus Kling <markus@markus-kling.net> Co-authored-by: Trenton Holmes <holmes.trenton@gmail.com> Co-authored-by: Michael Shamoon <4887959+shamoon@users.noreply.github.com> Co-authored-by: Quinn Casey <quinn@quinncasey.com>
2022-05-19 23:42:25 +02:00
verbose_name = _("storage path")
verbose_name_plural = _("storage paths")
class Document(ModelWithOwner):
2018-02-04 13:13:24 +00:00
STORAGE_TYPE_UNENCRYPTED = "unencrypted"
STORAGE_TYPE_GPG = "gpg"
STORAGE_TYPES = (
2020-12-30 21:48:34 +01:00
(STORAGE_TYPE_UNENCRYPTED, _("Unencrypted")),
2022-02-27 15:26:41 +01:00
(STORAGE_TYPE_GPG, _("Encrypted with GNU Privacy Guard")),
2018-02-04 13:13:24 +00:00
)
2016-03-04 09:14:50 +00:00
correspondent = models.ForeignKey(
Correspondent,
blank=True,
null=True,
related_name="documents",
2020-12-30 21:48:34 +01:00
on_delete=models.SET_NULL,
2022-02-27 15:26:41 +01:00
verbose_name=_("correspondent"),
)
2017-03-11 16:37:30 +00:00
Feature: Dynamic document storage pathes (#916) * Added devcontainer * Add feature storage pathes * Exclude tests and add versioning * Check escaping * Check escaping * Check quoting * Echo * Escape * Escape : * Double escape \ * Escaping * Remove if * Escape colon * Missing \ * Esacpe : * Escape all * test * Remove sed * Fix exclude * Remove SED command * Add LD_LIBRARY_PATH * Adjusted to v1.7 * Updated test-cases * Remove devcontainer * Removed internal build-file * Run pre-commit * Corrected flak8 error * Adjusted to v1.7 * Updated test-cases * Corrected flak8 error * Adjusted to new plural translations * Small adjustments due to code-review backend * Adjusted line-break * Removed PAPERLESS prefix from settings variables * Corrected style change due to search+replace * First documentation draft * Revert changes to Pipfile * Add sphinx-autobuild with keep-outdated * Revert merge error that results in wrong storage path is evaluated * Adjust styles of generated files ... * Adds additional testing to cover dynamic storage path functionality * Remove unnecessary condition * Add hint to edit storage path dialog * Correct spelling of pathes to paths * Minor documentation tweaks * Minor typo * improving wrapping of filter editor buttons with new storage path button * Update .gitignore * Fix select border radius in non input-groups * Better storage path edit hint * Add note to edit storage path dialog re document_renamer * Add note to bulk edit storage path re document_renamer * Rename FILTER_STORAGE_DIRECTORY to PATH * Fix broken filter rule parsing * Show default storage if unspecified * Remove note re storage path on bulk edit * Add basic validation of filename variables Co-authored-by: Markus Kling <markus@markus-kling.net> Co-authored-by: Trenton Holmes <holmes.trenton@gmail.com> Co-authored-by: Michael Shamoon <4887959+shamoon@users.noreply.github.com> Co-authored-by: Quinn Casey <quinn@quinncasey.com>
2022-05-19 23:42:25 +02:00
storage_path = models.ForeignKey(
StoragePath,
blank=True,
null=True,
related_name="documents",
on_delete=models.SET_NULL,
verbose_name=_("storage path"),
)
2022-02-27 15:26:41 +01:00
title = models.CharField(_("title"), max_length=128, blank=True, db_index=True)
2017-03-11 16:37:30 +00:00
2018-08-24 13:45:15 +02:00
document_type = models.ForeignKey(
DocumentType,
blank=True,
null=True,
related_name="documents",
2020-12-30 21:48:34 +01:00
on_delete=models.SET_NULL,
2022-02-27 15:26:41 +01:00
verbose_name=_("document type"),
2018-08-24 13:45:15 +02:00
)
2017-03-11 16:37:30 +00:00
content = models.TextField(
2020-12-30 21:48:34 +01:00
_("content"),
2017-03-11 16:37:30 +00:00
blank=True,
2022-02-27 15:26:41 +01:00
help_text=_(
"The raw, text-only data of the document. This field is "
"primarily used for searching.",
2022-02-27 15:26:41 +01:00
),
2017-03-11 16:37:30 +00:00
)
2022-02-27 15:26:41 +01:00
mime_type = models.CharField(_("mime type"), max_length=256, editable=False)
2017-03-11 16:37:30 +00:00
2016-02-08 23:46:16 +00:00
tags = models.ManyToManyField(
Tag,
related_name="documents",
blank=True,
verbose_name=_("tags"),
2020-12-30 21:48:34 +01:00
)
checksum = models.CharField(
2020-12-30 21:48:34 +01:00
_("checksum"),
max_length=32,
editable=False,
unique=True,
2022-02-27 15:26:41 +01:00
help_text=_("The checksum of the original document."),
)
archive_checksum = models.CharField(
2020-12-30 21:48:34 +01:00
_("archive checksum"),
max_length=32,
editable=False,
blank=True,
null=True,
2022-02-27 15:26:41 +01:00
help_text=_("The checksum of the archived document."),
)
2022-02-27 15:26:41 +01:00
created = models.DateTimeField(_("created"), default=timezone.now, db_index=True)
2020-12-07 21:51:00 +01:00
modified = models.DateTimeField(
_("modified"),
auto_now=True,
editable=False,
db_index=True,
2022-02-27 15:26:41 +01:00
)
2018-02-04 13:13:24 +00:00
storage_type = models.CharField(
2020-12-30 21:48:34 +01:00
_("storage type"),
2018-02-04 13:13:24 +00:00
max_length=11,
choices=STORAGE_TYPES,
default=STORAGE_TYPE_UNENCRYPTED,
2022-02-27 15:26:41 +01:00
editable=False,
2018-02-04 13:13:24 +00:00
)
2015-12-26 13:20:52 +00:00
added = models.DateTimeField(
_("added"),
default=timezone.now,
editable=False,
db_index=True,
2022-02-27 15:26:41 +01:00
)
2015-12-26 13:20:52 +00:00
filename = models.FilePathField(
2020-12-30 21:48:34 +01:00
_("filename"),
max_length=1024,
editable=False,
default=None,
unique=True,
null=True,
2022-02-27 15:26:41 +01:00
help_text=_("Current filename in storage"),
)
archive_filename = models.FilePathField(
_("archive filename"),
max_length=1024,
editable=False,
default=None,
unique=True,
null=True,
2022-02-27 15:26:41 +01:00
help_text=_("Current archive filename in storage"),
)
2022-07-25 14:40:43 +08:00
original_filename = models.CharField(
_("original filename"),
max_length=1024,
editable=False,
default=None,
unique=False,
null=True,
2022-07-29 20:32:07 -07:00
help_text=_("The original name of the file when it was uploaded"),
)
ARCHIVE_SERIAL_NUMBER_MIN: Final[int] = 0
ARCHIVE_SERIAL_NUMBER_MAX: Final[int] = 0xFF_FF_FF_FF
2023-01-23 20:28:12 -08:00
archive_serial_number = models.PositiveIntegerField(
2020-12-30 21:48:34 +01:00
_("archive serial number"),
blank=True,
null=True,
unique=True,
db_index=True,
2023-01-23 20:28:12 -08:00
validators=[
MaxValueValidator(ARCHIVE_SERIAL_NUMBER_MAX),
MinValueValidator(ARCHIVE_SERIAL_NUMBER_MIN),
2023-01-23 20:28:12 -08:00
],
2022-02-27 15:26:41 +01:00
help_text=_(
"The position of this document in your physical document archive.",
2022-02-27 15:26:41 +01:00
),
2018-09-25 16:09:33 +02:00
)
2018-05-27 23:21:36 +01:00
class Meta:
ordering = ("-created",)
2020-12-31 15:59:12 +01:00
verbose_name = _("document")
verbose_name_plural = _("documents")
2015-12-26 13:20:52 +00:00
def __str__(self) -> str:
# Convert UTC database time to local time
created = datetime.date.isoformat(timezone.localdate(self.created))
res = f"{created}"
if self.correspondent:
res += f" {self.correspondent}"
if self.title:
res += f" {self.title}"
return res
2016-01-01 16:13:59 +00:00
@property
def source_path(self) -> Path:
if self.filename:
fname = str(self.filename)
else:
fname = f"{self.pk:07}{self.file_type}"
if self.storage_type == self.STORAGE_TYPE_GPG:
2020-12-15 13:47:43 +01:00
fname += ".gpg" # pragma: no cover
return (settings.ORIGINALS_DIR / Path(fname)).resolve()
2016-01-01 16:13:59 +00:00
@property
2016-01-29 23:18:03 +00:00
def source_file(self):
return open(self.source_path, "rb")
@property
def has_archive_version(self) -> bool:
return self.archive_filename is not None
2020-11-25 14:47:01 +01:00
@property
def archive_path(self) -> Optional[Path]:
if self.has_archive_version:
return (settings.ARCHIVE_DIR / Path(str(self.archive_filename))).resolve()
2020-11-30 21:38:21 +01:00
else:
return None
2020-11-25 14:47:01 +01:00
@property
def archive_file(self):
return open(self.archive_path, "rb")
def get_public_filename(self, archive=False, counter=0, suffix=None) -> str:
"""
Returns a sanitized filename for the document, not including any paths.
"""
result = str(self)
2020-11-20 13:31:03 +01:00
if counter:
result += f"_{counter:02}"
if suffix:
result += suffix
if archive:
result += ".pdf"
else:
result += self.file_type
return pathvalidate.sanitize_filename(result, replacement_text="-")
2020-11-20 13:31:03 +01:00
@property
def file_type(self):
return get_default_file_extension(self.mime_type)
@property
def thumbnail_path(self) -> Path:
webp_file_name = f"{self.pk:07}.webp"
2018-02-04 13:13:24 +00:00
if self.storage_type == self.STORAGE_TYPE_GPG:
webp_file_name += ".gpg"
2018-02-04 13:13:24 +00:00
webp_file_path = settings.THUMBNAIL_DIR / Path(webp_file_name)
return webp_file_path.resolve()
@property
def thumbnail_file(self):
return open(self.thumbnail_path, "rb")
2022-05-15 21:09:42 -07:00
@property
def created_date(self):
return timezone.localdate(self.created)
2016-02-27 20:18:50 +00:00
class Log(models.Model):
LEVELS = (
2021-01-02 00:45:23 +01:00
(logging.DEBUG, _("debug")),
(logging.INFO, _("information")),
(logging.WARNING, _("warning")),
(logging.ERROR, _("error")),
(logging.CRITICAL, _("critical")),
2016-02-27 20:18:50 +00:00
)
2022-02-27 15:26:41 +01:00
group = models.UUIDField(_("group"), blank=True, null=True)
2020-12-30 21:48:34 +01:00
message = models.TextField(_("message"))
level = models.PositiveIntegerField(
_("level"),
choices=LEVELS,
default=logging.INFO,
2022-02-27 15:26:41 +01:00
)
2020-12-30 21:48:34 +01:00
created = models.DateTimeField(_("created"), auto_now_add=True)
2018-05-27 23:21:36 +01:00
class Meta:
2020-11-02 01:24:56 +01:00
ordering = ("-created",)
2020-12-30 21:48:34 +01:00
verbose_name = _("log")
verbose_name_plural = _("logs")
2016-02-27 20:18:50 +00:00
def __str__(self):
return self.message
2020-11-21 12:12:19 +01:00
class SavedView(ModelWithOwner):
class Meta:
ordering = ("name",)
2020-12-30 21:48:34 +01:00
verbose_name = _("saved view")
verbose_name_plural = _("saved views")
2022-02-27 15:26:41 +01:00
name = models.CharField(_("name"), max_length=128)
2020-12-12 15:46:56 +01:00
2020-12-30 21:48:34 +01:00
show_on_dashboard = models.BooleanField(
_("show on dashboard"),
)
show_in_sidebar = models.BooleanField(
_("show in sidebar"),
)
2020-12-12 15:46:56 +01:00
2020-12-30 21:48:34 +01:00
sort_field = models.CharField(
_("sort field"),
max_length=128,
null=True,
blank=True,
)
2022-02-27 15:26:41 +01:00
sort_reverse = models.BooleanField(_("sort reverse"), default=False)
2020-12-12 15:46:56 +01:00
class SavedViewFilterRule(models.Model):
RULE_TYPES = [
2020-12-30 21:48:34 +01:00
(0, _("title contains")),
(1, _("content contains")),
(2, _("ASN is")),
(3, _("correspondent is")),
(4, _("document type is")),
(5, _("is in inbox")),
(6, _("has tag")),
(7, _("has any tag")),
(8, _("created before")),
(9, _("created after")),
(10, _("created year is")),
(11, _("created month is")),
(12, _("created day is")),
(13, _("added before")),
(14, _("added after")),
(15, _("modified before")),
(16, _("modified after")),
(17, _("does not have tag")),
(18, _("does not have ASN")),
(19, _("title or content contains")),
(20, _("fulltext query")),
(21, _("more like this")),
2022-02-27 15:26:41 +01:00
(22, _("has tags in")),
(23, _("ASN greater than")),
(24, _("ASN less than")),
(25, _("storage path is")),
(26, _("has correspondent in")),
(27, _("does not have correspondent in")),
(28, _("has document type in")),
(29, _("does not have document type in")),
(30, _("has storage path in")),
(31, _("does not have storage path in")),
2023-05-02 00:38:32 -07:00
(32, _("owner is")),
(33, _("has owner in")),
(34, _("does not have owner")),
(35, _("does not have owner in")),
2020-12-12 15:46:56 +01:00
]
2020-12-15 12:06:24 +01:00
saved_view = models.ForeignKey(
SavedView,
on_delete=models.CASCADE,
2020-12-30 21:48:34 +01:00
related_name="filter_rules",
2022-02-27 15:26:41 +01:00
verbose_name=_("saved view"),
2020-12-15 12:06:24 +01:00
)
2020-12-12 15:46:56 +01:00
2022-02-27 15:26:41 +01:00
rule_type = models.PositiveIntegerField(_("rule type"), choices=RULE_TYPES)
2020-12-30 21:48:34 +01:00
value = models.CharField(_("value"), max_length=255, blank=True, null=True)
2020-12-12 15:46:56 +01:00
2020-12-30 21:48:34 +01:00
class Meta:
verbose_name = _("filter rule")
verbose_name_plural = _("filter rules")
2020-12-12 15:46:56 +01:00
def __str__(self) -> str:
return f"SavedViewFilterRule: {self.rule_type} : {self.value}"
2020-12-12 15:46:56 +01:00
# TODO: why is this in the models file?
# TODO: how about, what is this and where is it documented?
# It appears to parsing JSON from an environment variable to get a title and date from
# the filename, if possible, as a higher priority than either document filename or
# content parsing
2018-04-22 16:28:03 +01:00
class FileInfo:
2022-02-27 15:26:41 +01:00
REGEXES = OrderedDict(
[
(
"created-title",
re.compile(
r"^(?P<created>\d{8}(\d{6})?Z) - (?P<title>.*)$",
2022-02-27 15:26:41 +01:00
flags=re.IGNORECASE,
),
),
("title", re.compile(r"(?P<title>.*)$", flags=re.IGNORECASE)),
],
2022-02-27 15:26:41 +01:00
)
def __init__(
self,
created=None,
correspondent=None,
title=None,
tags=(),
extension=None,
2022-02-27 15:26:41 +01:00
):
2016-03-24 19:18:33 +00:00
self.created = created
self.title = title
self.extension = extension
self.correspondent = correspondent
self.tags = tags
@classmethod
def _get_created(cls, created):
2018-04-22 16:27:43 +01:00
try:
return dateutil.parser.parse(f"{created[:-1]:0<14}Z")
2018-04-22 16:27:43 +01:00
except ValueError:
return None
2016-03-24 19:18:33 +00:00
@classmethod
def _get_title(cls, title):
return title
@classmethod
def _mangle_property(cls, properties, name):
if name in properties:
properties[name] = getattr(cls, f"_get_{name}")(properties[name])
2016-03-24 19:18:33 +00:00
@classmethod
def from_filename(cls, filename) -> "FileInfo":
# Mutate filename in-place before parsing its components
# by applying at most one of the configured transformations.
2023-04-25 09:59:24 -07:00
for pattern, repl in settings.FILENAME_PARSE_TRANSFORMS:
(filename, count) = pattern.subn(repl, filename)
if count:
break
# do this after the transforms so that the transforms can do whatever
# with the file extension.
filename_no_ext = os.path.splitext(filename)[0]
if filename_no_ext == filename and filename.startswith("."):
# This is a very special case where there is no text before the
# file type.
# TODO: this should be handled better. The ext is not removed
# because usually, files like '.pdf' are just hidden files
# with the name pdf, but in our case, its more likely that
# there's just no name to begin with.
filename = ""
# This isn't too bad either, since we'll just not match anything
# and return an empty title. TODO: actually, this is kinda bad.
else:
filename = filename_no_ext
# Parse filename components.
2016-03-24 19:18:33 +00:00
for regex in cls.REGEXES.values():
m = regex.match(filename)
2016-03-24 19:18:33 +00:00
if m:
properties = m.groupdict()
cls._mangle_property(properties, "created")
cls._mangle_property(properties, "title")
return cls(**properties)
2022-05-06 22:10:35 -07:00
# Extending User Model Using a One-To-One Link
2022-05-07 08:11:10 -07:00
class UiSettings(models.Model):
2022-05-06 22:10:35 -07:00
user = models.OneToOneField(
User,
on_delete=models.CASCADE,
2022-05-07 08:11:10 -07:00
related_name="ui_settings",
2022-05-06 22:10:35 -07:00
)
settings = models.JSONField(null=True)
def __str__(self):
return self.user.username
class PaperlessTask(models.Model):
ALL_STATES = sorted(states.ALL_STATES)
TASK_STATE_CHOICES = sorted(zip(ALL_STATES, ALL_STATES))
task_id = models.CharField(
max_length=255,
unique=True,
verbose_name=_("Task ID"),
help_text=_("Celery ID for the Task that was run"),
)
acknowledged = models.BooleanField(
default=False,
verbose_name=_("Acknowledged"),
help_text=_("If the task is acknowledged via the frontend or API"),
)
task_file_name = models.CharField(
null=True,
max_length=255,
verbose_name=_("Task Filename"),
help_text=_("Name of the file which the Task was run for"),
)
task_name = models.CharField(
null=True,
max_length=255,
verbose_name=_("Task Name"),
help_text=_("Name of the Task which was run"),
)
status = models.CharField(
max_length=30,
default=states.PENDING,
choices=TASK_STATE_CHOICES,
verbose_name=_("Task State"),
help_text=_("Current state of the task being run"),
)
date_created = models.DateTimeField(
null=True,
default=timezone.now,
verbose_name=_("Created DateTime"),
help_text=_("Datetime field when the task result was created in UTC"),
)
date_started = models.DateTimeField(
null=True,
default=None,
verbose_name=_("Started DateTime"),
help_text=_("Datetime field when the task was started in UTC"),
)
date_done = models.DateTimeField(
null=True,
default=None,
verbose_name=_("Completed DateTime"),
help_text=_("Datetime field when the task was completed in UTC"),
)
result = models.TextField(
null=True,
default=None,
verbose_name=_("Result Data"),
help_text=_(
"The data returned by the task",
),
)
2022-08-07 12:41:30 -07:00
def __str__(self) -> str:
return f"Task {self.task_id}"
2022-08-07 17:45:49 -07:00
2023-03-17 16:36:08 -07:00
class Note(models.Model):
note = models.TextField(
2022-08-07 12:41:30 -07:00
_("content"),
blank=True,
2023-03-17 16:36:08 -07:00
help_text=_("Note for the document"),
2022-08-07 12:41:30 -07:00
)
created = models.DateTimeField(
_("created"),
2022-08-07 17:45:49 -07:00
default=timezone.now,
db_index=True,
)
2022-08-07 12:41:30 -07:00
document = models.ForeignKey(
Document,
blank=True,
null=True,
2023-03-17 16:36:08 -07:00
related_name="notes",
2022-08-07 12:41:30 -07:00
on_delete=models.CASCADE,
2022-08-07 17:45:49 -07:00
verbose_name=_("document"),
2022-08-07 12:41:30 -07:00
)
user = models.ForeignKey(
User,
blank=True,
null=True,
2023-03-17 16:36:08 -07:00
related_name="notes",
2022-08-07 12:41:30 -07:00
on_delete=models.SET_NULL,
2022-08-07 17:45:49 -07:00
verbose_name=_("user"),
2022-08-07 12:41:30 -07:00
)
class Meta:
ordering = ("created",)
2023-03-17 16:36:08 -07:00
verbose_name = _("note")
verbose_name_plural = _("notes")
2022-08-07 12:41:30 -07:00
def __str__(self):
2023-03-17 16:36:08 -07:00
return self.note
class ShareLink(models.Model):
class FileVersion(models.TextChoices):
ARCHIVE = ("archive", _("Archive"))
ORIGINAL = ("original", _("Original"))
created = models.DateTimeField(
_("created"),
default=timezone.now,
db_index=True,
blank=True,
editable=False,
)
expiration = models.DateTimeField(
_("expiration"),
blank=True,
null=True,
db_index=True,
)
slug = models.SlugField(
_("slug"),
db_index=True,
unique=True,
blank=True,
editable=False,
)
document = models.ForeignKey(
Document,
blank=True,
related_name="share_links",
on_delete=models.CASCADE,
verbose_name=_("document"),
)
file_version = models.CharField(
max_length=50,
choices=FileVersion.choices,
default=FileVersion.ARCHIVE,
)
owner = models.ForeignKey(
User,
blank=True,
null=True,
related_name="share_links",
on_delete=models.SET_NULL,
verbose_name=_("owner"),
)
class Meta:
ordering = ("created",)
verbose_name = _("share link")
verbose_name_plural = _("share links")
def __str__(self):
return f"Share Link for {self.document.title}"
class ConsumptionTemplate(models.Model):
class DocumentSourceChoices(models.IntegerChoices):
CONSUME_FOLDER = DocumentSource.ConsumeFolder.value, _("Consume Folder")
API_UPLOAD = DocumentSource.ApiUpload.value, _("Api Upload")
MAIL_FETCH = DocumentSource.MailFetch.value, _("Mail Fetch")
name = models.CharField(_("name"), max_length=256, unique=True)
order = models.IntegerField(_("order"), default=0)
sources = MultiSelectField(
max_length=5,
choices=DocumentSourceChoices.choices,
default=f"{DocumentSource.ConsumeFolder},{DocumentSource.ApiUpload},{DocumentSource.MailFetch}",
)
filter_path = models.CharField(
_("filter path"),
max_length=256,
null=True,
blank=True,
help_text=_(
"Only consume documents with a path that matches "
"this if specified. Wildcards specified as * are "
"allowed. Case insensitive.",
),
)
filter_filename = models.CharField(
_("filter filename"),
max_length=256,
null=True,
blank=True,
help_text=_(
"Only consume documents which entirely match this "
"filename if specified. Wildcards such as *.pdf or "
"*invoice* are allowed. Case insensitive.",
),
)
filter_mailrule = models.ForeignKey(
"paperless_mail.MailRule",
null=True,
blank=True,
on_delete=models.SET_NULL,
verbose_name=_("filter documents from this mail rule"),
)
assign_title = models.CharField(
_("assign title"),
max_length=256,
null=True,
blank=True,
help_text=_(
"Assign a document title, can include some placeholders, "
"see documentation.",
),
)
assign_tags = models.ManyToManyField(
Tag,
blank=True,
verbose_name=_("assign this tag"),
)
assign_document_type = models.ForeignKey(
DocumentType,
null=True,
blank=True,
on_delete=models.SET_NULL,
verbose_name=_("assign this document type"),
)
assign_correspondent = models.ForeignKey(
Correspondent,
null=True,
blank=True,
on_delete=models.SET_NULL,
verbose_name=_("assign this correspondent"),
)
assign_storage_path = models.ForeignKey(
StoragePath,
null=True,
blank=True,
on_delete=models.SET_NULL,
verbose_name=_("assign this storage path"),
)
assign_owner = models.ForeignKey(
User,
null=True,
blank=True,
on_delete=models.SET_NULL,
related_name="+",
verbose_name=_("assign this owner"),
)
assign_view_users = models.ManyToManyField(
User,
blank=True,
related_name="+",
verbose_name=_("grant view permissions to these users"),
)
assign_view_groups = models.ManyToManyField(
Group,
blank=True,
related_name="+",
verbose_name=_("grant view permissions to these groups"),
)
assign_change_users = models.ManyToManyField(
User,
blank=True,
related_name="+",
verbose_name=_("grant change permissions to these users"),
)
assign_change_groups = models.ManyToManyField(
Group,
blank=True,
related_name="+",
verbose_name=_("grant change permissions to these groups"),
)
class Meta:
verbose_name = _("consumption template")
verbose_name_plural = _("consumption templates")
def __str__(self):
return f"{self.name}"
class CustomField(models.Model):
"""
Defines the name and type of a custom field
"""
class FieldDataType(models.TextChoices):
STRING = ("string", _("String"))
URL = ("url", _("URL"))
DATE = ("date", _("Date"))
BOOL = ("boolean"), _("Boolean")
INT = ("integer", _("Integer"))
FLOAT = ("float", _("Float"))
MONETARY = ("monetary", _("Monetary"))
created = models.DateTimeField(
_("created"),
default=timezone.now,
db_index=True,
editable=False,
)
name = models.CharField(max_length=128)
data_type = models.CharField(
_("data type"),
max_length=50,
choices=FieldDataType.choices,
editable=False,
)
class Meta:
ordering = ("created",)
verbose_name = _("custom field")
verbose_name_plural = _("custom fields")
constraints = [
models.UniqueConstraint(
fields=["name"],
name="%(app_label)s_%(class)s_unique_name",
),
]
def __str__(self) -> str:
return f"{self.name} : {self.data_type}"
class CustomFieldInstance(models.Model):
"""
A single instance of a field, attached to a CustomField for the name and type
and attached to a single Document to be metadata for it
"""
created = models.DateTimeField(
_("created"),
default=timezone.now,
db_index=True,
editable=False,
)
document = models.ForeignKey(
Document,
blank=False,
null=False,
on_delete=models.CASCADE,
related_name="custom_fields",
editable=False,
)
field = models.ForeignKey(
CustomField,
blank=False,
null=False,
on_delete=models.CASCADE,
related_name="fields",
editable=False,
)
# Actual data storage
value_text = models.CharField(max_length=128, null=True)
value_bool = models.BooleanField(null=True)
value_url = models.URLField(null=True)
value_date = models.DateField(null=True)
value_int = models.IntegerField(null=True)
value_float = models.FloatField(null=True)
value_monetary = models.DecimalField(null=True, decimal_places=2, max_digits=12)
class Meta:
ordering = ("created",)
verbose_name = _("custom field instance")
verbose_name_plural = _("custom field instances")
constraints = [
models.UniqueConstraint(
fields=["document", "field"],
name="%(app_label)s_%(class)s_unique_document_field",
),
]
def __str__(self) -> str:
return str(self.field.name) + f" : {self.value}"
@property
def value(self):
"""
Based on the data type, access the actual value the instance stores
A little shorthand/quick way to get what is actually here
"""
if self.field.data_type == CustomField.FieldDataType.STRING:
return self.value_text
elif self.field.data_type == CustomField.FieldDataType.URL:
return self.value_url
elif self.field.data_type == CustomField.FieldDataType.DATE:
return self.value_date
elif self.field.data_type == CustomField.FieldDataType.BOOL:
return self.value_bool
elif self.field.data_type == CustomField.FieldDataType.INT:
return self.value_int
elif self.field.data_type == CustomField.FieldDataType.FLOAT:
return self.value_float
elif self.field.data_type == CustomField.FieldDataType.MONETARY:
return self.value_monetary
raise NotImplementedError(self.field.data_type)
if settings.AUDIT_LOG_ENABLED:
auditlog.register(Document, m2m_fields={"tags"})
auditlog.register(Correspondent)
auditlog.register(Tag)
auditlog.register(DocumentType)
auditlog.register(Note)
auditlog.register(CustomField)
auditlog.register(CustomFieldInstance)