paperless-ngx/src/documents/models.py

# coding=utf-8

import logging
import os
import re
import uuid
from collections import OrderedDict

import dateutil.parser
from django.conf import settings
from django.db import models
from django.template.defaultfilters import slugify
from django.utils import timezone
from fuzzywuzzy import fuzz

from .managers import LogManager

try:
    from django.core.urlresolvers import reverse
except ImportError:
    from django.urls import reverse


class MatchingModel(models.Model):

    name = models.CharField(max_length=128, unique=True)
    slug = models.SlugField(blank=True)

    automatic_classification = models.BooleanField(default=False, help_text='Automatically assign to newly added documents based on current usage in your document collection.')

    class Meta:
        abstract = True
        ordering = ("name",)

    def __str__(self):
        return self.name

    def save(self, *args, **kwargs):

        if not self.slug:
            self.slug = slugify(self.name)

        models.Model.save(self, *args, **kwargs)


class Correspondent(MatchingModel):

    # This regex is probably more restrictive than it needs to be, but it's
    # better safe than sorry.
    SAFE_REGEX = re.compile(r"^[\w\- ,.']+$")

    class Meta:
        ordering = ("name",)


class Tag(MatchingModel):

    COLOURS = (
        (1, "#a6cee3"),
        (2, "#1f78b4"),
        (3, "#b2df8a"),
        (4, "#33a02c"),
        (5, "#fb9a99"),
        (6, "#e31a1c"),
        (7, "#fdbf6f"),
        (8, "#ff7f00"),
        (9, "#cab2d6"),
        (10, "#6a3d9a"),
        (11, "#b15928"),
        (12, "#000000"),
        (13, "#cccccc")
    )

    colour = models.PositiveIntegerField(choices=COLOURS, default=1)

    is_inbox_tag = models.BooleanField(
        default=False,
        help_text="Marks this tag as an inbox tag: All newly consumed documents will be tagged with inbox tags.")

    is_archived_tag = models.BooleanField(
        default=False,
        help_text="Marks this tag as an archive tag: All documents tagged with archive tags will never be modified automatically (i.e., modifying tags by matching rules)")


class DocumentType(MatchingModel):

    pass


class Document(models.Model):

    TYPE_PDF = "pdf"
    TYPE_PNG = "png"
    TYPE_JPG = "jpg"
    TYPE_GIF = "gif"
    TYPE_TIF = "tiff"
    TYPE_TXT = "txt"
    TYPE_CSV = "csv"
    TYPE_MD = "md"
    TYPES = (TYPE_PDF, TYPE_PNG, TYPE_JPG, TYPE_GIF, TYPE_TIF,
             TYPE_TXT, TYPE_CSV, TYPE_MD)

    STORAGE_TYPE_UNENCRYPTED = "unencrypted"
    STORAGE_TYPE_GPG = "gpg"
    STORAGE_TYPES = (
        (STORAGE_TYPE_UNENCRYPTED, "Unencrypted"),
        (STORAGE_TYPE_GPG, "Encrypted with GNU Privacy Guard")
    )

    correspondent = models.ForeignKey(
        Correspondent,
        blank=True,
        null=True,
        related_name="documents",
        on_delete=models.SET_NULL
    )

    title = models.CharField(max_length=128, blank=True, db_index=True)

    document_type = models.ForeignKey(
        DocumentType,
        blank=True,
        null=True,
        related_name="documents",
        on_delete=models.SET_NULL
    )

    content = models.TextField(
        db_index=True,
        blank=True,
        help_text="The raw, text-only data of the document.  This field is "
                  "primarily used for searching."
    )

    file_type = models.CharField(
        max_length=4,
        editable=False,
        choices=tuple([(t, t.upper()) for t in TYPES])
    )

    tags = models.ManyToManyField(
        Tag, related_name="documents", blank=True)

    checksum = models.CharField(
        max_length=32,
        editable=False,
        unique=True,
        help_text="The checksum of the original document (before it was "
                  "encrypted).  We use this to prevent duplicate document "
                  "imports."
    )

    created = models.DateTimeField(
        default=timezone.now, db_index=True)
    modified = models.DateTimeField(
        auto_now=True, editable=False, db_index=True)

    storage_type = models.CharField(
        max_length=11,
        choices=STORAGE_TYPES,
        default=STORAGE_TYPE_UNENCRYPTED,
        editable=False
    )

    added = models.DateTimeField(
        default=timezone.now, editable=False, db_index=True)

    archive_serial_number = models.IntegerField(
        blank=True,
        null=True,
        unique=True,
        db_index=True,
        help_text="The position of this document in your physical document archive.")

    class Meta:
        ordering = ("correspondent", "title")

    def __str__(self):
        created = self.created.strftime("%Y%m%d%H%M%S")
        if self.correspondent and self.title:
            return "{}: {} - {}".format(
                created, self.correspondent, self.title)
        if self.correspondent or self.title:
            return "{}: {}".format(created, self.correspondent or self.title)
        return str(created)

    @property
    def source_path(self):

        file_name = "{:07}.{}".format(self.pk, self.file_type)
        if self.storage_type == self.STORAGE_TYPE_GPG:
            file_name += ".gpg"

        return os.path.join(
            settings.MEDIA_ROOT,
            "documents",
            "originals",
            file_name
        )

    @property
    def source_file(self):
        return open(self.source_path, "rb")

    @property
    def file_name(self):
        return slugify(str(self)) + "." + self.file_type

    @property
    def download_url(self):
        return reverse("fetch", kwargs={"kind": "doc", "pk": self.pk})

    @property
    def thumbnail_path(self):

        file_name = "{:07}.png".format(self.pk)
        if self.storage_type == self.STORAGE_TYPE_GPG:
            file_name += ".gpg"

        return os.path.join(
            settings.MEDIA_ROOT,
            "documents",
            "thumbnails",
            file_name
        )

    @property
    def thumbnail_file(self):
        return open(self.thumbnail_path, "rb")

    @property
    def thumbnail_url(self):
        return reverse("fetch", kwargs={"kind": "thumb", "pk": self.pk})


class Log(models.Model):

    LEVELS = (
        (logging.DEBUG, "Debugging"),
        (logging.INFO, "Informational"),
        (logging.WARNING, "Warning"),
        (logging.ERROR, "Error"),
        (logging.CRITICAL, "Critical"),
    )

    group = models.UUIDField(blank=True)
    message = models.TextField()
    level = models.PositiveIntegerField(choices=LEVELS, default=logging.INFO)
    created = models.DateTimeField(auto_now_add=True)
    modified = models.DateTimeField(auto_now=True)

    objects = LogManager()

    class Meta:
        ordering = ("-modified",)

    def __str__(self):
        return self.message

    def save(self, *args, **kwargs):
        """
        To allow for the case where we don't want to group the message, we
        shouldn't force the caller to specify a one-time group value.  However,
        allowing group=None means that the manager can't differentiate the
        different un-grouped messages, so instead we set a random one here.
        """

        if not self.group:
            self.group = uuid.uuid4()

        models.Model.save(self, *args, **kwargs)


class FileInfo:

    # This epic regex *almost* worked for our needs, so I'm keeping it here for
    # posterity, in the hopes that we might find a way to make it work one day.
    ALMOST_REGEX = re.compile(
        r"^((?P<date>\d\d\d\d\d\d\d\d\d\d\d\d\d\dZ){separator})?"
        r"((?P<correspondent>{non_separated_word}+){separator})??"
        r"(?P<title>{non_separated_word}+)"
        r"({separator}(?P<tags>[a-z,0-9-]+))?"
        r"\.(?P<extension>[a-zA-Z.-]+)$".format(
            separator=r"\s+-\s+",
            non_separated_word=r"([\w,. ]|([^\s]-))"
        )
    )

    formats = "pdf|jpe?g|png|gif|tiff?|te?xt|md|csv"
    REGEXES = OrderedDict([
        ("created-correspondent-title-tags", re.compile(
            r"^(?P<created>\d\d\d\d\d\d\d\d(\d\d\d\d\d\d)?Z) - "
            r"(?P<correspondent>.*) - "
            r"(?P<title>.*) - "
            r"(?P<tags>[a-z0-9\-,]*)"
            r"\.(?P<extension>{})$".format(formats),
            flags=re.IGNORECASE
        )),
        ("created-title-tags", re.compile(
            r"^(?P<created>\d\d\d\d\d\d\d\d(\d\d\d\d\d\d)?Z) - "
            r"(?P<title>.*) - "
            r"(?P<tags>[a-z0-9\-,]*)"
            r"\.(?P<extension>{})$".format(formats),
            flags=re.IGNORECASE
        )),
        ("created-correspondent-title", re.compile(
            r"^(?P<created>\d\d\d\d\d\d\d\d(\d\d\d\d\d\d)?Z) - "
            r"(?P<correspondent>.*) - "
            r"(?P<title>.*)"
            r"\.(?P<extension>{})$".format(formats),
            flags=re.IGNORECASE
        )),
        ("created-title", re.compile(
            r"^(?P<created>\d\d\d\d\d\d\d\d(\d\d\d\d\d\d)?Z) - "
            r"(?P<title>.*)"
            r"\.(?P<extension>{})$".format(formats),
            flags=re.IGNORECASE
        )),
        ("correspondent-title-tags", re.compile(
            r"(?P<correspondent>.*) - "
            r"(?P<title>.*) - "
            r"(?P<tags>[a-z0-9\-,]*)"
            r"\.(?P<extension>{})$".format(formats),
            flags=re.IGNORECASE
        )),
        ("correspondent-title", re.compile(
            r"(?P<correspondent>.*) - "
            r"(?P<title>.*)?"
            r"\.(?P<extension>{})$".format(formats),
            flags=re.IGNORECASE
        )),
        ("title", re.compile(
            r"(?P<title>.*)"
            r"\.(?P<extension>{})$".format(formats),
            flags=re.IGNORECASE
        ))
    ])

    def __init__(self, created=None, correspondent=None, title=None, tags=(),
                 extension=None):

        self.created = created
        self.title = title
        self.extension = extension
        self.correspondent = correspondent
        self.tags = tags

    @classmethod
    def _get_created(cls, created):
        try:
            return dateutil.parser.parse("{:0<14}Z".format(created[:-1]))
        except ValueError:
            return None

    @classmethod
    def _get_correspondent(cls, name):
        if not name:
            return None
        return Correspondent.objects.get_or_create(name=name, defaults={
            "slug": slugify(name)
        })[0]

    @classmethod
    def _get_title(cls, title):
        return title

    @classmethod
    def _get_tags(cls, tags):
        r = []
        for t in tags.split(","):
            r.append(Tag.objects.get_or_create(
                slug=t.lower(),
                defaults={"name": t}
            )[0])
        return tuple(r)

    @classmethod
    def _get_extension(cls, extension):
        r = extension.lower()
        if r == "jpeg":
            return "jpg"
        if r == "tif":
            return "tiff"
        return r

    @classmethod
    def _mangle_property(cls, properties, name):
        if name in properties:
            properties[name] = getattr(cls, "_get_{}".format(name))(
                properties[name]
            )

    @classmethod
    def from_path(cls, path):
        """
        We use a crude naming convention to make handling the correspondent,
        title, and tags easier:
          "<date> - <correspondent> - <title> - <tags>.<suffix>"
          "<correspondent> - <title> - <tags>.<suffix>"
          "<correspondent> - <title>.<suffix>"
          "<title>.<suffix>"
        """

        for regex in cls.REGEXES.values():
            m = regex.match(os.path.basename(path))
            if m:
                properties = m.groupdict()
                cls._mangle_property(properties, "created")
                cls._mangle_property(properties, "correspondent")
                cls._mangle_property(properties, "title")
                cls._mangle_property(properties, "tags")
                cls._mangle_property(properties, "extension")
                return cls(**properties)
Add encoding declaration 2018-01-06 17:23:07 +00:00			`# coding=utf-8`

New logging appears to work 2016-02-27 20:18:50 +00:00			`import logging`
Added GPG encryption for the PDFs 2016-01-01 16:13:59 +00:00			`import os`
#11: automatic tagging support 2016-01-28 07:23:11 +00:00			`import re`
Added an informational log message for consumer start 2016-03-06 17:26:07 +00:00			`import uuid`
Modifications for support for dates 2016-03-24 19:18:33 +00:00			`from collections import OrderedDict`

Reorder imports 2018-09-09 21:03:37 +01:00			`import dateutil.parser`
Added GPG encryption for the PDFs 2016-01-01 16:13:59 +00:00			`from django.conf import settings`
It works! 2015-12-20 19:23:33 +00:00			`from django.db import models`
Created a Sender model 2016-01-11 12:52:19 +00:00			`from django.template.defaultfilters import slugify`
Better created & modified + __str__() 2015-12-26 13:20:52 +00:00			`from django.utils import timezone`
Reorder imports 2018-09-09 21:03:37 +01:00			`from fuzzywuzzy import fuzz`
It works! 2015-12-20 19:23:33 +00:00
Added a custom manager for groupped logs 2016-02-28 00:41:03 +00:00			`from .managers import LogManager`

Reorder imports 2018-09-09 21:03:37 +01:00			`try:`
			`from django.core.urlresolvers import reverse`
			`except ImportError:`
			`from django.urls import reverse`

It works! 2015-12-20 19:23:33 +00:00
Removed log components and introduced signals for tags & correspondents 2016-03-28 11:11:15 +01:00			`class MatchingModel(models.Model):`
#11: automatic tagging support 2016-01-28 07:23:11 +00:00
Removed log components and introduced signals for tags & correspondents 2016-03-28 11:11:15 +01:00			`name = models.CharField(max_length=128, unique=True)`
			`slug = models.SlugField(blank=True)`

removed matching model fields, automatic classifier reloading, added autmatic_classification field to matching model 2018-09-04 18:40:26 +02:00			`automatic_classification = models.BooleanField(default=False, help_text='Automatically assign to newly added documents based on current usage in your document collection.')`
Allow to create case sensitive matches 2016-10-05 23:43:55 +02:00
Remove old Python2.7-style code 2018-05-27 23:21:36 +01:00			`class Meta:`
Removed log components and introduced signals for tags & correspondents 2016-03-28 11:11:15 +01:00			`abstract = True`
Default sort order for tags to use 'name' 2018-09-02 20:56:45 +01:00			`ordering = ("name",)`
Removed log components and introduced signals for tags & correspondents 2016-03-28 11:11:15 +01:00
			`def __str__(self):`
			`return self.name`

#11: automatic tagging support 2016-01-28 07:23:11 +00:00			`def save(self, args, *kwargs):`
Removed log components and introduced signals for tags & correspondents 2016-03-28 11:11:15 +01:00
			`if not self.slug:`
			`self.slug = slugify(self.name)`

			`models.Model.save(self, args, *kwargs)`


			`class Correspondent(MatchingModel):`

			`# This regex is probably more restrictive than it needs to be, but it's`
			`# better safe than sorry.`
			`SAFE_REGEX = re.compile(r"^[\w\- ,.']+$")`

Remove old Python2.7-style code 2018-05-27 23:21:36 +01:00			`class Meta:`
Removed log components and introduced signals for tags & correspondents 2016-03-28 11:11:15 +01:00			`ordering = ("name",)`


			`class Tag(MatchingModel):`

			`COLOURS = (`
			`(1, "#a6cee3"),`
			`(2, "#1f78b4"),`
			`(3, "#b2df8a"),`
			`(4, "#33a02c"),`
			`(5, "#fb9a99"),`
			`(6, "#e31a1c"),`
			`(7, "#fdbf6f"),`
			`(8, "#ff7f00"),`
			`(9, "#cab2d6"),`
			`(10, "#6a3d9a"),`
			`(11, "#b15928"),`
			`(12, "#000000"),`
			`(13, "#cccccc")`
			`)`

			`colour = models.PositiveIntegerField(choices=COLOURS, default=1)`
Add labels (#9) 2016-01-23 04:40:35 +00:00
inbox tags, archive tags, archive serial number for documents 2018-07-06 13:25:02 +02:00			`is_inbox_tag = models.BooleanField(`
			`default=False,`
			`help_text="Marks this tag as an inbox tag: All newly consumed documents will be tagged with inbox tags.")`

			`is_archived_tag = models.BooleanField(`
			`default=False,`
			`help_text="Marks this tag as an archive tag: All documents tagged with archive tags will never be modified automatically (i.e., modifying tags by matching rules)")`

Add labels (#9) 2016-01-23 04:40:35 +00:00
Added document type 2018-08-24 13:45:15 +02:00			`class DocumentType(MatchingModel):`

			`pass`


It works! 2015-12-20 19:23:33 +00:00			`class Document(models.Model):`

#12: Support image documents 2016-01-29 23:18:03 +00:00			`TYPE_PDF = "pdf"`
			`TYPE_PNG = "png"`
			`TYPE_JPG = "jpg"`
			`TYPE_GIF = "gif"`
			`TYPE_TIF = "tiff"`
explicitly add txt, md, and csv types for consumer and viewer; fix thumbnail generation 2018-09-03 23:46:13 -04:00			`TYPE_TXT = "txt"`
			`TYPE_CSV = "csv"`
Fix pycodestyle complaints 2018-09-09 20:55:37 +01:00			`TYPE_MD = "md"`
explicitly add txt, md, and csv types for consumer and viewer; fix thumbnail generation 2018-09-03 23:46:13 -04:00			`TYPES = (TYPE_PDF, TYPE_PNG, TYPE_JPG, TYPE_GIF, TYPE_TIF,`
			`TYPE_TXT, TYPE_CSV, TYPE_MD)`
#12: Support image documents 2016-01-29 23:18:03 +00:00
Attach storage_type to Documents 2018-02-04 13:13:24 +00:00			`STORAGE_TYPE_UNENCRYPTED = "unencrypted"`
			`STORAGE_TYPE_GPG = "gpg"`
			`STORAGE_TYPES = (`
			`(STORAGE_TYPE_UNENCRYPTED, "Unencrypted"),`
			`(STORAGE_TYPE_GPG, "Encrypted with GNU Privacy Guard")`
			`)`

Accounted for .sender in a few places 2016-03-04 09:14:50 +00:00			`correspondent = models.ForeignKey(`
Allow correspondents to be deleted without deleting their documents Fixes #235 2017-07-15 19:06:52 +01:00			`Correspondent,`
			`blank=True,`
			`null=True,`
			`related_name="documents",`
			`on_delete=models.SET_NULL`
			`)`
feat: make the content field optional 2017-03-11 16:37:30 +00:00
It works! 2015-12-20 19:23:33 +00:00			`title = models.CharField(max_length=128, blank=True, db_index=True)`
feat: make the content field optional 2017-03-11 16:37:30 +00:00
Added document type 2018-08-24 13:45:15 +02:00			`document_type = models.ForeignKey(`
			`DocumentType,`
			`blank=True,`
			`null=True,`
			`related_name="documents",`
			`on_delete=models.SET_NULL`
			`)`

feat: make the content field optional 2017-03-11 16:37:30 +00:00			`content = models.TextField(`
			`db_index=True,`
			`blank=True,`
			`help_text="The raw, text-only data of the document. This field is "`
			`"primarily used for searching."`
			`)`

#12: Support image documents 2016-01-29 23:18:03 +00:00			`file_type = models.CharField(`
			`max_length=4,`
			`editable=False,`
			`choices=tuple([(t, t.upper()) for t in TYPES])`
			`)`
feat: make the content field optional 2017-03-11 16:37:30 +00:00
The 'API' is written but untested 2016-02-08 23:46:16 +00:00			`tags = models.ManyToManyField(`
			`Tag, related_name="documents", blank=True)`
Added a checksum property and a couple indexes 2016-04-03 16:34:09 +01:00
			`checksum = models.CharField(`
			`max_length=32,`
			`editable=False,`
			`unique=True,`
			`help_text="The checksum of the original document (before it was "`
			`"encrypted). We use this to prevent duplicate document "`
			`"imports."`
			`)`

			`created = models.DateTimeField(`
			`default=timezone.now, db_index=True)`
			`modified = models.DateTimeField(`
			`auto_now=True, editable=False, db_index=True)`
Merge branch 'master' into mcronce-disable_encryption 2018-06-17 16:32:51 +01:00
Attach storage_type to Documents 2018-02-04 13:13:24 +00:00			`storage_type = models.CharField(`
			`max_length=11,`
			`choices=STORAGE_TYPES,`
Change default storage_type to unencrypted 2018-05-27 23:17:21 +01:00			`default=STORAGE_TYPE_UNENCRYPTED,`
Attach storage_type to Documents 2018-02-04 13:13:24 +00:00			`editable=False`
			`)`
Better created & modified + __str__() 2015-12-26 13:20:52 +00:00
Add field 'added' to documents This field indicates when the document was added to the database 2018-04-26 11:58:05 +02:00			`added = models.DateTimeField(`
			`default=timezone.now, editable=False, db_index=True)`
Better created & modified + __str__() 2015-12-26 13:20:52 +00:00
inbox tags, archive tags, archive serial number for documents 2018-07-06 13:25:02 +02:00			`archive_serial_number = models.IntegerField(`
			`blank=True,`
			`null=True,`
			`unique=True,`
			`db_index=True,`
			`help_text="The position of this document in your physical document archive.")`

Remove old Python2.7-style code 2018-05-27 23:21:36 +01:00			`class Meta:`
Accounted for .sender in a few places 2016-03-04 09:14:50 +00:00			`ordering = ("correspondent", "title")`
Better created & modified + __str__() 2015-12-26 13:20:52 +00:00
			`def __str__(self):`
s/Sender/Correspondent & reworked the (im\|ex)porter 2016-03-03 20:52:42 +00:00			`created = self.created.strftime("%Y%m%d%H%M%S")`
Accounted for .sender in a few places 2016-03-04 09:14:50 +00:00			`if self.correspondent and self.title:`
			`return "{}: {} - {}".format(`
			`created, self.correspondent, self.title)`
			`if self.correspondent or self.title:`
			`return "{}: {}".format(created, self.correspondent or self.title)`
Better created & modified + __str__() 2015-12-26 13:20:52 +00:00			`return str(created)`
Added GPG encryption for the PDFs 2016-01-01 16:13:59 +00:00
			`@property`
#12: Support image documents 2016-01-29 23:18:03 +00:00			`def source_path(self):`
Attach storage_type to Documents 2018-02-04 13:13:24 +00:00
			`file_name = "{:07}.{}".format(self.pk, self.file_type)`
			`if self.storage_type == self.STORAGE_TYPE_GPG:`
			`file_name += ".gpg"`

Added GPG encryption for the PDFs 2016-01-01 16:13:59 +00:00			`return os.path.join(`
			`settings.MEDIA_ROOT,`
			`"documents",`
The first stages of getting thumbnails back 2016-03-05 01:57:49 +00:00			`"originals",`
Attach storage_type to Documents 2018-02-04 13:13:24 +00:00			`file_name`
Added GPG encryption for the PDFs 2016-01-01 16:13:59 +00:00			`)`

			`@property`
#12: Support image documents 2016-01-29 23:18:03 +00:00			`def source_file(self):`
			`return open(self.source_path, "rb")`
Fixed a few consumer bugs and added an exporter Rename exporter to export and fixt some debugging Account for files not matching the sender/title pattern Added a safety note Wrong regex on the name parser Renamed the command to something slightly less ambiguous 2016-01-14 19:47:57 +00:00
			`@property`
Added download_url to the Document model 2016-02-15 22:38:18 +00:00			`def file_name(self):`
s/Sender/Correspondent & reworked the (im\|ex)porter 2016-03-03 20:52:42 +00:00			`return slugify(str(self)) + "." + self.file_type`
Added download_url to the Document model 2016-02-15 22:38:18 +00:00
			`@property`
			`def download_url(self):`
The first stages of getting thumbnails back 2016-03-05 01:57:49 +00:00			`return reverse("fetch", kwargs={"kind": "doc", "pk": self.pk})`

			`@property`
			`def thumbnail_path(self):`
Attach storage_type to Documents 2018-02-04 13:13:24 +00:00
			`file_name = "{:07}.png".format(self.pk)`
			`if self.storage_type == self.STORAGE_TYPE_GPG:`
			`file_name += ".gpg"`

The first stages of getting thumbnails back 2016-03-05 01:57:49 +00:00			`return os.path.join(`
			`settings.MEDIA_ROOT,`
			`"documents",`
			`"thumbnails",`
Attach storage_type to Documents 2018-02-04 13:13:24 +00:00			`file_name`
The first stages of getting thumbnails back 2016-03-05 01:57:49 +00:00			`)`

			`@property`
			`def thumbnail_file(self):`
			`return open(self.thumbnail_path, "rb")`

			`@property`
			`def thumbnail_url(self):`
			`return reverse("fetch", kwargs={"kind": "thumb", "pk": self.pk})`
New logging appears to work 2016-02-27 20:18:50 +00:00

			`class Log(models.Model):`

			`LEVELS = (`
			`(logging.DEBUG, "Debugging"),`
			`(logging.INFO, "Informational"),`
			`(logging.WARNING, "Warning"),`
			`(logging.ERROR, "Error"),`
			`(logging.CRITICAL, "Critical"),`
			`)`

			`group = models.UUIDField(blank=True)`
			`message = models.TextField()`
			`level = models.PositiveIntegerField(choices=LEVELS, default=logging.INFO)`
			`created = models.DateTimeField(auto_now_add=True)`
			`modified = models.DateTimeField(auto_now=True)`

Added a custom manager for groupped logs 2016-02-28 00:41:03 +00:00			`objects = LogManager()`

Remove old Python2.7-style code 2018-05-27 23:21:36 +01:00			`class Meta:`
New logging appears to work 2016-02-27 20:18:50 +00:00			`ordering = ("-modified",)`

			`def __str__(self):`
			`return self.message`
Added an informational log message for consumer start 2016-03-06 17:26:07 +00:00
			`def save(self, args, *kwargs):`
			`"""`
			`To allow for the case where we don't want to group the message, we`
			`shouldn't force the caller to specify a one-time group value. However,`
			`allowing group=None means that the manager can't differentiate the`
			`different un-grouped messages, so instead we set a random one here.`
			`"""`

			`if not self.group:`
			`self.group = uuid.uuid4()`

			`models.Model.save(self, args, *kwargs)`
Modifications for support for dates 2016-03-24 19:18:33 +00:00

Remove old Python style 2018-04-22 16:28:03 +01:00			`class FileInfo:`
Modifications for support for dates 2016-03-24 19:18:33 +00:00
			`# This epic regex almost worked for our needs, so I'm keeping it here for`
			`# posterity, in the hopes that we might find a way to make it work one day.`
			`ALMOST_REGEX = re.compile(`
			`r"^((?P<date>\d\d\d\d\d\d\d\d\d\d\d\d\d\dZ){separator})?"`
			`r"((?P<correspondent>{non_separated_word}+){separator})??"`
			`r"(?P<title>{non_separated_word}+)"`
			`r"({separator}(?P<tags>[a-z,0-9-]+))?"`
			`r"\.(?P<extension>[a-zA-Z.-]+)$".format(`
			`separator=r"\s+-\s+",`
			`non_separated_word=r"([\w,. ]\|([^\s]-))"`
			`)`
			`)`

explicitly add txt, md, and csv types for consumer and viewer; fix thumbnail generation 2018-09-03 23:46:13 -04:00			`formats = "pdf\|jpe?g\|png\|gif\|tiff?\|te?xt\|md\|csv"`
Modifications for support for dates 2016-03-24 19:18:33 +00:00			`REGEXES = OrderedDict([`
			`("created-correspondent-title-tags", re.compile(`
			`r"^(?P<created>\d\d\d\d\d\d\d\d(\d\d\d\d\d\d)?Z) - "`
			`r"(?P<correspondent>.*) - "`
			`r"(?P<title>.*) - "`
			`r"(?P<tags>[a-z0-9\-,]*)"`
explicitly add txt, md, and csv types for consumer and viewer; fix thumbnail generation 2018-09-03 23:46:13 -04:00			`r"\.(?P<extension>{})$".format(formats),`
Modifications for support for dates 2016-03-24 19:18:33 +00:00			`flags=re.IGNORECASE`
			`)),`
			`("created-title-tags", re.compile(`
			`r"^(?P<created>\d\d\d\d\d\d\d\d(\d\d\d\d\d\d)?Z) - "`
			`r"(?P<title>.*) - "`
			`r"(?P<tags>[a-z0-9\-,]*)"`
explicitly add txt, md, and csv types for consumer and viewer; fix thumbnail generation 2018-09-03 23:46:13 -04:00			`r"\.(?P<extension>{})$".format(formats),`
Modifications for support for dates 2016-03-24 19:18:33 +00:00			`flags=re.IGNORECASE`
			`)),`
			`("created-correspondent-title", re.compile(`
			`r"^(?P<created>\d\d\d\d\d\d\d\d(\d\d\d\d\d\d)?Z) - "`
			`r"(?P<correspondent>.*) - "`
			`r"(?P<title>.*)"`
explicitly add txt, md, and csv types for consumer and viewer; fix thumbnail generation 2018-09-03 23:46:13 -04:00			`r"\.(?P<extension>{})$".format(formats),`
Modifications for support for dates 2016-03-24 19:18:33 +00:00			`flags=re.IGNORECASE`
			`)),`
			`("created-title", re.compile(`
			`r"^(?P<created>\d\d\d\d\d\d\d\d(\d\d\d\d\d\d)?Z) - "`
			`r"(?P<title>.*)"`
explicitly add txt, md, and csv types for consumer and viewer; fix thumbnail generation 2018-09-03 23:46:13 -04:00			`r"\.(?P<extension>{})$".format(formats),`
Modifications for support for dates 2016-03-24 19:18:33 +00:00			`flags=re.IGNORECASE`
			`)),`
			`("correspondent-title-tags", re.compile(`
			`r"(?P<correspondent>.*) - "`
			`r"(?P<title>.*) - "`
			`r"(?P<tags>[a-z0-9\-,]*)"`
explicitly add txt, md, and csv types for consumer and viewer; fix thumbnail generation 2018-09-03 23:46:13 -04:00			`r"\.(?P<extension>{})$".format(formats),`
Modifications for support for dates 2016-03-24 19:18:33 +00:00			`flags=re.IGNORECASE`
			`)),`
			`("correspondent-title", re.compile(`
			`r"(?P<correspondent>.*) - "`
			`r"(?P<title>.*)?"`
explicitly add txt, md, and csv types for consumer and viewer; fix thumbnail generation 2018-09-03 23:46:13 -04:00			`r"\.(?P<extension>{})$".format(formats),`
Modifications for support for dates 2016-03-24 19:18:33 +00:00			`flags=re.IGNORECASE`
			`)),`
			`("title", re.compile(`
			`r"(?P<title>.*)"`
explicitly add txt, md, and csv types for consumer and viewer; fix thumbnail generation 2018-09-03 23:46:13 -04:00			`r"\.(?P<extension>{})$".format(formats),`
Modifications for support for dates 2016-03-24 19:18:33 +00:00			`flags=re.IGNORECASE`
			`))`
			`])`

			`def __init__(self, created=None, correspondent=None, title=None, tags=(),`
			`extension=None):`

			`self.created = created`
			`self.title = title`
			`self.extension = extension`
			`self.correspondent = correspondent`
			`self.tags = tags`

			`@classmethod`
			`def _get_created(cls, created):`
Don't explode on invalid dates 2018-04-22 16:27:43 +01:00			`try:`
			`return dateutil.parser.parse("{:0<14}Z".format(created[:-1]))`
			`except ValueError:`
			`return None`
Modifications for support for dates 2016-03-24 19:18:33 +00:00
			`@classmethod`
			`def _get_correspondent(cls, name):`
			`if not name:`
			`return None`
			`return Correspondent.objects.get_or_create(name=name, defaults={`
			`"slug": slugify(name)`
			`})[0]`

			`@classmethod`
			`def _get_title(cls, title):`
			`return title`

			`@classmethod`
			`def _get_tags(cls, tags):`
			`r = []`
			`for t in tags.split(","):`
Fix #384: duplicate tags due to case insensitivity 2018-09-02 20:48:51 +01:00			`r.append(Tag.objects.get_or_create(`
			`slug=t.lower(),`
			`defaults={"name": t}`
			`)[0])`
Modifications for support for dates 2016-03-24 19:18:33 +00:00			`return tuple(r)`

			`@classmethod`
			`def _get_extension(cls, extension):`
			`r = extension.lower()`
			`if r == "jpeg":`
			`return "jpg"`
Detect .tif files properly Fixes #232 2017-07-15 17:47:17 +01:00			`if r == "tif":`
			`return "tiff"`
Modifications for support for dates 2016-03-24 19:18:33 +00:00			`return r`

			`@classmethod`
			`def _mangle_property(cls, properties, name):`
			`if name in properties:`
			`properties[name] = getattr(cls, "_get_{}".format(name))(`
			`properties[name]`
			`)`

			`@classmethod`
			`def from_path(cls, path):`
			`"""`
			`We use a crude naming convention to make handling the correspondent,`
			`title, and tags easier:`
Actually write the date found in the file name 2016-08-20 18:11:51 +01:00			`"<date> - <correspondent> - <title> - <tags>.<suffix>"`
Modifications for support for dates 2016-03-24 19:18:33 +00:00			`"<correspondent> - <title> - <tags>.<suffix>"`
			`"<correspondent> - <title>.<suffix>"`
			`"<title>.<suffix>"`
			`"""`

			`for regex in cls.REGEXES.values():`
			`m = regex.match(os.path.basename(path))`
			`if m:`
			`properties = m.groupdict()`
			`cls._mangle_property(properties, "created")`
			`cls._mangle_property(properties, "correspondent")`
			`cls._mangle_property(properties, "title")`
			`cls._mangle_property(properties, "tags")`
			`cls._mangle_property(properties, "extension")`
			`return cls(**properties)`