paperless-ngx/src/documents/forms.py

103 lines
2.9 KiB
Python
Raw Normal View History

2016-02-08 23:46:16 +00:00
import magic
import os
from datetime import datetime
from time import mktime
from django import forms
from django.conf import settings
from .models import Document, Correspondent
2016-02-08 23:46:16 +00:00
from .consumer import Consumer
class UploadForm(forms.Form):
SECRET = settings.SHARED_SECRET
2016-02-08 23:46:16 +00:00
TYPE_LOOKUP = {
"application/pdf": Document.TYPE_PDF,
"image/png": Document.TYPE_PNG,
"image/jpeg": Document.TYPE_JPG,
"image/gif": Document.TYPE_GIF,
"image/tiff": Document.TYPE_TIF,
}
2016-03-04 09:14:50 +00:00
correspondent = forms.CharField(
max_length=Correspondent._meta.get_field("name").max_length,
required=False
)
2016-02-08 23:46:16 +00:00
title = forms.CharField(
2016-02-21 00:14:50 +00:00
max_length=Document._meta.get_field("title").max_length,
required=False
)
2016-02-08 23:46:16 +00:00
document = forms.FileField()
def __init__(self, *args, **kwargs):
forms.Form.__init__(self, *args, **kwargs)
self._file_type = None
2016-03-04 09:14:50 +00:00
def clean_correspondent(self):
2016-02-08 23:46:16 +00:00
"""
I suppose it might look cleaner to use .get_or_create() here, but that
2016-03-04 09:14:50 +00:00
would also allow someone to fill up the db with bogus correspondents
before all validation was met.
2016-02-08 23:46:16 +00:00
"""
2016-03-04 09:14:50 +00:00
corresp = self.cleaned_data.get("correspondent")
2016-03-04 09:14:50 +00:00
if not corresp:
2016-02-08 23:46:16 +00:00
return None
2016-03-04 09:14:50 +00:00
if not Correspondent.SAFE_REGEX.match(corresp) or " - " in corresp:
raise forms.ValidationError(
"That correspondent name is suspicious.")
2016-03-04 09:14:50 +00:00
return corresp
2016-02-08 23:46:16 +00:00
def clean_title(self):
2016-02-08 23:46:16 +00:00
title = self.cleaned_data.get("title")
2016-02-08 23:46:16 +00:00
if not title:
return None
if not Correspondent.SAFE_REGEX.match(title) or " - " in title:
2016-02-08 23:46:16 +00:00
raise forms.ValidationError("That title is suspicious.")
return title
2016-02-08 23:46:16 +00:00
def clean_document(self):
2016-02-08 23:46:16 +00:00
document = self.cleaned_data.get("document").read()
2016-02-08 23:46:16 +00:00
with magic.Magic(flags=magic.MAGIC_MIME_TYPE) as m:
file_type = m.id_buffer(document)
2016-02-08 23:46:16 +00:00
if file_type not in self.TYPE_LOOKUP:
raise forms.ValidationError("The file type is invalid.")
self._file_type = self.TYPE_LOOKUP[file_type]
return document
2016-02-08 23:46:16 +00:00
def save(self):
"""
Since the consumer already does a lot of work, it's easier just to save
to-be-consumed files to the consumption directory rather than have the
form do that as well. Think of it as a poor-man's queue server.
"""
correspondent = self.cleaned_data.get("correspondent")
title = self.cleaned_data.get("title")
document = self.cleaned_data.get("document")
2016-02-08 23:46:16 +00:00
t = int(mktime(datetime.now().timetuple()))
2016-02-08 23:46:16 +00:00
file_name = os.path.join(
2016-03-04 09:14:50 +00:00
Consumer.CONSUME,
"{} - {}.{}".format(correspondent, title, self._file_type)
2016-03-04 09:14:50 +00:00
)
2016-02-08 23:46:16 +00:00
with open(file_name, "wb") as f:
f.write(document)
os.utime(file_name, times=(t, t))