paperless-ngx/src/documents/forms.py

94 lines
3 KiB
Python
Raw Normal View History

2016-02-08 23:46:16 +00:00
import magic
import os
from datetime import datetime
from hashlib import sha256
from time import mktime
from django import forms
from django.conf import settings
from .models import Document, Correspondent
2016-02-08 23:46:16 +00:00
from .consumer import Consumer
class UploadForm(forms.Form):
SECRET = settings.SHARED_SECRET
2016-02-08 23:46:16 +00:00
TYPE_LOOKUP = {
"application/pdf": Document.TYPE_PDF,
"image/png": Document.TYPE_PNG,
"image/jpeg": Document.TYPE_JPG,
"image/gif": Document.TYPE_GIF,
"image/tiff": Document.TYPE_TIF,
}
2016-03-04 09:14:50 +00:00
correspondent = forms.CharField(
max_length=Correspondent._meta.get_field("name").max_length,
required=False
)
2016-02-08 23:46:16 +00:00
title = forms.CharField(
2016-02-21 00:14:50 +00:00
max_length=Document._meta.get_field("title").max_length,
required=False
)
2016-02-08 23:46:16 +00:00
document = forms.FileField()
signature = forms.CharField(max_length=256)
2016-03-04 09:14:50 +00:00
def clean_correspondent(self):
2016-02-08 23:46:16 +00:00
"""
I suppose it might look cleaner to use .get_or_create() here, but that
2016-03-04 09:14:50 +00:00
would also allow someone to fill up the db with bogus correspondents
before all validation was met.
2016-02-08 23:46:16 +00:00
"""
2016-03-04 09:14:50 +00:00
corresp = self.cleaned_data.get("correspondent")
if not corresp:
2016-02-08 23:46:16 +00:00
return None
2016-03-04 09:14:50 +00:00
if not Correspondent.SAFE_REGEX.match(corresp) or " - " in corresp:
raise forms.ValidationError(
"That correspondent name is suspicious.")
return corresp
2016-02-08 23:46:16 +00:00
def clean_title(self):
title = self.cleaned_data.get("title")
if not title:
return None
if not Correspondent.SAFE_REGEX.match(title) or " - " in title:
2016-02-08 23:46:16 +00:00
raise forms.ValidationError("That title is suspicious.")
def clean_document(self):
document = self.cleaned_data.get("document").read()
with magic.Magic(flags=magic.MAGIC_MIME_TYPE) as m:
file_type = m.id_buffer(document)
if file_type not in self.TYPE_LOOKUP:
raise forms.ValidationError("The file type is invalid.")
return document, self.TYPE_LOOKUP[file_type]
def clean(self):
2016-03-04 09:14:50 +00:00
corresp = self.clened_data("correspondent")
2016-02-08 23:46:16 +00:00
title = self.cleaned_data("title")
signature = self.cleaned_data("signature")
2016-03-04 09:14:50 +00:00
if sha256(corresp + title + self.SECRET).hexdigest() == signature:
2016-02-08 23:46:16 +00:00
return True
return False
def save(self):
"""
Since the consumer already does a lot of work, it's easier just to save
to-be-consumed files to the consumption directory rather than have the
form do that as well. Think of it as a poor-man's queue server.
"""
2016-03-04 09:14:50 +00:00
correspondent = self.clened_data("correspondent")
2016-02-08 23:46:16 +00:00
title = self.cleaned_data("title")
document, file_type = self.cleaned_data.get("document")
t = int(mktime(datetime.now()))
file_name = os.path.join(
2016-03-04 09:14:50 +00:00
Consumer.CONSUME,
"{} - {}.{}".format(correspondent, title, file_type)
)
2016-02-08 23:46:16 +00:00
with open(file_name, "wb") as f:
f.write(document)
os.utime(file_name, times=(t, t))