mirror of
https://github.com/paperless-ngx/paperless-ngx.git
synced 2025-12-07 07:15:07 +01:00
added checksums for archived documents.
This commit is contained in:
parent
fdaf419a7e
commit
24767f62c7
5 changed files with 57 additions and 15 deletions
|
|
@ -6,6 +6,7 @@ import os
|
|||
import magic
|
||||
from django.conf import settings
|
||||
from django.db import transaction
|
||||
from django.db.models import Q
|
||||
from django.utils import timezone
|
||||
|
||||
from .classifier import DocumentClassifier, IncompatibleClassifierVersionError
|
||||
|
|
@ -42,7 +43,7 @@ class Consumer(LoggingMixin):
|
|||
def pre_check_duplicate(self):
|
||||
with open(self.path, "rb") as f:
|
||||
checksum = hashlib.md5(f.read()).hexdigest()
|
||||
if Document.objects.filter(checksum=checksum).exists():
|
||||
if Document.objects.filter(Q(checksum=checksum) | Q(archive_checksum=checksum)).exists(): # NOQA: E501
|
||||
if settings.CONSUMER_DELETE_DUPLICATES:
|
||||
os.unlink(self.path)
|
||||
raise ConsumerError(
|
||||
|
|
@ -184,6 +185,11 @@ class Consumer(LoggingMixin):
|
|||
self._write(document.storage_type,
|
||||
archive_path, document.archive_path)
|
||||
|
||||
with open(archive_path, 'rb') as f:
|
||||
document.archive_checksum = hashlib.md5(
|
||||
f.read()).hexdigest()
|
||||
document.save()
|
||||
|
||||
# Delete the file only if it was successfully consumed
|
||||
self.log("debug", "Deleting file {}".format(self.path))
|
||||
os.unlink(self.path)
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue