2016-04-03 16:34:09 +01:00
# Generated by Django 1.9.4 on 2016-03-28 19:09
import hashlib
import os
import django . utils . timezone
2023-04-20 08:10:17 -07:00
import gnupg
2016-04-03 16:34:09 +01:00
from django . conf import settings
2023-04-20 08:10:17 -07:00
from django . db import migrations
from django . db import models
2016-04-03 16:34:09 +01:00
from django . template . defaultfilters import slugify
from django . utils . termcolors import colorize as colourise # Spelling hurts me
2023-03-28 09:39:30 -07:00
class GnuPG :
2016-04-03 16:34:09 +01:00
"""
A handy singleton to use when handling encrypted files .
"""
gpg = gnupg . GPG ( gnupghome = settings . GNUPG_HOME )
@classmethod
def decrypted ( cls , file_handle ) :
2022-02-27 15:26:41 +01:00
return cls . gpg . decrypt_file ( file_handle , passphrase = settings . PASSPHRASE ) . data
2016-04-03 16:34:09 +01:00
@classmethod
def encrypted ( cls , file_handle ) :
return cls . gpg . encrypt_file (
2023-03-28 09:39:30 -07:00
file_handle ,
recipients = None ,
passphrase = settings . PASSPHRASE ,
symmetric = True ,
2016-04-03 16:34:09 +01:00
) . data
2023-03-28 09:39:30 -07:00
class Document :
2016-04-03 16:34:09 +01:00
"""
Django ' s migrations restrict access to model methods, so this is a snapshot
of the methods that existed at the time this migration was written , since
we need to make use of a lot of these shortcuts here .
"""
def __init__ ( self , doc ) :
self . pk = doc . pk
self . correspondent = doc . correspondent
self . title = doc . title
self . file_type = doc . file_type
self . tags = doc . tags
self . created = doc . created
def __str__ ( self ) :
created = self . created . strftime ( " % Y % m %d % H % M % S " )
if self . correspondent and self . title :
2023-03-28 09:39:30 -07:00
return f " { created } : { self . correspondent } - { self . title } "
2016-04-03 16:34:09 +01:00
if self . correspondent or self . title :
2023-03-28 09:39:30 -07:00
return f " { created } : { self . correspondent or self . title } "
2016-04-03 16:34:09 +01:00
return str ( created )
@property
def source_path ( self ) :
return os . path . join (
settings . MEDIA_ROOT ,
" documents " ,
" originals " ,
2023-03-28 09:39:30 -07:00
f " { self . pk : 07 } . { self . file_type } .gpg " ,
2016-04-03 16:34:09 +01:00
)
@property
def source_file ( self ) :
return open ( self . source_path , " rb " )
@property
def file_name ( self ) :
return slugify ( str ( self ) ) + " . " + self . file_type
def set_checksums ( apps , schema_editor ) :
document_model = apps . get_model ( " documents " , " Document " )
if not document_model . objects . all ( ) . exists ( ) :
return
2022-02-27 15:26:41 +01:00
print (
colourise (
" \n \n "
" This is a one-time only migration to generate checksums for all \n "
" of your existing documents. If you have a lot of documents \n "
" though, this may take a while, so a coffee break may be in \n "
" order. "
" \n " ,
opts = ( " bold " , ) ,
2023-03-28 09:39:30 -07:00
) ,
2022-02-27 15:26:41 +01:00
)
2016-04-03 16:34:09 +01:00
sums = { }
for d in document_model . objects . all ( ) :
document = Document ( d )
2022-02-27 15:26:41 +01:00
print (
" {} {} {} " . format (
colourise ( " * " , fg = " green " ) ,
colourise ( " Generating a checksum for " , fg = " white " ) ,
colourise ( document . file_name , fg = " cyan " ) ,
2023-03-28 09:39:30 -07:00
) ,
2022-02-27 15:26:41 +01:00
)
2016-04-03 16:34:09 +01:00
with document . source_file as encrypted :
checksum = hashlib . md5 ( GnuPG . decrypted ( encrypted ) ) . hexdigest ( )
if checksum in sums :
error = " \n {line} {p1} \n \n {doc1} \n {doc2} \n \n {p2} \n \n {code} \n \n {p3} {line} " . format (
2022-02-27 15:26:41 +01:00
p1 = colourise (
" It appears that you have two identical documents in your collection and \n Paperless no longer supports this (see issue #97). The documents in question \n are: " ,
fg = " yellow " ,
) ,
p2 = colourise (
" To fix this problem, you ' ll have to remove one of them from the database, a task \n most easily done by running the following command in the same \n directory as manage.py: " ,
fg = " yellow " ,
) ,
p3 = colourise (
" When that ' s finished, re-run the migrate, and provided that there aren ' t any \n other duplicates, you should be good to go. " ,
fg = " yellow " ,
) ,
doc1 = colourise (
2023-03-28 09:39:30 -07:00
f " * { sums [ checksum ] [ 1 ] } (id: { sums [ checksum ] [ 0 ] } ) " ,
2022-02-27 15:26:41 +01:00
fg = " red " ,
) ,
doc2 = colourise (
2023-03-28 09:39:30 -07:00
f " * { document . file_name } (id: { document . pk } ) " ,
fg = " red " ,
2022-02-27 15:26:41 +01:00
) ,
code = colourise (
" $ echo ' DELETE FROM documents_document WHERE id = {pk} ; ' | ./manage.py dbshell " . format (
2023-03-28 09:39:30 -07:00
pk = document . pk ,
2022-02-27 15:26:41 +01:00
) ,
fg = " green " ,
) ,
line = colourise ( " \n {} \n " . format ( " = " * 80 ) , fg = " white " , opts = ( " bold " , ) ) ,
2016-04-03 16:34:09 +01:00
)
raise RuntimeError ( error )
sums [ checksum ] = ( document . pk , document . file_name )
document_model . objects . filter ( pk = document . pk ) . update ( checksum = checksum )
def do_nothing ( apps , schema_editor ) :
pass
class Migration ( migrations . Migration ) :
dependencies = [
2022-02-27 15:26:41 +01:00
( " documents " , " 0013_auto_20160325_2111 " ) ,
2016-04-03 16:34:09 +01:00
]
operations = [
migrations . AddField (
2022-02-27 15:26:41 +01:00
model_name = " document " ,
name = " checksum " ,
2016-04-03 16:34:09 +01:00
field = models . CharField (
2022-02-27 15:26:41 +01:00
default = " - " ,
2016-04-03 16:34:09 +01:00
db_index = True ,
editable = False ,
max_length = 32 ,
2022-02-27 15:26:41 +01:00
help_text = " The checksum of the original document (before it "
" was encrypted). We use this to prevent duplicate "
" document imports. " ,
2016-04-03 16:34:09 +01:00
) ,
preserve_default = False ,
) ,
migrations . RunPython ( set_checksums , do_nothing ) ,
migrations . AlterField (
2022-02-27 15:26:41 +01:00
model_name = " document " ,
name = " created " ,
field = models . DateTimeField (
2023-03-28 09:39:30 -07:00
db_index = True ,
default = django . utils . timezone . now ,
2022-02-27 15:26:41 +01:00
) ,
2016-04-03 16:34:09 +01:00
) ,
migrations . AlterField (
2022-02-27 15:26:41 +01:00
model_name = " document " ,
name = " modified " ,
2016-04-03 16:34:09 +01:00
field = models . DateTimeField ( auto_now = True , db_index = True ) ,
) ,
]