paperless-ngx/src/documents/file_handling.py

131 lines
4.1 KiB
Python
Raw Normal View History

import datetime
import logging
import os
from collections import defaultdict
import pathvalidate
from django.conf import settings
from django.template.defaultfilters import slugify
def create_source_path_directory(source_path):
os.makedirs(os.path.dirname(source_path), exist_ok=True)
2020-11-30 21:38:21 +01:00
def delete_empty_directories(directory, root):
if not os.path.isdir(directory):
return
# Go up in the directory hierarchy and try to delete all directories
directory = os.path.normpath(directory)
2020-11-30 21:38:21 +01:00
root = os.path.normpath(root)
if not directory.startswith(root + os.path.sep):
# don't do anything outside our originals folder.
# append os.path.set so that we avoid these cases:
# directory = /home/originals2/test
# root = /home/originals ("/" gets appended and startswith fails)
return
while directory != root:
if not os.listdir(directory):
# it's empty
try:
os.rmdir(directory)
except OSError:
# whatever. empty directories aren't that bad anyway.
return
else:
# it's not empty.
return
# go one level up
directory = os.path.normpath(os.path.dirname(directory))
def many_to_dictionary(field):
# Converts ManyToManyField to dictionary by assuming, that field
# entries contain an _ or - which will be used as a delimiter
mydictionary = dict()
for index, t in enumerate(field.all()):
# Populate tag names by index
mydictionary[index] = slugify(t.name)
# Find delimiter
delimiter = t.name.find('_')
if delimiter == -1:
delimiter = t.name.find('-')
if delimiter == -1:
continue
key = t.name[:delimiter]
value = t.name[delimiter + 1:]
mydictionary[slugify(key)] = slugify(value)
return mydictionary
2020-11-21 15:34:00 +01:00
def generate_filename(doc):
path = ""
try:
if settings.PAPERLESS_FILENAME_FORMAT is not None:
tags = defaultdict(lambda: slugify(None),
2020-11-21 15:34:00 +01:00
many_to_dictionary(doc.tags))
if doc.correspondent:
correspondent = pathvalidate.sanitize_filename(
doc.correspondent.name, replacement_text="-"
)
else:
correspondent = "none"
if doc.document_type:
document_type = pathvalidate.sanitize_filename(
doc.document_type.name, replacement_text="-"
)
else:
document_type = "none"
path = settings.PAPERLESS_FILENAME_FORMAT.format(
title=pathvalidate.sanitize_filename(
doc.title, replacement_text="-"),
correspondent=correspondent,
document_type=document_type,
created=datetime.date.isoformat(doc.created),
2020-11-21 15:34:00 +01:00
created_year=doc.created.year if doc.created else "none",
created_month=doc.created.month if doc.created else "none",
created_day=doc.created.day if doc.created else "none",
added=datetime.date.isoformat(doc.added),
2020-11-21 15:34:00 +01:00
added_year=doc.added.year if doc.added else "none",
added_month=doc.added.month if doc.added else "none",
added_day=doc.added.day if doc.added else "none",
tags=tags,
)
2020-11-18 22:41:14 +01:00
except (ValueError, KeyError, IndexError):
2020-11-21 14:03:45 +01:00
logging.getLogger(__name__).warning(
f"Invalid PAPERLESS_FILENAME_FORMAT: "
f"{settings.PAPERLESS_FILENAME_FORMAT}, falling back to default")
# Always append the primary key to guarantee uniqueness of filename
if len(path) > 0:
2020-11-21 15:34:00 +01:00
filename = "%s-%07i%s" % (path, doc.pk, doc.file_type)
else:
2020-11-21 15:34:00 +01:00
filename = "%07i%s" % (doc.pk, doc.file_type)
# Append .gpg for encrypted files
2020-11-21 15:34:00 +01:00
if doc.storage_type == doc.STORAGE_TYPE_GPG:
filename += ".gpg"
return filename
2020-11-30 21:38:21 +01:00
def archive_name_from_filename(filename):
return os.path.splitext(filename)[0] + ".pdf"