2022-04-12 19:52:56 -07:00
|
|
|
import datetime
|
2019-05-18 19:25:50 +02:00
|
|
|
import json
|
2020-11-16 18:26:54 +01:00
|
|
|
import math
|
2020-11-02 21:59:36 +01:00
|
|
|
import multiprocessing
|
2015-12-20 19:23:33 +00:00
|
|
|
import os
|
2019-05-18 19:25:50 +02:00
|
|
|
import re
|
2022-07-28 15:36:24 -07:00
|
|
|
import tempfile
|
2022-03-22 11:26:56 -07:00
|
|
|
from typing import Final
|
2022-05-22 16:56:08 -07:00
|
|
|
from typing import Optional
|
2022-04-12 19:52:56 -07:00
|
|
|
from typing import Set
|
2022-04-07 21:17:59 -07:00
|
|
|
from urllib.parse import urlparse
|
2015-12-20 19:23:33 +00:00
|
|
|
|
2021-02-06 17:05:07 +01:00
|
|
|
from concurrent_log_handler.queue import setup_logging_queues
|
2020-12-30 00:26:06 +01:00
|
|
|
from django.utils.translation import gettext_lazy as _
|
2022-03-11 10:55:51 -08:00
|
|
|
from dotenv import load_dotenv
|
2020-12-30 00:26:06 +01:00
|
|
|
|
2017-01-14 18:03:42 +00:00
|
|
|
# Tap paperless.conf if it's available
|
2020-10-27 01:09:50 +01:00
|
|
|
if os.path.exists("../paperless.conf"):
|
|
|
|
|
load_dotenv("../paperless.conf")
|
2020-10-17 01:57:08 +02:00
|
|
|
elif os.path.exists("/etc/paperless.conf"):
|
2017-01-14 18:03:42 +00:00
|
|
|
load_dotenv("/etc/paperless.conf")
|
2018-03-04 21:37:04 +01:00
|
|
|
elif os.path.exists("/usr/local/etc/paperless.conf"):
|
|
|
|
|
load_dotenv("/usr/local/etc/paperless.conf")
|
2017-01-14 18:03:42 +00:00
|
|
|
|
2020-11-12 10:01:22 +01:00
|
|
|
# There are multiple levels of concurrency in paperless:
|
|
|
|
|
# - Multiple consumers may be run in parallel.
|
|
|
|
|
# - Each consumer may process multiple pages in parallel.
|
|
|
|
|
# - Each Tesseract OCR run may spawn multiple threads to process a single page
|
|
|
|
|
# slightly faster.
|
|
|
|
|
# The performance gains from having tesseract use multiple threads are minimal.
|
|
|
|
|
# However, when multiple pages are processed in parallel, the total number of
|
|
|
|
|
# OCR threads may exceed the number of available cpu cores, which will
|
|
|
|
|
# dramatically slow down the consumption process. This settings limits each
|
|
|
|
|
# Tesseract process to one thread.
|
2022-02-27 15:26:41 +01:00
|
|
|
os.environ["OMP_THREAD_LIMIT"] = "1"
|
2017-01-14 18:03:42 +00:00
|
|
|
|
|
|
|
|
|
2022-03-22 11:26:56 -07:00
|
|
|
def __get_boolean(key: str, default: str = "NO") -> bool:
|
2018-09-09 21:22:07 +01:00
|
|
|
"""
|
|
|
|
|
Return a boolean value based on whatever the user has supplied in the
|
|
|
|
|
environment based on whether the value "looks like" it's True or not.
|
|
|
|
|
"""
|
2018-09-12 16:25:23 +02:00
|
|
|
return bool(os.getenv(key, default).lower() in ("yes", "y", "1", "t", "true"))
|
2018-09-09 21:22:07 +01:00
|
|
|
|
2020-11-12 21:09:45 +01:00
|
|
|
|
2022-03-22 11:26:56 -07:00
|
|
|
def __get_int(key: str, default: int) -> int:
|
|
|
|
|
"""
|
|
|
|
|
Return an integer value based on the environment variable or a default
|
|
|
|
|
"""
|
|
|
|
|
return int(os.getenv(key, default))
|
|
|
|
|
|
|
|
|
|
|
2022-05-15 11:48:12 -07:00
|
|
|
def __get_float(key: str, default: float) -> float:
|
|
|
|
|
"""
|
|
|
|
|
Return an integer value based on the environment variable or a default
|
|
|
|
|
"""
|
|
|
|
|
return float(os.getenv(key, default))
|
|
|
|
|
|
|
|
|
|
|
2022-07-28 15:36:24 -07:00
|
|
|
def __get_path(key: str, default: str) -> str:
|
|
|
|
|
"""
|
|
|
|
|
Return a normalized, absolute path based on the environment variable or a default
|
|
|
|
|
"""
|
|
|
|
|
return os.path.abspath(os.path.normpath(os.environ.get(key, default)))
|
|
|
|
|
|
|
|
|
|
|
2020-11-09 15:28:12 +01:00
|
|
|
# NEVER RUN WITH DEBUG IN PRODUCTION.
|
|
|
|
|
DEBUG = __get_boolean("PAPERLESS_DEBUG", "NO")
|
|
|
|
|
|
2020-11-12 21:09:45 +01:00
|
|
|
|
2020-11-02 21:59:36 +01:00
|
|
|
###############################################################################
|
|
|
|
|
# Directories #
|
|
|
|
|
###############################################################################
|
2018-09-09 21:22:07 +01:00
|
|
|
|
2015-12-20 19:23:33 +00:00
|
|
|
BASE_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
|
|
|
|
|
|
2022-07-28 15:36:24 -07:00
|
|
|
STATIC_ROOT = __get_path("PAPERLESS_STATICDIR", os.path.join(BASE_DIR, "..", "static"))
|
2020-10-26 00:35:24 +01:00
|
|
|
|
2022-07-28 15:36:24 -07:00
|
|
|
MEDIA_ROOT = __get_path("PAPERLESS_MEDIA_ROOT", os.path.join(BASE_DIR, "..", "media"))
|
2020-10-29 14:30:15 +01:00
|
|
|
ORIGINALS_DIR = os.path.join(MEDIA_ROOT, "documents", "originals")
|
2020-11-25 14:45:21 +01:00
|
|
|
ARCHIVE_DIR = os.path.join(MEDIA_ROOT, "documents", "archive")
|
2020-10-29 14:30:15 +01:00
|
|
|
THUMBNAIL_DIR = os.path.join(MEDIA_ROOT, "documents", "thumbnails")
|
2017-01-14 18:03:42 +00:00
|
|
|
|
2022-07-28 15:36:24 -07:00
|
|
|
DATA_DIR = __get_path("PAPERLESS_DATA_DIR", os.path.join(BASE_DIR, "..", "data"))
|
2020-12-08 13:54:35 +01:00
|
|
|
|
2022-02-27 15:26:41 +01:00
|
|
|
TRASH_DIR = os.getenv("PAPERLESS_TRASH_DIR")
|
2021-08-22 19:31:50 +02:00
|
|
|
|
2020-12-08 13:54:35 +01:00
|
|
|
# Lock file for synchronizing changes to the MEDIA directory across multiple
|
|
|
|
|
# threads.
|
|
|
|
|
MEDIA_LOCK = os.path.join(MEDIA_ROOT, "media.lock")
|
2020-11-02 21:59:36 +01:00
|
|
|
INDEX_DIR = os.path.join(DATA_DIR, "index")
|
|
|
|
|
MODEL_FILE = os.path.join(DATA_DIR, "classification_model.pickle")
|
2020-11-03 12:23:24 +01:00
|
|
|
|
2022-07-28 15:36:24 -07:00
|
|
|
LOGGING_DIR = __get_path("PAPERLESS_LOGGING_DIR", os.path.join(DATA_DIR, "log"))
|
2021-02-06 17:05:07 +01:00
|
|
|
|
2022-07-28 15:36:24 -07:00
|
|
|
CONSUMPTION_DIR = __get_path(
|
2022-03-11 10:55:51 -08:00
|
|
|
"PAPERLESS_CONSUMPTION_DIR",
|
|
|
|
|
os.path.join(BASE_DIR, "..", "consume"),
|
2022-02-27 15:26:41 +01:00
|
|
|
)
|
2020-11-03 12:23:24 +01:00
|
|
|
|
|
|
|
|
# This will be created if it doesn't exist
|
2022-07-28 15:36:24 -07:00
|
|
|
SCRATCH_DIR = __get_path(
|
|
|
|
|
"PAPERLESS_SCRATCH_DIR",
|
|
|
|
|
os.path.join(tempfile.gettempdir(), "paperless"),
|
|
|
|
|
)
|
2020-11-03 12:23:24 +01:00
|
|
|
|
2020-11-02 21:59:36 +01:00
|
|
|
###############################################################################
|
|
|
|
|
# Application Definition #
|
|
|
|
|
###############################################################################
|
2015-12-20 19:23:33 +00:00
|
|
|
|
2020-12-28 22:37:53 +01:00
|
|
|
env_apps = os.getenv("PAPERLESS_APPS").split(",") if os.getenv("PAPERLESS_APPS") else []
|
2020-12-28 22:19:30 +01:00
|
|
|
|
2015-12-20 19:23:33 +00:00
|
|
|
INSTALLED_APPS = [
|
2020-10-27 01:09:50 +01:00
|
|
|
"whitenoise.runserver_nostatic",
|
2017-01-29 19:43:35 +00:00
|
|
|
"django.contrib.auth",
|
|
|
|
|
"django.contrib.contenttypes",
|
|
|
|
|
"django.contrib.sessions",
|
|
|
|
|
"django.contrib.messages",
|
|
|
|
|
"django.contrib.staticfiles",
|
2018-08-16 17:05:54 +08:00
|
|
|
"corsheaders",
|
2015-12-20 19:23:33 +00:00
|
|
|
"django_extensions",
|
2018-12-30 12:20:08 +00:00
|
|
|
"paperless",
|
2016-03-28 11:11:15 +01:00
|
|
|
"documents.apps.DocumentsConfig",
|
2017-03-11 16:30:49 +00:00
|
|
|
"paperless_tesseract.apps.PaperlessTesseractConfig",
|
2018-08-30 23:32:41 -04:00
|
|
|
"paperless_text.apps.PaperlessTextConfig",
|
2020-11-15 23:56:08 +01:00
|
|
|
"paperless_mail.apps.PaperlessMailConfig",
|
2017-01-29 19:43:35 +00:00
|
|
|
"django.contrib.admin",
|
2016-02-16 09:28:34 +00:00
|
|
|
"rest_framework",
|
2020-12-03 18:36:23 +01:00
|
|
|
"rest_framework.authtoken",
|
2018-12-11 12:26:44 +01:00
|
|
|
"django_filters",
|
2020-11-09 20:29:02 +01:00
|
|
|
"django_q",
|
2020-12-28 22:19:30 +01:00
|
|
|
] + env_apps
|
2015-12-20 19:23:33 +00:00
|
|
|
|
2021-02-14 12:50:30 +01:00
|
|
|
if DEBUG:
|
|
|
|
|
INSTALLED_APPS.append("channels")
|
|
|
|
|
|
2020-10-20 00:35:27 +02:00
|
|
|
REST_FRAMEWORK = {
|
2022-02-27 15:26:41 +01:00
|
|
|
"DEFAULT_AUTHENTICATION_CLASSES": [
|
|
|
|
|
"rest_framework.authentication.BasicAuthentication",
|
|
|
|
|
"rest_framework.authentication.SessionAuthentication",
|
|
|
|
|
"rest_framework.authentication.TokenAuthentication",
|
2021-02-24 22:27:43 +01:00
|
|
|
],
|
2022-02-27 15:26:41 +01:00
|
|
|
"DEFAULT_VERSIONING_CLASS": "rest_framework.versioning.AcceptHeaderVersioning",
|
|
|
|
|
"DEFAULT_VERSION": "1",
|
2021-02-26 11:13:28 +01:00
|
|
|
# Make sure these are ordered and that the most recent version appears
|
|
|
|
|
# last
|
2022-02-27 15:26:41 +01:00
|
|
|
"ALLOWED_VERSIONS": ["1", "2"],
|
2020-10-20 00:35:27 +02:00
|
|
|
}
|
|
|
|
|
|
2020-11-09 15:28:12 +01:00
|
|
|
if DEBUG:
|
2022-02-27 15:26:41 +01:00
|
|
|
REST_FRAMEWORK["DEFAULT_AUTHENTICATION_CLASSES"].append(
|
2022-03-11 10:55:51 -08:00
|
|
|
"paperless.auth.AngularApiAuthenticationOverride",
|
2020-11-09 15:28:12 +01:00
|
|
|
)
|
|
|
|
|
|
2018-07-04 17:03:59 +02:00
|
|
|
MIDDLEWARE = [
|
2022-02-27 15:26:41 +01:00
|
|
|
"django.middleware.security.SecurityMiddleware",
|
|
|
|
|
"whitenoise.middleware.WhiteNoiseMiddleware",
|
|
|
|
|
"django.contrib.sessions.middleware.SessionMiddleware",
|
|
|
|
|
"corsheaders.middleware.CorsMiddleware",
|
|
|
|
|
"django.middleware.locale.LocaleMiddleware",
|
|
|
|
|
"django.middleware.common.CommonMiddleware",
|
|
|
|
|
"django.middleware.csrf.CsrfViewMiddleware",
|
|
|
|
|
"paperless.middleware.ApiVersionMiddleware",
|
|
|
|
|
"django.contrib.auth.middleware.AuthenticationMiddleware",
|
|
|
|
|
"django.contrib.messages.middleware.MessageMiddleware",
|
|
|
|
|
"django.middleware.clickjacking.XFrameOptionsMiddleware",
|
2015-12-20 19:23:33 +00:00
|
|
|
]
|
|
|
|
|
|
2022-02-27 15:26:41 +01:00
|
|
|
ROOT_URLCONF = "paperless.urls"
|
2019-07-24 13:26:28 -09:00
|
|
|
|
2020-11-02 21:59:36 +01:00
|
|
|
FORCE_SCRIPT_NAME = os.getenv("PAPERLESS_FORCE_SCRIPT_NAME")
|
2021-05-14 14:20:05 +09:00
|
|
|
BASE_URL = (FORCE_SCRIPT_NAME or "") + "/"
|
|
|
|
|
LOGIN_URL = BASE_URL + "accounts/login/"
|
2021-08-26 18:40:12 +02:00
|
|
|
LOGOUT_REDIRECT_URL = os.getenv("PAPERLESS_LOGOUT_REDIRECT_URL")
|
2018-02-08 08:46:33 -05:00
|
|
|
|
2022-02-27 15:26:41 +01:00
|
|
|
WSGI_APPLICATION = "paperless.wsgi.application"
|
2020-11-07 11:30:45 +01:00
|
|
|
ASGI_APPLICATION = "paperless.asgi.application"
|
2020-11-02 21:59:36 +01:00
|
|
|
|
2021-05-14 18:14:59 +09:00
|
|
|
STATIC_URL = os.getenv("PAPERLESS_STATIC_URL", BASE_URL + "static/")
|
|
|
|
|
WHITENOISE_STATIC_PREFIX = "/static/"
|
2015-12-20 19:23:33 +00:00
|
|
|
|
2021-02-24 22:27:43 +01:00
|
|
|
# TODO: what is this used for?
|
2015-12-20 19:23:33 +00:00
|
|
|
TEMPLATES = [
|
|
|
|
|
{
|
2022-02-27 15:26:41 +01:00
|
|
|
"BACKEND": "django.template.backends.django.DjangoTemplates",
|
|
|
|
|
"DIRS": [],
|
|
|
|
|
"APP_DIRS": True,
|
|
|
|
|
"OPTIONS": {
|
|
|
|
|
"context_processors": [
|
|
|
|
|
"django.template.context_processors.debug",
|
|
|
|
|
"django.template.context_processors.request",
|
|
|
|
|
"django.contrib.auth.context_processors.auth",
|
|
|
|
|
"django.contrib.messages.context_processors.messages",
|
2015-12-20 19:23:33 +00:00
|
|
|
],
|
|
|
|
|
},
|
|
|
|
|
},
|
|
|
|
|
]
|
|
|
|
|
|
2020-11-20 10:58:17 +01:00
|
|
|
CHANNEL_LAYERS = {
|
|
|
|
|
"default": {
|
|
|
|
|
"BACKEND": "channels_redis.core.RedisChannelLayer",
|
|
|
|
|
"CONFIG": {
|
2021-01-27 20:08:09 +01:00
|
|
|
"hosts": [os.getenv("PAPERLESS_REDIS", "redis://localhost:6379")],
|
2021-02-04 23:32:11 +01:00
|
|
|
"capacity": 2000, # default 100
|
|
|
|
|
"expiry": 15, # default 60
|
2020-11-20 10:58:17 +01:00
|
|
|
},
|
|
|
|
|
},
|
|
|
|
|
}
|
|
|
|
|
|
2020-11-02 21:59:36 +01:00
|
|
|
###############################################################################
|
|
|
|
|
# Security #
|
|
|
|
|
###############################################################################
|
2015-12-20 19:23:33 +00:00
|
|
|
|
2020-11-23 22:50:02 +01:00
|
|
|
AUTO_LOGIN_USERNAME = os.getenv("PAPERLESS_AUTO_LOGIN_USERNAME")
|
|
|
|
|
|
|
|
|
|
if AUTO_LOGIN_USERNAME:
|
2022-02-27 15:26:41 +01:00
|
|
|
_index = MIDDLEWARE.index("django.contrib.auth.middleware.AuthenticationMiddleware")
|
2020-11-23 22:50:02 +01:00
|
|
|
# This overrides everything the auth middleware is doing but still allows
|
|
|
|
|
# regular login in case the provided user does not exist.
|
2022-02-27 15:26:41 +01:00
|
|
|
MIDDLEWARE.insert(_index + 1, "paperless.auth.AutoLoginMiddleware")
|
2020-11-23 22:50:02 +01:00
|
|
|
|
2021-01-06 16:53:58 +01:00
|
|
|
ENABLE_HTTP_REMOTE_USER = __get_boolean("PAPERLESS_ENABLE_HTTP_REMOTE_USER")
|
2022-02-27 15:26:41 +01:00
|
|
|
HTTP_REMOTE_USER_HEADER_NAME = os.getenv(
|
2022-03-11 10:55:51 -08:00
|
|
|
"PAPERLESS_HTTP_REMOTE_USER_HEADER_NAME",
|
|
|
|
|
"HTTP_REMOTE_USER",
|
2022-02-27 15:26:41 +01:00
|
|
|
)
|
2021-01-06 16:53:58 +01:00
|
|
|
|
|
|
|
|
if ENABLE_HTTP_REMOTE_USER:
|
2022-02-27 15:26:41 +01:00
|
|
|
MIDDLEWARE.append("paperless.auth.HttpRemoteUserMiddleware")
|
2021-01-06 16:53:58 +01:00
|
|
|
AUTHENTICATION_BACKENDS = [
|
2022-02-27 15:26:41 +01:00
|
|
|
"django.contrib.auth.backends.RemoteUserBackend",
|
|
|
|
|
"django.contrib.auth.backends.ModelBackend",
|
2021-01-06 16:53:58 +01:00
|
|
|
]
|
2022-02-27 15:26:41 +01:00
|
|
|
REST_FRAMEWORK["DEFAULT_AUTHENTICATION_CLASSES"].append(
|
2022-03-11 10:55:51 -08:00
|
|
|
"rest_framework.authentication.RemoteUserAuthentication",
|
2021-01-06 16:53:58 +01:00
|
|
|
)
|
2020-11-23 22:50:02 +01:00
|
|
|
|
2021-01-14 13:35:21 +01:00
|
|
|
# X-Frame options for embedded PDF display:
|
|
|
|
|
if DEBUG:
|
2022-02-27 15:26:41 +01:00
|
|
|
X_FRAME_OPTIONS = "ANY"
|
2021-01-14 13:35:21 +01:00
|
|
|
else:
|
2022-02-27 15:26:41 +01:00
|
|
|
X_FRAME_OPTIONS = "SAMEORIGIN"
|
2020-11-23 22:50:02 +01:00
|
|
|
|
2022-04-07 21:17:59 -07:00
|
|
|
|
|
|
|
|
# The next 3 settings can also be set using just PAPERLESS_URL
|
|
|
|
|
_csrf_origins = os.getenv("PAPERLESS_CSRF_TRUSTED_ORIGINS")
|
|
|
|
|
if _csrf_origins:
|
|
|
|
|
CSRF_TRUSTED_ORIGINS = _csrf_origins.split(",")
|
|
|
|
|
else:
|
|
|
|
|
CSRF_TRUSTED_ORIGINS = []
|
|
|
|
|
|
|
|
|
|
# We allow CORS from localhost:8000
|
2022-02-27 15:26:41 +01:00
|
|
|
CORS_ALLOWED_ORIGINS = tuple(
|
2022-03-11 10:55:51 -08:00
|
|
|
os.getenv("PAPERLESS_CORS_ALLOWED_HOSTS", "http://localhost:8000").split(","),
|
2022-02-27 15:26:41 +01:00
|
|
|
)
|
2020-11-02 21:59:36 +01:00
|
|
|
|
|
|
|
|
if DEBUG:
|
|
|
|
|
# Allow access from the angular development server during debugging
|
2022-02-27 15:26:41 +01:00
|
|
|
CORS_ALLOWED_ORIGINS += ("http://localhost:4200",)
|
2020-11-02 21:59:36 +01:00
|
|
|
|
2022-04-07 21:17:59 -07:00
|
|
|
_allowed_hosts = os.getenv("PAPERLESS_ALLOWED_HOSTS")
|
|
|
|
|
if _allowed_hosts:
|
|
|
|
|
ALLOWED_HOSTS = _allowed_hosts.split(",")
|
|
|
|
|
else:
|
|
|
|
|
ALLOWED_HOSTS = ["*"]
|
|
|
|
|
|
|
|
|
|
_paperless_url = os.getenv("PAPERLESS_URL")
|
|
|
|
|
if _paperless_url:
|
|
|
|
|
_paperless_uri = urlparse(_paperless_url)
|
|
|
|
|
CSRF_TRUSTED_ORIGINS.append(_paperless_url)
|
|
|
|
|
CORS_ALLOWED_ORIGINS += (_paperless_url,)
|
|
|
|
|
if _allowed_hosts:
|
|
|
|
|
ALLOWED_HOSTS.append(_paperless_uri.hostname)
|
|
|
|
|
else:
|
2022-04-12 14:39:08 +02:00
|
|
|
# always allow localhost. Necessary e.g. for healthcheck in docker.
|
|
|
|
|
ALLOWED_HOSTS = [_paperless_uri.hostname] + ["localhost"]
|
2022-04-07 21:17:59 -07:00
|
|
|
|
2020-11-02 21:59:36 +01:00
|
|
|
# The secret key has a default that should be fine so long as you're hosting
|
|
|
|
|
# Paperless on a closed network. However, if you're putting this anywhere
|
|
|
|
|
# public, you should change the key to something unique and verbose.
|
|
|
|
|
SECRET_KEY = os.getenv(
|
2022-03-11 10:55:51 -08:00
|
|
|
"PAPERLESS_SECRET_KEY",
|
|
|
|
|
"e11fl1oa-*ytql8p)(06fbj4ukrlo+n7k&q5+$1md7i+mge=ee",
|
2020-11-02 21:59:36 +01:00
|
|
|
)
|
|
|
|
|
|
|
|
|
|
AUTH_PASSWORD_VALIDATORS = [
|
|
|
|
|
{
|
2022-09-01 10:35:33 -07:00
|
|
|
"NAME": "django.contrib.auth.password_validation.UserAttributeSimilarityValidator", # noqa: E501
|
2020-11-02 21:59:36 +01:00
|
|
|
},
|
|
|
|
|
{
|
2022-02-27 15:26:41 +01:00
|
|
|
"NAME": "django.contrib.auth.password_validation.MinimumLengthValidator",
|
2020-11-02 21:59:36 +01:00
|
|
|
},
|
|
|
|
|
{
|
2022-02-27 15:26:41 +01:00
|
|
|
"NAME": "django.contrib.auth.password_validation.CommonPasswordValidator",
|
2020-11-02 21:59:36 +01:00
|
|
|
},
|
|
|
|
|
{
|
2022-02-27 15:26:41 +01:00
|
|
|
"NAME": "django.contrib.auth.password_validation.NumericPasswordValidator",
|
2020-11-02 21:59:36 +01:00
|
|
|
},
|
|
|
|
|
]
|
|
|
|
|
|
|
|
|
|
# Disable Django's artificial limit on the number of form fields to submit at
|
|
|
|
|
# once. This is a protection against overloading the server, but since this is
|
|
|
|
|
# a self-hosted sort of gig, the benefits of being able to mass-delete a tonne
|
|
|
|
|
# of log entries outweight the benefits of such a safeguard.
|
|
|
|
|
|
|
|
|
|
DATA_UPLOAD_MAX_NUMBER_FIELDS = None
|
|
|
|
|
|
2020-12-11 17:49:32 +01:00
|
|
|
COOKIE_PREFIX = os.getenv("PAPERLESS_COOKIE_PREFIX", "")
|
|
|
|
|
|
|
|
|
|
CSRF_COOKIE_NAME = f"{COOKIE_PREFIX}csrftoken"
|
|
|
|
|
SESSION_COOKIE_NAME = f"{COOKIE_PREFIX}sessionid"
|
|
|
|
|
LANGUAGE_COOKIE_NAME = f"{COOKIE_PREFIX}django_language"
|
|
|
|
|
|
2020-11-02 21:59:36 +01:00
|
|
|
###############################################################################
|
|
|
|
|
# Database #
|
|
|
|
|
###############################################################################
|
2015-12-20 19:23:33 +00:00
|
|
|
|
|
|
|
|
DATABASES = {
|
2016-01-10 22:45:15 +00:00
|
|
|
"default": {
|
|
|
|
|
"ENGINE": "django.db.backends.sqlite3",
|
2022-02-27 15:26:41 +01:00
|
|
|
"NAME": os.path.join(DATA_DIR, "db.sqlite3"),
|
2022-03-11 10:55:51 -08:00
|
|
|
},
|
2015-12-20 19:23:33 +00:00
|
|
|
}
|
2016-08-23 12:22:36 -04:00
|
|
|
|
2020-11-03 12:23:24 +01:00
|
|
|
if os.getenv("PAPERLESS_DBHOST"):
|
2020-11-12 17:12:58 +01:00
|
|
|
# Have sqlite available as a second option for management commands
|
|
|
|
|
# This is important when migrating to/from sqlite
|
2022-02-27 15:26:41 +01:00
|
|
|
DATABASES["sqlite"] = DATABASES["default"].copy()
|
2020-11-12 17:12:58 +01:00
|
|
|
|
2016-01-10 13:40:26 +00:00
|
|
|
DATABASES["default"] = {
|
2020-11-03 12:23:24 +01:00
|
|
|
"ENGINE": "django.db.backends.postgresql_psycopg2",
|
|
|
|
|
"HOST": os.getenv("PAPERLESS_DBHOST"),
|
2016-02-27 20:18:50 +00:00
|
|
|
"NAME": os.getenv("PAPERLESS_DBNAME", "paperless"),
|
2020-11-03 12:23:24 +01:00
|
|
|
"USER": os.getenv("PAPERLESS_DBUSER", "paperless"),
|
|
|
|
|
"PASSWORD": os.getenv("PAPERLESS_DBPASS", "paperless"),
|
2022-02-27 15:26:41 +01:00
|
|
|
"OPTIONS": {"sslmode": os.getenv("PAPERLESS_DBSSLMODE", "prefer")},
|
2016-01-10 13:40:26 +00:00
|
|
|
}
|
2018-12-02 15:20:29 -06:00
|
|
|
if os.getenv("PAPERLESS_DBPORT"):
|
|
|
|
|
DATABASES["default"]["PORT"] = os.getenv("PAPERLESS_DBPORT")
|
2015-12-20 19:23:33 +00:00
|
|
|
|
2022-07-28 15:36:24 -07:00
|
|
|
if os.getenv("PAPERLESS_DB_TIMEOUT") is not None:
|
|
|
|
|
_new_opts = {"timeout": float(os.getenv("PAPERLESS_DB_TIMEOUT"))}
|
|
|
|
|
if "OPTIONS" in DATABASES["default"]:
|
|
|
|
|
DATABASES["default"]["OPTIONS"].update(_new_opts)
|
|
|
|
|
else:
|
|
|
|
|
DATABASES["default"]["OPTIONS"] = _new_opts
|
|
|
|
|
|
2022-02-27 15:26:41 +01:00
|
|
|
DEFAULT_AUTO_FIELD = "django.db.models.AutoField"
|
2021-04-17 16:00:29 +02:00
|
|
|
|
2020-11-02 21:59:36 +01:00
|
|
|
###############################################################################
|
|
|
|
|
# Internationalization #
|
|
|
|
|
###############################################################################
|
2015-12-20 19:23:33 +00:00
|
|
|
|
2022-02-27 15:26:41 +01:00
|
|
|
LANGUAGE_CODE = "en-us"
|
2015-12-20 19:23:33 +00:00
|
|
|
|
2020-12-30 00:26:06 +01:00
|
|
|
LANGUAGES = [
|
2022-02-27 15:26:41 +01:00
|
|
|
("en-us", _("English (US)")), # needs to be first to act as fallback language
|
2022-03-31 19:42:08 -07:00
|
|
|
("be-by", _("Belarusian")),
|
2022-02-18 09:08:02 +01:00
|
|
|
("cs-cz", _("Czech")),
|
2022-02-18 09:18:04 +01:00
|
|
|
("da-dk", _("Danish")),
|
2021-03-06 22:20:06 +01:00
|
|
|
("de-de", _("German")),
|
2022-02-17 16:56:25 +01:00
|
|
|
("en-gb", _("English (GB)")),
|
|
|
|
|
("es-es", _("Spanish")),
|
2021-03-06 22:20:06 +01:00
|
|
|
("fr-fr", _("French")),
|
2022-02-17 16:56:25 +01:00
|
|
|
("it-it", _("Italian")),
|
|
|
|
|
("lb-lu", _("Luxembourgish")),
|
|
|
|
|
("nl-nl", _("Dutch")),
|
|
|
|
|
("pl-pl", _("Polish")),
|
2021-02-26 12:55:39 +01:00
|
|
|
("pt-br", _("Portuguese (Brazil)")),
|
2021-03-17 22:32:39 +01:00
|
|
|
("pt-pt", _("Portuguese")),
|
2021-03-07 10:07:42 +01:00
|
|
|
("ro-ro", _("Romanian")),
|
2021-03-14 13:34:14 +01:00
|
|
|
("ru-ru", _("Russian")),
|
2022-03-13 09:13:25 -07:00
|
|
|
("sl-si", _("Slovenian")),
|
2022-03-22 22:26:59 -07:00
|
|
|
("sr-cs", _("Serbian")),
|
2021-05-04 17:01:39 +02:00
|
|
|
("sv-se", _("Swedish")),
|
2022-03-26 13:52:15 -07:00
|
|
|
("tr-tr", _("Turkish")),
|
2022-03-10 10:00:20 -08:00
|
|
|
("zh-cn", _("Chinese Simplified")),
|
2020-12-30 00:26:06 +01:00
|
|
|
]
|
|
|
|
|
|
2022-02-27 15:26:41 +01:00
|
|
|
LOCALE_PATHS = [os.path.join(BASE_DIR, "locale")]
|
2020-12-30 01:39:06 +01:00
|
|
|
|
2017-01-07 15:12:45 -08:00
|
|
|
TIME_ZONE = os.getenv("PAPERLESS_TIME_ZONE", "UTC")
|
2015-12-20 19:23:33 +00:00
|
|
|
|
|
|
|
|
USE_I18N = True
|
|
|
|
|
|
|
|
|
|
USE_L10N = True
|
|
|
|
|
|
|
|
|
|
USE_TZ = True
|
|
|
|
|
|
2020-11-02 21:59:36 +01:00
|
|
|
###############################################################################
|
|
|
|
|
# Logging #
|
|
|
|
|
###############################################################################
|
2016-02-27 20:18:50 +00:00
|
|
|
|
2021-02-06 17:05:07 +01:00
|
|
|
setup_logging_queues()
|
|
|
|
|
|
|
|
|
|
os.makedirs(LOGGING_DIR, exist_ok=True)
|
|
|
|
|
|
2022-02-27 15:26:41 +01:00
|
|
|
LOGROTATE_MAX_SIZE = os.getenv("PAPERLESS_LOGROTATE_MAX_SIZE", 1024 * 1024)
|
2021-02-06 17:05:07 +01:00
|
|
|
LOGROTATE_MAX_BACKUPS = os.getenv("PAPERLESS_LOGROTATE_MAX_BACKUPS", 20)
|
2020-11-27 13:13:11 +01:00
|
|
|
|
2016-02-27 20:18:50 +00:00
|
|
|
LOGGING = {
|
|
|
|
|
"version": 1,
|
|
|
|
|
"disable_existing_loggers": False,
|
2022-02-27 15:26:41 +01:00
|
|
|
"formatters": {
|
|
|
|
|
"verbose": {
|
|
|
|
|
"format": "[{asctime}] [{levelname}] [{name}] {message}",
|
|
|
|
|
"style": "{",
|
2020-12-03 01:03:56 +01:00
|
|
|
},
|
2022-02-27 15:26:41 +01:00
|
|
|
"simple": {
|
|
|
|
|
"format": "{levelname} {message}",
|
|
|
|
|
"style": "{",
|
2020-12-03 01:03:56 +01:00
|
|
|
},
|
|
|
|
|
},
|
2016-02-27 20:18:50 +00:00
|
|
|
"handlers": {
|
2020-12-03 01:03:56 +01:00
|
|
|
"console": {
|
2021-01-13 13:35:05 +01:00
|
|
|
"level": "DEBUG" if DEBUG else "INFO",
|
2020-12-03 01:03:56 +01:00
|
|
|
"class": "logging.StreamHandler",
|
|
|
|
|
"formatter": "verbose",
|
2021-02-06 17:05:07 +01:00
|
|
|
},
|
|
|
|
|
"file_paperless": {
|
|
|
|
|
"class": "concurrent_log_handler.ConcurrentRotatingFileHandler",
|
|
|
|
|
"formatter": "verbose",
|
|
|
|
|
"filename": os.path.join(LOGGING_DIR, "paperless.log"),
|
|
|
|
|
"maxBytes": LOGROTATE_MAX_SIZE,
|
2022-02-27 15:26:41 +01:00
|
|
|
"backupCount": LOGROTATE_MAX_BACKUPS,
|
2021-02-06 17:05:07 +01:00
|
|
|
},
|
|
|
|
|
"file_mail": {
|
|
|
|
|
"class": "concurrent_log_handler.ConcurrentRotatingFileHandler",
|
|
|
|
|
"formatter": "verbose",
|
|
|
|
|
"filename": os.path.join(LOGGING_DIR, "mail.log"),
|
|
|
|
|
"maxBytes": LOGROTATE_MAX_SIZE,
|
2022-02-27 15:26:41 +01:00
|
|
|
"backupCount": LOGROTATE_MAX_BACKUPS,
|
|
|
|
|
},
|
2020-12-03 01:03:56 +01:00
|
|
|
},
|
2022-02-27 15:26:41 +01:00
|
|
|
"root": {"handlers": ["console"]},
|
2016-02-27 20:18:50 +00:00
|
|
|
"loggers": {
|
2022-02-27 15:26:41 +01:00
|
|
|
"paperless": {"handlers": ["file_paperless"], "level": "DEBUG"},
|
|
|
|
|
"paperless_mail": {"handlers": ["file_mail"], "level": "DEBUG"},
|
|
|
|
|
},
|
2016-02-27 20:18:50 +00:00
|
|
|
}
|
|
|
|
|
|
2020-11-09 20:29:02 +01:00
|
|
|
###############################################################################
|
|
|
|
|
# Task queue #
|
|
|
|
|
###############################################################################
|
|
|
|
|
|
2022-07-11 13:54:04 -07:00
|
|
|
TASK_WORKERS = __get_int("PAPERLESS_TASK_WORKERS", 1)
|
2022-03-22 11:26:56 -07:00
|
|
|
|
2022-09-01 10:35:33 -07:00
|
|
|
WORKER_TIMEOUT: Final[int] = __get_int("PAPERLESS_WORKER_TIMEOUT", 1800)
|
2022-03-22 11:26:56 -07:00
|
|
|
|
|
|
|
|
# Per django-q docs, timeout must be smaller than retry
|
2022-09-01 10:35:33 -07:00
|
|
|
# We default retry to 10s more than the timeout to silence the
|
|
|
|
|
# warning, as retry functionality isn't used.
|
|
|
|
|
WORKER_RETRY: Final[int] = __get_int(
|
2022-03-22 11:26:56 -07:00
|
|
|
"PAPERLESS_WORKER_RETRY",
|
2022-09-01 10:35:33 -07:00
|
|
|
WORKER_TIMEOUT + 10,
|
2022-03-22 11:26:56 -07:00
|
|
|
)
|
2020-11-16 18:26:54 +01:00
|
|
|
|
2020-11-09 20:29:02 +01:00
|
|
|
Q_CLUSTER = {
|
2022-02-27 15:26:41 +01:00
|
|
|
"name": "paperless",
|
2022-06-04 10:12:53 +10:00
|
|
|
"guard_cycle": 5,
|
2022-02-27 15:26:41 +01:00
|
|
|
"catch_up": False,
|
|
|
|
|
"recycle": 1,
|
2022-09-01 10:35:33 -07:00
|
|
|
"retry": WORKER_RETRY,
|
|
|
|
|
"timeout": WORKER_TIMEOUT,
|
2022-02-27 15:26:41 +01:00
|
|
|
"workers": TASK_WORKERS,
|
|
|
|
|
"redis": os.getenv("PAPERLESS_REDIS", "redis://localhost:6379"),
|
2022-06-01 08:59:20 -07:00
|
|
|
"log_level": "DEBUG" if DEBUG else "INFO",
|
2020-11-09 20:29:02 +01:00
|
|
|
}
|
|
|
|
|
|
2020-11-16 18:52:13 +01:00
|
|
|
|
2022-03-22 11:26:56 -07:00
|
|
|
def default_threads_per_worker(task_workers) -> int:
|
2021-01-13 00:02:20 +01:00
|
|
|
# always leave one core open
|
2021-01-26 22:10:43 +01:00
|
|
|
available_cores = max(multiprocessing.cpu_count(), 1)
|
2020-11-16 18:52:13 +01:00
|
|
|
try:
|
2022-02-27 15:26:41 +01:00
|
|
|
return max(math.floor(available_cores / task_workers), 1)
|
2020-11-16 18:52:13 +01:00
|
|
|
except NotImplementedError:
|
|
|
|
|
return 1
|
|
|
|
|
|
|
|
|
|
|
2022-02-27 15:26:41 +01:00
|
|
|
THREADS_PER_WORKER = os.getenv(
|
2022-03-11 10:55:51 -08:00
|
|
|
"PAPERLESS_THREADS_PER_WORKER",
|
|
|
|
|
default_threads_per_worker(TASK_WORKERS),
|
2022-02-27 15:26:41 +01:00
|
|
|
)
|
2020-11-16 18:52:13 +01:00
|
|
|
|
2020-11-02 21:59:36 +01:00
|
|
|
###############################################################################
|
|
|
|
|
# Paperless Specific Settings #
|
|
|
|
|
###############################################################################
|
2016-02-27 20:18:50 +00:00
|
|
|
|
2020-11-16 18:52:13 +01:00
|
|
|
CONSUMER_POLLING = int(os.getenv("PAPERLESS_CONSUMER_POLLING", 0))
|
|
|
|
|
|
2021-02-21 12:14:54 +01:00
|
|
|
CONSUMER_POLLING_DELAY = int(os.getenv("PAPERLESS_CONSUMER_POLLING_DELAY", 5))
|
|
|
|
|
|
|
|
|
|
CONSUMER_POLLING_RETRY_COUNT = int(
|
2022-03-11 10:55:51 -08:00
|
|
|
os.getenv("PAPERLESS_CONSUMER_POLLING_RETRY_COUNT", 5),
|
2021-02-21 12:14:54 +01:00
|
|
|
)
|
|
|
|
|
|
2022-05-15 11:48:12 -07:00
|
|
|
CONSUMER_INOTIFY_DELAY: Final[float] = __get_float(
|
|
|
|
|
"PAPERLESS_CONSUMER_INOTIFY_DELAY",
|
|
|
|
|
0.5,
|
|
|
|
|
)
|
|
|
|
|
|
2020-11-10 01:47:58 +01:00
|
|
|
CONSUMER_DELETE_DUPLICATES = __get_boolean("PAPERLESS_CONSUMER_DELETE_DUPLICATES")
|
|
|
|
|
|
2020-11-29 15:39:43 +01:00
|
|
|
CONSUMER_RECURSIVE = __get_boolean("PAPERLESS_CONSUMER_RECURSIVE")
|
|
|
|
|
|
2021-08-08 21:29:36 +02:00
|
|
|
# Ignore glob patterns, relative to PAPERLESS_CONSUMPTION_DIR
|
|
|
|
|
CONSUMER_IGNORE_PATTERNS = list(
|
|
|
|
|
json.loads(
|
2022-02-27 15:26:41 +01:00
|
|
|
os.getenv(
|
|
|
|
|
"PAPERLESS_CONSUMER_IGNORE_PATTERNS",
|
2022-09-01 10:35:33 -07:00
|
|
|
'[".DS_STORE/*", "._*", ".stfolder/*", ".stversions/*", ".localized/*", "desktop.ini"]', # noqa: E501
|
2022-03-11 10:55:51 -08:00
|
|
|
),
|
|
|
|
|
),
|
2022-02-27 15:26:41 +01:00
|
|
|
)
|
2021-08-08 21:29:36 +02:00
|
|
|
|
2020-11-29 15:39:43 +01:00
|
|
|
CONSUMER_SUBDIRS_AS_TAGS = __get_boolean("PAPERLESS_CONSUMER_SUBDIRS_AS_TAGS")
|
|
|
|
|
|
2022-03-26 10:16:23 +01:00
|
|
|
CONSUMER_ENABLE_BARCODES = __get_boolean(
|
|
|
|
|
"PAPERLESS_CONSUMER_ENABLE_BARCODES",
|
|
|
|
|
)
|
|
|
|
|
|
2022-04-16 21:56:10 +02:00
|
|
|
CONSUMER_BARCODE_TIFF_SUPPORT = __get_boolean(
|
|
|
|
|
"PAPERLESS_CONSUMER_BARCODE_TIFF_SUPPORT",
|
|
|
|
|
)
|
|
|
|
|
|
2022-03-26 10:16:23 +01:00
|
|
|
CONSUMER_BARCODE_STRING = os.getenv("PAPERLESS_CONSUMER_BARCODE_STRING", "PATCHT")
|
|
|
|
|
|
2022-02-27 15:26:41 +01:00
|
|
|
OCR_PAGES = int(os.getenv("PAPERLESS_OCR_PAGES", 0))
|
2020-11-22 12:54:08 +01:00
|
|
|
|
2016-01-23 02:28:39 +00:00
|
|
|
# The default language that tesseract will attempt to use when parsing
|
|
|
|
|
# documents. It should be a 3-letter language code consistent with ISO 639.
|
2017-01-07 23:27:10 -08:00
|
|
|
OCR_LANGUAGE = os.getenv("PAPERLESS_OCR_LANGUAGE", "eng")
|
2016-01-23 02:28:39 +00:00
|
|
|
|
2020-11-25 14:50:43 +01:00
|
|
|
# OCRmyPDF --output-type options are available.
|
|
|
|
|
# TODO: validate this setting.
|
|
|
|
|
OCR_OUTPUT_TYPE = os.getenv("PAPERLESS_OCR_OUTPUT_TYPE", "pdfa")
|
2016-02-14 15:57:42 +01:00
|
|
|
|
2020-11-25 14:50:43 +01:00
|
|
|
# skip. redo, force
|
|
|
|
|
# TODO: validate this.
|
|
|
|
|
OCR_MODE = os.getenv("PAPERLESS_OCR_MODE", "skip")
|
2018-01-30 20:13:35 +00:00
|
|
|
|
2020-11-25 19:37:48 +01:00
|
|
|
OCR_IMAGE_DPI = os.getenv("PAPERLESS_OCR_IMAGE_DPI")
|
|
|
|
|
|
2021-02-21 00:16:57 +01:00
|
|
|
OCR_CLEAN = os.getenv("PAPERLESS_OCR_CLEAN", "clean")
|
|
|
|
|
|
2021-02-21 23:40:26 +01:00
|
|
|
OCR_DESKEW = __get_boolean("PAPERLESS_OCR_DESKEW", "true")
|
2021-02-21 00:16:57 +01:00
|
|
|
|
2021-02-21 23:40:26 +01:00
|
|
|
OCR_ROTATE_PAGES = __get_boolean("PAPERLESS_OCR_ROTATE_PAGES", "true")
|
2021-02-21 00:16:57 +01:00
|
|
|
|
2022-02-27 15:26:41 +01:00
|
|
|
OCR_ROTATE_PAGES_THRESHOLD = float(
|
2022-03-11 10:55:51 -08:00
|
|
|
os.getenv("PAPERLESS_OCR_ROTATE_PAGES_THRESHOLD", 12.0),
|
2022-02-27 15:26:41 +01:00
|
|
|
)
|
2021-02-21 00:16:57 +01:00
|
|
|
|
2022-05-22 16:56:08 -07:00
|
|
|
OCR_MAX_IMAGE_PIXELS: Optional[int] = None
|
|
|
|
|
if os.environ.get("PAPERLESS_OCR_MAX_IMAGE_PIXELS") is not None:
|
|
|
|
|
OCR_MAX_IMAGE_PIXELS: int = int(os.environ.get("PAPERLESS_OCR_MAX_IMAGE_PIXELS"))
|
2022-03-21 22:27:32 +01:00
|
|
|
|
2020-11-29 12:37:55 +01:00
|
|
|
OCR_USER_ARGS = os.getenv("PAPERLESS_OCR_USER_ARGS", "{}")
|
2018-01-30 20:13:35 +00:00
|
|
|
|
2016-01-23 02:28:39 +00:00
|
|
|
# GNUPG needs a home directory for some reason
|
2016-02-27 20:18:50 +00:00
|
|
|
GNUPG_HOME = os.getenv("HOME", "/tmp")
|
2016-01-23 02:28:39 +00:00
|
|
|
|
2016-03-01 22:37:42 +00:00
|
|
|
# Convert is part of the ImageMagick package
|
2017-01-01 22:44:04 +00:00
|
|
|
CONVERT_BINARY = os.getenv("PAPERLESS_CONVERT_BINARY", "convert")
|
2016-03-25 20:31:15 +00:00
|
|
|
CONVERT_TMPDIR = os.getenv("PAPERLESS_CONVERT_TMPDIR")
|
|
|
|
|
CONVERT_MEMORY_LIMIT = os.getenv("PAPERLESS_CONVERT_MEMORY_LIMIT")
|
2016-01-23 02:28:39 +00:00
|
|
|
|
2019-02-03 16:57:32 +01:00
|
|
|
GS_BINARY = os.getenv("PAPERLESS_GS_BINARY", "gs")
|
2020-11-25 19:30:11 +01:00
|
|
|
|
2016-01-10 13:40:26 +00:00
|
|
|
|
2018-05-27 23:20:04 +01:00
|
|
|
# Pre-2.x versions of Paperless stored your documents locally with GPG
|
|
|
|
|
# encryption, but that is no longer the default. This behaviour is still
|
|
|
|
|
# available, but it must be explicitly enabled by setting
|
|
|
|
|
# `PAPERLESS_PASSPHRASE` in your environment or config file. The default is to
|
|
|
|
|
# store these files unencrypted.
|
|
|
|
|
#
|
|
|
|
|
# Translation:
|
|
|
|
|
# * If you're a new user, you can safely ignore this setting.
|
|
|
|
|
# * If you're upgrading from 1.x, this must be set, OR you can run
|
|
|
|
|
# `./manage.py change_storage_type gpg unencrypted` to decrypt your files,
|
|
|
|
|
# after which you can unset this value.
|
2016-02-27 20:18:50 +00:00
|
|
|
PASSPHRASE = os.getenv("PAPERLESS_PASSPHRASE")
|
2016-02-05 00:23:36 +00:00
|
|
|
|
2016-03-28 19:47:11 +01:00
|
|
|
# Trigger a script after every successful document consumption?
|
2016-06-23 21:57:17 +02:00
|
|
|
PRE_CONSUME_SCRIPT = os.getenv("PAPERLESS_PRE_CONSUME_SCRIPT")
|
2016-06-24 16:49:32 +02:00
|
|
|
POST_CONSUME_SCRIPT = os.getenv("PAPERLESS_POST_CONSUME_SCRIPT")
|
2017-01-14 17:09:48 +00:00
|
|
|
|
2018-01-28 19:09:52 +01:00
|
|
|
# Specify the default date order (for autodetected dates)
|
|
|
|
|
DATE_ORDER = os.getenv("PAPERLESS_DATE_ORDER", "DMY")
|
2018-11-15 20:32:15 -05:00
|
|
|
FILENAME_DATE_ORDER = os.getenv("PAPERLESS_FILENAME_DATE_ORDER")
|
2018-08-28 15:42:39 +02:00
|
|
|
|
2022-08-25 08:03:38 -07:00
|
|
|
# Maximum number of dates taken from document start to end to show as suggestions for
|
2022-09-01 10:35:33 -07:00
|
|
|
# `created` date in the frontend. Duplicates are removed, which can result in
|
|
|
|
|
# fewer dates shown.
|
2022-08-25 08:03:38 -07:00
|
|
|
NUMBER_OF_SUGGESTED_DATES = __get_int("PAPERLESS_NUMBER_OF_SUGGESTED_DATES", 3)
|
2022-08-06 13:02:08 +02:00
|
|
|
|
2019-05-18 19:25:50 +02:00
|
|
|
# Transformations applied before filename parsing
|
|
|
|
|
FILENAME_PARSE_TRANSFORMS = []
|
2019-09-08 17:00:02 +02:00
|
|
|
for t in json.loads(os.getenv("PAPERLESS_FILENAME_PARSE_TRANSFORMS", "[]")):
|
|
|
|
|
FILENAME_PARSE_TRANSFORMS.append((re.compile(t["pattern"]), t["repl"]))
|
2020-11-07 11:30:45 +01:00
|
|
|
|
2020-11-08 13:00:45 +01:00
|
|
|
# Specify the filename format for out files
|
2022-05-19 23:42:25 +02:00
|
|
|
FILENAME_FORMAT = os.getenv("PAPERLESS_FILENAME_FORMAT")
|
|
|
|
|
|
2022-09-01 10:35:33 -07:00
|
|
|
# If this is enabled, variables in filename format will resolve to
|
|
|
|
|
# empty-string instead of 'none'.
|
2022-05-19 23:42:25 +02:00
|
|
|
# Directories with 'empty names' are omitted, too.
|
|
|
|
|
FILENAME_FORMAT_REMOVE_NONE = __get_boolean(
|
|
|
|
|
"PAPERLESS_FILENAME_FORMAT_REMOVE_NONE",
|
|
|
|
|
"NO",
|
|
|
|
|
)
|
2020-12-29 12:26:41 +01:00
|
|
|
|
2022-02-27 15:26:41 +01:00
|
|
|
THUMBNAIL_FONT_NAME = os.getenv(
|
|
|
|
|
"PAPERLESS_THUMBNAIL_FONT_NAME",
|
|
|
|
|
"/usr/share/fonts/liberation/LiberationSerif-Regular.ttf",
|
|
|
|
|
)
|
2020-12-29 01:23:40 +01:00
|
|
|
|
|
|
|
|
# Tika settings
|
2022-09-01 10:35:33 -07:00
|
|
|
TIKA_ENABLED = __get_boolean("PAPERLESS_TIKA_ENABLED", "NO")
|
|
|
|
|
TIKA_ENDPOINT = os.getenv("PAPERLESS_TIKA_ENDPOINT", "http://localhost:9998")
|
|
|
|
|
TIKA_GOTENBERG_ENDPOINT = os.getenv(
|
2022-03-11 10:55:51 -08:00
|
|
|
"PAPERLESS_TIKA_GOTENBERG_ENDPOINT",
|
|
|
|
|
"http://localhost:3000",
|
2020-12-31 14:41:47 +01:00
|
|
|
)
|
2021-01-02 14:40:56 +01:00
|
|
|
|
2022-09-01 10:35:33 -07:00
|
|
|
if TIKA_ENABLED:
|
2021-01-08 13:27:57 +01:00
|
|
|
INSTALLED_APPS.append("paperless_tika.apps.PaperlessTikaConfig")
|
|
|
|
|
|
2021-02-04 15:15:11 +01:00
|
|
|
|
2022-04-13 08:04:15 -07:00
|
|
|
def _parse_ignore_dates(
|
|
|
|
|
env_ignore: str,
|
|
|
|
|
date_order: str = DATE_ORDER,
|
|
|
|
|
) -> Set[datetime.datetime]:
|
|
|
|
|
"""
|
|
|
|
|
If the PAPERLESS_IGNORE_DATES environment variable is set, parse the
|
|
|
|
|
user provided string(s) into dates
|
|
|
|
|
|
|
|
|
|
Args:
|
2022-09-01 10:35:33 -07:00
|
|
|
env_ignore (str): The value of the environment variable, comma separated dates
|
|
|
|
|
date_order (str, optional): The format of the date strings.
|
|
|
|
|
Defaults to DATE_ORDER.
|
2022-04-12 19:52:56 -07:00
|
|
|
|
2022-04-13 08:04:15 -07:00
|
|
|
Returns:
|
|
|
|
|
Set[datetime.datetime]: The set of parsed date objects
|
|
|
|
|
"""
|
2021-02-04 15:15:11 +01:00
|
|
|
import dateparser
|
|
|
|
|
|
2022-04-12 19:52:56 -07:00
|
|
|
ignored_dates = set()
|
|
|
|
|
for s in env_ignore.split(","):
|
2022-04-13 08:04:15 -07:00
|
|
|
d = dateparser.parse(
|
|
|
|
|
s,
|
|
|
|
|
settings={
|
|
|
|
|
"DATE_ORDER": date_order,
|
|
|
|
|
},
|
|
|
|
|
)
|
2021-02-04 15:15:11 +01:00
|
|
|
if d:
|
2022-04-12 19:52:56 -07:00
|
|
|
ignored_dates.add(d.date())
|
|
|
|
|
return ignored_dates
|
|
|
|
|
|
|
|
|
|
|
2022-04-13 08:04:15 -07:00
|
|
|
# List dates that should be ignored when trying to parse date from document text
|
|
|
|
|
IGNORE_DATES: Set[datetime.date] = set()
|
|
|
|
|
|
2022-04-12 19:52:56 -07:00
|
|
|
if os.getenv("PAPERLESS_IGNORE_DATES") is not None:
|
|
|
|
|
IGNORE_DATES = _parse_ignore_dates(os.getenv("PAPERLESS_IGNORE_DATES"))
|
2022-04-01 07:22:55 -07:00
|
|
|
|
2022-04-02 10:10:49 -07:00
|
|
|
ENABLE_UPDATE_CHECK = os.getenv("PAPERLESS_ENABLE_UPDATE_CHECK", "default")
|
|
|
|
|
if ENABLE_UPDATE_CHECK != "default":
|
2022-04-04 07:05:00 -07:00
|
|
|
ENABLE_UPDATE_CHECK = __get_boolean("PAPERLESS_ENABLE_UPDATE_CHECK")
|