paperless-ngx/src/paperless/settings.py

435 lines
15 KiB
Python
Raw Normal View History

import json
2020-11-16 18:26:54 +01:00
import math
2020-11-02 21:59:36 +01:00
import multiprocessing
2015-12-20 19:23:33 +00:00
import os
import re
2015-12-20 19:23:33 +00:00
from dotenv import load_dotenv
from django.utils.translation import gettext_lazy as _
# Tap paperless.conf if it's available
2020-10-27 01:09:50 +01:00
if os.path.exists("../paperless.conf"):
load_dotenv("../paperless.conf")
elif os.path.exists("/etc/paperless.conf"):
load_dotenv("/etc/paperless.conf")
elif os.path.exists("/usr/local/etc/paperless.conf"):
load_dotenv("/usr/local/etc/paperless.conf")
2020-11-12 10:01:22 +01:00
# There are multiple levels of concurrency in paperless:
# - Multiple consumers may be run in parallel.
# - Each consumer may process multiple pages in parallel.
# - Each Tesseract OCR run may spawn multiple threads to process a single page
# slightly faster.
# The performance gains from having tesseract use multiple threads are minimal.
# However, when multiple pages are processed in parallel, the total number of
# OCR threads may exceed the number of available cpu cores, which will
# dramatically slow down the consumption process. This settings limits each
# Tesseract process to one thread.
os.environ['OMP_THREAD_LIMIT'] = "1"
2020-11-12 21:09:45 +01:00
2018-09-12 16:25:23 +02:00
def __get_boolean(key, default="NO"):
"""
Return a boolean value based on whatever the user has supplied in the
environment based on whether the value "looks like" it's True or not.
"""
2018-09-12 16:25:23 +02:00
return bool(os.getenv(key, default).lower() in ("yes", "y", "1", "t", "true"))
2020-11-12 21:09:45 +01:00
# NEVER RUN WITH DEBUG IN PRODUCTION.
DEBUG = __get_boolean("PAPERLESS_DEBUG", "NO")
2020-11-12 21:09:45 +01:00
2020-11-02 21:59:36 +01:00
###############################################################################
# Directories #
###############################################################################
2015-12-20 19:23:33 +00:00
BASE_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
2020-11-02 21:59:36 +01:00
STATIC_ROOT = os.getenv("PAPERLESS_STATICDIR", os.path.join(BASE_DIR, "..", "static"))
2020-10-26 00:35:24 +01:00
MEDIA_ROOT = os.getenv('PAPERLESS_MEDIA_ROOT', os.path.join(BASE_DIR, "..", "media"))
ORIGINALS_DIR = os.path.join(MEDIA_ROOT, "documents", "originals")
2020-11-25 14:45:21 +01:00
ARCHIVE_DIR = os.path.join(MEDIA_ROOT, "documents", "archive")
THUMBNAIL_DIR = os.path.join(MEDIA_ROOT, "documents", "thumbnails")
2020-11-02 21:59:36 +01:00
DATA_DIR = os.getenv('PAPERLESS_DATA_DIR', os.path.join(BASE_DIR, "..", "data"))
2020-12-08 13:54:35 +01:00
# Lock file for synchronizing changes to the MEDIA directory across multiple
# threads.
MEDIA_LOCK = os.path.join(MEDIA_ROOT, "media.lock")
2020-11-02 21:59:36 +01:00
INDEX_DIR = os.path.join(DATA_DIR, "index")
MODEL_FILE = os.path.join(DATA_DIR, "classification_model.pickle")
CONSUMPTION_DIR = os.getenv("PAPERLESS_CONSUMPTION_DIR", os.path.join(BASE_DIR, "..", "consume"))
# This will be created if it doesn't exist
SCRATCH_DIR = os.getenv("PAPERLESS_SCRATCH_DIR", "/tmp/paperless")
2020-11-02 21:59:36 +01:00
###############################################################################
# Application Definition #
###############################################################################
2015-12-20 19:23:33 +00:00
2020-12-28 22:37:53 +01:00
env_apps = os.getenv("PAPERLESS_APPS").split(",") if os.getenv("PAPERLESS_APPS") else []
2015-12-20 19:23:33 +00:00
INSTALLED_APPS = [
2020-10-27 01:09:50 +01:00
"whitenoise.runserver_nostatic",
"django.contrib.auth",
"django.contrib.contenttypes",
"django.contrib.sessions",
"django.contrib.messages",
"django.contrib.staticfiles",
2015-12-20 19:23:33 +00:00
"corsheaders",
2015-12-20 19:23:33 +00:00
"django_extensions",
"paperless",
"documents.apps.DocumentsConfig",
"paperless_tesseract.apps.PaperlessTesseractConfig",
2018-08-30 23:32:41 -04:00
"paperless_text.apps.PaperlessTextConfig",
"paperless_mail.apps.PaperlessMailConfig",
"django.contrib.admin",
2016-02-16 09:28:34 +00:00
"rest_framework",
"rest_framework.authtoken",
2018-12-11 12:26:44 +01:00
"django_filters",
2016-02-16 09:28:34 +00:00
"django_q",
] + env_apps
2015-12-20 19:23:33 +00:00
REST_FRAMEWORK = {
'DEFAULT_AUTHENTICATION_CLASSES': [
'rest_framework.authentication.BasicAuthentication',
'rest_framework.authentication.SessionAuthentication',
'rest_framework.authentication.TokenAuthentication'
]
}
if DEBUG:
REST_FRAMEWORK['DEFAULT_AUTHENTICATION_CLASSES'].append(
'paperless.auth.AngularApiAuthenticationOverride'
)
2018-07-04 17:03:59 +02:00
MIDDLEWARE = [
2015-12-20 19:23:33 +00:00
'django.middleware.security.SecurityMiddleware',
2020-10-27 01:09:50 +01:00
'whitenoise.middleware.WhiteNoiseMiddleware',
2015-12-20 19:23:33 +00:00
'django.contrib.sessions.middleware.SessionMiddleware',
'corsheaders.middleware.CorsMiddleware',
'django.middleware.locale.LocaleMiddleware',
2015-12-20 19:23:33 +00:00
'django.middleware.common.CommonMiddleware',
'django.middleware.csrf.CsrfViewMiddleware',
'django.contrib.auth.middleware.AuthenticationMiddleware',
'django.contrib.messages.middleware.MessageMiddleware',
2015-12-20 19:23:33 +00:00
'django.middleware.clickjacking.XFrameOptionsMiddleware',
]
2020-11-02 21:59:36 +01:00
ROOT_URLCONF = 'paperless.urls'
2020-11-02 21:59:36 +01:00
FORCE_SCRIPT_NAME = os.getenv("PAPERLESS_FORCE_SCRIPT_NAME")
2020-11-02 21:59:36 +01:00
WSGI_APPLICATION = 'paperless.wsgi.application'
STATIC_URL = os.getenv("PAPERLESS_STATIC_URL", "/static/")
2015-12-20 19:23:33 +00:00
2020-11-02 21:59:36 +01:00
# what is this used for?
2015-12-20 19:23:33 +00:00
TEMPLATES = [
{
'BACKEND': 'django.template.backends.django.DjangoTemplates',
'DIRS': [],
'APP_DIRS': True,
'OPTIONS': {
'context_processors': [
'django.template.context_processors.debug',
'django.template.context_processors.request',
'django.contrib.auth.context_processors.auth',
'django.contrib.messages.context_processors.messages',
],
},
},
]
2020-11-02 21:59:36 +01:00
###############################################################################
# Security #
###############################################################################
2015-12-20 19:23:33 +00:00
AUTO_LOGIN_USERNAME = os.getenv("PAPERLESS_AUTO_LOGIN_USERNAME")
if AUTO_LOGIN_USERNAME:
_index = MIDDLEWARE.index('django.contrib.auth.middleware.AuthenticationMiddleware')
# This overrides everything the auth middleware is doing but still allows
# regular login in case the provided user does not exist.
MIDDLEWARE.insert(_index+1, 'paperless.auth.AutoLoginMiddleware')
2020-11-02 21:59:36 +01:00
# We allow CORS from localhost:8080
CORS_ALLOWED_ORIGINS = tuple(os.getenv("PAPERLESS_CORS_ALLOWED_HOSTS", "http://localhost:8000").split(","))
2020-11-02 21:59:36 +01:00
if DEBUG:
# Allow access from the angular development server during debugging
CORS_ALLOWED_ORIGINS += ('http://localhost:4200',)
2020-11-02 21:59:36 +01:00
# The secret key has a default that should be fine so long as you're hosting
# Paperless on a closed network. However, if you're putting this anywhere
# public, you should change the key to something unique and verbose.
SECRET_KEY = os.getenv(
"PAPERLESS_SECRET_KEY",
"e11fl1oa-*ytql8p)(06fbj4ukrlo+n7k&q5+$1md7i+mge=ee"
)
_allowed_hosts = os.getenv("PAPERLESS_ALLOWED_HOSTS")
if _allowed_hosts:
ALLOWED_HOSTS = _allowed_hosts.split(",")
else:
ALLOWED_HOSTS = ["*"]
AUTH_PASSWORD_VALIDATORS = [
{
'NAME': 'django.contrib.auth.password_validation.UserAttributeSimilarityValidator',
},
{
'NAME': 'django.contrib.auth.password_validation.MinimumLengthValidator',
},
{
'NAME': 'django.contrib.auth.password_validation.CommonPasswordValidator',
},
{
'NAME': 'django.contrib.auth.password_validation.NumericPasswordValidator',
},
]
# Disable Django's artificial limit on the number of form fields to submit at
# once. This is a protection against overloading the server, but since this is
# a self-hosted sort of gig, the benefits of being able to mass-delete a tonne
# of log entries outweight the benefits of such a safeguard.
DATA_UPLOAD_MAX_NUMBER_FIELDS = None
2020-12-11 17:49:32 +01:00
COOKIE_PREFIX = os.getenv("PAPERLESS_COOKIE_PREFIX", "")
CSRF_COOKIE_NAME = f"{COOKIE_PREFIX}csrftoken"
SESSION_COOKIE_NAME = f"{COOKIE_PREFIX}sessionid"
LANGUAGE_COOKIE_NAME = f"{COOKIE_PREFIX}django_language"
2020-11-02 21:59:36 +01:00
###############################################################################
# Database #
###############################################################################
2015-12-20 19:23:33 +00:00
DATABASES = {
2016-01-10 22:45:15 +00:00
"default": {
"ENGINE": "django.db.backends.sqlite3",
"NAME": os.path.join(
2020-10-26 00:35:24 +01:00
DATA_DIR,
2017-01-01 18:40:23 +00:00
"db.sqlite3"
)
2015-12-20 19:23:33 +00:00
}
}
2016-08-23 12:22:36 -04:00
if os.getenv("PAPERLESS_DBHOST"):
2020-11-12 17:12:58 +01:00
# Have sqlite available as a second option for management commands
# This is important when migrating to/from sqlite
DATABASES['sqlite'] = DATABASES['default'].copy()
2016-01-10 13:40:26 +00:00
DATABASES["default"] = {
"ENGINE": "django.db.backends.postgresql_psycopg2",
"HOST": os.getenv("PAPERLESS_DBHOST"),
2016-02-27 20:18:50 +00:00
"NAME": os.getenv("PAPERLESS_DBNAME", "paperless"),
"USER": os.getenv("PAPERLESS_DBUSER", "paperless"),
"PASSWORD": os.getenv("PAPERLESS_DBPASS", "paperless"),
2016-01-10 13:40:26 +00:00
}
if os.getenv("PAPERLESS_DBPORT"):
DATABASES["default"]["PORT"] = os.getenv("PAPERLESS_DBPORT")
2015-12-20 19:23:33 +00:00
2020-11-02 21:59:36 +01:00
###############################################################################
# Internationalization #
###############################################################################
2015-12-20 19:23:33 +00:00
LANGUAGE_CODE = 'en-us'
LANGUAGES = [
("en-us", _("English")),
("de", _("German"))
]
2017-01-07 15:12:45 -08:00
TIME_ZONE = os.getenv("PAPERLESS_TIME_ZONE", "UTC")
2015-12-20 19:23:33 +00:00
USE_I18N = True
USE_L10N = True
USE_TZ = True
2020-11-02 21:59:36 +01:00
###############################################################################
# Logging #
###############################################################################
2016-02-27 20:18:50 +00:00
DISABLE_DBHANDLER = __get_boolean("PAPERLESS_DISABLE_DBHANDLER")
2016-02-27 20:18:50 +00:00
LOGGING = {
"version": 1,
"disable_existing_loggers": False,
2020-12-03 01:03:56 +01:00
'formatters': {
'verbose': {
'format': '{levelname} {asctime} {module} {message}',
'style': '{',
},
'simple': {
'format': '{levelname} {message}',
'style': '{',
},
},
2016-02-27 20:18:50 +00:00
"handlers": {
2020-12-03 01:03:56 +01:00
"db": {
"level": "DEBUG",
2020-11-02 18:54:27 +01:00
"class": "documents.loggers.PaperlessHandler",
},
2020-12-03 01:03:56 +01:00
"console": {
"level": "INFO",
2020-12-03 01:03:56 +01:00
"class": "logging.StreamHandler",
"formatter": "verbose",
2016-02-27 20:18:50 +00:00
}
},
2020-12-03 01:03:56 +01:00
"root": {
"handlers": ["console"],
"level": "DEBUG",
},
2016-02-27 20:18:50 +00:00
"loggers": {
"documents": {
2020-12-03 01:03:56 +01:00
"handlers": ["db"],
"propagate": True,
2016-02-27 20:18:50 +00:00
},
"paperless_mail": {
2020-12-03 01:03:56 +01:00
"handlers": ["db"],
"propagate": True,
},
"paperless_tesseract": {
2020-12-03 01:03:56 +01:00
"handlers": ["db"],
"propagate": True,
},
2016-02-27 20:18:50 +00:00
},
}
###############################################################################
# Task queue #
###############################################################################
2020-11-16 18:26:54 +01:00
# Sensible defaults for multitasking:
# use a fair balance between worker processes and threads epr worker so that
# both consuming many documents in parallel and consuming large documents is
# reasonably fast.
# Favors threads per worker on smaller systems and never exceeds cpu_count()
# in total.
def default_task_workers():
try:
return max(
math.floor(math.sqrt(multiprocessing.cpu_count())),
1
)
except NotImplementedError:
return 1
TASK_WORKERS = int(os.getenv("PAPERLESS_TASK_WORKERS", default_task_workers()))
Q_CLUSTER = {
'name': 'paperless',
'catch_up': False,
2020-11-16 18:52:13 +01:00
'workers': TASK_WORKERS,
'redis': os.getenv("PAPERLESS_REDIS", "redis://localhost:6379")
}
2020-11-16 18:52:13 +01:00
def default_threads_per_worker():
try:
return max(
math.floor(multiprocessing.cpu_count() / TASK_WORKERS),
1
)
except NotImplementedError:
return 1
THREADS_PER_WORKER = os.getenv("PAPERLESS_THREADS_PER_WORKER", default_threads_per_worker())
2020-11-02 21:59:36 +01:00
###############################################################################
# Paperless Specific Settings #
###############################################################################
2016-02-27 20:18:50 +00:00
2020-11-16 18:52:13 +01:00
CONSUMER_POLLING = int(os.getenv("PAPERLESS_CONSUMER_POLLING", 0))
CONSUMER_DELETE_DUPLICATES = __get_boolean("PAPERLESS_CONSUMER_DELETE_DUPLICATES")
CONSUMER_RECURSIVE = __get_boolean("PAPERLESS_CONSUMER_RECURSIVE")
CONSUMER_SUBDIRS_AS_TAGS = __get_boolean("PAPERLESS_CONSUMER_SUBDIRS_AS_TAGS")
OPTIMIZE_THUMBNAILS = __get_boolean("PAPERLESS_OPTIMIZE_THUMBNAILS", "true")
2020-11-22 12:54:08 +01:00
OCR_PAGES = int(os.getenv('PAPERLESS_OCR_PAGES', 0))
# The default language that tesseract will attempt to use when parsing
# documents. It should be a 3-letter language code consistent with ISO 639.
OCR_LANGUAGE = os.getenv("PAPERLESS_OCR_LANGUAGE", "eng")
# OCRmyPDF --output-type options are available.
# TODO: validate this setting.
OCR_OUTPUT_TYPE = os.getenv("PAPERLESS_OCR_OUTPUT_TYPE", "pdfa")
# skip. redo, force
# TODO: validate this.
OCR_MODE = os.getenv("PAPERLESS_OCR_MODE", "skip")
OCR_IMAGE_DPI = os.getenv("PAPERLESS_OCR_IMAGE_DPI")
OCR_USER_ARGS = os.getenv("PAPERLESS_OCR_USER_ARGS", "{}")
# GNUPG needs a home directory for some reason
2016-02-27 20:18:50 +00:00
GNUPG_HOME = os.getenv("HOME", "/tmp")
# Convert is part of the ImageMagick package
2017-01-01 22:44:04 +00:00
CONVERT_BINARY = os.getenv("PAPERLESS_CONVERT_BINARY", "convert")
CONVERT_TMPDIR = os.getenv("PAPERLESS_CONVERT_TMPDIR")
CONVERT_MEMORY_LIMIT = os.getenv("PAPERLESS_CONVERT_MEMORY_LIMIT")
GS_BINARY = os.getenv("PAPERLESS_GS_BINARY", "gs")
2020-11-25 19:30:11 +01:00
OPTIPNG_BINARY = os.getenv("PAPERLESS_OPTIPNG_BINARY", "optipng")
2016-01-10 13:40:26 +00:00
# Pre-2.x versions of Paperless stored your documents locally with GPG
# encryption, but that is no longer the default. This behaviour is still
# available, but it must be explicitly enabled by setting
# `PAPERLESS_PASSPHRASE` in your environment or config file. The default is to
# store these files unencrypted.
#
# Translation:
# * If you're a new user, you can safely ignore this setting.
# * If you're upgrading from 1.x, this must be set, OR you can run
# `./manage.py change_storage_type gpg unencrypted` to decrypt your files,
# after which you can unset this value.
2016-02-27 20:18:50 +00:00
PASSPHRASE = os.getenv("PAPERLESS_PASSPHRASE")
2016-03-28 19:47:11 +01:00
# Trigger a script after every successful document consumption?
PRE_CONSUME_SCRIPT = os.getenv("PAPERLESS_PRE_CONSUME_SCRIPT")
POST_CONSUME_SCRIPT = os.getenv("PAPERLESS_POST_CONSUME_SCRIPT")
# Specify the default date order (for autodetected dates)
DATE_ORDER = os.getenv("PAPERLESS_DATE_ORDER", "DMY")
FILENAME_DATE_ORDER = os.getenv("PAPERLESS_FILENAME_DATE_ORDER")
# Transformations applied before filename parsing
FILENAME_PARSE_TRANSFORMS = []
for t in json.loads(os.getenv("PAPERLESS_FILENAME_PARSE_TRANSFORMS", "[]")):
FILENAME_PARSE_TRANSFORMS.append((re.compile(t["pattern"]), t["repl"]))
2020-11-16 23:16:37 +01:00
# TODO: this should not have a prefix.
# Specify the filename format for out files
PAPERLESS_FILENAME_FORMAT = os.getenv("PAPERLESS_FILENAME_FORMAT")
THUMBNAIL_FONT_NAME = os.getenv("PAPERLESS_THUMBNAIL_FONT_NAME", "/usr/share/fonts/liberation/LiberationSerif-Regular.ttf")