############################################################################### # IntelliDocs (Paperless-ngx) settings # ############################################################################### # See http://docs.paperless-ngx.com/configuration/ for all available options. # The UID and GID of the user used to run paperless in the container. Set this # to your UID and GID on the host so that you have write access to the # consumption directory. #USERMAP_UID=1000 #USERMAP_GID=1000 # See the documentation linked above for all options. A few commonly adjusted settings # are provided below. # This is required if you will be exposing IntelliDocs on a public domain # (if doing so please consider security measures such as reverse proxy) #PAPERLESS_URL=https://intellidocs.example.com # Adjust this key if you plan to make paperless available publicly. It should # be a very long sequence of random characters. You don't need to remember it. #PAPERLESS_SECRET_KEY=change-me # Use this variable to set a timezone for the Docker containers. Defaults to UTC. #PAPERLESS_TIME_ZONE=America/Los_Angeles # The default language to use for OCR. Set this to the language most of your # documents are written in. #PAPERLESS_OCR_LANGUAGE=eng # Additional languages to install for text recognition, separated by a whitespace. # Note that this is different from PAPERLESS_OCR_LANGUAGE (default=eng), which defines # the language used for OCR. # The container installs English, German, Italian, Spanish and French by default. # See https://packages.debian.org/search?keywords=tesseract-ocr-&searchon=names # for available languages. #PAPERLESS_OCR_LANGUAGES=tur ces ############################################################################### # IntelliDocs Advanced ML/OCR Features (NEW) # ############################################################################### # Enable/disable advanced ML features (BERT classification, NER, semantic search) # Set to 1 to enable, 0 to disable. Default: 1 (enabled) #PAPERLESS_ENABLE_ML_FEATURES=1 # Enable/disable advanced OCR features (table extraction, handwriting, forms) # Set to 1 to enable, 0 to disable. Default: 1 (enabled) #PAPERLESS_ENABLE_ADVANCED_OCR=1 # ML Model selection for document classification # Options: distilbert-base-uncased (default, fast), bert-base-uncased (more accurate but slower) #PAPERLESS_ML_CLASSIFIER_MODEL=distilbert-base-uncased # Enable GPU acceleration for ML/OCR if available # Set to 1 to use GPU, 0 to use CPU only. Default: 0 (CPU) #PAPERLESS_USE_GPU=0 # Confidence threshold for table detection (0.0 to 1.0) # Higher values = fewer false positives but might miss some tables. Default: 0.7 #PAPERLESS_TABLE_DETECTION_THRESHOLD=0.7 # Enable handwriting recognition for documents # Set to 1 to enable, 0 to disable. Default: 1 (enabled) #PAPERLESS_ENABLE_HANDWRITING_OCR=1 # Cache directory for ML models (to persist downloaded models between container restarts) # Should be mounted as a volume for better performance #PAPERLESS_ML_MODEL_CACHE=/usr/src/paperless/.cache/huggingface