2018-09-04 14:39:55 +02:00
|
|
|
import logging
|
|
|
|
|
|
|
|
|
|
from django.core.management.base import BaseCommand
|
2020-11-06 14:46:06 +01:00
|
|
|
from documents.classifier import DocumentClassifier, \
|
|
|
|
|
IncompatibleClassifierVersionError
|
2018-09-04 14:39:55 +02:00
|
|
|
from paperless import settings
|
|
|
|
|
from ...mixins import Renderable
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class Command(Renderable, BaseCommand):
|
|
|
|
|
|
|
|
|
|
help = """
|
2018-09-25 21:12:47 +02:00
|
|
|
Trains the classifier on your data and saves the resulting models to a
|
|
|
|
|
file. The document consumer will then automatically use this new model.
|
2018-09-04 14:39:55 +02:00
|
|
|
""".replace(" ", "")
|
|
|
|
|
|
|
|
|
|
def __init__(self, *args, **kwargs):
|
|
|
|
|
BaseCommand.__init__(self, *args, **kwargs)
|
|
|
|
|
|
|
|
|
|
def handle(self, *args, **options):
|
2020-10-28 11:45:11 +01:00
|
|
|
classifier = DocumentClassifier()
|
2020-11-06 14:46:06 +01:00
|
|
|
|
2020-10-28 11:45:11 +01:00
|
|
|
try:
|
2020-11-06 14:46:06 +01:00
|
|
|
# load the classifier, since we might not have to train it again.
|
|
|
|
|
classifier.reload()
|
|
|
|
|
except (FileNotFoundError, IncompatibleClassifierVersionError):
|
|
|
|
|
# This is what we're going to fix here.
|
|
|
|
|
pass
|
|
|
|
|
|
|
|
|
|
try:
|
|
|
|
|
if classifier.train():
|
|
|
|
|
logging.getLogger(__name__).info(
|
|
|
|
|
"Saving updated classifier model to {}...".format(settings.MODEL_FILE)
|
|
|
|
|
)
|
|
|
|
|
classifier.save_classifier()
|
|
|
|
|
else:
|
|
|
|
|
logging.getLogger(__name__).debug(
|
|
|
|
|
"Training data unchanged."
|
|
|
|
|
)
|
|
|
|
|
|
2020-10-28 11:45:11 +01:00
|
|
|
except Exception as e:
|
|
|
|
|
logging.getLogger(__name__).error(
|
|
|
|
|
"Classifier error: " + str(e)
|
|
|
|
|
)
|