Merge branch 'machine-learning' into dev

2025-12-19 21:16:56 +01:00 · 2018-09-11 14:36:21 +02:00 · 2018-09-11 14:36:21 +02:00 · 46a5bc00d7
commit 46a5bc00d7
parent 11adc94e5e d46ee11143
7 changed files with 66 additions and 45 deletions
--- a/src/documents/management/commands/document_create_dataset.py
+++ b/src/documents/management/commands/document_create_dataset.py
@ -18,7 +18,7 @@ class Command(Renderable, BaseCommand):
        with open("dataset_tags.txt", "w") as f:
            for doc in Document.objects.exclude(tags__is_inbox_tag=True):
                labels = []
-                for tag in doc.tags.all():
+                for tag in doc.tags.filter(automatic_classification=True):
                    labels.append(tag.name)
                f.write(",".join(labels))
                f.write(";")
@ -27,14 +27,14 @@ class Command(Renderable, BaseCommand):

        with open("dataset_types.txt", "w") as f:
            for doc in Document.objects.exclude(tags__is_inbox_tag=True):
-                f.write(doc.document_type.name if doc.document_type is not None else "None")
+                f.write(doc.document_type.name if doc.document_type is not None and doc.document_type.automatic_classification else "-")
                f.write(";")
                f.write(preprocess_content(doc.content))
                f.write("\n")

        with open("dataset_correspondents.txt", "w") as f:
            for doc in Document.objects.exclude(tags__is_inbox_tag=True):
-                f.write(doc.correspondent.name if doc.correspondent is not None else "None")
+                f.write(doc.correspondent.name if doc.correspondent is not None and doc.correspondent.automatic_classification else "-")
                f.write(";")
                f.write(preprocess_content(doc.content))
                f.write("\n")
--- a/src/documents/management/commands/document_retagger.py
+++ b/src/documents/management/commands/document_retagger.py
@ -35,6 +35,10 @@ class Command(Renderable, BaseCommand):
            "-i", "--inbox-only",
            action="store_true"
        )
+        parser.add_argument(
+            "-r", "--replace-tags",
+            action="store_true"
+        )

    def handle(self, *args, **options):

@ -52,7 +56,6 @@ class Command(Renderable, BaseCommand):
            logging.getLogger(__name__).fatal("Cannot classify documents, classifier model file was not found.")
            return

-
        for document in documents:
            logging.getLogger(__name__).info("Processing document {}".format(document.title))
-            clf.classify_document(document, classify_type=options['type'], classify_tags=options['tags'], classify_correspondent=options['correspondent'])
+            clf.classify_document(document, classify_document_type=options['type'], classify_tags=options['tags'], classify_correspondent=options['correspondent'], replace_tags=options['replace_tags'])