mirror of
https://github.com/paperless-ngx/paperless-ngx.git
synced 2025-12-19 21:16:56 +01:00
Merge branch 'machine-learning' into dev
This commit is contained in:
commit
46a5bc00d7
7 changed files with 66 additions and 45 deletions
|
|
@ -18,7 +18,7 @@ class Command(Renderable, BaseCommand):
|
|||
with open("dataset_tags.txt", "w") as f:
|
||||
for doc in Document.objects.exclude(tags__is_inbox_tag=True):
|
||||
labels = []
|
||||
for tag in doc.tags.all():
|
||||
for tag in doc.tags.filter(automatic_classification=True):
|
||||
labels.append(tag.name)
|
||||
f.write(",".join(labels))
|
||||
f.write(";")
|
||||
|
|
@ -27,14 +27,14 @@ class Command(Renderable, BaseCommand):
|
|||
|
||||
with open("dataset_types.txt", "w") as f:
|
||||
for doc in Document.objects.exclude(tags__is_inbox_tag=True):
|
||||
f.write(doc.document_type.name if doc.document_type is not None else "None")
|
||||
f.write(doc.document_type.name if doc.document_type is not None and doc.document_type.automatic_classification else "-")
|
||||
f.write(";")
|
||||
f.write(preprocess_content(doc.content))
|
||||
f.write("\n")
|
||||
|
||||
with open("dataset_correspondents.txt", "w") as f:
|
||||
for doc in Document.objects.exclude(tags__is_inbox_tag=True):
|
||||
f.write(doc.correspondent.name if doc.correspondent is not None else "None")
|
||||
f.write(doc.correspondent.name if doc.correspondent is not None and doc.correspondent.automatic_classification else "-")
|
||||
f.write(";")
|
||||
f.write(preprocess_content(doc.content))
|
||||
f.write("\n")
|
||||
|
|
|
|||
|
|
@ -35,6 +35,10 @@ class Command(Renderable, BaseCommand):
|
|||
"-i", "--inbox-only",
|
||||
action="store_true"
|
||||
)
|
||||
parser.add_argument(
|
||||
"-r", "--replace-tags",
|
||||
action="store_true"
|
||||
)
|
||||
|
||||
def handle(self, *args, **options):
|
||||
|
||||
|
|
@ -52,7 +56,6 @@ class Command(Renderable, BaseCommand):
|
|||
logging.getLogger(__name__).fatal("Cannot classify documents, classifier model file was not found.")
|
||||
return
|
||||
|
||||
|
||||
for document in documents:
|
||||
logging.getLogger(__name__).info("Processing document {}".format(document.title))
|
||||
clf.classify_document(document, classify_type=options['type'], classify_tags=options['tags'], classify_correspondent=options['correspondent'])
|
||||
clf.classify_document(document, classify_document_type=options['type'], classify_tags=options['tags'], classify_correspondent=options['correspondent'], replace_tags=options['replace_tags'])
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue