mirror of
https://github.com/paperless-ngx/paperless-ngx.git
synced 2025-12-08 07:45:32 +01:00
unified document matching, legacy and automatching work alongside now
This commit is contained in:
parent
9e4147ac52
commit
11af74ba36
16 changed files with 629 additions and 225 deletions
|
|
@ -16,7 +16,7 @@ class Migration(migrations.Migration):
|
|||
fields=[
|
||||
('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
|
||||
('name', models.CharField(max_length=128, unique=True)),
|
||||
('slug', models.SlugField(blank=True)),
|
||||
('slug', models.SlugField(blank=True, editable=False)),
|
||||
('match', models.CharField(blank=True, max_length=256)),
|
||||
('matching_algorithm', models.PositiveIntegerField(choices=[(1, 'Any'), (2, 'All'), (3, 'Literal'), (4, 'Regular Expression'), (5, 'Fuzzy Match')], default=1, help_text='Which algorithm you want to use when matching text to the OCR\'d PDF. Here, "any" looks for any occurrence of any word provided in the PDF, while "all" requires that every word provided appear in the PDF, albeit not in the order provided. A "literal" match means that the text you enter must appear in the PDF exactly as you\'ve entered it, and "regular expression" uses a regex to match the PDF. (If you don\'t know what a regex is, you probably don\'t want this option.) Finally, a "fuzzy match" looks for words or phrases that are mostly—but not exactly—the same, which can be useful for matching against documents containg imperfections that foil accurate OCR.')),
|
||||
('is_insensitive', models.BooleanField(default=True)),
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue