paperless-ngx/src/paperless_tesseract/signals.py

19 lines
392 B
Python
Raw Normal View History

import re
from .parsers import RasterisedDocumentParser
def tesseract_consumer_declaration(sender, **kwargs):
return {
"parser": RasterisedDocumentParser,
"weight": 0,
"test": tesseract_consumer_test
}
MATCHING_FILES = re.compile(r"^.*\.(pdf|jpe?g|gif|png|tiff?|pnm|bmp)$")
def tesseract_consumer_test(doc):
return MATCHING_FILES.match(doc.lower())