mime type handling

This commit is contained in:
Jonas Winkler 2020-11-20 13:31:03 +01:00
parent bd45a804a7
commit 41650f20f4
19 changed files with 163 additions and 146 deletions

View file

@ -1,5 +1,3 @@
import re
from .parsers import RasterisedDocumentParser
@ -7,12 +5,9 @@ def tesseract_consumer_declaration(sender, **kwargs):
return {
"parser": RasterisedDocumentParser,
"weight": 0,
"test": tesseract_consumer_test
"mime_types": [
"application/pdf",
"image/jpeg",
"image/png"
]
}
MATCHING_FILES = re.compile(r"^.*\.(pdf|jpe?g|gif|png|tiff?|pnm|bmp)$")
def tesseract_consumer_test(doc):
return MATCHING_FILES.match(doc.lower())