mirror of
https://github.com/paperless-ngx/paperless-ngx.git
synced 2025-12-09 00:05:21 +01:00
Add the new paperless_tika parser
This parser will use an external Tika and Gotenberg server to parse "Office" documents (.doc, .xls, .odt, etc.) Signed-off-by: Jo Vandeginste <Jo.Vandeginste@kuleuven.be>
This commit is contained in:
parent
d690b34ee0
commit
b8e8bf3dd4
9 changed files with 276 additions and 0 deletions
20
src/paperless_tika/signals.py
Normal file
20
src/paperless_tika/signals.py
Normal file
|
|
@ -0,0 +1,20 @@
|
|||
from .parsers import TikaDocumentParser
|
||||
|
||||
|
||||
def tika_consumer_declaration(sender, **kwargs):
|
||||
return {
|
||||
"parser": TikaDocumentParser,
|
||||
"weight": 10,
|
||||
"mime_types": {
|
||||
"application/msword": ".doc",
|
||||
"application/vnd.openxmlformats-officedocument.wordprocessingml.document": ".docx",
|
||||
"application/vnd.ms-excel": ".xls",
|
||||
"application/vnd.openxmlformats-officedocument.spreadsheetml.sheet": ".xlsx",
|
||||
"application/vnd.ms-powerpoint": ".ppt",
|
||||
"application/vnd.openxmlformats-officedocument.presentationml.presentation": ".pptx",
|
||||
"application/vnd.openxmlformats-officedocument.presentationml.slideshow": ".ppsx",
|
||||
"application/vnd.oasis.opendocument.presentation": ".odp",
|
||||
"application/vnd.oasis.opendocument.spreadsheet": ".ods",
|
||||
"application/vnd.oasis.opendocument.text": ".odt",
|
||||
},
|
||||
}
|
||||
Loading…
Add table
Add a link
Reference in a new issue