mirror of
https://github.com/paperless-ngx/paperless-ngx.git
synced 2025-12-11 00:57:09 +01:00
Ensure the tika parse function gets a string, not a PathLike
This commit is contained in:
parent
17ae2aacbf
commit
d4cb84ff76
1 changed files with 11 additions and 1 deletions
|
|
@ -1,4 +1,5 @@
|
||||||
import os
|
import os
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
import dateutil.parser
|
import dateutil.parser
|
||||||
import requests
|
import requests
|
||||||
|
|
@ -28,6 +29,11 @@ class TikaDocumentParser(DocumentParser):
|
||||||
|
|
||||||
def extract_metadata(self, document_path, mime_type):
|
def extract_metadata(self, document_path, mime_type):
|
||||||
tika_server = settings.TIKA_ENDPOINT
|
tika_server = settings.TIKA_ENDPOINT
|
||||||
|
|
||||||
|
# tika does not support a PathLike, only strings
|
||||||
|
# ensure this is a string
|
||||||
|
document_path = str(document_path)
|
||||||
|
|
||||||
try:
|
try:
|
||||||
parsed = parser.from_file(document_path, tika_server)
|
parsed = parser.from_file(document_path, tika_server)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
|
|
@ -47,10 +53,14 @@ class TikaDocumentParser(DocumentParser):
|
||||||
for key in parsed["metadata"]
|
for key in parsed["metadata"]
|
||||||
]
|
]
|
||||||
|
|
||||||
def parse(self, document_path, mime_type, file_name=None):
|
def parse(self, document_path: Path, mime_type, file_name=None):
|
||||||
self.log("info", f"Sending {document_path} to Tika server")
|
self.log("info", f"Sending {document_path} to Tika server")
|
||||||
tika_server = settings.TIKA_ENDPOINT
|
tika_server = settings.TIKA_ENDPOINT
|
||||||
|
|
||||||
|
# tika does not support a PathLike, only strings
|
||||||
|
# ensure this is a string
|
||||||
|
document_path = str(document_path)
|
||||||
|
|
||||||
try:
|
try:
|
||||||
parsed = parser.from_file(document_path, tika_server)
|
parsed = parser.from_file(document_path, tika_server)
|
||||||
except Exception as err:
|
except Exception as err:
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue