mirror of
https://github.com/paperless-ngx/paperless-ngx.git
synced 2025-12-10 00:35:30 +01:00
Account for plusses in the OCR language setting
This commit is contained in:
parent
1e891414a3
commit
d1a17480ea
1 changed files with 3 additions and 0 deletions
|
|
@ -719,7 +719,10 @@ def _get_nltk_language_setting(ocr_lang: str) -> Optional[str]:
|
|||
Maps an ISO-639-1 language code supported by Tesseract into
|
||||
an optional NLTK language name. This is the set of common supported
|
||||
languages for all the NLTK data used.
|
||||
|
||||
Assumption: The primary language is first
|
||||
"""
|
||||
ocr_lang = ocr_lang.split("+")[0]
|
||||
iso_code_to_nltk = {
|
||||
"dan": "danish",
|
||||
"nld": "dutch",
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue