Rename .TEXT_CACHE to .text

Properties should use snake_case, and only constants should be ALL_CAPS. This change also makes use of the convention of "private" properties being prefixed with `_`.
2025-12-08 07:45:32 +01:00 · 2018-02-18 16:00:43 +00:00 · 2018-02-18 16:00:43 +00:00 · 8e9d5caa37
commit 8e9d5caa37
parent 122aa2b9f1
1 changed files with 11 additions and 7 deletions
--- a/src/paperless_tesseract/parsers.py
+++ b/src/paperless_tesseract/parsers.py
@ -36,7 +36,10 @@ class RasterisedDocumentParser(DocumentParser):
    DATE_ORDER = settings.DATE_ORDER
    DEFAULT_OCR_LANGUAGE = settings.OCR_LANGUAGE
    OCR_ALWAYS = settings.OCR_ALWAYS
-    TEXT_CACHE = None
+
+    def __init__(self, path):
+        super().__init__(path)
+        self._text = None

    def get_thumbnail(self):
        """
@ -62,19 +65,20 @@ class RasterisedDocumentParser(DocumentParser):
        return len(text) > 50

    def get_text(self):
-        if self.TEXT_CACHE is not None:
-            return self.TEXT_CACHE
+
+        if self._text is not None:
+            return self._text

        if not self.OCR_ALWAYS and self._is_ocred():
            self.log("info", "Skipping OCR, using Text from PDF")
-            self.TEXT_CACHE = get_text_from_pdf(self.document_path)
-            return self.TEXT_CACHE
+            self._text = get_text_from_pdf(self.document_path)
+            return self._text

        images = self._get_greyscale()

        try:
-            self.TEXT_CACHE = self._get_ocr(images)
-            return self.TEXT_CACHE
+            self._text = self._get_ocr(images)
+            return self._text
        except OCRError as e:
            raise ParseError(e)