mirror of
https://github.com/paperless-ngx/paperless-ngx.git
synced 2025-12-07 15:25:41 +01:00
Handle dateparser ValueErrors
When parsing dates from the document text or filenames, correctly handle values errors indicating broken dates. Newly added tests ensure that this handling works properly.
This commit is contained in:
parent
a3aab0cb48
commit
a311cd498c
2 changed files with 28 additions and 2 deletions
|
|
@ -108,7 +108,7 @@ class DocumentParser:
|
|||
|
||||
try:
|
||||
date = __parser(date_string, self.FILENAME_DATE_ORDER)
|
||||
except TypeError:
|
||||
except (TypeError, ValueError):
|
||||
# Skip all matches that do not parse to a proper date
|
||||
continue
|
||||
|
||||
|
|
@ -134,7 +134,7 @@ class DocumentParser:
|
|||
|
||||
try:
|
||||
date = __parser(date_string, self.DATE_ORDER)
|
||||
except TypeError:
|
||||
except (TypeError, ValueError):
|
||||
# Skip all matches that do not parse to a proper date
|
||||
continue
|
||||
|
||||
|
|
|
|||
|
|
@ -172,3 +172,29 @@ class TestDate(TestCase):
|
|||
document = RasterisedDocumentParser("/dev/null")
|
||||
document.get_text()
|
||||
self.assertIsNone(document.get_date())
|
||||
|
||||
@mock.patch(
|
||||
"paperless_tesseract.parsers.RasterisedDocumentParser.get_text",
|
||||
return_value="20 408000l 2475"
|
||||
)
|
||||
@mock.patch(MOCK_SCRATCH, SCRATCH)
|
||||
def test_crazy_date_with_spaces(self, *args):
|
||||
document = RasterisedDocumentParser("/dev/null")
|
||||
document.get_text()
|
||||
self.assertIsNone(document.get_date())
|
||||
|
||||
@mock.patch(
|
||||
"paperless_tesseract.parsers.RasterisedDocumentParser.get_text",
|
||||
return_value="No date in here"
|
||||
)
|
||||
@mock.patch(
|
||||
"paperless_tesseract.parsers.RasterisedDocumentParser."
|
||||
"FILENAME_DATE_ORDER",
|
||||
new_callable=mock.PropertyMock,
|
||||
return_value="YMD"
|
||||
)
|
||||
@mock.patch(MOCK_SCRATCH, SCRATCH)
|
||||
def test_filename_date_parse_invalid(self, *args):
|
||||
document = RasterisedDocumentParser("/tmp/20 408000l 2475 - test.pdf")
|
||||
document.get_text()
|
||||
self.assertIsNone(document.get_date())
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue