mirror of
https://github.com/paperless-ngx/paperless-ngx.git
synced 2025-12-08 15:55:31 +01:00
more tests of the new parser
This commit is contained in:
parent
39fa02dcb1
commit
e87575240d
10 changed files with 146 additions and 10 deletions
|
|
@ -160,7 +160,9 @@ def strip_excess_whitespace(text):
|
|||
r"([\n\r]+)([^\S\n\r]+)", '\\1', collapsed_spaces)
|
||||
no_trailing_whitespace = re.sub(
|
||||
r"([^\S\n\r]+)$", '', no_leading_whitespace)
|
||||
return no_trailing_whitespace
|
||||
|
||||
# TODO: this needs a rework
|
||||
return no_trailing_whitespace.strip()
|
||||
|
||||
|
||||
def get_text_from_pdf(pdf_file):
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue