paperless-ngx/src/documents/index.py

import logging
import os
from contextlib import contextmanager

import math
from dateutil.parser import isoparse
from django.conf import settings
from whoosh import highlight, classify, query
from whoosh.fields import Schema, TEXT, NUMERIC, KEYWORD, DATETIME, BOOLEAN
from whoosh.highlight import Formatter, get_text, HtmlFormatter
from whoosh.index import create_in, exists_in, open_dir
from whoosh.qparser import MultifieldParser
from whoosh.qparser.dateparse import DateParserPlugin
from whoosh.searching import ResultsPage, Searcher
from whoosh.writing import AsyncWriter

from documents.models import Document

logger = logging.getLogger("paperless.index")


def get_schema():
    return Schema(
        id=NUMERIC(
            stored=True,
            unique=True
        ),
        title=TEXT(
            sortable=True
        ),
        content=TEXT(),
        asn=NUMERIC(
            sortable=True
        ),

        correspondent=TEXT(
            sortable=True
        ),
        correspondent_id=NUMERIC(),
        has_correspondent=BOOLEAN(),

        tag=KEYWORD(
            commas=True,
            scorable=True,
            lowercase=True
        ),
        tag_id=KEYWORD(
            commas=True,
            scorable=True
        ),
        has_tag=BOOLEAN(),

        type=TEXT(
            sortable=True
        ),
        type_id=NUMERIC(),
        has_type=BOOLEAN(),

        created=DATETIME(
            sortable=True
        ),
        modified=DATETIME(
            sortable=True
        ),
        added=DATETIME(
            sortable=True
        ),

    )


def open_index(recreate=False):
    try:
        if exists_in(settings.INDEX_DIR) and not recreate:
            return open_dir(settings.INDEX_DIR, schema=get_schema())
    except Exception:
        logger.exception(f"Error while opening the index, recreating.")

    if not os.path.isdir(settings.INDEX_DIR):
        os.makedirs(settings.INDEX_DIR, exist_ok=True)
    return create_in(settings.INDEX_DIR, get_schema())


@contextmanager
def open_index_writer(optimize=False):
    writer = AsyncWriter(open_index())

    try:
        yield writer
    except Exception as e:
        logger.exception(str(e))
        writer.cancel()
    finally:
        writer.commit(optimize=optimize)


@contextmanager
def open_index_searcher():
    searcher = open_index().searcher()

    try:
        yield searcher
    finally:
        searcher.close()


def update_document(writer, doc):
    tags = ",".join([t.name for t in doc.tags.all()])
    tags_ids = ",".join([str(t.id) for t in doc.tags.all()])
    writer.update_document(
        id=doc.pk,
        title=doc.title,
        content=doc.content,
        correspondent=doc.correspondent.name if doc.correspondent else None,
        correspondent_id=doc.correspondent.id if doc.correspondent else None,
        has_correspondent=doc.correspondent is not None,
        tag=tags if tags else None,
        tag_id=tags_ids if tags_ids else None,
        has_tag=len(tags) > 0,
        type=doc.document_type.name if doc.document_type else None,
        type_id=doc.document_type.id if doc.document_type else None,
        has_type=doc.document_type is not None,
        created=doc.created,
        added=doc.added,
        asn=doc.archive_serial_number,
        modified=doc.modified,
    )


def remove_document(writer, doc):
    remove_document_by_id(writer, doc.pk)


def remove_document_by_id(writer, doc_id):
    writer.delete_by_term('id', doc_id)


def add_or_update_document(document):
    with open_index_writer() as writer:
        update_document(writer, document)


def remove_document_from_index(document):
    with open_index_writer() as writer:
        remove_document(writer, document)


class DelayedQuery:

    @property
    def _query(self):
        raise NotImplementedError()

    @property
    def _query_filter(self):
        criterias = []
        for k, v in self.query_params.items():
            if k == 'correspondent__id':
                criterias.append(query.Term('correspondent_id', v))
            elif k == 'tags__id__all':
                for tag_id in v.split(","):
                    criterias.append(query.Term('tag_id', tag_id))
            elif k == 'document_type__id':
                criterias.append(query.Term('type_id', v))
            elif k == 'correspondent__isnull':
                criterias.append(query.Term("has_correspondent", v == "false"))
            elif k == 'is_tagged':
                criterias.append(query.Term("has_tag", v == "true"))
            elif k == 'document_type__isnull':
                criterias.append(query.Term("has_type", v == "false"))
            elif k == 'created__date__lt':
                criterias.append(
                    query.DateRange("created", start=None, end=isoparse(v)))
            elif k == 'created__date__gt':
                criterias.append(
                    query.DateRange("created", start=isoparse(v), end=None))
            elif k == 'added__date__gt':
                criterias.append(
                    query.DateRange("added", start=isoparse(v), end=None))
            elif k == 'added__date__lt':
                criterias.append(
                    query.DateRange("added", start=None, end=isoparse(v)))
        if len(criterias) > 0:
            return query.And(criterias)
        else:
            return None

    @property
    def _query_sortedby(self):
        # if not 'ordering' in self.query_params:
        return None, False

        # o: str = self.query_params['ordering']
        # if o.startswith('-'):
        #     return o[1:], True
        # else:
        #     return o, False

    def __init__(self, searcher: Searcher, query_params, page_size):
        self.searcher = searcher
        self.query_params = query_params
        self.page_size = page_size
        self.saved_results = dict()
        self.first_score = None

    def __len__(self):
        page = self[0:1]
        return len(page)

    def __getitem__(self, item):
        if item.start in self.saved_results:
            return self.saved_results[item.start]

        q, mask = self._query
        sortedby, reverse = self._query_sortedby

        page: ResultsPage = self.searcher.search_page(
            q,
            mask=mask,
            filter=self._query_filter,
            pagenum=math.floor(item.start / self.page_size) + 1,
            pagelen=self.page_size,
            sortedby=sortedby,
            reverse=reverse
        )
        page.results.fragmenter = highlight.ContextFragmenter(
            surround=50)
        page.results.formatter = HtmlFormatter(tagname="span", between=" ... ")

        if not self.first_score and len(page.results) > 0:
            self.first_score = page.results[0].score

        if self.first_score:
            page.results.top_n = list(map(
                lambda hit: (hit[0] / self.first_score, hit[1]),
                page.results.top_n
            ))

        self.saved_results[item.start] = page

        return page


class DelayedFullTextQuery(DelayedQuery):

    @property
    def _query(self):
        q_str = self.query_params['query']
        qp = MultifieldParser(
            ["content", "title", "correspondent", "tag", "type"],
            self.searcher.ixreader.schema)
        qp.add_plugin(DateParserPlugin())
        q = qp.parse(q_str)

        corrected = self.searcher.correct_query(q, q_str)
        if corrected.query != q:
            corrected_query = corrected.string

        return q, None


class DelayedMoreLikeThisQuery(DelayedQuery):

    @property
    def _query(self):
        more_like_doc_id = int(self.query_params['more_like_id'])
        content = Document.objects.get(id=more_like_doc_id).content

        docnum = self.searcher.document_number(id=more_like_doc_id)
        kts = self.searcher.key_terms_from_text(
            'content', content, numterms=20,
            model=classify.Bo1Model, normalize=False)
        q = query.Or(
            [query.Term('content', word, boost=weight)
             for word, weight in kts])
        mask = {docnum}

        return q, mask


def autocomplete(ix, term, limit=10):
    with ix.reader() as reader:
        terms = []
        for (score, t) in reader.most_distinctive_terms(
                "content", number=limit, prefix=term.lower()):
            terms.append(t)
        return terms
updated settings: docker image runs without ENV variables 2020-11-03 12:23:24 +01:00			`import logging`
make the index dir if it does not exist. 2020-11-20 11:21:09 +01:00			`import os`
fixed an issue with the searcher. 2020-11-10 01:47:35 +01:00			`from contextlib import contextmanager`
updated settings: docker image runs without ENV variables 2020-11-03 12:23:24 +01:00
some initial attempts to merge search and document list 2021-03-07 13:16:23 +01:00			`import math`
fix date filtering for full text search 2021-04-03 21:02:13 +02:00			`from dateutil.parser import isoparse`
make the index dir if it does not exist. 2020-11-20 11:21:09 +01:00			`from django.conf import settings`
more like this searching 2020-12-17 21:36:21 +01:00			`from whoosh import highlight, classify, query`
lots of changes for the new unified search 2021-03-17 22:25:22 +01:00			`from whoosh.fields import Schema, TEXT, NUMERIC, KEYWORD, DATETIME, BOOLEAN`
			`from whoosh.highlight import Formatter, get_text, HtmlFormatter`
added - document index - api access for thumbnails/downloads - more api filters updated - pipfile removed - filename handling - legacy thumb/download access - obsolete admin gui settings (per page items, FY, inline view) 2020-10-25 23:03:02 +01:00			`from whoosh.index import create_in, exists_in, open_dir`
moved some code 2020-11-08 11:30:16 +01:00			`from whoosh.qparser import MultifieldParser`
searching for types and dates, error catching, documentation and changelog. 2020-11-30 16:13:35 +01:00			`from whoosh.qparser.dateparse import DateParserPlugin`
lots of changes for the new unified search 2021-03-17 22:25:22 +01:00			`from whoosh.searching import ResultsPage, Searcher`
added - document index - api access for thumbnails/downloads - more api filters updated - pipfile removed - filename handling - legacy thumb/download access - obsolete admin gui settings (per page items, FY, inline view) 2020-10-25 23:03:02 +01:00			`from whoosh.writing import AsyncWriter`

some initial attempts to merge search and document list 2021-03-07 13:16:23 +01:00			`from documents.models import Document`
added - document index - api access for thumbnails/downloads - more api filters updated - pipfile removed - filename handling - legacy thumb/download access - obsolete admin gui settings (per page items, FY, inline view) 2020-10-25 23:03:02 +01:00
rework most of the logging 2021-02-05 01:10:29 +01:00			`logger = logging.getLogger("paperless.index")`
I removed the model save/delete hooks for index updates since they were causing too much trouble with migrations 2020-11-08 11:24:57 +01:00

added - document index - api access for thumbnails/downloads - more api filters updated - pipfile removed - filename handling - legacy thumb/download access - obsolete admin gui settings (per page items, FY, inline view) 2020-10-25 23:03:02 +01:00			`def get_schema():`
			`return Schema(`
lots of changes for the new unified search 2021-03-17 22:25:22 +01:00			`id=NUMERIC(`
			`stored=True,`
			`unique=True`
			`),`
			`title=TEXT(`
			`sortable=True`
			`),`
add correspondent to the index 2020-11-06 17:27:21 +01:00			`content=TEXT(),`
fix some issues with the search index 2021-04-05 21:53:07 +02:00			`asn=NUMERIC(`
lots of changes for the new unified search 2021-03-17 22:25:22 +01:00			`sortable=True`
			`),`

			`correspondent=TEXT(`
			`sortable=True`
			`),`
			`correspondent_id=NUMERIC(),`
			`has_correspondent=BOOLEAN(),`

			`tag=KEYWORD(`
			`commas=True,`
			`scorable=True,`
			`lowercase=True`
			`),`
			`tag_id=KEYWORD(`
			`commas=True,`
			`scorable=True`
			`),`
			`has_tag=BOOLEAN(),`

			`type=TEXT(`
			`sortable=True`
			`),`
			`type_id=NUMERIC(),`
			`has_type=BOOLEAN(),`

			`created=DATETIME(`
			`sortable=True`
			`),`
			`modified=DATETIME(`
			`sortable=True`
			`),`
			`added=DATETIME(`
			`sortable=True`
			`),`

added - document index - api access for thumbnails/downloads - more api filters updated - pipfile removed - filename handling - legacy thumb/download access - obsolete admin gui settings (per page items, FY, inline view) 2020-10-25 23:03:02 +01:00			`)`


			`def open_index(recreate=False):`
The index is now recreated in case loading fails. 2020-11-26 22:18:30 +01:00			`try:`
			`if exists_in(settings.INDEX_DIR) and not recreate:`
searching for tags, spelling corrections fixes #74 2020-11-30 15:13:53 +01:00			`return open_dir(settings.INDEX_DIR, schema=get_schema())`
better exception logging 2021-02-11 22:16:41 +01:00			`except Exception:`
			`logger.exception(f"Error while opening the index, recreating.")`
The index is now recreated in case loading fails. 2020-11-26 22:18:30 +01:00
			`if not os.path.isdir(settings.INDEX_DIR):`
			`os.makedirs(settings.INDEX_DIR, exist_ok=True)`
			`return create_in(settings.INDEX_DIR, get_schema())`
added - document index - api access for thumbnails/downloads - more api filters updated - pipfile removed - filename handling - legacy thumb/download access - obsolete admin gui settings (per page items, FY, inline view) 2020-10-25 23:03:02 +01:00

some search index optimizations 2021-02-15 13:26:36 +01:00			`@contextmanager`
more testing 2021-04-04 01:19:07 +02:00			`def open_index_writer(optimize=False):`
			`writer = AsyncWriter(open_index())`
some search index optimizations 2021-02-15 13:26:36 +01:00
			`try:`
			`yield writer`
			`except Exception as e:`
			`logger.exception(str(e))`
			`writer.cancel()`
			`finally:`
			`writer.commit(optimize=optimize)`


lots of changes for the new unified search 2021-03-17 22:25:22 +01:00			`@contextmanager`
more testing 2021-04-04 01:19:07 +02:00			`def open_index_searcher():`
			`searcher = open_index().searcher()`
lots of changes for the new unified search 2021-03-17 22:25:22 +01:00
			`try:`
			`yield searcher`
			`finally:`
			`searcher.close()`


added - document index - api access for thumbnails/downloads - more api filters updated - pipfile removed - filename handling - legacy thumb/download access - obsolete admin gui settings (per page items, FY, inline view) 2020-10-25 23:03:02 +01:00			`def update_document(writer, doc):`
searching for tags, spelling corrections fixes #74 2020-11-30 15:13:53 +01:00			`tags = ",".join([t.name for t in doc.tags.all()])`
lots of changes for the new unified search 2021-03-17 22:25:22 +01:00			`tags_ids = ",".join([str(t.id) for t in doc.tags.all()])`
added - document index - api access for thumbnails/downloads - more api filters updated - pipfile removed - filename handling - legacy thumb/download access - obsolete admin gui settings (per page items, FY, inline view) 2020-10-25 23:03:02 +01:00			`writer.update_document(`
replaced usages of .id with .pk, fixed filename issue in exporter 2020-11-03 12:37:37 +01:00			`id=doc.pk,`
added - document index - api access for thumbnails/downloads - more api filters updated - pipfile removed - filename handling - legacy thumb/download access - obsolete admin gui settings (per page items, FY, inline view) 2020-10-25 23:03:02 +01:00			`title=doc.title,`
add correspondent to the index 2020-11-06 17:27:21 +01:00			`content=doc.content,`
searching for tags, spelling corrections fixes #74 2020-11-30 15:13:53 +01:00			`correspondent=doc.correspondent.name if doc.correspondent else None,`
some initial attempts to merge search and document list 2021-03-07 13:16:23 +01:00			`correspondent_id=doc.correspondent.id if doc.correspondent else None,`
lots of changes for the new unified search 2021-03-17 22:25:22 +01:00			`has_correspondent=doc.correspondent is not None,`
searching for types and dates, error catching, documentation and changelog. 2020-11-30 16:13:35 +01:00			`tag=tags if tags else None,`
lots of changes for the new unified search 2021-03-17 22:25:22 +01:00			`tag_id=tags_ids if tags_ids else None,`
			`has_tag=len(tags) > 0,`
searching for types and dates, error catching, documentation and changelog. 2020-11-30 16:13:35 +01:00			`type=doc.document_type.name if doc.document_type else None,`
lots of changes for the new unified search 2021-03-17 22:25:22 +01:00			`type_id=doc.document_type.id if doc.document_type else None,`
			`has_type=doc.document_type is not None,`
searching for types and dates, error catching, documentation and changelog. 2020-11-30 16:13:35 +01:00			`created=doc.created,`
			`added=doc.added,`
fix some issues with the search index 2021-04-05 21:53:07 +02:00			`asn=doc.archive_serial_number,`
searching for types and dates, error catching, documentation and changelog. 2020-11-30 16:13:35 +01:00			`modified=doc.modified,`
added - document index - api access for thumbnails/downloads - more api filters updated - pipfile removed - filename handling - legacy thumb/download access - obsolete admin gui settings (per page items, FY, inline view) 2020-10-25 23:03:02 +01:00			`)`

added autocomplete to backend 2020-10-27 17:07:13 +01:00
I removed the model save/delete hooks for index updates since they were causing too much trouble with migrations 2020-11-08 11:24:57 +01:00			`def remove_document(writer, doc):`
update index after bulk edit operations #195 2020-12-27 17:05:35 +01:00			`remove_document_by_id(writer, doc.pk)`


			`def remove_document_by_id(writer, doc_id):`
			`writer.delete_by_term('id', doc_id)`
I removed the model save/delete hooks for index updates since they were causing too much trouble with migrations 2020-11-08 11:24:57 +01:00

			`def add_or_update_document(document):`
some search index optimizations 2021-02-15 13:26:36 +01:00			`with open_index_writer() as writer:`
I removed the model save/delete hooks for index updates since they were causing too much trouble with migrations 2020-11-08 11:24:57 +01:00			`update_document(writer, document)`
added - document index - api access for thumbnails/downloads - more api filters updated - pipfile removed - filename handling - legacy thumb/download access - obsolete admin gui settings (per page items, FY, inline view) 2020-10-25 23:03:02 +01:00

I removed the model save/delete hooks for index updates since they were causing too much trouble with migrations 2020-11-08 11:24:57 +01:00			`def remove_document_from_index(document):`
some search index optimizations 2021-02-15 13:26:36 +01:00			`with open_index_writer() as writer:`
I removed the model save/delete hooks for index updates since they were causing too much trouble with migrations 2020-11-08 11:24:57 +01:00			`remove_document(writer, document)`
added - document index - api access for thumbnails/downloads - more api filters updated - pipfile removed - filename handling - legacy thumb/download access - obsolete admin gui settings (per page items, FY, inline view) 2020-10-25 23:03:02 +01:00

some initial attempts to merge search and document list 2021-03-07 13:16:23 +01:00			`class DelayedQuery:`

			`@property`
			`def _query(self):`
lots of changes for the new unified search 2021-03-17 22:25:22 +01:00			`raise NotImplementedError()`
some initial attempts to merge search and document list 2021-03-07 13:16:23 +01:00
			`@property`
			`def _query_filter(self):`
			`criterias = []`
			`for k, v in self.query_params.items():`
			`if k == 'correspondent__id':`
			`criterias.append(query.Term('correspondent_id', v))`
lots of changes for the new unified search 2021-03-17 22:25:22 +01:00			`elif k == 'tags__id__all':`
			`for tag_id in v.split(","):`
			`criterias.append(query.Term('tag_id', tag_id))`
			`elif k == 'document_type__id':`
			`criterias.append(query.Term('type_id', v))`
			`elif k == 'correspondent__isnull':`
			`criterias.append(query.Term("has_correspondent", v == "false"))`
			`elif k == 'is_tagged':`
			`criterias.append(query.Term("has_tag", v == "true"))`
			`elif k == 'document_type__isnull':`
			`criterias.append(query.Term("has_type", v == "false"))`
			`elif k == 'created__date__lt':`
fix date filtering for full text search 2021-04-03 21:02:13 +02:00			`criterias.append(`
			`query.DateRange("created", start=None, end=isoparse(v)))`
lots of changes for the new unified search 2021-03-17 22:25:22 +01:00			`elif k == 'created__date__gt':`
fix date filtering for full text search 2021-04-03 21:02:13 +02:00			`criterias.append(`
			`query.DateRange("created", start=isoparse(v), end=None))`
lots of changes for the new unified search 2021-03-17 22:25:22 +01:00			`elif k == 'added__date__gt':`
fix date filtering for full text search 2021-04-03 21:02:13 +02:00			`criterias.append(`
			`query.DateRange("added", start=isoparse(v), end=None))`
lots of changes for the new unified search 2021-03-17 22:25:22 +01:00			`elif k == 'added__date__lt':`
fix date filtering for full text search 2021-04-03 21:02:13 +02:00			`criterias.append(`
			`query.DateRange("added", start=None, end=isoparse(v)))`
some initial attempts to merge search and document list 2021-03-07 13:16:23 +01:00			`if len(criterias) > 0:`
			`return query.And(criterias)`
			`else:`
			`return None`

lots of changes for the new unified search 2021-03-17 22:25:22 +01:00			`@property`
			`def _query_sortedby(self):`
disable sorting for now 2021-04-03 21:49:31 +02:00			`# if not 'ordering' in self.query_params:`
			`return None, False`
lots of changes for the new unified search 2021-03-17 22:25:22 +01:00
disable sorting for now 2021-04-03 21:49:31 +02:00			`# o: str = self.query_params['ordering']`
			`# if o.startswith('-'):`
			`# return o[1:], True`
			`# else:`
			`# return o, False`
lots of changes for the new unified search 2021-03-17 22:25:22 +01:00
			`def __init__(self, searcher: Searcher, query_params, page_size):`
some initial attempts to merge search and document list 2021-03-07 13:16:23 +01:00			`self.searcher = searcher`
			`self.query_params = query_params`
			`self.page_size = page_size`
lots of changes for the new unified search 2021-03-17 22:25:22 +01:00			`self.saved_results = dict()`
disable sorting for now 2021-04-03 21:49:31 +02:00			`self.first_score = None`
some initial attempts to merge search and document list 2021-03-07 13:16:23 +01:00
			`def __len__(self):`
lots of changes for the new unified search 2021-03-17 22:25:22 +01:00			`page = self[0:1]`
			`return len(page)`
some initial attempts to merge search and document list 2021-03-07 13:16:23 +01:00
			`def __getitem__(self, item):`
lots of changes for the new unified search 2021-03-17 22:25:22 +01:00			`if item.start in self.saved_results:`
			`return self.saved_results[item.start]`

			`q, mask = self._query`
			`sortedby, reverse = self._query_sortedby`

some initial attempts to merge search and document list 2021-03-07 13:16:23 +01:00			`page: ResultsPage = self.searcher.search_page(`
lots of changes for the new unified search 2021-03-17 22:25:22 +01:00			`q,`
			`mask=mask,`
some initial attempts to merge search and document list 2021-03-07 13:16:23 +01:00			`filter=self._query_filter,`
			`pagenum=math.floor(item.start / self.page_size) + 1,`
lots of changes for the new unified search 2021-03-17 22:25:22 +01:00			`pagelen=self.page_size,`
			`sortedby=sortedby,`
			`reverse=reverse`
some initial attempts to merge search and document list 2021-03-07 13:16:23 +01:00			`)`
lots of changes for the new unified search 2021-03-17 22:25:22 +01:00			`page.results.fragmenter = highlight.ContextFragmenter(`
			`surround=50)`
			`page.results.formatter = HtmlFormatter(tagname="span", between=" ... ")`

disable sorting for now 2021-04-03 21:49:31 +02:00			`if not self.first_score and len(page.results) > 0:`
			`self.first_score = page.results[0].score`

			`if self.first_score:`
			`page.results.top_n = list(map(`
			`lambda hit: (hit[0] / self.first_score, hit[1]),`
			`page.results.top_n`
			`))`

lots of changes for the new unified search 2021-03-17 22:25:22 +01:00			`self.saved_results[item.start] = page`

some initial attempts to merge search and document list 2021-03-07 13:16:23 +01:00			`return page`


lots of changes for the new unified search 2021-03-17 22:25:22 +01:00			`class DelayedFullTextQuery(DelayedQuery):`

			`@property`
			`def _query(self):`
			`q_str = self.query_params['query']`
			`qp = MultifieldParser(`
			`["content", "title", "correspondent", "tag", "type"],`
			`self.searcher.ixreader.schema)`
			`qp.add_plugin(DateParserPlugin())`
			`q = qp.parse(q_str)`

			`corrected = self.searcher.correct_query(q, q_str)`
			`if corrected.query != q:`
			`corrected_query = corrected.string`

			`return q, None`


			`class DelayedMoreLikeThisQuery(DelayedQuery):`

			`@property`
			`def _query(self):`
			`more_like_doc_id = int(self.query_params['more_like_id'])`
			`content = Document.objects.get(id=more_like_doc_id).content`

			`docnum = self.searcher.document_number(id=more_like_doc_id)`
			`kts = self.searcher.key_terms_from_text(`
			`'content', content, numterms=20,`
			`model=classify.Bo1Model, normalize=False)`
			`q = query.Or(`
			`[query.Term('content', word, boost=weight)`
			`for word, weight in kts])`
			`mask = {docnum}`

			`return q, mask`


added autocomplete to backend 2020-10-27 17:07:13 +01:00			`def autocomplete(ix, term, limit=10):`
			`with ix.reader() as reader:`
			`terms = []`
code cleanup 2020-11-21 14:03:45 +01:00			`for (score, t) in reader.most_distinctive_terms(`
			`"content", number=limit, prefix=term.lower()):`
added autocomplete to backend 2020-10-27 17:07:13 +01:00			`terms.append(t)`
			`return terms`