paperless-ngx/src/documents/views.py

348 lines
11 KiB
Python
Raw Normal View History

2020-11-25 14:48:36 +01:00
import os
import tempfile
from datetime import datetime
from time import mktime
2020-11-25 14:48:36 +01:00
from django.conf import settings
2020-10-21 12:53:14 +02:00
from django.db.models import Count, Max
from django.http import HttpResponse, HttpResponseBadRequest, Http404
from django.views.decorators.cache import cache_control
from django.views.generic import TemplateView
from django_filters.rest_framework import DjangoFilterBackend
from django_q.tasks import async_task
from rest_framework import parsers
from rest_framework.decorators import action
from rest_framework.filters import OrderingFilter, SearchFilter
2016-03-01 18:57:12 +00:00
from rest_framework.mixins import (
DestroyModelMixin,
ListModelMixin,
RetrieveModelMixin,
UpdateModelMixin
)
2016-03-01 18:57:12 +00:00
from rest_framework.permissions import IsAuthenticated
2020-11-12 21:09:45 +01:00
from rest_framework.response import Response
from rest_framework.views import APIView
2016-03-01 18:57:12 +00:00
from rest_framework.viewsets import (
GenericViewSet,
ModelViewSet,
ReadOnlyModelViewSet
)
2020-11-12 21:09:45 +01:00
import documents.index as index
from paperless.db import GnuPG
from paperless.views import StandardPagination
2018-09-26 10:51:42 +02:00
from .filters import (
CorrespondentFilterSet,
DocumentFilterSet,
TagFilterSet,
2020-11-02 01:24:56 +01:00
DocumentTypeFilterSet,
LogFilterSet
2018-09-26 10:51:42 +02:00
)
2018-09-05 15:25:14 +02:00
from .models import Correspondent, Document, Log, Tag, DocumentType
2016-03-01 18:57:12 +00:00
from .serialisers import (
CorrespondentSerializer,
DocumentSerializer,
LogSerializer,
2018-09-05 15:25:14 +02:00
TagSerializer,
DocumentTypeSerializer,
PostDocumentSerializer
2018-09-25 16:09:33 +02:00
)
2016-01-01 16:13:59 +00:00
2016-03-03 18:09:10 +00:00
class IndexView(TemplateView):
template_name = "index.html"
2016-02-16 09:28:34 +00:00
class CorrespondentViewSet(ModelViewSet):
model = Correspondent
2020-11-21 14:03:45 +01:00
queryset = Correspondent.objects.annotate(
document_count=Count('documents'),
last_correspondence=Max('documents__created')).order_by('name')
serializer_class = CorrespondentSerializer
2016-02-21 00:55:38 +00:00
pagination_class = StandardPagination
2016-03-01 18:57:12 +00:00
permission_classes = (IsAuthenticated,)
filter_backends = (DjangoFilterBackend, OrderingFilter)
2020-11-17 22:31:43 +01:00
filterset_class = CorrespondentFilterSet
2020-11-21 14:03:45 +01:00
ordering_fields = (
"name",
"matching_algorithm",
"match",
"document_count",
"last_correspondence")
2016-02-16 09:28:34 +00:00
class TagViewSet(ModelViewSet):
model = Tag
2020-11-21 14:03:45 +01:00
queryset = Tag.objects.annotate(
document_count=Count('documents')).order_by('name')
2016-02-16 09:28:34 +00:00
serializer_class = TagSerializer
2016-02-21 00:55:38 +00:00
pagination_class = StandardPagination
2016-03-01 18:57:12 +00:00
permission_classes = (IsAuthenticated,)
filter_backends = (DjangoFilterBackend, OrderingFilter)
2020-11-17 22:31:43 +01:00
filterset_class = TagFilterSet
ordering_fields = ("name", "matching_algorithm", "match", "document_count")
2016-02-16 09:28:34 +00:00
2018-09-05 15:25:14 +02:00
class DocumentTypeViewSet(ModelViewSet):
model = DocumentType
2020-11-21 14:03:45 +01:00
queryset = DocumentType.objects.annotate(
document_count=Count('documents')).order_by('name')
2018-09-05 15:25:14 +02:00
serializer_class = DocumentTypeSerializer
pagination_class = StandardPagination
permission_classes = (IsAuthenticated,)
filter_backends = (DjangoFilterBackend, OrderingFilter)
2020-11-17 22:31:43 +01:00
filterset_class = DocumentTypeFilterSet
ordering_fields = ("name", "matching_algorithm", "match", "document_count")
2018-09-05 15:25:14 +02:00
2016-03-01 18:57:12 +00:00
class DocumentViewSet(RetrieveModelMixin,
UpdateModelMixin,
DestroyModelMixin,
ListModelMixin,
GenericViewSet):
2016-02-16 09:28:34 +00:00
model = Document
2016-02-21 00:55:38 +00:00
queryset = Document.objects.all()
2016-02-16 09:28:34 +00:00
serializer_class = DocumentSerializer
2016-02-21 00:55:38 +00:00
pagination_class = StandardPagination
2016-03-01 18:57:12 +00:00
permission_classes = (IsAuthenticated,)
filter_backends = (DjangoFilterBackend, SearchFilter, OrderingFilter)
2020-11-17 22:31:43 +01:00
filterset_class = DocumentFilterSet
2016-03-09 01:05:46 +00:00
search_fields = ("title", "correspondent__name", "content")
2016-03-13 16:45:12 +00:00
ordering_fields = (
2020-11-21 14:03:45 +01:00
"id",
"title",
"correspondent__name",
"document_type__name",
"created",
"modified",
"added",
"archive_serial_number")
def update(self, request, *args, **kwargs):
2020-11-21 14:03:45 +01:00
response = super(DocumentViewSet, self).update(
request, *args, **kwargs)
index.add_or_update_document(self.get_object())
return response
def destroy(self, request, *args, **kwargs):
index.remove_document_from_index(self.get_object())
return super(DocumentViewSet, self).destroy(request, *args, **kwargs)
2020-11-25 14:48:36 +01:00
@staticmethod
def original_requested(request):
return (
'original' in request.query_params and
request.query_params['original'] == 'true'
)
2020-11-25 14:48:36 +01:00
def file_response(self, pk, request, disposition):
doc = Document.objects.get(id=pk)
2020-11-25 17:23:57 +01:00
if not self.original_requested(request) and os.path.isfile(doc.archive_path): # NOQA: E501
2020-11-25 14:48:36 +01:00
file_handle = doc.archive_file
filename = doc.archive_file_name
2020-11-25 14:48:36 +01:00
mime_type = 'application/pdf'
else:
2020-11-25 20:16:27 +01:00
file_handle = doc.source_file
filename = doc.file_name
mime_type = doc.mime_type
if doc.storage_type == Document.STORAGE_TYPE_GPG:
file_handle = GnuPG.decrypted(file_handle)
2020-11-25 14:48:36 +01:00
response = HttpResponse(file_handle, content_type=mime_type)
response["Content-Disposition"] = '{}; filename="{}"'.format(
disposition, filename)
return response
@action(methods=['get'], detail=True)
def metadata(self, request, pk=None):
try:
doc = Document.objects.get(pk=pk)
return Response({
"paperless__checksum": doc.checksum,
"paperless__mime_type": doc.mime_type,
"paperless__filename": doc.filename,
2020-11-25 17:23:57 +01:00
"paperless__has_archive_version":
os.path.isfile(doc.archive_path)
})
except Document.DoesNotExist:
raise Http404()
@action(methods=['get'], detail=True)
def preview(self, request, pk=None):
try:
2020-11-25 14:48:36 +01:00
response = self.file_response(
pk, request, "inline")
return response
except (FileNotFoundError, Document.DoesNotExist):
raise Http404()
@action(methods=['get'], detail=True)
@cache_control(public=False, max_age=315360000)
def thumb(self, request, pk=None):
try:
2020-11-21 14:03:45 +01:00
return HttpResponse(Document.objects.get(id=pk).thumbnail_file,
content_type='image/png')
except (FileNotFoundError, Document.DoesNotExist):
raise Http404()
@action(methods=['get'], detail=True)
def download(self, request, pk=None):
try:
2020-11-25 14:48:36 +01:00
return self.file_response(
pk, request, "attachment")
except (FileNotFoundError, Document.DoesNotExist):
raise Http404()
2016-03-01 18:57:12 +00:00
class LogViewSet(ReadOnlyModelViewSet):
model = Log
2020-11-02 01:24:56 +01:00
queryset = Log.objects.all()
2016-03-01 18:57:12 +00:00
serializer_class = LogSerializer
pagination_class = StandardPagination
permission_classes = (IsAuthenticated,)
filter_backends = (DjangoFilterBackend, OrderingFilter)
2020-11-17 22:31:43 +01:00
filterset_class = LogFilterSet
2020-11-02 01:24:56 +01:00
ordering_fields = ("created",)
class PostDocumentView(APIView):
permission_classes = (IsAuthenticated,)
serializer_class = PostDocumentSerializer
parser_classes = (parsers.MultiPartParser,)
def get_serializer_context(self):
return {
'request': self.request,
'format': self.format_kwarg,
'view': self
}
def get_serializer(self, *args, **kwargs):
kwargs['context'] = self.get_serializer_context()
return self.serializer_class(*args, **kwargs)
def post(self, request, *args, **kwargs):
serializer = self.get_serializer(data=request.data)
serializer.is_valid(raise_exception=True)
document = serializer.validated_data['document']
document_data = serializer.validated_data['document_data']
correspondent_id = serializer.validated_data['correspondent_id']
document_type_id = serializer.validated_data['document_type_id']
tag_ids = serializer.validated_data['tag_ids']
title = serializer.validated_data['title']
t = int(mktime(datetime.now().timetuple()))
os.makedirs(settings.SCRATCH_DIR, exist_ok=True)
with tempfile.NamedTemporaryFile(prefix="paperless-upload-",
dir=settings.SCRATCH_DIR,
delete=False) as f:
f.write(document_data)
os.utime(f.name, times=(t, t))
async_task("documents.tasks.consume_file",
f.name,
override_filename=document.name,
override_title=title,
override_correspondent_id=correspondent_id,
override_document_type_id=document_type_id,
override_tag_ids=tag_ids,
task_name=os.path.basename(document.name)[:100])
return Response("OK")
class SearchView(APIView):
2020-10-27 17:07:13 +01:00
permission_classes = (IsAuthenticated,)
def __init__(self, *args, **kwargs):
super(SearchView, self).__init__(*args, **kwargs)
self.ix = index.open_index()
2020-10-27 17:07:13 +01:00
2020-11-02 12:23:50 +01:00
def add_infos_to_hit(self, r):
doc = Document.objects.get(id=r['id'])
return {'id': r['id'],
'highlights': r.highlights("content", text=doc.content),
'score': r.score,
'rank': r.rank,
'document': DocumentSerializer(doc).data,
'title': r['title']
}
def get(self, request, format=None):
2020-11-30 16:25:10 +01:00
if 'query' not in request.query_params:
return Response({
'count': 0,
'page': 0,
'page_count': 0,
'results': []})
query = request.query_params['query']
try:
page = int(request.query_params.get('page', 1))
except (ValueError, TypeError):
page = 1
2020-11-02 12:23:50 +01:00
if page < 1:
page = 1
2020-11-27 15:00:16 +01:00
try:
with index.query_page(self.ix, query, page) as (result_page,
corrected_query):
2020-11-10 01:47:35 +01:00
return Response(
{'count': len(result_page),
'page': result_page.pagenum,
'page_count': result_page.pagecount,
'corrected_query': corrected_query,
2020-11-10 01:47:35 +01:00
'results': list(map(self.add_infos_to_hit, result_page))})
except Exception as e:
return HttpResponseBadRequest(str(e))
2020-10-27 17:07:13 +01:00
class SearchAutoCompleteView(APIView):
permission_classes = (IsAuthenticated,)
def __init__(self, *args, **kwargs):
super(SearchAutoCompleteView, self).__init__(*args, **kwargs)
self.ix = index.open_index()
2020-10-27 17:07:13 +01:00
def get(self, request, format=None):
if 'term' in request.query_params:
term = request.query_params['term']
else:
2020-11-17 14:20:28 +01:00
return HttpResponseBadRequest("Term required")
2020-10-27 17:07:13 +01:00
if 'limit' in request.query_params:
limit = int(request.query_params['limit'])
2020-11-17 14:20:28 +01:00
if limit <= 0:
return HttpResponseBadRequest("Invalid limit")
2020-10-27 17:07:13 +01:00
else:
limit = 10
2020-11-17 14:20:28 +01:00
return Response(index.autocomplete(self.ix, term, limit))
2020-10-31 00:56:20 +01:00
class StatisticsView(APIView):
permission_classes = (IsAuthenticated,)
def get(self, request, format=None):
return Response({
'documents_total': Document.objects.all().count(),
2020-11-21 14:03:45 +01:00
'documents_inbox': Document.objects.filter(
tags__is_inbox_tag=True).distinct().count()
2020-10-31 00:56:20 +01:00
})