diff --git a/src/paperless_migration/scripts/transform.py b/src/paperless_migration/scripts/transform.py index d9aed3c1a..c9b178ece 100644 --- a/src/paperless_migration/scripts/transform.py +++ b/src/paperless_migration/scripts/transform.py @@ -3,7 +3,6 @@ # "rich", # "ijson", # "typer-slim", -# "websockets", # ] # /// @@ -15,7 +14,6 @@ from pathlib import Path from typing import Any from typing import TypedDict -import ijson import typer from rich.console import Console from rich.progress import BarColumn @@ -24,8 +22,14 @@ from rich.progress import SpinnerColumn from rich.progress import TextColumn from rich.progress import TimeElapsedColumn from rich.table import Table -from websockets.sync.client import ClientConnection -from websockets.sync.client import connect + +try: + import ijson # type: ignore +except ImportError as exc: # pragma: no cover - handled at runtime + raise SystemExit( + "ijson is required for migration transform. " + "Install dependencies (e.g., `uv pip install ijson`).", + ) from exc app = typer.Typer(add_completion=False) console = Console() @@ -76,8 +80,6 @@ def migrate( "-o", callback=validate_output, ), - ws_url: str | None = typer.Option(None, "--ws"), - update_frequency: int = typer.Option(100, "--freq"), ) -> None: """ Process JSON fixtures with detailed summary and timing. @@ -92,15 +94,6 @@ def migrate( total_processed: int = 0 start_time: float = time.perf_counter() - ws: ClientConnection | None = None - if ws_url: - try: - ws = connect(ws_url) - except Exception as e: - console.print( - f"[yellow]Warning: Could not connect to WebSocket: {e}[/yellow]", - ) - progress = Progress( SpinnerColumn(), TextColumn("[bold blue]{task.description}"), @@ -110,49 +103,33 @@ def migrate( console=console, ) - try: - with ( - progress, - input_path.open("rb") as infile, - output_path.open("w", encoding="utf-8") as outfile, - ): - task = progress.add_task("Processing fixture", start=True) - outfile.write("[\n") - first: bool = True + with ( + progress, + input_path.open("rb") as infile, + output_path.open("w", encoding="utf-8") as outfile, + ): + task = progress.add_task("Processing fixture", start=True) + outfile.write("[\n") + first: bool = True - for i, obj in enumerate(ijson.items(infile, "item")): - fixture: FixtureObject = obj - model: str = fixture["model"] - total_processed += 1 + for i, obj in enumerate(ijson.items(infile, "item")): + fixture: FixtureObject = obj + model: str = fixture["model"] + total_processed += 1 - transform: TransformFn | None = TRANSFORMS.get(model) - if transform: - fixture = transform(fixture) - stats[model] += 1 + transform: TransformFn | None = TRANSFORMS.get(model) + if transform: + fixture = transform(fixture) + stats[model] += 1 - if not first: - outfile.write(",\n") - first = False + if not first: + outfile.write(",\n") + first = False - json.dump(fixture, outfile, ensure_ascii=False) - progress.advance(task, 1) + json.dump(fixture, outfile, ensure_ascii=False) + progress.advance(task, 1) - if ws and (i % update_frequency == 0): - ws.send( - json.dumps( - { - "task": "processing", - "completed": total_processed, - "stats": dict(stats), - }, - ), - ) - - outfile.write("\n]\n") - - finally: - if ws: - ws.close() + outfile.write("\n]\n") end_time: float = time.perf_counter() duration: float = end_time - start_time diff --git a/src/paperless_migration/templates/paperless_migration/migration_home.html b/src/paperless_migration/templates/paperless_migration/migration_home.html index 9061c61ff..afa9b2c95 100644 --- a/src/paperless_migration/templates/paperless_migration/migration_home.html +++ b/src/paperless_migration/templates/paperless_migration/migration_home.html @@ -268,11 +268,29 @@
Migration console
Live output -
(waiting for connection…)
+
(waiting for connection…)
+ {% if start_stream %} + + {% endif %} diff --git a/src/paperless_migration/urls.py b/src/paperless_migration/urls.py index 0ef0f2ab3..c02b2b533 100644 --- a/src/paperless_migration/urls.py +++ b/src/paperless_migration/urls.py @@ -1,3 +1,6 @@ +from django.conf import settings +from django.conf.urls.static import static +from django.contrib.staticfiles.urls import staticfiles_urlpatterns from django.urls import include from django.urls import path @@ -7,4 +10,9 @@ urlpatterns = [ path("accounts/login/", views.migration_login, name="account_login"), path("accounts/", include("allauth.urls")), path("migration/", views.migration_home, name="migration_home"), + path("migration/transform/stream", views.transform_stream, name="transform_stream"), ] + +if settings.DEBUG: + urlpatterns += staticfiles_urlpatterns() + urlpatterns += static(settings.MEDIA_URL, document_root=settings.MEDIA_ROOT) diff --git a/src/paperless_migration/views.py b/src/paperless_migration/views.py index 3a1cf96e0..10b498d60 100644 --- a/src/paperless_migration/views.py +++ b/src/paperless_migration/views.py @@ -1,3 +1,5 @@ +import subprocess +import sys from pathlib import Path from django.contrib import messages @@ -5,6 +7,7 @@ from django.contrib.auth import authenticate from django.contrib.auth import login from django.contrib.auth.decorators import login_required from django.http import HttpResponseForbidden +from django.http import StreamingHttpResponse from django.shortcuts import redirect from django.shortcuts import render from django.views.decorators.http import require_http_methods @@ -28,10 +31,8 @@ def migration_home(request): if action == "check": messages.success(request, "Checked export paths.") elif action == "transform": - messages.info( - request, - "Transform step is not implemented yet.", - ) + messages.info(request, "Starting transform… live output below.") + request.session["start_transform_stream"] = True elif action == "import": messages.info( request, @@ -46,6 +47,7 @@ def migration_home(request): "export_exists": export_path.exists(), "transformed_path": transformed_path, "transformed_exists": transformed_path.exists(), + "start_stream": request.session.pop("start_transform_stream", False), } return render(request, "paperless_migration/migration_home.html", context) @@ -75,3 +77,53 @@ def migration_login(request): return redirect(settings.LOGIN_REDIRECT_URL) return render(request, "account/login.html") + + +@login_required +@require_http_methods(["GET"]) +def transform_stream(request): + if not request.session.get("migration_code_ok"): + return HttpResponseForbidden("Access code required") + if not request.user.is_superuser: + return HttpResponseForbidden("Superuser access required") + + input_path = Path(settings.MIGRATION_EXPORT_PATH) + output_path = Path(settings.MIGRATION_TRANSFORMED_PATH) + + cmd = [ + sys.executable, + "-m", + "paperless_migration.scripts.transform", + "--input", + str(input_path), + "--output", + str(output_path), + ] + + def event_stream(): + process = subprocess.Popen( + cmd, + stdout=subprocess.PIPE, + stderr=subprocess.STDOUT, + bufsize=1, + text=True, + ) + try: + yield "data: Starting transform...\n\n" + if process.stdout: + for line in process.stdout: + yield f"data: {line.rstrip()}\n\n" + process.wait() + yield f"data: Transform finished with code {process.returncode}\n\n" + finally: + if process and process.poll() is None: + process.kill() + + return StreamingHttpResponse( + event_stream(), + content_type="text/event-stream", + headers={ + "Cache-Control": "no-cache", + "X-Accel-Buffering": "no", + }, + )