Sick, run transform as subprocess

This commit is contained in:
shamoon 2026-01-23 08:39:57 -08:00
parent 0846fe9845
commit a290fcfe6f
No known key found for this signature in database
4 changed files with 113 additions and 58 deletions

View file

@ -3,7 +3,6 @@
# "rich",
# "ijson",
# "typer-slim",
# "websockets",
# ]
# ///
@ -15,7 +14,6 @@ from pathlib import Path
from typing import Any
from typing import TypedDict
import ijson
import typer
from rich.console import Console
from rich.progress import BarColumn
@ -24,8 +22,14 @@ from rich.progress import SpinnerColumn
from rich.progress import TextColumn
from rich.progress import TimeElapsedColumn
from rich.table import Table
from websockets.sync.client import ClientConnection
from websockets.sync.client import connect
try:
import ijson # type: ignore
except ImportError as exc: # pragma: no cover - handled at runtime
raise SystemExit(
"ijson is required for migration transform. "
"Install dependencies (e.g., `uv pip install ijson`).",
) from exc
app = typer.Typer(add_completion=False)
console = Console()
@ -76,8 +80,6 @@ def migrate(
"-o",
callback=validate_output,
),
ws_url: str | None = typer.Option(None, "--ws"),
update_frequency: int = typer.Option(100, "--freq"),
) -> None:
"""
Process JSON fixtures with detailed summary and timing.
@ -92,15 +94,6 @@ def migrate(
total_processed: int = 0
start_time: float = time.perf_counter()
ws: ClientConnection | None = None
if ws_url:
try:
ws = connect(ws_url)
except Exception as e:
console.print(
f"[yellow]Warning: Could not connect to WebSocket: {e}[/yellow]",
)
progress = Progress(
SpinnerColumn(),
TextColumn("[bold blue]{task.description}"),
@ -110,49 +103,33 @@ def migrate(
console=console,
)
try:
with (
progress,
input_path.open("rb") as infile,
output_path.open("w", encoding="utf-8") as outfile,
):
task = progress.add_task("Processing fixture", start=True)
outfile.write("[\n")
first: bool = True
with (
progress,
input_path.open("rb") as infile,
output_path.open("w", encoding="utf-8") as outfile,
):
task = progress.add_task("Processing fixture", start=True)
outfile.write("[\n")
first: bool = True
for i, obj in enumerate(ijson.items(infile, "item")):
fixture: FixtureObject = obj
model: str = fixture["model"]
total_processed += 1
for i, obj in enumerate(ijson.items(infile, "item")):
fixture: FixtureObject = obj
model: str = fixture["model"]
total_processed += 1
transform: TransformFn | None = TRANSFORMS.get(model)
if transform:
fixture = transform(fixture)
stats[model] += 1
transform: TransformFn | None = TRANSFORMS.get(model)
if transform:
fixture = transform(fixture)
stats[model] += 1
if not first:
outfile.write(",\n")
first = False
if not first:
outfile.write(",\n")
first = False
json.dump(fixture, outfile, ensure_ascii=False)
progress.advance(task, 1)
json.dump(fixture, outfile, ensure_ascii=False)
progress.advance(task, 1)
if ws and (i % update_frequency == 0):
ws.send(
json.dumps(
{
"task": "processing",
"completed": total_processed,
"stats": dict(stats),
},
),
)
outfile.write("\n]\n")
finally:
if ws:
ws.close()
outfile.write("\n]\n")
end_time: float = time.perf_counter()
duration: float = end_time - start_time

View file

@ -268,11 +268,29 @@
<div class="fw-semibold">Migration console</div>
<span class="badge bg-secondary-subtle text-secondary border border-secondary-subtle">Live output</span>
</div>
<pre class="mb-0" style="background:#0f1a12;color:#d1e7d6;border-radius:12px;min-height:160px;padding:12px;font-size:0.9rem;overflow:auto;">(waiting for connection…)</pre>
<pre id="migration-log" class="mb-0" style="background:#0f1a12;color:#d1e7d6;border-radius:12px;min-height:180px;padding:12px;font-size:0.9rem;overflow:auto;">(waiting for connection…)</pre>
</div>
</div>
</div>
</div>
</div>
{% if start_stream %}
<script>
(() => {
const logEl = document.getElementById('migration-log');
if (!logEl) return;
const evt = new EventSource('{% url "transform_stream" %}');
const append = (line) => {
logEl.textContent += `\n${line}`;
logEl.scrollTop = logEl.scrollHeight;
};
evt.onmessage = (e) => append(e.data);
evt.onerror = () => {
append('[connection closed]');
evt.close();
};
})();
</script>
{% endif %}
</body>
</html>

View file

@ -1,3 +1,6 @@
from django.conf import settings
from django.conf.urls.static import static
from django.contrib.staticfiles.urls import staticfiles_urlpatterns
from django.urls import include
from django.urls import path
@ -7,4 +10,9 @@ urlpatterns = [
path("accounts/login/", views.migration_login, name="account_login"),
path("accounts/", include("allauth.urls")),
path("migration/", views.migration_home, name="migration_home"),
path("migration/transform/stream", views.transform_stream, name="transform_stream"),
]
if settings.DEBUG:
urlpatterns += staticfiles_urlpatterns()
urlpatterns += static(settings.MEDIA_URL, document_root=settings.MEDIA_ROOT)

View file

@ -1,3 +1,5 @@
import subprocess
import sys
from pathlib import Path
from django.contrib import messages
@ -5,6 +7,7 @@ from django.contrib.auth import authenticate
from django.contrib.auth import login
from django.contrib.auth.decorators import login_required
from django.http import HttpResponseForbidden
from django.http import StreamingHttpResponse
from django.shortcuts import redirect
from django.shortcuts import render
from django.views.decorators.http import require_http_methods
@ -28,10 +31,8 @@ def migration_home(request):
if action == "check":
messages.success(request, "Checked export paths.")
elif action == "transform":
messages.info(
request,
"Transform step is not implemented yet.",
)
messages.info(request, "Starting transform… live output below.")
request.session["start_transform_stream"] = True
elif action == "import":
messages.info(
request,
@ -46,6 +47,7 @@ def migration_home(request):
"export_exists": export_path.exists(),
"transformed_path": transformed_path,
"transformed_exists": transformed_path.exists(),
"start_stream": request.session.pop("start_transform_stream", False),
}
return render(request, "paperless_migration/migration_home.html", context)
@ -75,3 +77,53 @@ def migration_login(request):
return redirect(settings.LOGIN_REDIRECT_URL)
return render(request, "account/login.html")
@login_required
@require_http_methods(["GET"])
def transform_stream(request):
if not request.session.get("migration_code_ok"):
return HttpResponseForbidden("Access code required")
if not request.user.is_superuser:
return HttpResponseForbidden("Superuser access required")
input_path = Path(settings.MIGRATION_EXPORT_PATH)
output_path = Path(settings.MIGRATION_TRANSFORMED_PATH)
cmd = [
sys.executable,
"-m",
"paperless_migration.scripts.transform",
"--input",
str(input_path),
"--output",
str(output_path),
]
def event_stream():
process = subprocess.Popen(
cmd,
stdout=subprocess.PIPE,
stderr=subprocess.STDOUT,
bufsize=1,
text=True,
)
try:
yield "data: Starting transform...\n\n"
if process.stdout:
for line in process.stdout:
yield f"data: {line.rstrip()}\n\n"
process.wait()
yield f"data: Transform finished with code {process.returncode}\n\n"
finally:
if process and process.poll() is None:
process.kill()
return StreamingHttpResponse(
event_stream(),
content_type="text/event-stream",
headers={
"Cache-Control": "no-cache",
"X-Accel-Buffering": "no",
},
)