fix: Fix linting issues in scan_documents_ai command

- Fixed blank line whitespace issues
- Changed logger.error with exc_info=True to logger.exception
- Prefixed unused unpacked variables with underscore
- Applied ruff formatting to both command and test files

Co-authored-by: dawnsystem <42047891+dawnsystem@users.noreply.github.com>
This commit is contained in:
copilot-swe-agent[bot] 2025-11-14 15:51:14 +00:00
parent cc9e66c11c
commit 920998f6b5
2 changed files with 83 additions and 46 deletions

View file

@ -28,7 +28,7 @@ logger = logging.getLogger("paperless.management.scan_documents_ai")
class Command(ProgressBarMixin, BaseCommand): class Command(ProgressBarMixin, BaseCommand):
""" """
Management command to apply AI scanner to existing documents. Management command to apply AI scanner to existing documents.
This command processes existing documents through the comprehensive AI scanner This command processes existing documents through the comprehensive AI scanner
to generate metadata suggestions (tags, correspondents, document types, etc.). to generate metadata suggestions (tags, correspondents, document types, etc.).
""" """
@ -127,7 +127,9 @@ class Command(ProgressBarMixin, BaseCommand):
if document_count == 0: if document_count == 0:
self.stdout.write( self.stdout.write(
self.style.WARNING("No documents found matching the specified filters."), self.style.WARNING(
"No documents found matching the specified filters."
),
) )
return return
@ -153,12 +155,14 @@ class Command(ProgressBarMixin, BaseCommand):
def _validate_arguments(self, options): def _validate_arguments(self, options):
"""Validate command line arguments.""" """Validate command line arguments."""
# At least one filter must be specified # At least one filter must be specified
if not any([ if not any(
options["all"], [
options["filter_by_type"], options["all"],
options["date_range"], options["filter_by_type"],
options["id_range"], options["date_range"],
]): options["id_range"],
]
):
raise CommandError( raise CommandError(
"You must specify at least one filter: " "You must specify at least one filter: "
"--all, --filter-by-type, --date-range, or --id-range", "--all, --filter-by-type, --date-range, or --id-range",
@ -242,7 +246,9 @@ class Command(ProgressBarMixin, BaseCommand):
# Display processing mode # Display processing mode
self.stdout.write("\nProcessing mode:") self.stdout.write("\nProcessing mode:")
if options["dry_run"]: if options["dry_run"]:
self.stdout.write(self.style.WARNING(" • DRY RUN - No changes will be applied")) self.stdout.write(
self.style.WARNING(" • DRY RUN - No changes will be applied")
)
elif options["auto_apply_high_confidence"]: elif options["auto_apply_high_confidence"]:
self.stdout.write(" • Auto-apply high confidence suggestions (≥80%)") self.stdout.write(" • Auto-apply high confidence suggestions (≥80%)")
else: else:
@ -266,7 +272,7 @@ class Command(ProgressBarMixin, BaseCommand):
) -> dict[str, Any]: ) -> dict[str, Any]:
""" """
Process documents through the AI scanner. Process documents through the AI scanner.
Returns: Returns:
Dictionary with processing results and statistics Dictionary with processing results and statistics
""" """
@ -291,7 +297,7 @@ class Command(ProgressBarMixin, BaseCommand):
disable=self.no_progress_bar, disable=self.no_progress_bar,
desc="Processing batches", desc="Processing batches",
): ):
batch = queryset[i:i + batch_size] batch = queryset[i : i + batch_size]
for document in batch: for document in batch:
try: try:
@ -334,26 +340,29 @@ class Command(ProgressBarMixin, BaseCommand):
) )
# Store for summary # Store for summary
results["documents_with_suggestions"].append({ results["documents_with_suggestions"].append(
"id": document.id, {
"title": document.title, "id": document.id,
"suggestions": filtered_result.to_dict(), "title": document.title,
"applied": applied if auto_apply else None, "suggestions": filtered_result.to_dict(),
}) "applied": applied if auto_apply else None,
}
)
results["processed"] += 1 results["processed"] += 1
except Exception as e: except Exception as e:
logger.error( logger.exception(
f"Error processing document {document.id}: {e}", f"Error processing document {document.id}: {e}",
exc_info=True,
) )
results["errors"] += 1 results["errors"] += 1
results["error_documents"].append({ results["error_documents"].append(
"id": document.id, {
"title": document.title, "id": document.id,
"error": str(e), "title": document.title,
}) "error": str(e),
}
)
return results return results
@ -367,25 +376,24 @@ class Command(ProgressBarMixin, BaseCommand):
# Filter tags # Filter tags
filtered.tags = [ filtered.tags = [
(tag_id, conf) for tag_id, conf in scan_result.tags (tag_id, conf) for tag_id, conf in scan_result.tags if conf >= threshold
if conf >= threshold
] ]
# Filter correspondent # Filter correspondent
if scan_result.correspondent: if scan_result.correspondent:
corr_id, conf = scan_result.correspondent _corr_id, conf = scan_result.correspondent
if conf >= threshold: if conf >= threshold:
filtered.correspondent = scan_result.correspondent filtered.correspondent = scan_result.correspondent
# Filter document type # Filter document type
if scan_result.document_type: if scan_result.document_type:
type_id, conf = scan_result.document_type _type_id, conf = scan_result.document_type
if conf >= threshold: if conf >= threshold:
filtered.document_type = scan_result.document_type filtered.document_type = scan_result.document_type
# Filter storage path # Filter storage path
if scan_result.storage_path: if scan_result.storage_path:
path_id, conf = scan_result.storage_path _path_id, conf = scan_result.storage_path
if conf >= threshold: if conf >= threshold:
filtered.storage_path = scan_result.storage_path filtered.storage_path = scan_result.storage_path
@ -396,8 +404,7 @@ class Command(ProgressBarMixin, BaseCommand):
# Filter workflows # Filter workflows
filtered.workflows = [ filtered.workflows = [
(wf_id, conf) for wf_id, conf in scan_result.workflows (wf_id, conf) for wf_id, conf in scan_result.workflows if conf >= threshold
if conf >= threshold
] ]
# Copy other fields as-is # Copy other fields as-is
@ -428,12 +435,18 @@ class Command(ProgressBarMixin, BaseCommand):
# Display statistics # Display statistics
self.stdout.write("Statistics:") self.stdout.write("Statistics:")
self.stdout.write(f" • Documents processed: {results['processed']}") self.stdout.write(f" • Documents processed: {results['processed']}")
self.stdout.write(f" • Documents with suggestions: {len(results['documents_with_suggestions'])}") self.stdout.write(
self.stdout.write(f" • Total suggestions generated: {results['suggestions_generated']}") f" • Documents with suggestions: {len(results['documents_with_suggestions'])}"
)
self.stdout.write(
f" • Total suggestions generated: {results['suggestions_generated']}"
)
if options["auto_apply_high_confidence"] and not options["dry_run"]: if options["auto_apply_high_confidence"] and not options["dry_run"]:
self.stdout.write( self.stdout.write(
self.style.SUCCESS(f" • Suggestions auto-applied: {results['auto_applied']}"), self.style.SUCCESS(
f" • Suggestions auto-applied: {results['auto_applied']}"
),
) )
if results["errors"] > 0: if results["errors"] > 0:

View file

@ -71,7 +71,9 @@ class TestScanDocumentsAICommand(DirectoriesMixin, TestCase):
def test_command_all_flag(self): def test_command_all_flag(self):
"""Test command with --all flag.""" """Test command with --all flag."""
# Mock the AI scanner # Mock the AI scanner
with mock.patch("documents.management.commands.scan_documents_ai.get_ai_scanner") as mock_scanner: with mock.patch(
"documents.management.commands.scan_documents_ai.get_ai_scanner"
) as mock_scanner:
mock_instance = mock.Mock() mock_instance = mock.Mock()
mock_scanner.return_value = mock_instance mock_scanner.return_value = mock_instance
@ -95,7 +97,9 @@ class TestScanDocumentsAICommand(DirectoriesMixin, TestCase):
def test_command_filter_by_type(self): def test_command_filter_by_type(self):
"""Test command with --filter-by-type option.""" """Test command with --filter-by-type option."""
with mock.patch("documents.management.commands.scan_documents_ai.get_ai_scanner") as mock_scanner: with mock.patch(
"documents.management.commands.scan_documents_ai.get_ai_scanner"
) as mock_scanner:
mock_instance = mock.Mock() mock_instance = mock.Mock()
mock_scanner.return_value = mock_instance mock_scanner.return_value = mock_instance
@ -129,7 +133,9 @@ class TestScanDocumentsAICommand(DirectoriesMixin, TestCase):
def test_command_date_range(self): def test_command_date_range(self):
"""Test command with --date-range option.""" """Test command with --date-range option."""
with mock.patch("documents.management.commands.scan_documents_ai.get_ai_scanner") as mock_scanner: with mock.patch(
"documents.management.commands.scan_documents_ai.get_ai_scanner"
) as mock_scanner:
mock_instance = mock.Mock() mock_instance = mock.Mock()
mock_scanner.return_value = mock_instance mock_scanner.return_value = mock_instance
@ -182,7 +188,9 @@ class TestScanDocumentsAICommand(DirectoriesMixin, TestCase):
def test_command_id_range(self): def test_command_id_range(self):
"""Test command with --id-range option.""" """Test command with --id-range option."""
with mock.patch("documents.management.commands.scan_documents_ai.get_ai_scanner") as mock_scanner: with mock.patch(
"documents.management.commands.scan_documents_ai.get_ai_scanner"
) as mock_scanner:
mock_instance = mock.Mock() mock_instance = mock.Mock()
mock_scanner.return_value = mock_instance mock_scanner.return_value = mock_instance
@ -205,7 +213,9 @@ class TestScanDocumentsAICommand(DirectoriesMixin, TestCase):
def test_command_confidence_threshold(self): def test_command_confidence_threshold(self):
"""Test command with custom confidence threshold.""" """Test command with custom confidence threshold."""
with mock.patch("documents.management.commands.scan_documents_ai.get_ai_scanner") as mock_scanner: with mock.patch(
"documents.management.commands.scan_documents_ai.get_ai_scanner"
) as mock_scanner:
mock_instance = mock.Mock() mock_instance = mock.Mock()
mock_scanner.return_value = mock_instance mock_scanner.return_value = mock_instance
@ -244,7 +254,9 @@ class TestScanDocumentsAICommand(DirectoriesMixin, TestCase):
def test_command_auto_apply(self): def test_command_auto_apply(self):
"""Test command with --auto-apply-high-confidence.""" """Test command with --auto-apply-high-confidence."""
with mock.patch("documents.management.commands.scan_documents_ai.get_ai_scanner") as mock_scanner: with mock.patch(
"documents.management.commands.scan_documents_ai.get_ai_scanner"
) as mock_scanner:
mock_instance = mock.Mock() mock_instance = mock.Mock()
mock_scanner.return_value = mock_instance mock_scanner.return_value = mock_instance
@ -277,7 +289,9 @@ class TestScanDocumentsAICommand(DirectoriesMixin, TestCase):
def test_command_dry_run_does_not_apply(self): def test_command_dry_run_does_not_apply(self):
"""Test that dry run mode does not apply changes.""" """Test that dry run mode does not apply changes."""
with mock.patch("documents.management.commands.scan_documents_ai.get_ai_scanner") as mock_scanner: with mock.patch(
"documents.management.commands.scan_documents_ai.get_ai_scanner"
) as mock_scanner:
mock_instance = mock.Mock() mock_instance = mock.Mock()
mock_scanner.return_value = mock_instance mock_scanner.return_value = mock_instance
@ -311,7 +325,9 @@ class TestScanDocumentsAICommand(DirectoriesMixin, TestCase):
checksum="EMPTY123", checksum="EMPTY123",
) )
with mock.patch("documents.management.commands.scan_documents_ai.get_ai_scanner") as mock_scanner: with mock.patch(
"documents.management.commands.scan_documents_ai.get_ai_scanner"
) as mock_scanner:
mock_instance = mock.Mock() mock_instance = mock.Mock()
mock_scanner.return_value = mock_instance mock_scanner.return_value = mock_instance
@ -334,7 +350,9 @@ class TestScanDocumentsAICommand(DirectoriesMixin, TestCase):
def test_command_handles_scanner_error(self): def test_command_handles_scanner_error(self):
"""Test that command handles scanner errors gracefully.""" """Test that command handles scanner errors gracefully."""
with mock.patch("documents.management.commands.scan_documents_ai.get_ai_scanner") as mock_scanner: with mock.patch(
"documents.management.commands.scan_documents_ai.get_ai_scanner"
) as mock_scanner:
mock_instance = mock.Mock() mock_instance = mock.Mock()
mock_scanner.return_value = mock_instance mock_scanner.return_value = mock_instance
@ -365,7 +383,9 @@ class TestScanDocumentsAICommand(DirectoriesMixin, TestCase):
checksum=f"BATCH{i}", checksum=f"BATCH{i}",
) )
with mock.patch("documents.management.commands.scan_documents_ai.get_ai_scanner") as mock_scanner: with mock.patch(
"documents.management.commands.scan_documents_ai.get_ai_scanner"
) as mock_scanner:
mock_instance = mock.Mock() mock_instance = mock.Mock()
mock_scanner.return_value = mock_instance mock_scanner.return_value = mock_instance
@ -388,7 +408,9 @@ class TestScanDocumentsAICommand(DirectoriesMixin, TestCase):
def test_command_displays_suggestions(self): def test_command_displays_suggestions(self):
"""Test that command displays suggestions in output.""" """Test that command displays suggestions in output."""
with mock.patch("documents.management.commands.scan_documents_ai.get_ai_scanner") as mock_scanner: with mock.patch(
"documents.management.commands.scan_documents_ai.get_ai_scanner"
) as mock_scanner:
mock_instance = mock.Mock() mock_instance = mock.Mock()
mock_scanner.return_value = mock_instance mock_scanner.return_value = mock_instance
@ -421,7 +443,9 @@ class TestScanDocumentsAICommand(DirectoriesMixin, TestCase):
@override_settings(PAPERLESS_ENABLE_AI_SCANNER=False) @override_settings(PAPERLESS_ENABLE_AI_SCANNER=False)
def test_command_works_when_ai_disabled(self): def test_command_works_when_ai_disabled(self):
"""Test that command can run even if AI scanner is disabled in settings.""" """Test that command can run even if AI scanner is disabled in settings."""
with mock.patch("documents.management.commands.scan_documents_ai.get_ai_scanner") as mock_scanner: with mock.patch(
"documents.management.commands.scan_documents_ai.get_ai_scanner"
) as mock_scanner:
mock_instance = mock.Mock() mock_instance = mock.Mock()
mock_scanner.return_value = mock_instance mock_scanner.return_value = mock_instance