Compare commits

...

4 Commits

Author SHA1 Message Date
Jean Caillé
569fef22b4 style: align test conventions with existing codebase
- Prefix unused mock params with _ and type as Any
- Extract shared db session mock setup into helper
- Remove unused pytest import
- Use module path constant to reduce repetition
2026-03-31 18:21:42 +02:00
Jean Caille
517d1035e0 Merge branch 'main' into fix/ttl-skip-missing-file-records 2026-03-31 18:13:12 +02:00
Jean Caillé
ad108dd573 test: add unit tests for TTL deletion resilience
- test_chat_deletion: verifies delete_messages_and_files_from_chat_session
  continues when a file record is missing, and handles messages with no files
- test_ttl_task: verifies perform_ttl_management_task continues deleting
  remaining sessions after one fails, and reports correct success status

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-03-31 18:11:45 +02:00
Jean Caillé
2a5810a44a fix: TTL management task no longer crashes on missing file records
The TTL cleanup task would crash with a RuntimeError when trying to
delete a file record that no longer exists, blocking cleanup of all
remaining sessions. Two fixes:

1. Wrap file deletion in delete_messages_and_files_from_chat_session
   with try/except — a missing file is already the desired state, so
   log a warning and continue.

2. Add per-session error handling in the TTL task loop. The existing
   comment said "one session per delete so that we don't blow up" but
   the outer try/except still aborted on the first failure. Now each
   session deletion is individually wrapped so failures don't block
   cleanup of subsequent sessions.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-03-30 19:25:36 +02:00
8 changed files with 194 additions and 11 deletions

View File

@@ -54,27 +54,35 @@ def perform_ttl_management_task(
retention_limit_days, db_session
)
failures = 0
for user_id, session_id in old_chat_sessions:
# one session per delete so that we don't blow up if a deletion fails.
with get_session_with_current_tenant() as db_session:
delete_chat_session(
user_id,
session_id,
db_session,
include_deleted=True,
hard_delete=True,
try:
with get_session_with_current_tenant() as db_session:
delete_chat_session(
user_id,
session_id,
db_session,
include_deleted=True,
hard_delete=True,
)
except Exception:
failures += 1
logger.exception(
"Failed to delete chat session "
f"user_id={user_id} session_id={session_id}, "
"continuing with remaining sessions"
)
with get_session_with_current_tenant() as db_session:
mark_task_as_finished_with_id(
db_session=db_session,
task_id=task_id,
success=True,
success=failures == 0,
)
except Exception:
logger.exception(
f"delete_chat_session exceptioned. user_id={user_id} session_id={session_id}"
f"TTL management task failed. user_id={user_id} session_id={session_id}"
)
with get_session_with_current_tenant() as db_session:
mark_task_as_finished_with_id(

View File

@@ -185,7 +185,15 @@ def delete_messages_and_files_from_chat_session(
for _, files in messages_with_files:
file_store = get_default_file_store()
for file_info in files or []:
file_store.delete_file(file_id=file_info.get("id"))
try:
file_store.delete_file(file_id=file_info.get("id"))
except Exception:
logger.warning(
"Failed to delete file %s from file store during "
"chat session cleanup, skipping.",
file_info.get("id"),
)
continue
# Delete ChatMessage records - CASCADE constraints will automatically handle:
# - ChatMessage__StandardAnswer relationship records

View File

@@ -0,0 +1,109 @@
"""Tests for TTL management task resilience."""
from typing import Any
from unittest.mock import MagicMock
from unittest.mock import patch
from uuid import uuid4
_TASK_MODULE = "ee.onyx.background.celery.tasks.ttl_management.tasks"
def _setup_db_session_mock(mock_get_db_session: MagicMock) -> None:
mock_db_session = MagicMock()
mock_get_db_session.return_value.__enter__ = MagicMock(
return_value=mock_db_session
)
mock_get_db_session.return_value.__exit__ = MagicMock(return_value=False)
@patch(f"{_TASK_MODULE}.get_session_with_current_tenant")
@patch(f"{_TASK_MODULE}.delete_chat_session")
@patch(f"{_TASK_MODULE}.get_chat_sessions_older_than")
@patch(f"{_TASK_MODULE}.mark_task_as_finished_with_id")
@patch(f"{_TASK_MODULE}.register_task")
def test_ttl_task_continues_after_session_delete_failure(
_mock_register: Any,
mock_mark_finished: MagicMock,
mock_get_old_sessions: MagicMock,
mock_delete_session: MagicMock,
mock_get_db_session: MagicMock,
) -> None:
"""One failing session should not prevent cleanup of remaining sessions."""
from ee.onyx.background.celery.tasks.ttl_management.tasks import (
perform_ttl_management_task,
)
user1, session1 = uuid4(), uuid4()
user2, session2 = uuid4(), uuid4()
user3, session3 = uuid4(), uuid4()
mock_get_old_sessions.return_value = [
(user1, session1),
(user2, session2),
(user3, session3),
]
# Second session fails
mock_delete_session.side_effect = [
None,
RuntimeError("File does not exist"),
None,
]
_setup_db_session_mock(mock_get_db_session)
mock_task = MagicMock()
mock_task.request.id = "test-task-id"
# Call the underlying function directly, bypassing Celery decorator
perform_ttl_management_task.__wrapped__(
mock_task, retention_limit_days=30, tenant_id="test"
)
# All three sessions should have been attempted
assert mock_delete_session.call_count == 3
# Task marked as finished with success=False (due to the one failure)
mock_mark_finished.assert_called()
finish_call_kwargs = mock_mark_finished.call_args[1]
assert finish_call_kwargs["success"] is False
@patch(f"{_TASK_MODULE}.get_session_with_current_tenant")
@patch(f"{_TASK_MODULE}.delete_chat_session")
@patch(f"{_TASK_MODULE}.get_chat_sessions_older_than")
@patch(f"{_TASK_MODULE}.mark_task_as_finished_with_id")
@patch(f"{_TASK_MODULE}.register_task")
def test_ttl_task_reports_success_when_all_deletions_pass(
_mock_register: Any,
mock_mark_finished: MagicMock,
mock_get_old_sessions: MagicMock,
mock_delete_session: MagicMock,
mock_get_db_session: MagicMock,
) -> None:
"""Task should report success when all sessions are deleted."""
from ee.onyx.background.celery.tasks.ttl_management.tasks import (
perform_ttl_management_task,
)
mock_get_old_sessions.return_value = [
(uuid4(), uuid4()),
(uuid4(), uuid4()),
]
mock_delete_session.side_effect = None
_setup_db_session_mock(mock_get_db_session)
mock_task = MagicMock()
mock_task.request.id = "test-task-id"
perform_ttl_management_task.__wrapped__(
mock_task, retention_limit_days=30, tenant_id="test"
)
assert mock_delete_session.call_count == 2
mock_mark_finished.assert_called()
finish_call_kwargs = mock_mark_finished.call_args[1]
assert finish_call_kwargs["success"] is True

View File

@@ -0,0 +1,58 @@
"""Tests for chat session and message deletion resilience."""
from typing import Any
from unittest.mock import MagicMock
from unittest.mock import patch
from uuid import uuid4
from onyx.db.chat import delete_messages_and_files_from_chat_session
@patch("onyx.db.chat.delete_orphaned_search_docs")
@patch("onyx.db.chat.get_default_file_store")
def test_delete_messages_skips_missing_files(
mock_get_file_store: MagicMock,
_mock_delete_orphaned: Any,
) -> None:
"""Deletion should continue when a referenced file record no longer exists."""
session_id = uuid4()
file_store = MagicMock()
file_store.delete_file.side_effect = [
None, # first file deletes fine
RuntimeError("File by id abc does not exist or was deleted"),
None, # third file deletes fine
]
mock_get_file_store.return_value = file_store
mock_db_session = MagicMock()
mock_db_session.execute.return_value.fetchall.return_value = [
(1, [{"id": "file-ok-1"}, {"id": "file-missing"}, {"id": "file-ok-2"}]),
]
delete_messages_and_files_from_chat_session(session_id, mock_db_session)
assert file_store.delete_file.call_count == 3
mock_db_session.execute.assert_called()
mock_db_session.commit.assert_called()
@patch("onyx.db.chat.delete_orphaned_search_docs")
@patch("onyx.db.chat.get_default_file_store")
def test_delete_messages_succeeds_with_no_files(
mock_get_file_store: MagicMock,
_mock_delete_orphaned: Any,
) -> None:
"""Deletion works when messages have no attached files."""
session_id = uuid4()
mock_db_session = MagicMock()
mock_db_session.execute.return_value.fetchall.return_value = [
(1, None),
(2, []),
]
delete_messages_and_files_from_chat_session(session_id, mock_db_session)
mock_get_file_store.return_value.delete_file.assert_not_called()
mock_db_session.commit.assert_called()