fix: eager load chat session persona

refactor: filter fields
pr comments
2026-03-24 09:02:43 +00:00 · 2026-03-23 16:17:10 -07:00 · 2026-03-23 16:17:09 -07:00 · 2026-03-23 16:16:02 -07:00 · 2026-03-23 14:01:11 -07:00
49 changed files with 370 additions and 875 deletions
--- a/backend/onyx/chat/models.py
+++ b/backend/onyx/chat/models.py
@@ -177,8 +177,8 @@ class ExtractedContextFiles(BaseModel):
 class SearchParams(BaseModel):
    """Resolved search filter IDs and search-tool usage for a chat turn."""

-    search_project_id: int | None
-    search_persona_id: int | None
+    project_id_filter: int | None
+    persona_id_filter: int | None
    search_usage: SearchToolUsage


--- a/backend/onyx/chat/process_message.py
+++ b/backend/onyx/chat/process_message.py
@@ -399,13 +399,13 @@ def determine_search_params(
    """
    is_custom_persona = persona_id != DEFAULT_PERSONA_ID

-    search_project_id: int | None = None
-    search_persona_id: int | None = None
+    project_id_filter: int | None = None
+    persona_id_filter: int | None = None
    if extracted_context_files.use_as_search_filter:
        if is_custom_persona:
-            search_persona_id = persona_id
+            persona_id_filter = persona_id
        else:
-            search_project_id = project_id
+            project_id_filter = project_id

    search_usage = SearchToolUsage.AUTO
    if not is_custom_persona and project_id:
@@ -418,8 +418,8 @@ def determine_search_params(
            search_usage = SearchToolUsage.DISABLED

    return SearchParams(
-        search_project_id=search_project_id,
-        search_persona_id=search_persona_id,
+        project_id_filter=project_id_filter,
+        persona_id_filter=persona_id_filter,
        search_usage=search_usage,
    )

@@ -474,11 +474,18 @@ def handle_stream_message_objects(
                db_session=db_session,
            )
            yield CreateChatSessionID(chat_session_id=chat_session.id)
+            chat_session = get_chat_session_by_id(
+                chat_session_id=chat_session.id,
+                user_id=user_id,
+                db_session=db_session,
+                eager_load_persona=True,
+            )
        else:
            chat_session = get_chat_session_by_id(
                chat_session_id=new_msg_req.chat_session_id,
                user_id=user_id,
                db_session=db_session,
+                eager_load_persona=True,
            )

        persona = chat_session.persona
@@ -711,8 +718,8 @@ def handle_stream_message_objects(
            llm=llm,
            search_tool_config=SearchToolConfig(
                user_selected_filters=new_msg_req.internal_search_filters,
-                project_id=search_params.search_project_id,
-                persona_id=search_params.search_persona_id,
+                project_id_filter=search_params.project_id_filter,
+                persona_id_filter=search_params.persona_id_filter,
                bypass_acl=bypass_acl,
                slack_context=slack_context,
                enable_slack_search=_should_enable_slack_search(
--- a/backend/onyx/context/search/models.py
+++ b/backend/onyx/context/search/models.py
@@ -2,7 +2,6 @@ from collections.abc import Sequence
 from datetime import datetime
 from enum import Enum
 from typing import Any
-from uuid import UUID

 from pydantic import BaseModel
 from pydantic import Field
@@ -70,9 +69,13 @@ class BaseFilters(BaseModel):


 class UserFileFilters(BaseModel):
-    user_file_ids: list[UUID] | None = None
-    project_id: int | None = None
-    persona_id: int | None = None
+    # Scopes search to user files tagged with a given project/persona in Vespa.
+    # These are NOT simply the IDs of the current project or persona — they are
+    # only set when the persona's/project's user files overflowed the LLM
+    # context window and must be searched via vector DB instead of being loaded
+    # directly into the prompt.
+    project_id_filter: int | None = None
+    persona_id_filter: int | None = None


 class AssistantKnowledgeFilters(BaseModel):
--- a/backend/onyx/context/search/pipeline.py
+++ b/backend/onyx/context/search/pipeline.py
@@ -1,6 +1,5 @@
 from collections import defaultdict
 from datetime import datetime
-from uuid import UUID

 from sqlalchemy.orm import Session

@@ -39,9 +38,8 @@ logger = setup_logger()
 def _build_index_filters(
    user_provided_filters: BaseFilters | None,
    user: User,  # Used for ACLs, anonymous users only see public docs
-    project_id: int | None,
-    persona_id: int | None,
-    user_file_ids: list[UUID] | None,
+    project_id_filter: int | None,
+    persona_id_filter: int | None,
    persona_document_sets: list[str] | None,
    persona_time_cutoff: datetime | None,
    db_session: Session | None = None,
@@ -97,16 +95,6 @@ def _build_index_filters(
    if not source_filter and detected_source_filter:
        source_filter = detected_source_filter

-    # CRITICAL FIX: If user_file_ids are present, we must ensure "user_file"
-    # source type is included in the filter, otherwise user files will be excluded!
-    if user_file_ids and source_filter:
-        from onyx.configs.constants import DocumentSource
-
-        # Add user_file to the source filter if not already present
-        if DocumentSource.USER_FILE not in source_filter:
-            source_filter = list(source_filter) + [DocumentSource.USER_FILE]
-            logger.debug("Added USER_FILE to source_filter for user knowledge search")
-
    if bypass_acl:
        user_acl_filters = None
    elif acl_filters is not None:
@@ -117,9 +105,8 @@ def _build_index_filters(
        user_acl_filters = build_access_filters_for_user(user, db_session)

    final_filters = IndexFilters(
-        user_file_ids=user_file_ids,
-        project_id=project_id,
-        persona_id=persona_id,
+        project_id_filter=project_id_filter,
+        persona_id_filter=persona_id_filter,
        source_type=source_filter,
        document_set=document_set_filter,
        time_cutoff=time_filter,
@@ -265,19 +252,16 @@ def search_pipeline(
    db_session: Session | None = None,
    auto_detect_filters: bool = False,
    llm: LLM | None = None,
-    # If a project ID is provided, it will be exclusively scoped to that project
-    project_id: int | None = None,
-    # If a persona_id is provided, search scopes to files attached to this persona
-    persona_id: int | None = None,
+    # Vespa metadata filters for overflowing user files.  NOT the raw IDs
+    # of the current project/persona — only set when user files couldn't fit
+    # in the LLM context and need to be searched via vector DB.
+    project_id_filter: int | None = None,
+    persona_id_filter: int | None = None,
    # Pre-fetched data — when provided, avoids DB queries (no session needed)
    acl_filters: list[str] | None = None,
    embedding_model: EmbeddingModel | None = None,
    prefetched_federated_retrieval_infos: list[FederatedRetrievalInfo] | None = None,
 ) -> list[InferenceChunk]:
-    user_uploaded_persona_files: list[UUID] | None = (
-        [user_file.id for user_file in persona.user_files] if persona else None
-    )
-
    persona_document_sets: list[str] | None = (
        [persona_document_set.name for persona_document_set in persona.document_sets]
        if persona
@@ -302,9 +286,8 @@ def search_pipeline(
    filters = _build_index_filters(
        user_provided_filters=chunk_search_request.user_selected_filters,
        user=user,
-        project_id=project_id,
-        persona_id=persona_id,
-        user_file_ids=user_uploaded_persona_files,
+        project_id_filter=project_id_filter,
+        persona_id_filter=persona_id_filter,
        persona_document_sets=persona_document_sets,
        persona_time_cutoff=persona_time_cutoff,
        db_session=db_session,
--- a/backend/onyx/context/search/retrieval/search_runner.py
+++ b/backend/onyx/context/search/retrieval/search_runner.py
@@ -110,7 +110,6 @@ def search_chunks(
            user_id=user_id,
            source_types=list(source_filters) if source_filters else None,
            document_set_names=query_request.filters.document_set,
-            user_file_ids=query_request.filters.user_file_ids,
        )

    federated_sources = set(
--- a/backend/onyx/db/chat.py
+++ b/backend/onyx/db/chat.py
@@ -28,6 +28,7 @@ from onyx.db.models import ChatMessage
 from onyx.db.models import ChatMessage__SearchDoc
 from onyx.db.models import ChatSession
 from onyx.db.models import ChatSessionSharedStatus
+from onyx.db.models import Persona
 from onyx.db.models import SearchDoc as DBSearchDoc
 from onyx.db.models import ToolCall
 from onyx.db.models import User
@@ -53,9 +54,17 @@ def get_chat_session_by_id(
    db_session: Session,
    include_deleted: bool = False,
    is_shared: bool = False,
+    eager_load_persona: bool = False,
 ) -> ChatSession:
    stmt = select(ChatSession).where(ChatSession.id == chat_session_id)

+    if eager_load_persona:
+        stmt = stmt.options(
+            selectinload(ChatSession.persona).selectinload(Persona.tools),
+            selectinload(ChatSession.persona).selectinload(Persona.user_files),
+            selectinload(ChatSession.project),
+        )
+
    if is_shared:
        stmt = stmt.where(ChatSession.shared_status == ChatSessionSharedStatus.PUBLIC)
    else:
--- a/backend/onyx/db/index_attempt.py
+++ b/backend/onyx/db/index_attempt.py
@@ -583,67 +583,6 @@ def get_latest_index_attempt_for_cc_pair_id(
    return db_session.execute(stmt).scalar_one_or_none()


-def get_latest_successful_index_attempt_for_cc_pair_id(
-    db_session: Session,
-    connector_credential_pair_id: int,
-    secondary_index: bool = False,
-) -> IndexAttempt | None:
-    """Returns the most recent successful index attempt for the given cc pair,
-    filtered to the current (or future) search settings.
-    Uses MAX(id) semantics to match get_latest_index_attempts_by_status."""
-    status = IndexModelStatus.FUTURE if secondary_index else IndexModelStatus.PRESENT
-    stmt = (
-        select(IndexAttempt)
-        .where(
-            IndexAttempt.connector_credential_pair_id == connector_credential_pair_id,
-            IndexAttempt.status.in_(
-                [IndexingStatus.SUCCESS, IndexingStatus.COMPLETED_WITH_ERRORS]
-            ),
-        )
-        .join(SearchSettings)
-        .where(SearchSettings.status == status)
-        .order_by(desc(IndexAttempt.id))
-        .limit(1)
-    )
-    return db_session.execute(stmt).scalar_one_or_none()
-
-
-def get_latest_successful_index_attempts_parallel(
-    secondary_index: bool = False,
-) -> Sequence[IndexAttempt]:
-    """Batch version: returns the latest successful index attempt per cc pair.
-    Covers both SUCCESS and COMPLETED_WITH_ERRORS (matching is_successful())."""
-    model_status = (
-        IndexModelStatus.FUTURE if secondary_index else IndexModelStatus.PRESENT
-    )
-    with get_session_with_current_tenant() as db_session:
-        latest_ids = (
-            select(
-                IndexAttempt.connector_credential_pair_id,
-                func.max(IndexAttempt.id).label("max_id"),
-            )
-            .join(SearchSettings, IndexAttempt.search_settings_id == SearchSettings.id)
-            .where(
-                SearchSettings.status == model_status,
-                IndexAttempt.status.in_(
-                    [IndexingStatus.SUCCESS, IndexingStatus.COMPLETED_WITH_ERRORS]
-                ),
-            )
-            .group_by(IndexAttempt.connector_credential_pair_id)
-            .subquery()
-        )
-
-        stmt = select(IndexAttempt).join(
-            latest_ids,
-            (
-                IndexAttempt.connector_credential_pair_id
-                == latest_ids.c.connector_credential_pair_id
-            )
-            & (IndexAttempt.id == latest_ids.c.max_id),
-        )
-        return db_session.execute(stmt).scalars().all()
-
-
 def count_index_attempts_for_cc_pair(
    db_session: Session,
    cc_pair_id: int,
--- a/backend/onyx/document_index/FILTER_SEMANTICS.md
+++ b/backend/onyx/document_index/FILTER_SEMANTICS.md
@@ -10,8 +10,8 @@ How `IndexFilters` fields combine into the final query filter. Applies to both V
 | **Tenant** | `tenant_id` | AND (multi-tenant only) |
 | **ACL** | `access_control_list` | OR within, AND with rest |
 | **Narrowing** | `source_type`, `tags`, `time_cutoff` | Each OR within, AND with rest |
-| **Knowledge scope** | `document_set`, `user_file_ids`, `attached_document_ids`, `hierarchy_node_ids` | OR within group, AND with rest |
-| **Additive scope** | `project_id`, `persona_id` | OR'd into knowledge scope **only when** a knowledge scope filter already exists |
+| **Knowledge scope** | `document_set`, `attached_document_ids`, `hierarchy_node_ids`, `persona_id_filter` | OR within group, AND with rest |
+| **Additive scope** | `project_id_filter` | OR'd into knowledge scope **only when** a knowledge scope filter already exists |

 ## How filters combine

@@ -31,12 +31,22 @@ AND time >= cutoff                      -- if set

 The knowledge scope filter controls **what knowledge an assistant can access**.

+### Primary vs additive triggers
+
+- **`persona_id_filter`** is a **primary** trigger. A persona with user files IS explicit
+  knowledge, so `persona_id_filter` alone can start a knowledge scope. Note: this is
+  NOT the raw ID of the persona being used — it is only set when the persona's
+  user files overflowed the LLM context window.
+- **`project_id_filter`** is **additive**. It widens an existing scope to include project
+  files but never restricts on its own — a chat inside a project should still search
+  team knowledge when no other knowledge is attached.
+
 ### No explicit knowledge attached

-When `document_set`, `user_file_ids`, `attached_document_ids`, and `hierarchy_node_ids` are all empty/None:
+When `document_set`, `attached_document_ids`, `hierarchy_node_ids`, and `persona_id_filter` are all empty/None:

 - **No knowledge scope filter is applied.** The assistant can see everything (subject to ACL).
- `project_id` and `persona_id` are ignored — they never restrict on their own.
+- `project_id_filter` is ignored — it never restricts on its own.

 ### One explicit knowledge type

@@ -44,39 +54,40 @@ When `document_set`, `user_file_ids`, `attached_document_ids`, and `hierarchy_no
 -- Only document sets
 AND (document_sets contains "Engineering" OR document_sets contains "Legal")

-- Only user files
-AND (document_id = "uuid-1" OR document_id = "uuid-2")
+-- Only persona user files (overflowed context)
+AND (personas contains 42)
 ```

 ### Multiple explicit knowledge types (OR'd)

 ```
-- Document sets + user files
-AND (
-    document_sets contains "Engineering"
-    OR document_id = "uuid-1"
-)
-```
-
-### Explicit knowledge + overflowing user files
-
-When an explicit knowledge restriction is in effect **and** `project_id` or `persona_id` is set (user files overflowed the LLM context window), the additive scopes widen the filter:
-
-```
-- Document sets + persona user files overflowed
+-- Document sets + persona user files
 AND (
    document_sets contains "Engineering"
    OR personas contains 42
 )
+```

-- User files + project files overflowed
+### Explicit knowledge + overflowing project files
+
+When an explicit knowledge restriction is in effect **and** `project_id_filter` is set (project files overflowed the LLM context window), `project_id_filter` widens the filter:
+
+```
+-- Document sets + project files overflowed
 AND (
-    document_id = "uuid-1"
+    document_sets contains "Engineering"
+    OR user_project contains 7
+)
+
+-- Persona user files + project files (won't happen in practice;
+-- custom personas ignore project files per the precedence rule)
+AND (
+    personas contains 42
    OR user_project contains 7
 )
 ```

-### Only project_id or persona_id (no explicit knowledge)
+### Only project_id_filter (no explicit knowledge)

 No knowledge scope filter. The assistant searches everything.

@@ -91,11 +102,10 @@ AND (acl contains ...)
 | Filter field | Vespa field | Vespa type | Purpose |
 |---|---|---|---|
 | `document_set` | `document_sets` | `weightedset<string>` | Connector doc sets attached to assistant |
-| `user_file_ids` | `document_id` | `string` | User files uploaded to assistant |
 | `attached_document_ids` | `document_id` | `string` | Documents explicitly attached (OpenSearch only) |
 | `hierarchy_node_ids` | `ancestor_hierarchy_node_ids` | `array<int>` | Folder/space nodes (OpenSearch only) |
-| `project_id` | `user_project` | `array<int>` | Project tag for overflowing user files |
-| `persona_id` | `personas` | `array<int>` | Persona tag for overflowing user files |
+| `persona_id_filter` | `personas` | `array<int>` | Persona tag for overflowing user files (**primary** trigger) |
+| `project_id_filter` | `user_project` | `array<int>` | Project tag for overflowing project files (**additive** only) |
 | `access_control_list` | `access_control_list` | `weightedset<string>` | ACL entries for the requesting user |
 | `source_type` | `source_type` | `string` | Connector source type (e.g. `web`, `jira`) |
 | `tags` | `metadata_list` | `array<string>` | Document metadata tags |
--- a/backend/onyx/document_index/opensearch/search.py
+++ b/backend/onyx/document_index/opensearch/search.py
@@ -3,7 +3,6 @@ from datetime import datetime
 from datetime import timedelta
 from datetime import timezone
 from typing import Any
-from uuid import UUID

 from onyx.configs.app_configs import DEFAULT_OPENSEARCH_QUERY_TIMEOUT_S
 from onyx.configs.app_configs import OPENSEARCH_EXPLAIN_ENABLED
@@ -219,9 +218,8 @@ class DocumentQuery:
            source_types=index_filters.source_type or [],
            tags=index_filters.tags or [],
            document_sets=index_filters.document_set or [],
-            user_file_ids=index_filters.user_file_ids or [],
-            project_id=index_filters.project_id,
-            persona_id=index_filters.persona_id,
+            project_id_filter=index_filters.project_id_filter,
+            persona_id_filter=index_filters.persona_id_filter,
            time_cutoff=index_filters.time_cutoff,
            min_chunk_index=min_chunk_index,
            max_chunk_index=max_chunk_index,
@@ -286,9 +284,8 @@ class DocumentQuery:
            source_types=[],
            tags=[],
            document_sets=[],
-            user_file_ids=[],
-            project_id=None,
-            persona_id=None,
+            project_id_filter=None,
+            persona_id_filter=None,
            time_cutoff=None,
            min_chunk_index=None,
            max_chunk_index=None,
@@ -356,9 +353,8 @@ class DocumentQuery:
            source_types=index_filters.source_type or [],
            tags=index_filters.tags or [],
            document_sets=index_filters.document_set or [],
-            user_file_ids=index_filters.user_file_ids or [],
-            project_id=index_filters.project_id,
-            persona_id=index_filters.persona_id,
+            project_id_filter=index_filters.project_id_filter,
+            persona_id_filter=index_filters.persona_id_filter,
            time_cutoff=index_filters.time_cutoff,
            min_chunk_index=None,
            max_chunk_index=None,
@@ -449,9 +445,8 @@ class DocumentQuery:
            source_types=index_filters.source_type or [],
            tags=index_filters.tags or [],
            document_sets=index_filters.document_set or [],
-            user_file_ids=index_filters.user_file_ids or [],
-            project_id=index_filters.project_id,
-            persona_id=index_filters.persona_id,
+            project_id_filter=index_filters.project_id_filter,
+            persona_id_filter=index_filters.persona_id_filter,
            time_cutoff=index_filters.time_cutoff,
            min_chunk_index=None,
            max_chunk_index=None,
@@ -529,9 +524,8 @@ class DocumentQuery:
            source_types=index_filters.source_type or [],
            tags=index_filters.tags or [],
            document_sets=index_filters.document_set or [],
-            user_file_ids=index_filters.user_file_ids or [],
-            project_id=index_filters.project_id,
-            persona_id=index_filters.persona_id,
+            project_id_filter=index_filters.project_id_filter,
+            persona_id_filter=index_filters.persona_id_filter,
            time_cutoff=index_filters.time_cutoff,
            min_chunk_index=None,
            max_chunk_index=None,
@@ -591,9 +585,8 @@ class DocumentQuery:
            source_types=index_filters.source_type or [],
            tags=index_filters.tags or [],
            document_sets=index_filters.document_set or [],
-            user_file_ids=index_filters.user_file_ids or [],
-            project_id=index_filters.project_id,
-            persona_id=index_filters.persona_id,
+            project_id_filter=index_filters.project_id_filter,
+            persona_id_filter=index_filters.persona_id_filter,
            time_cutoff=index_filters.time_cutoff,
            min_chunk_index=None,
            max_chunk_index=None,
@@ -824,9 +817,8 @@ class DocumentQuery:
        source_types: list[DocumentSource],
        tags: list[Tag],
        document_sets: list[str],
-        user_file_ids: list[UUID],
-        project_id: int | None,
-        persona_id: int | None,
+        project_id_filter: int | None,
+        persona_id_filter: int | None,
        time_cutoff: datetime | None,
        min_chunk_index: int | None,
        max_chunk_index: int | None,
@@ -857,12 +849,12 @@ class DocumentQuery:
                list corresponding to a tag will be retrieved.
            document_sets: If supplied, only documents with at least one
                document set ID from this list will be retrieved.
-            user_file_ids: If supplied, only document IDs in this list will be
-                retrieved.
-            project_id: If not None, only documents with this project ID in user
-                projects will be retrieved.
-            persona_id: If not None, only documents whose personas array
-                contains this persona ID will be retrieved.
+            project_id_filter: If not None, only documents with this project ID
+                in user projects will be retrieved. Additive — only applied
+                when a knowledge scope already exists.
+            persona_id_filter: If not None, only documents whose personas array
+                contains this persona ID will be retrieved. Primary — creates
+                a knowledge scope on its own.
            time_cutoff: Time cutoff for the documents to retrieve. If not None,
                Documents which were last updated before this date will not be
                returned. For documents which do not have a value for their last
@@ -879,10 +871,6 @@ class DocumentQuery:
                NOTE: See DocumentChunk.max_chunk_size.
            document_id: The document ID to retrieve. If None, no filter will be
                applied for this. Defaults to None.
-                WARNING: This filters on the same property as user_file_ids.
-                Although it would never make sense to supply both, note that if
-                user_file_ids is supplied and does not contain document_id, no
-                matches will be retrieved.
            attached_document_ids: Document IDs explicitly attached to the
                assistant. If provided along with hierarchy_node_ids, documents
                matching EITHER criteria will be retrieved (OR logic).
@@ -943,15 +931,6 @@ class DocumentQuery:
                )
            return document_set_filter

-        def _get_user_file_id_filter(user_file_ids: list[UUID]) -> dict[str, Any]:
-            # Logical OR operator on its elements.
-            user_file_id_filter: dict[str, Any] = {"bool": {"should": []}}
-            for user_file_id in user_file_ids:
-                user_file_id_filter["bool"]["should"].append(
-                    {"term": {DOCUMENT_ID_FIELD_NAME: {"value": str(user_file_id)}}}
-                )
-            return user_file_id_filter
-
        def _get_user_project_filter(project_id: int) -> dict[str, Any]:
            # Logical OR operator on its elements.
            user_project_filter: dict[str, Any] = {"bool": {"should": []}}
@@ -1052,14 +1031,17 @@ class DocumentQuery:
        # assistant can see. When none are set the assistant searches
        # everything.
        #
-        # project_id / persona_id are additive: they make overflowing user files
-        # findable but must NOT trigger the restriction on their own (an agent
-        # with no explicit knowledge should search everything).
+        # persona_id_filter is a primary trigger — a persona with user files IS
+        # explicit knowledge, so it can start a knowledge scope on its own.
+        #
+        # project_id_filter is additive — it widens the scope to also cover
+        # overflowing project files but never restricts on its own (a chat
+        # inside a project should still search team knowledge).
        has_knowledge_scope = (
            attached_document_ids
            or hierarchy_node_ids
-            or user_file_ids
            or document_sets
+            or persona_id_filter is not None
        )

        if has_knowledge_scope:
@@ -1074,23 +1056,17 @@ class DocumentQuery:
                knowledge_filter["bool"]["should"].append(
                    _get_hierarchy_node_filter(hierarchy_node_ids)
                )
-            if user_file_ids:
-                knowledge_filter["bool"]["should"].append(
-                    _get_user_file_id_filter(user_file_ids)
-                )
            if document_sets:
                knowledge_filter["bool"]["should"].append(
                    _get_document_set_filter(document_sets)
                )
-            # Additive: widen scope to also cover overflowing user files, but
-            # only when an explicit restriction is already in effect.
-            if project_id is not None:
+            if persona_id_filter is not None:
                knowledge_filter["bool"]["should"].append(
-                    _get_user_project_filter(project_id)
+                    _get_persona_filter(persona_id_filter)
                )
-            if persona_id is not None:
+            if project_id_filter is not None:
                knowledge_filter["bool"]["should"].append(
-                    _get_persona_filter(persona_id)
+                    _get_user_project_filter(project_id_filter)
                )
            filter_clauses.append(knowledge_filter)

@@ -1108,8 +1084,6 @@ class DocumentQuery:
            )

        if document_id is not None:
-            # WARNING: If user_file_ids has elements and if none of them are
-            # document_id, no matches will be retrieved.
            filter_clauses.append(
                {"term": {DOCUMENT_ID_FIELD_NAME: {"value": document_id}}}
            )
--- a/backend/onyx/document_index/vespa/shared_utils/vespa_request_builders.py
+++ b/backend/onyx/document_index/vespa/shared_utils/vespa_request_builders.py
@@ -199,31 +199,29 @@ def build_vespa_filters(
        ]
    _append(filter_parts, _build_or_filters(METADATA_LIST, tag_attributes))

-    # Knowledge scope: explicit knowledge attachments (document_sets,
-    # user_file_ids) restrict what an assistant can see.  When none are
-    # set, the assistant can see everything.
+    # Knowledge scope: explicit knowledge attachments restrict what an
+    # assistant can see.  When none are set, the assistant can see
+    # everything.
    #
-    # project_id / persona_id are additive: they make overflowing user
-    # files findable in Vespa but must NOT trigger the restriction on
-    # their own (an agent with no explicit knowledge should search
-    # everything).
+    # persona_id_filter is a primary trigger — a persona with user files IS
+    # explicit knowledge, so it can start a knowledge scope on its own.
+    #
+    # project_id_filter is additive — it widens the scope to also cover
+    # overflowing project files but never restricts on its own (a chat
+    # inside a project should still search team knowledge).
    knowledge_scope_parts: list[str] = []

    _append(
        knowledge_scope_parts, _build_or_filters(DOCUMENT_SETS, filters.document_set)
    )
+    _append(knowledge_scope_parts, _build_persona_filter(filters.persona_id_filter))

-    user_file_ids_str = (
-        [str(uuid) for uuid in filters.user_file_ids] if filters.user_file_ids else None
-    )
-    _append(knowledge_scope_parts, _build_or_filters(DOCUMENT_ID, user_file_ids_str))
-
-    # Only include project/persona scopes when an explicit knowledge
-    # restriction is already in effect — they widen the scope to also
-    # cover overflowing user files but never restrict on their own.
+    # project_id_filter only widens an existing scope.
    if knowledge_scope_parts:
-        _append(knowledge_scope_parts, _build_user_project_filter(filters.project_id))
-        _append(knowledge_scope_parts, _build_persona_filter(filters.persona_id))
+        _append(
+            knowledge_scope_parts,
+            _build_user_project_filter(filters.project_id_filter),
+        )

    if len(knowledge_scope_parts) > 1:
        filter_parts.append("(" + " or ".join(knowledge_scope_parts) + ")")
--- a/backend/onyx/federated_connectors/federated_retrieval.py
+++ b/backend/onyx/federated_connectors/federated_retrieval.py
@@ -38,17 +38,7 @@ def get_federated_retrieval_functions(
    source_types: list[DocumentSource] | None,
    document_set_names: list[str] | None,
    slack_context: SlackContext | None = None,
-    user_file_ids: list[UUID] | None = None,
 ) -> list[FederatedRetrievalInfo]:
-    # When User Knowledge (user files) is the only knowledge source enabled,
-    # skip federated connectors entirely. User Knowledge mode means the agent
-    # should ONLY use uploaded files, not team connectors like Slack.
-    if user_file_ids and not document_set_names:
-        logger.debug(
-            "Skipping all federated connectors: User Knowledge mode enabled "
-            f"with {len(user_file_ids)} user files and no document sets"
-        )
-        return []

    # Check for Slack bot context first (regardless of user_id)
    if slack_context:
--- a/backend/onyx/server/documents/cc_pair.py
+++ b/backend/onyx/server/documents/cc_pair.py
@@ -43,9 +43,6 @@ from onyx.db.index_attempt import count_index_attempt_errors_for_cc_pair
 from onyx.db.index_attempt import count_index_attempts_for_cc_pair
 from onyx.db.index_attempt import get_index_attempt_errors_for_cc_pair
 from onyx.db.index_attempt import get_latest_index_attempt_for_cc_pair_id
-from onyx.db.index_attempt import (
-    get_latest_successful_index_attempt_for_cc_pair_id,
-)
 from onyx.db.index_attempt import get_paginated_index_attempts_for_cc_pair_id
 from onyx.db.indexing_coordination import IndexingCoordination
 from onyx.db.models import IndexAttempt
@@ -193,11 +190,6 @@ def get_cc_pair_full_info(
        only_finished=False,
    )

-    latest_successful_attempt = get_latest_successful_index_attempt_for_cc_pair_id(
-        db_session=db_session,
-        connector_credential_pair_id=cc_pair_id,
-    )
-
    # Get latest permission sync attempt for status
    latest_permission_sync_attempt = None
    if cc_pair.access_type == AccessType.SYNC:
@@ -215,11 +207,6 @@ def get_cc_pair_full_info(
            cc_pair_id=cc_pair_id,
        ),
        last_index_attempt=latest_attempt,
-        last_successful_index_time=(
-            latest_successful_attempt.time_started
-            if latest_successful_attempt
-            else None
-        ),
        latest_deletion_attempt=get_deletion_attempt_snapshot(
            connector_id=cc_pair.connector_id,
            credential_id=cc_pair.credential_id,
--- a/backend/onyx/server/documents/connector.py
+++ b/backend/onyx/server/documents/connector.py
@@ -3,7 +3,6 @@ import math
 import mimetypes
 import os
 import zipfile
-from datetime import datetime
 from io import BytesIO
 from typing import Any
 from typing import cast
@@ -110,9 +109,6 @@ from onyx.db.federated import fetch_all_federated_connectors_parallel
 from onyx.db.index_attempt import get_index_attempts_for_cc_pair
 from onyx.db.index_attempt import get_latest_index_attempts_by_status
 from onyx.db.index_attempt import get_latest_index_attempts_parallel
-from onyx.db.index_attempt import (
-    get_latest_successful_index_attempts_parallel,
-)
 from onyx.db.models import ConnectorCredentialPair
 from onyx.db.models import FederatedConnector
 from onyx.db.models import IndexAttempt
@@ -1162,26 +1158,21 @@ def get_connector_indexing_status(
            ),
            (),
        ),
-        # Get most recent successful index attempts
-        (
-            lambda: get_latest_successful_index_attempts_parallel(
-                request.secondary_index,
-            ),
-            (),
-        ),
    ]

    if user and user.role == UserRole.ADMIN:
+        # For Admin users, we already got all the cc pair in editable_cc_pairs
+        # its not needed to get them again
        (
            editable_cc_pairs,
            federated_connectors,
            latest_index_attempts,
            latest_finished_index_attempts,
-            latest_successful_index_attempts,
        ) = run_functions_tuples_in_parallel(parallel_functions)
        non_editable_cc_pairs = []
    else:
        parallel_functions.append(
+            # Get non-editable connector/credential pairs
            (
                lambda: get_connector_credential_pairs_for_user_parallel(
                    user, False, None, True, True, False, True, request.source
@@ -1195,7 +1186,6 @@ def get_connector_indexing_status(
            federated_connectors,
            latest_index_attempts,
            latest_finished_index_attempts,
-            latest_successful_index_attempts,
            non_editable_cc_pairs,
        ) = run_functions_tuples_in_parallel(parallel_functions)

@@ -1207,9 +1197,6 @@ def get_connector_indexing_status(
    latest_finished_index_attempts = cast(
        list[IndexAttempt], latest_finished_index_attempts
    )
-    latest_successful_index_attempts = cast(
-        list[IndexAttempt], latest_successful_index_attempts
-    )

    document_count_info = get_document_counts_for_all_cc_pairs(db_session)

@@ -1219,48 +1206,42 @@ def get_connector_indexing_status(
        for connector_id, credential_id, cnt in document_count_info
    }

-    def _attempt_lookup(
-        attempts: list[IndexAttempt],
-    ) -> dict[int, IndexAttempt]:
-        return {attempt.connector_credential_pair_id: attempt for attempt in attempts}
+    cc_pair_to_latest_index_attempt: dict[tuple[int, int], IndexAttempt] = {
+        (
+            attempt.connector_credential_pair.connector_id,
+            attempt.connector_credential_pair.credential_id,
+        ): attempt
+        for attempt in latest_index_attempts
+    }

-    cc_pair_to_latest_index_attempt = _attempt_lookup(latest_index_attempts)
-    cc_pair_to_latest_finished_index_attempt = _attempt_lookup(
-        latest_finished_index_attempts
-    )
-    cc_pair_to_latest_successful_index_attempt = _attempt_lookup(
-        latest_successful_index_attempts
-    )
+    cc_pair_to_latest_finished_index_attempt: dict[tuple[int, int], IndexAttempt] = {
+        (
+            attempt.connector_credential_pair.connector_id,
+            attempt.connector_credential_pair.credential_id,
+        ): attempt
+        for attempt in latest_finished_index_attempts
+    }

    def build_connector_indexing_status(
        cc_pair: ConnectorCredentialPair,
        is_editable: bool,
    ) -> ConnectorIndexingStatusLite | None:
+        # TODO remove this to enable ingestion API
        if cc_pair.name == "DefaultCCPair":
            return None

-        latest_attempt = cc_pair_to_latest_index_attempt.get(cc_pair.id)
-        latest_finished_attempt = cc_pair_to_latest_finished_index_attempt.get(
-            cc_pair.id
+        latest_attempt = cc_pair_to_latest_index_attempt.get(
+            (cc_pair.connector_id, cc_pair.credential_id)
        )
-        latest_successful_attempt = cc_pair_to_latest_successful_index_attempt.get(
-            cc_pair.id
+        latest_finished_attempt = cc_pair_to_latest_finished_index_attempt.get(
+            (cc_pair.connector_id, cc_pair.credential_id)
        )
        doc_count = cc_pair_to_document_cnt.get(
            (cc_pair.connector_id, cc_pair.credential_id), 0
        )

        return _get_connector_indexing_status_lite(
-            cc_pair,
-            latest_attempt,
-            latest_finished_attempt,
-            (
-                latest_successful_attempt.time_started
-                if latest_successful_attempt
-                else None
-            ),
-            is_editable,
-            doc_count,
+            cc_pair, latest_attempt, latest_finished_attempt, is_editable, doc_count
        )

    # Process editable cc_pairs
@@ -1421,7 +1402,6 @@ def _get_connector_indexing_status_lite(
    cc_pair: ConnectorCredentialPair,
    latest_index_attempt: IndexAttempt | None,
    latest_finished_index_attempt: IndexAttempt | None,
-    last_successful_index_time: datetime | None,
    is_editable: bool,
    document_cnt: int,
 ) -> ConnectorIndexingStatusLite | None:
@@ -1455,7 +1435,7 @@ def _get_connector_indexing_status_lite(
            else None
        ),
        last_status=latest_index_attempt.status if latest_index_attempt else None,
-        last_success=last_successful_index_time,
+        last_success=cc_pair.last_successful_index_time,
        docs_indexed=document_cnt,
        latest_index_attempt_docs_indexed=(
            latest_index_attempt.total_docs_indexed if latest_index_attempt else None
--- a/backend/onyx/server/documents/models.py
+++ b/backend/onyx/server/documents/models.py
@@ -330,7 +330,6 @@ class CCPairFullInfo(BaseModel):
        num_docs_indexed: int,  # not ideal, but this must be computed separately
        is_editable_for_current_user: bool,
        indexing: bool,
-        last_successful_index_time: datetime | None = None,
        last_permission_sync_attempt_status: PermissionSyncStatus | None = None,
        permission_syncing: bool = False,
        last_permission_sync_attempt_finished: datetime | None = None,
@@ -383,7 +382,9 @@ class CCPairFullInfo(BaseModel):
            creator_email=(
                cc_pair_model.creator.email if cc_pair_model.creator else None
            ),
-            last_indexed=last_successful_index_time,
+            last_indexed=(
+                last_index_attempt.time_started if last_index_attempt else None
+            ),
            last_pruned=cc_pair_model.last_pruned,
            last_full_permission_sync=cls._get_last_full_permission_sync(cc_pair_model),
            overall_indexing_speed=overall_indexing_speed,
--- a/backend/onyx/tools/tool_constructor.py
+++ b/backend/onyx/tools/tool_constructor.py
@@ -53,8 +53,12 @@ logger = setup_logger()

 class SearchToolConfig(BaseModel):
    user_selected_filters: BaseFilters | None = None
-    project_id: int | None = None
-    persona_id: int | None = None
+    # Vespa metadata filters for overflowing user files.  These are NOT the
+    # IDs of the current project/persona — they are only set when the
+    # project's/persona's user files didn't fit in the LLM context window and
+    # must be found via vector DB search instead.
+    project_id_filter: int | None = None
+    persona_id_filter: int | None = None
    bypass_acl: bool = False
    additional_context: str | None = None
    slack_context: SlackContext | None = None
@@ -180,8 +184,8 @@ def construct_tools(
                    llm=llm,
                    document_index=document_index,
                    user_selected_filters=search_tool_config.user_selected_filters,
-                    project_id=search_tool_config.project_id,
-                    persona_id=search_tool_config.persona_id,
+                    project_id_filter=search_tool_config.project_id_filter,
+                    persona_id_filter=search_tool_config.persona_id_filter,
                    bypass_acl=search_tool_config.bypass_acl,
                    slack_context=search_tool_config.slack_context,
                    enable_slack_search=search_tool_config.enable_slack_search,
@@ -396,7 +400,6 @@ def construct_tools(
                    tool_definition=saved_tool.mcp_input_schema or {},
                    connection_config=connection_config,
                    user_email=user_email,
-                    user_id=str(user.id),
                    user_oauth_token=mcp_user_oauth_token,
                    additional_headers=additional_mcp_headers,
                )
@@ -429,8 +432,8 @@ def construct_tools(
            llm=llm,
            document_index=document_index,
            user_selected_filters=search_tool_config.user_selected_filters,
-            project_id=search_tool_config.project_id,
-            persona_id=search_tool_config.persona_id,
+            project_id_filter=search_tool_config.project_id_filter,
+            persona_id_filter=search_tool_config.persona_id_filter,
            bypass_acl=search_tool_config.bypass_acl,
            slack_context=search_tool_config.slack_context,
            enable_slack_search=search_tool_config.enable_slack_search,
--- a/backend/onyx/tools/tool_implementations/mcp/mcp_tool.py
+++ b/backend/onyx/tools/tool_implementations/mcp/mcp_tool.py
@@ -1,8 +1,6 @@
 import json
 from typing import Any

-from mcp.client.auth import OAuthClientProvider
-
 from onyx.chat.emitter import Emitter
 from onyx.db.enums import MCPAuthenticationType
 from onyx.db.enums import MCPTransport
@@ -49,7 +47,6 @@ class MCPTool(Tool[None]):
        tool_definition: dict[str, Any],
        connection_config: MCPConnectionConfig | None = None,
        user_email: str = "",
-        user_id: str = "",
        user_oauth_token: str | None = None,
        additional_headers: dict[str, str] | None = None,
    ) -> None:
@@ -59,7 +56,6 @@ class MCPTool(Tool[None]):
        self.mcp_server = mcp_server
        self.connection_config = connection_config
        self.user_email = user_email
-        self._user_id = user_id
        self._user_oauth_token = user_oauth_token
        self._additional_headers = additional_headers or {}

@@ -202,42 +198,12 @@ class MCPTool(Tool[None]):
                    llm_facing_response=llm_facing_response,
                )

-            # For OAuth servers, construct OAuthClientProvider so the MCP SDK
-            # can refresh expired tokens automatically
-            auth: OAuthClientProvider | None = None
-            if (
-                self.mcp_server.auth_type == MCPAuthenticationType.OAUTH
-                and self.connection_config is not None
-                and self._user_id
-            ):
-                if self.mcp_server.transport == MCPTransport.SSE:
-                    logger.warning(
-                        f"MCP tool '{self._name}': OAuth token refresh is not supported "
-                        f"for SSE transport — auth provider will be ignored. "
-                        f"Re-authentication may be required after token expiry."
-                    )
-                else:
-                    from onyx.server.features.mcp.api import UNUSED_RETURN_PATH
-                    from onyx.server.features.mcp.api import make_oauth_provider
-
-                    # user_id is the requesting user's UUID; safe here because
-                    # UNUSED_RETURN_PATH ensures redirect_handler raises immediately
-                    # and user_id is never consulted for Redis state lookups.
-                    auth = make_oauth_provider(
-                        self.mcp_server,
-                        self._user_id,
-                        UNUSED_RETURN_PATH,
-                        self.connection_config.id,
-                        None,
-                    )
-
            tool_result = call_mcp_tool(
                self.mcp_server.server_url,
                self._name,
                llm_kwargs,
                connection_headers=headers,
                transport=self.mcp_server.transport or MCPTransport.STREAMABLE_HTTP,
-                auth=auth,
            )

            logger.info(f"MCP tool '{self._name}' executed successfully")
@@ -282,7 +248,6 @@ class MCPTool(Tool[None]):
                "invalid token",
                "invalid api key",
                "invalid credentials",
-                "please reconnect to the server",
            ]

            is_auth_error = any(
--- a/backend/onyx/tools/tool_implementations/open_url/open_url_tool.py
+++ b/backend/onyx/tools/tool_implementations/open_url/open_url_tool.py
@@ -764,8 +764,7 @@ class OpenURLTool(Tool[OpenURLToolOverrideKwargs]):
            tags=None,
            access_control_list=access_control_list,
            tenant_id=get_current_tenant_id() if MULTI_TENANT else None,
-            user_file_ids=None,
-            project_id=None,
+            project_id_filter=None,
        )

    def _merge_indexed_and_crawled_results(
--- a/backend/onyx/tools/tool_implementations/search/search_tool.py
+++ b/backend/onyx/tools/tool_implementations/search/search_tool.py
@@ -244,10 +244,11 @@ class SearchTool(Tool[SearchToolOverrideKwargs]):
        document_index: DocumentIndex,
        # Respecting user selections
        user_selected_filters: BaseFilters | None,
-        # If the chat is part of a project
-        project_id: int | None,
-        # If set, search scopes to files attached to this persona
-        persona_id: int | None = None,
+        # Vespa metadata filters for overflowing user files.  NOT the raw IDs
+        # of the current project/persona — only set when user files couldn't
+        # fit in the LLM context and need to be searched via vector DB.
+        project_id_filter: int | None,
+        persona_id_filter: int | None = None,
        bypass_acl: bool = False,
        # Slack context for federated Slack search (tokens fetched internally)
        slack_context: SlackContext | None = None,
@@ -261,8 +262,8 @@ class SearchTool(Tool[SearchToolOverrideKwargs]):
        self.llm = llm
        self.document_index = document_index
        self.user_selected_filters = user_selected_filters
-        self.project_id = project_id
-        self.persona_id = persona_id
+        self.project_id_filter = project_id_filter
+        self.persona_id_filter = persona_id_filter
        self.bypass_acl = bypass_acl
        self.slack_context = slack_context
        self.enable_slack_search = enable_slack_search
@@ -451,13 +452,15 @@ class SearchTool(Tool[SearchToolOverrideKwargs]):
                hybrid_alpha=hybrid_alpha,
                # For projects, the search scope is the project and has no other limits
                user_selected_filters=(
-                    self.user_selected_filters if self.project_id is None else None
+                    self.user_selected_filters
+                    if self.project_id_filter is None
+                    else None
                ),
                bypass_acl=self.bypass_acl,
                limit=num_hits,
            ),
-            project_id=self.project_id,
-            persona_id=self.persona_id,
+            project_id_filter=self.project_id_filter,
+            persona_id_filter=self.persona_id_filter,
            document_index=self.document_index,
            user=self.user,
            persona=self.persona,
@@ -574,7 +577,7 @@ class SearchTool(Tool[SearchToolOverrideKwargs]):
            )

            # Federated retrieval functions (non-Slack; Slack is separate)
-            if self.project_id is not None:
+            if self.project_id_filter is not None:
                # Project mode ignores user filters → no federated sources
                prefetch_source_types = None
            else:
@@ -587,16 +590,12 @@ class SearchTool(Tool[SearchToolOverrideKwargs]):
            persona_document_sets = (
                [ds.name for ds in self.persona.document_sets] if self.persona else None
            )
-            user_file_ids = (
-                [uf.id for uf in self.persona.user_files] if self.persona else None
-            )
            federated_retrieval_infos = (
                get_federated_retrieval_functions(
                    db_session=db_session,
                    user_id=self.user.id if self.user else None,
                    source_types=prefetch_source_types,
                    document_set_names=persona_document_sets,
-                    user_file_ids=user_file_ids,
                )
                or []
            )
--- a/backend/tests/external_dependency_unit/db/test_chat_session_eager_load.py
+++ b/backend/tests/external_dependency_unit/db/test_chat_session_eager_load.py
@@ -0,0 +1,37 @@
+from sqlalchemy import inspect
+from sqlalchemy.orm import Session
+
+from onyx.db.chat import create_chat_session
+from onyx.db.chat import get_chat_session_by_id
+from onyx.db.models import Persona
+
+
+def test_eager_load_persona_loads_relationships(db_session: Session) -> None:
+    """Verify that eager_load_persona pre-loads persona, its collections, and project."""
+    persona = Persona(name="eager-load-test", description="test")
+    db_session.add(persona)
+    db_session.flush()
+
+    chat_session = create_chat_session(
+        db_session=db_session,
+        description="test",
+        user_id=None,
+        persona_id=persona.id,
+    )
+
+    loaded = get_chat_session_by_id(
+        chat_session_id=chat_session.id,
+        user_id=None,
+        db_session=db_session,
+        eager_load_persona=True,
+    )
+
+    unloaded = inspect(loaded).unloaded
+    assert "persona" not in unloaded
+    assert "project" not in unloaded
+
+    persona_unloaded = inspect(loaded.persona).unloaded
+    assert "tools" not in persona_unloaded
+    assert "user_files" not in persona_unloaded
+
+    db_session.rollback()
--- a/backend/tests/external_dependency_unit/opensearch/test_assistant_knowledge_filter.py
+++ b/backend/tests/external_dependency_unit/opensearch/test_assistant_knowledge_filter.py
@@ -1,34 +1,30 @@
 """Tests for OpenSearch assistant knowledge filter construction.

-These tests verify that when an assistant (persona) has user files attached,
-the search filter includes those user file IDs in the assistant knowledge filter
-with OR logic (not AND), ensuring user files are discoverable alongside other
-knowledge types like attached documents and hierarchy nodes.
-
-This prevents a regression where user_file_ids were added as a separate AND
-filter, making it impossible to find user files when the assistant also had
-attached documents or hierarchy nodes (since no document could match both).
+These tests verify that when an assistant (persona) has knowledge attached,
+the search filter includes the appropriate scope filters with OR logic (not AND),
+ensuring documents are discoverable across knowledge types like attached documents,
+hierarchy nodes, document sets, and persona/project user files.
 """

 from typing import Any
-from uuid import UUID

 from onyx.configs.constants import DocumentSource
 from onyx.document_index.interfaces_new import TenantState
-from onyx.document_index.opensearch.schema import DOCUMENT_ID_FIELD_NAME
+from onyx.document_index.opensearch.schema import PERSONAS_FIELD_NAME
 from onyx.document_index.opensearch.search import DocumentQuery
 from shared_configs.configs import POSTGRES_DEFAULT_SCHEMA

-USER_FILE_ID = UUID("6ad84e45-4450-406c-9d36-fcb5e74aca6b")
 ATTACHED_DOCUMENT_ID = "https://docs.google.com/document/d/test-doc-id"
 HIERARCHY_NODE_ID = 42
+PERSONA_ID = 7


 def _get_search_filters(
    source_types: list[DocumentSource],
-    user_file_ids: list[UUID],
    attached_document_ids: list[str] | None,
    hierarchy_node_ids: list[int] | None,
+    persona_id_filter: int | None = None,
+    document_sets: list[str] | None = None,
 ) -> list[dict[str, Any]]:
    return DocumentQuery._get_search_filters(
        tenant_state=TenantState(tenant_id=POSTGRES_DEFAULT_SCHEMA, multitenant=False),
@@ -36,15 +32,14 @@ def _get_search_filters(
        access_control_list=["user_email:test@example.com"],
        source_types=source_types,
        tags=[],
-        document_sets=[],
-        project_id=None,
-        persona_id=None,
+        document_sets=document_sets or [],
+        project_id_filter=None,
+        persona_id_filter=persona_id_filter,
        time_cutoff=None,
        min_chunk_index=None,
        max_chunk_index=None,
        max_chunk_size=None,
        document_id=None,
-        user_file_ids=user_file_ids,
        attached_document_ids=attached_document_ids,
        hierarchy_node_ids=hierarchy_node_ids,
    )
@@ -53,137 +48,97 @@ def _get_search_filters(
 class TestAssistantKnowledgeFilter:
    """Tests for assistant knowledge filter construction in OpenSearch queries."""

-    def test_user_file_ids_included_in_assistant_knowledge_filter(self) -> None:
-        """
-        Tests that user_file_ids are included in the assistant knowledge filter
-        with OR logic when the assistant has both user files and attached documents.
-
-        This prevents the regression where user files were ANDed with other
-        knowledge types, making them unfindable.
-        """
-
-        # Under test: Call the filter construction method directly
+    def test_persona_id_filter_added_when_knowledge_scope_exists(self) -> None:
+        """persona_id_filter should be OR'd into the knowledge scope filter
+        when explicit knowledge attachments (attached_document_ids,
+        hierarchy_node_ids, document_sets) are present."""
        filter_clauses = _get_search_filters(
-            source_types=[DocumentSource.FILE, DocumentSource.USER_FILE],
-            user_file_ids=[USER_FILE_ID],
+            source_types=[DocumentSource.FILE],
            attached_document_ids=[ATTACHED_DOCUMENT_ID],
            hierarchy_node_ids=[HIERARCHY_NODE_ID],
+            persona_id_filter=PERSONA_ID,
+        )
+
+        knowledge_filter = None
+        for clause in filter_clauses:
+            if "bool" in clause and "should" in clause["bool"]:
+                if clause["bool"].get("minimum_should_match") == 1:
+                    knowledge_filter = clause
+                    break
+
+        assert knowledge_filter is not None, (
+            "Expected to find an assistant knowledge filter with "
+            "'minimum_should_match: 1'"
+        )
+
+        should_clauses = knowledge_filter["bool"]["should"]
+        persona_found = any(
+            clause.get("term", {}).get(PERSONAS_FIELD_NAME, {}).get("value")
+            == PERSONA_ID
+            for clause in should_clauses
+        )
+        assert persona_found, (
+            f"Expected persona_id={PERSONA_ID} filter on {PERSONAS_FIELD_NAME} "
+            f"in should clauses. Got: {should_clauses}"
+        )
+
+    def test_persona_id_filter_alone_creates_knowledge_scope(self) -> None:
+        """persona_id_filter IS a primary knowledge scope trigger — a persona
+        with user files is explicit knowledge, so it should restrict
+        search on its own."""
+        filter_clauses = _get_search_filters(
+            source_types=[],
+            attached_document_ids=None,
+            hierarchy_node_ids=None,
+            persona_id_filter=PERSONA_ID,
        )

-        # Postcondition: Find the assistant knowledge filter (bool with should clauses)
        knowledge_filter = None
        for clause in filter_clauses:
            if "bool" in clause and "should" in clause["bool"]:
-                # Check if this is the knowledge filter (has minimum_should_match=1)
                if clause["bool"].get("minimum_should_match") == 1:
                    knowledge_filter = clause
                    break

        assert (
            knowledge_filter is not None
-        ), "Expected to find an assistant knowledge filter with 'minimum_should_match: 1'"
-
-        # The knowledge filter should have 3 should clauses (user files, attached docs, hierarchy nodes)
-        should_clauses = knowledge_filter["bool"]["should"]
-        assert (
-            len(should_clauses) == 3
-        ), f"Expected 3 should clauses (user_file, attached_doc, hierarchy_node), got {len(should_clauses)}"
-
-        # Verify user_file_id is in one of the should clauses
-        user_file_filter_found = False
-        for should_clause in should_clauses:
-            # The user file filter uses a nested bool with should for each file ID
-            if "bool" in should_clause and "should" in should_clause["bool"]:
-                for term_clause in should_clause["bool"]["should"]:
-                    if "term" in term_clause:
-                        term_value = term_clause["term"].get(DOCUMENT_ID_FIELD_NAME, {})
-                        if term_value.get("value") == str(USER_FILE_ID):
-                            user_file_filter_found = True
-                            break
-
-        assert user_file_filter_found, (
-            f"Expected user_file_id {USER_FILE_ID} to be in the assistant knowledge "
-            f"filter's should clauses. Filter structure: {knowledge_filter}"
+        ), "Expected persona_id_filter alone to create a knowledge scope filter"
+        persona_found = any(
+            clause.get("term", {}).get(PERSONAS_FIELD_NAME, {}).get("value")
+            == PERSONA_ID
+            for clause in knowledge_filter["bool"]["should"]
+        )
+        assert persona_found, (
+            f"Expected persona_id={PERSONA_ID} filter in knowledge scope. "
+            f"Got: {knowledge_filter}"
        )

-    def test_user_file_ids_only_creates_knowledge_filter(self) -> None:
-        """
-        Tests that when only user_file_ids are provided (no attached_documents or
-        hierarchy_nodes), the assistant knowledge filter is still created with the
-        user file IDs.
-        """
-        # Precondition
-
+    def test_knowledge_filter_with_document_sets_and_persona_filter(self) -> None:
+        """document_sets and persona_id_filter should be OR'd together in
+        the knowledge scope filter."""
        filter_clauses = _get_search_filters(
-            source_types=[DocumentSource.USER_FILE],
-            user_file_ids=[USER_FILE_ID],
+            source_types=[],
            attached_document_ids=None,
            hierarchy_node_ids=None,
+            persona_id_filter=PERSONA_ID,
+            document_sets=["engineering"],
        )

-        # Postcondition: Find filter that contains our user file ID
-        user_file_filter_found = False
+        knowledge_filter = None
        for clause in filter_clauses:
-            clause_str = str(clause)
-            if str(USER_FILE_ID) in clause_str:
-                user_file_filter_found = True
-                break
+            if "bool" in clause and "should" in clause["bool"]:
+                if clause["bool"].get("minimum_should_match") == 1:
+                    knowledge_filter = clause
+                    break

        assert (
-            user_file_filter_found
-        ), f"Expected user_file_id {USER_FILE_ID} to be in the filter clauses. Got: {filter_clauses}"
+            knowledge_filter is not None
+        ), "Expected knowledge filter when document_sets is provided"

-    def test_no_separate_user_file_filter_when_assistant_has_knowledge(self) -> None:
-        """
-        Tests that user_file_ids are NOT added as a separate AND filter when the
-        assistant has other knowledge attached (attached_documents or hierarchy_nodes).
-        """
-
-        filter_clauses = _get_search_filters(
-            source_types=[DocumentSource.FILE, DocumentSource.USER_FILE],
-            user_file_ids=[USER_FILE_ID],
-            attached_document_ids=[ATTACHED_DOCUMENT_ID],
-            hierarchy_node_ids=None,
-        )
-
-        # Postcondition: Count how many times user_file_id appears in filter clauses
-        # It should appear exactly once (in the knowledge filter), not twice
-        user_file_id_str = str(USER_FILE_ID)
-        occurrences = 0
-        for clause in filter_clauses:
-            if user_file_id_str in str(clause):
-                occurrences += 1
-
-        assert occurrences == 1, (
-            f"Expected user_file_id to appear exactly once in filter clauses "
-            f"(inside the assistant knowledge filter), but found {occurrences} "
-            f"occurrences. This suggests user_file_ids is being added as both a "
-            f"separate AND filter and inside the knowledge filter. "
-            f"Filter clauses: {filter_clauses}"
-        )
-
-    def test_multiple_user_files_all_included_in_filter(self) -> None:
-        """
-        Tests that when multiple user files are attached to an assistant,
-        all of them are included in the filter.
-        """
-        # Precondition
-        user_file_ids = [
-            UUID("6ad84e45-4450-406c-9d36-fcb5e74aca6b"),
-            UUID("7be95f56-5561-517d-ae47-acd6f85bdb7c"),
-            UUID("8cf06a67-6672-628e-bf58-ade7a96cec8d"),
-        ]
-
-        filter_clauses = _get_search_filters(
-            source_types=[DocumentSource.USER_FILE],
-            user_file_ids=user_file_ids,
-            attached_document_ids=[ATTACHED_DOCUMENT_ID],
-            hierarchy_node_ids=None,
-        )
-
-        # Postcondition: All user file IDs should be in the filter
-        filter_str = str(filter_clauses)
-        for user_file_id in user_file_ids:
-            assert (
-                str(user_file_id) in filter_str
-            ), f"Expected user_file_id {user_file_id} to be in the filter clauses"
+        filter_str = str(knowledge_filter)
+        assert (
+            "engineering" in filter_str
+        ), "Expected document_set 'engineering' in knowledge filter"
+        assert (
+            str(PERSONA_ID) in filter_str
+        ), f"Expected persona_id_filter {PERSONA_ID} in knowledge filter"
--- a/backend/tests/external_dependency_unit/tools/test_mcp_passthrough_oauth.py
+++ b/backend/tests/external_dependency_unit/tools/test_mcp_passthrough_oauth.py
@@ -368,10 +368,9 @@ class TestMCPPassThroughOAuth:
        def mock_call_mcp_tool(
            server_url: str,  # noqa: ARG001
            tool_name: str,  # noqa: ARG001
-            arguments: dict[str, Any],  # noqa: ARG001
+            kwargs: dict[str, Any],  # noqa: ARG001
            connection_headers: dict[str, str],
            transport: MCPTransport,  # noqa: ARG001
-            auth: Any = None,  # noqa: ARG001
        ) -> dict[str, Any]:
            captured_headers.update(connection_headers)
            return mocked_response
--- a/backend/tests/integration/tests/connector/test_last_indexed_time.py
+++ b/backend/tests/integration/tests/connector/test_last_indexed_time.py
@@ -1,237 +0,0 @@
-"""
-Integration tests for the "Last Indexed" time displayed on both the
-per-connector detail page and the all-connectors listing page.
-
-Expected behavior: "Last Indexed" = time_started of the most recent
-successful index attempt for the cc pair, regardless of pagination.
-
-Edge cases:
-1. First page of index attempts is entirely errors — last_indexed should
-   still reflect the older successful attempt beyond page 1.
-2. Credential swap — successful attempts, then failures after a
-   "credential change"; last_indexed should reflect the most recent
-   successful attempt.
-3. Mix of statuses — only the most recent successful attempt matters.
-4. COMPLETED_WITH_ERRORS counts as a success for last_indexed purposes.
-"""
-
-from datetime import datetime
-from datetime import timedelta
-from datetime import timezone
-
-from onyx.db.models import IndexingStatus
-from onyx.server.documents.models import CCPairFullInfo
-from onyx.server.documents.models import ConnectorIndexingStatusLite
-from tests.integration.common_utils.managers.cc_pair import CCPairManager
-from tests.integration.common_utils.managers.connector import ConnectorManager
-from tests.integration.common_utils.managers.credential import CredentialManager
-from tests.integration.common_utils.managers.index_attempt import IndexAttemptManager
-from tests.integration.common_utils.managers.user import UserManager
-from tests.integration.common_utils.test_models import DATestCCPair
-from tests.integration.common_utils.test_models import DATestUser
-
-
-def _wait_for_real_success(
-    cc_pair: DATestCCPair,
-    admin: DATestUser,
-) -> None:
-    """Wait for the initial index attempt to complete successfully."""
-    CCPairManager.wait_for_indexing_completion(
-        cc_pair,
-        after=datetime(2000, 1, 1, tzinfo=timezone.utc),
-        user_performing_action=admin,
-        timeout=120,
-    )
-
-
-def _get_detail(cc_pair_id: int, admin: DATestUser) -> CCPairFullInfo:
-    result = CCPairManager.get_single(cc_pair_id, admin)
-    assert result is not None
-    return result
-
-
-def _get_listing(cc_pair_id: int, admin: DATestUser) -> ConnectorIndexingStatusLite:
-    result = CCPairManager.get_indexing_status_by_id(cc_pair_id, admin)
-    assert result is not None
-    return result
-
-
-def test_last_indexed_first_page_all_errors(reset: None) -> None:  # noqa: ARG001
-    """When the first page of index attempts is entirely errors but an
-    older successful attempt exists, both the detail page and the listing
-    page should still show the time of that successful attempt.
-
-    The detail page UI uses page size 8. We insert 10 failed attempts
-    more recent than the initial success to push the success off page 1.
-    """
-    admin = UserManager.create(name="admin_first_page_errors")
-    cc_pair = CCPairManager.create_from_scratch(user_performing_action=admin)
-    _wait_for_real_success(cc_pair, admin)
-
-    # Baseline: last_success should be set from the initial successful run
-    listing_before = _get_listing(cc_pair.id, admin)
-    assert listing_before.last_success is not None
-
-    # 10 recent failures push the success off page 1
-    IndexAttemptManager.create_test_index_attempts(
-        num_attempts=10,
-        cc_pair_id=cc_pair.id,
-        status=IndexingStatus.FAILED,
-        error_msg="simulated failure",
-        base_time=datetime.now(tz=timezone.utc),
-    )
-
-    detail = _get_detail(cc_pair.id, admin)
-    listing = _get_listing(cc_pair.id, admin)
-
-    assert (
-        detail.last_indexed is not None
-    ), "Detail page last_indexed is None even though a successful attempt exists"
-    assert (
-        listing.last_success is not None
-    ), "Listing page last_success is None even though a successful attempt exists"
-
-    # Both surfaces must agree
-    assert detail.last_indexed == listing.last_success, (
-        f"Detail last_indexed={detail.last_indexed} != "
-        f"listing last_success={listing.last_success}"
-    )
-
-
-def test_last_indexed_credential_swap_scenario(reset: None) -> None:  # noqa: ARG001
-    """Perform an actual credential swap: create connector + cred1 (cc_pair_1),
-    wait for success, then associate a new cred2 with the same connector
-    (cc_pair_2), wait for that to succeed, and inject failures on cc_pair_2.
-
-    cc_pair_2's last_indexed must reflect cc_pair_2's own success, not
-    cc_pair_1's older one. Both the detail page and listing page must agree.
-    """
-    admin = UserManager.create(name="admin_cred_swap")
-
-    connector = ConnectorManager.create(user_performing_action=admin)
-    cred1 = CredentialManager.create(user_performing_action=admin)
-    cc_pair_1 = CCPairManager.create(
-        connector_id=connector.id,
-        credential_id=cred1.id,
-        user_performing_action=admin,
-    )
-    _wait_for_real_success(cc_pair_1, admin)
-
-    cred2 = CredentialManager.create(user_performing_action=admin, name="swapped-cred")
-    cc_pair_2 = CCPairManager.create(
-        connector_id=connector.id,
-        credential_id=cred2.id,
-        user_performing_action=admin,
-    )
-    _wait_for_real_success(cc_pair_2, admin)
-
-    listing_after_swap = _get_listing(cc_pair_2.id, admin)
-    assert listing_after_swap.last_success is not None
-
-    IndexAttemptManager.create_test_index_attempts(
-        num_attempts=10,
-        cc_pair_id=cc_pair_2.id,
-        status=IndexingStatus.FAILED,
-        error_msg="credential expired",
-        base_time=datetime.now(tz=timezone.utc),
-    )
-
-    detail = _get_detail(cc_pair_2.id, admin)
-    listing = _get_listing(cc_pair_2.id, admin)
-
-    assert detail.last_indexed is not None
-    assert listing.last_success is not None
-
-    assert detail.last_indexed == listing.last_success, (
-        f"Detail last_indexed={detail.last_indexed} != "
-        f"listing last_success={listing.last_success}"
-    )
-
-
-def test_last_indexed_mixed_statuses(reset: None) -> None:  # noqa: ARG001
-    """Mix of in_progress, failed, and successful attempts. Only the most
-    recent successful attempt's time matters."""
-    admin = UserManager.create(name="admin_mixed")
-    cc_pair = CCPairManager.create_from_scratch(user_performing_action=admin)
-    _wait_for_real_success(cc_pair, admin)
-
-    now = datetime.now(tz=timezone.utc)
-
-    # Success 5 hours ago
-    IndexAttemptManager.create_test_index_attempts(
-        num_attempts=1,
-        cc_pair_id=cc_pair.id,
-        status=IndexingStatus.SUCCESS,
-        base_time=now - timedelta(hours=5),
-    )
-
-    # Failures 3 hours ago
-    IndexAttemptManager.create_test_index_attempts(
-        num_attempts=3,
-        cc_pair_id=cc_pair.id,
-        status=IndexingStatus.FAILED,
-        error_msg="transient failure",
-        base_time=now - timedelta(hours=3),
-    )
-
-    # In-progress 1 hour ago
-    IndexAttemptManager.create_test_index_attempts(
-        num_attempts=1,
-        cc_pair_id=cc_pair.id,
-        status=IndexingStatus.IN_PROGRESS,
-        base_time=now - timedelta(hours=1),
-    )
-
-    detail = _get_detail(cc_pair.id, admin)
-    listing = _get_listing(cc_pair.id, admin)
-
-    assert detail.last_indexed is not None
-    assert listing.last_success is not None
-
-    assert detail.last_indexed == listing.last_success, (
-        f"Detail last_indexed={detail.last_indexed} != "
-        f"listing last_success={listing.last_success}"
-    )
-
-
-def test_last_indexed_completed_with_errors(reset: None) -> None:  # noqa: ARG001
-    """COMPLETED_WITH_ERRORS is treated as a successful attempt (matching
-    IndexingStatus.is_successful()). When it is the most recent "success"
-    and later attempts all failed, both surfaces should reflect its time."""
-    admin = UserManager.create(name="admin_completed_errors")
-    cc_pair = CCPairManager.create_from_scratch(user_performing_action=admin)
-    _wait_for_real_success(cc_pair, admin)
-
-    now = datetime.now(tz=timezone.utc)
-
-    # COMPLETED_WITH_ERRORS 2 hours ago
-    IndexAttemptManager.create_test_index_attempts(
-        num_attempts=1,
-        cc_pair_id=cc_pair.id,
-        status=IndexingStatus.COMPLETED_WITH_ERRORS,
-        base_time=now - timedelta(hours=2),
-    )
-
-    # 10 failures after — push everything else off page 1
-    IndexAttemptManager.create_test_index_attempts(
-        num_attempts=10,
-        cc_pair_id=cc_pair.id,
-        status=IndexingStatus.FAILED,
-        error_msg="post-partial failure",
-        base_time=now,
-    )
-
-    detail = _get_detail(cc_pair.id, admin)
-    listing = _get_listing(cc_pair.id, admin)
-
-    assert (
-        detail.last_indexed is not None
-    ), "COMPLETED_WITH_ERRORS should count as a success for last_indexed"
-    assert (
-        listing.last_success is not None
-    ), "COMPLETED_WITH_ERRORS should count as a success for last_success"
-
-    assert detail.last_indexed == listing.last_success, (
-        f"Detail last_indexed={detail.last_indexed} != "
-        f"listing last_success={listing.last_success}"
-    )
--- a/backend/tests/unit/onyx/utils/test_vespa_query.py
+++ b/backend/tests/unit/onyx/utils/test_vespa_query.py
@@ -1,7 +1,6 @@
 from datetime import datetime
 from datetime import timedelta
 from datetime import timezone
-from uuid import UUID

 from onyx.configs.constants import DocumentSource
 from onyx.configs.constants import INDEX_SEPARATOR
@@ -11,10 +10,10 @@ from onyx.document_index.vespa.shared_utils.vespa_request_builders import (
    build_vespa_filters,
 )
 from onyx.document_index.vespa_constants import DOC_UPDATED_AT
-from onyx.document_index.vespa_constants import DOCUMENT_ID
 from onyx.document_index.vespa_constants import DOCUMENT_SETS
 from onyx.document_index.vespa_constants import HIDDEN
 from onyx.document_index.vespa_constants import METADATA_LIST
+from onyx.document_index.vespa_constants import PERSONAS
 from onyx.document_index.vespa_constants import SOURCE_TYPE
 from onyx.document_index.vespa_constants import TENANT_ID
 from onyx.document_index.vespa_constants import USER_PROJECT
@@ -151,56 +150,30 @@ class TestBuildVespaFilters:
        result = build_vespa_filters(filters)
        assert f"!({HIDDEN}=true) and " == result

-    def test_user_file_ids_filter(self) -> None:
-        """Test user file IDs filtering."""
-        id1 = UUID("00000000-0000-0000-0000-000000000123")
-        id2 = UUID("00000000-0000-0000-0000-000000000456")
-
-        # Single user file ID (UUID)
-        filters = IndexFilters(access_control_list=[], user_file_ids=[id1])
-        result = build_vespa_filters(filters)
-        assert (
-            f'!({HIDDEN}=true) and ({DOCUMENT_ID} contains "{str(id1)}") and ' == result
-        )
-
-        # Multiple user file IDs (UUIDs)
-        filters = IndexFilters(access_control_list=[], user_file_ids=[id1, id2])
-        result = build_vespa_filters(filters)
-        assert (
-            f'!({HIDDEN}=true) and ({DOCUMENT_ID} contains "{str(id1)}" or {DOCUMENT_ID} contains "{str(id2)}") and '
-            == result
-        )
-
-        # Empty user file IDs
-        filters = IndexFilters(access_control_list=[], user_file_ids=[])
-        result = build_vespa_filters(filters)
-        assert f"!({HIDDEN}=true) and " == result
-
    def test_user_project_filter(self) -> None:
        """Test user project filtering.

-        project_id alone does NOT trigger a knowledge scope restriction
+        project_id_filter alone does NOT trigger a knowledge scope restriction
        (an agent with no explicit knowledge should search everything).
        It only participates when explicit knowledge filters are present.
        """
-        # project_id alone → no restriction
-        filters = IndexFilters(access_control_list=[], project_id=789)
+        # project_id_filter alone → no restriction
+        filters = IndexFilters(access_control_list=[], project_id_filter=789)
        result = build_vespa_filters(filters)
        assert f"!({HIDDEN}=true) and " == result

-        # project_id with user_file_ids → both OR'd
-        id1 = UUID("00000000-0000-0000-0000-000000000123")
+        # project_id_filter with document_set → both OR'd
        filters = IndexFilters(
-            access_control_list=[], project_id=789, user_file_ids=[id1]
+            access_control_list=[], project_id_filter=789, document_set=["set1"]
        )
        result = build_vespa_filters(filters)
        assert (
-            f'!({HIDDEN}=true) and (({DOCUMENT_ID} contains "{str(id1)}") or ({USER_PROJECT} contains "789")) and '
+            f'!({HIDDEN}=true) and (({DOCUMENT_SETS} contains "set1") or ({USER_PROJECT} contains "789")) and '
            == result
        )

-        # No project id
-        filters = IndexFilters(access_control_list=[], project_id=None)
+        # No project id filter
+        filters = IndexFilters(access_control_list=[], project_id_filter=None)
        result = build_vespa_filters(filters)
        assert f"!({HIDDEN}=true) and " == result

@@ -233,17 +206,16 @@ class TestBuildVespaFilters:
    def test_combined_filters(self) -> None:
        """Test combining multiple filter types.

-        Knowledge-scope filters (document_set, user_file_ids, project_id,
-        persona_id) are OR'd together, while all other filters are AND'd.
+        Knowledge-scope filters (document_set, project_id_filter, persona_id_filter)
+        are OR'd together, while all other filters are AND'd.
        """
-        id1 = UUID("00000000-0000-0000-0000-000000000123")
        filters = IndexFilters(
            access_control_list=["user1", "group1"],
            source_type=[DocumentSource.WEB],
            tags=[Tag(tag_key="color", tag_value="red")],
            document_set=["set1"],
-            user_file_ids=[id1],
-            project_id=789,
+            project_id_filter=789,
+            persona_id_filter=42,
            time_cutoff=datetime(2023, 1, 1, tzinfo=timezone.utc),
        )

@@ -254,9 +226,10 @@ class TestBuildVespaFilters:
        expected += f'({SOURCE_TYPE} contains "web") and '
        expected += f'({METADATA_LIST} contains "color{INDEX_SEPARATOR}red") and '
        # Knowledge scope filters are OR'd together
+        # (persona_id_filter is primary, project_id_filter is additive — order reflects this)
        expected += (
            f'(({DOCUMENT_SETS} contains "set1")'
-            f' or ({DOCUMENT_ID} contains "{str(id1)}")'
+            f' or ({PERSONAS} contains "42")'
            f' or ({USER_PROJECT} contains "789")'
            f") and "
        )
@@ -276,18 +249,37 @@ class TestBuildVespaFilters:
        result = build_vespa_filters(filters)
        assert f'!({HIDDEN}=true) and ({DOCUMENT_SETS} contains "set1") and ' == result

-    def test_knowledge_scope_document_set_and_user_files_ored(self) -> None:
-        """Document set filter and user file IDs must be OR'd so that
-        connector documents (in the set) and user files (with specific
-        IDs) can both be found."""
-        id1 = UUID("00000000-0000-0000-0000-000000000123")
+    def test_persona_id_filter_is_primary_knowledge_scope(self) -> None:
+        """persona_id_filter alone should trigger a knowledge scope restriction
+        (a persona with user files IS explicit knowledge)."""
+        filters = IndexFilters(access_control_list=[], persona_id_filter=42)
+        result = build_vespa_filters(filters)
+        assert f'!({HIDDEN}=true) and ({PERSONAS} contains "42") and ' == result
+
+    def test_persona_id_filter_with_project_id_filter(self) -> None:
+        """When persona_id_filter triggers the scope, project_id_filter should be
+        OR'd in additively."""
+        filters = IndexFilters(
+            access_control_list=[], persona_id_filter=42, project_id_filter=789
+        )
+        result = build_vespa_filters(filters)
+        expected = (
+            f"!({HIDDEN}=true) and "
+            f'(({PERSONAS} contains "42") or ({USER_PROJECT} contains "789")) and '
+        )
+        assert expected == result
+
+    def test_knowledge_scope_document_set_and_persona_filter_ored(self) -> None:
+        """Document set filter and persona_id_filter must be OR'd so that
+        connector documents (in the set) and persona user files can
+        both be found."""
        filters = IndexFilters(
            access_control_list=[],
            document_set=["engineering"],
-            user_file_ids=[id1],
+            persona_id_filter=42,
        )
        result = build_vespa_filters(filters)
-        expected = f'!({HIDDEN}=true) and (({DOCUMENT_SETS} contains "engineering") or ({DOCUMENT_ID} contains "{str(id1)}")) and '
+        expected = f'!({HIDDEN}=true) and (({DOCUMENT_SETS} contains "engineering") or ({PERSONAS} contains "42")) and '
        assert expected == result

    def test_acl_large_list_uses_weighted_set(self) -> None:
--- a/web/lib/opal/src/components/table/ActionsContainer.tsx
+++ b/web/lib/opal/src/components/table/ActionsContainer.tsx
@@ -1,5 +1,6 @@
 "use client";

+import { cn } from "@opal/utils";
 import { useTableSize } from "@opal/components/table/TableSizeContext";

 interface ActionsContainerProps {
@@ -24,7 +25,14 @@ export default function ActionsContainer({
      data-size={size}
      onClick={onClick}
    >
-      <div className="flex h-full items-center justify-end">{children}</div>
+      <div
+        className={cn(
+          "flex h-full items-center",
+          type === "cell" ? "justify-end" : "justify-center"
+        )}
+      >
+        {children}
+      </div>
    </Tag>
  );
 }
--- a/web/lib/opal/src/components/table/DragOverlayRow.tsx
+++ b/web/lib/opal/src/components/table/DragOverlayRow.tsx
@@ -61,7 +61,6 @@ function DragOverlayRowInner<TData>({
                    imageSrc={qualifierColumn.getImageSrc?.(row.original)}
                    imageAlt={qualifierColumn.getImageAlt?.(row.original)}
                    background={qualifierColumn.background}
-                    iconSize={qualifierColumn.iconSize}
                    selectable={isSelectable}
                    selected={isSelectable && row.getIsSelected()}
                  />
--- a/web/lib/opal/src/components/table/TableElement.tsx
+++ b/web/lib/opal/src/components/table/TableElement.tsx
@@ -47,7 +47,7 @@ function Table({
    <table
      ref={ref}
      className={cn("border-separate border-spacing-0", !width && "min-w-full")}
-      style={{ width }}
+      style={{ tableLayout: "fixed", width }}
      data-size={size}
      data-variant={variant}
      data-selection={selectionBehavior}
--- a/web/lib/opal/src/components/table/TableHead.tsx
+++ b/web/lib/opal/src/components/table/TableHead.tsx
@@ -92,7 +92,9 @@ export default function TableHead({
      data-size={resolvedSize}
      data-bottom-border={bottomBorder || undefined}
    >
-      <div className="flex items-center gap-1">
+      <div
+        className={cn("flex items-center gap-1", alignmentFlexClass[alignment])}
+      >
        <div className="table-head-label">
          <Text
            mainUiAction={!isSmall}
--- a/web/lib/opal/src/components/table/TableQualifier.tsx
+++ b/web/lib/opal/src/components/table/TableQualifier.tsx
@@ -26,13 +26,11 @@ interface TableQualifierProps {
  imageAlt?: string;
  /** Show a tinted background container behind the content. */
  background?: boolean;
-  /** Icon size preset. `"lg"` = 28/24, `"md"` = 20/16. @default "md" */
-  iconSize?: "lg" | "md";
 }

-const iconSizesMap = {
-  lg: { lg: 28, md: 24 },
-  md: { lg: 20, md: 16 },
+const iconSizes = {
+  lg: 28,
+  md: 24,
 } as const;

 function getOverlayStyles(selected: boolean, disabled: boolean) {
@@ -55,10 +53,9 @@ function TableQualifier({
  imageSrc,
  imageAlt = "",
  background = false,
-  iconSize: iconSizePreset = "md",
 }: TableQualifierProps) {
  const resolvedSize = useTableSize();
-  const iconSize = iconSizesMap[iconSizePreset][resolvedSize];
+  const iconSize = iconSizes[resolvedSize];
  const overlayStyles = getOverlayStyles(selected, disabled);

  function renderContent() {
--- a/web/lib/opal/src/components/table/columns.ts
+++ b/web/lib/opal/src/components/table/columns.ts
@@ -33,8 +33,6 @@ interface QualifierConfig<TData> {
  getImageAlt?: (row: TData) => string;
  /** Show a tinted background container behind the content. @default false */
  background?: boolean;
-  /** Icon size preset. `"lg"` = 28/24, `"md"` = 20/16. @default "md" */
-  iconSize?: "lg" | "md";
 }

 // ---------------------------------------------------------------------------
@@ -162,7 +160,6 @@ export function createTableColumns<TData>(): TableColumnsBuilder<TData> {
        getImageSrc: config?.getImageSrc,
        getImageAlt: config?.getImageAlt,
        background: config?.background,
-        iconSize: config?.iconSize,
      };
    },

--- a/web/lib/opal/src/components/table/components.tsx
+++ b/web/lib/opal/src/components/table/components.tsx
@@ -544,7 +544,6 @@ export function Table<TData>(props: DataTableProps<TData>) {
                              imageSrc={qDef.getImageSrc?.(row.original)}
                              imageAlt={qDef.getImageAlt?.(row.original)}
                              background={qDef.background}
-                              iconSize={qDef.iconSize}
                              selectable={showQualifierCheckbox}
                              selected={
                                showQualifierCheckbox && row.getIsSelected()
--- a/web/lib/opal/src/components/table/types.ts
+++ b/web/lib/opal/src/components/table/types.ts
@@ -59,8 +59,6 @@ export interface OnyxQualifierColumn<TData> extends OnyxColumnBase<TData> {
  getImageAlt?: (row: TData) => string;
  /** Show a tinted background container behind the content. @default false */
  background?: boolean;
-  /** Icon size preset. Use `"lg"` for avatars, `"md"` for regular icons. @default "md" */
-  iconSize?: "lg" | "md";
 }

 /** Data column — accessor-based column with sorting/resizing. */
--- a/web/lib/opal/src/icons/index.ts
+++ b/web/lib/opal/src/icons/index.ts
@@ -159,7 +159,6 @@ export { default as SvgSort } from "@opal/icons/sort";
 export { default as SvgSortOrder } from "@opal/icons/sort-order";
 export { default as SvgSparkle } from "@opal/icons/sparkle";
 export { default as SvgStar } from "@opal/icons/star";
-export { default as SvgStarOff } from "@opal/icons/star-off";
 export { default as SvgStep1 } from "@opal/icons/step1";
 export { default as SvgStep2 } from "@opal/icons/step2";
 export { default as SvgStep3 } from "@opal/icons/step3";
--- a/web/lib/opal/src/icons/star-off.tsx
+++ b/web/lib/opal/src/icons/star-off.tsx
@@ -1,22 +0,0 @@
-import type { IconProps } from "@opal/types";
-
-const SvgStarOff = ({ size, ...props }: IconProps) => (
-  <svg
-    width={size}
-    height={size}
-    viewBox="0 0 16 16"
-    fill="none"
-    xmlns="http://www.w3.org/2000/svg"
-    stroke="currentColor"
-    {...props}
-  >
-    <path
-      d="M1 1L5.56196 5.56196M15 15L5.56196 5.56196M5.56196 5.56196L1.33333 6.18004L4.66666 9.42671L3.88 14.0134L8 11.8467L12.12 14.0134L11.7267 11.72M12.1405 8.64051L14.6667 6.18004L10.06 5.50671L8 1.33337L6.95349 3.45349"
-      strokeWidth={1.5}
-      strokeLinecap="round"
-      strokeLinejoin="round"
-    />
-  </svg>
-);
-
-export default SvgStarOff;
--- a/web/src/app/admin/connector/[ccPairId]/page.tsx
+++ b/web/src/app/admin/connector/[ccPairId]/page.tsx
@@ -626,7 +626,10 @@ function Main({ ccPairId }: { ccPairId: number }) {
          <div className="w-[200px]">
            <div className="text-sm font-medium mb-1">Last Indexed</div>
            <div className="text-sm text-text-default">
-              {timeAgo(ccPair?.last_indexed) ?? "-"}
+              {timeAgo(
+                indexAttempts?.find((attempt) => attempt.status === "success")
+                  ?.time_started
+              ) ?? "-"}
            </div>
          </div>

--- a/web/src/interfaces/llm.ts
+++ b/web/src/interfaces/llm.ts
@@ -123,9 +123,6 @@ export interface LLMProviderFormProps {
  open?: boolean;
  onOpenChange?: (open: boolean) => void;

-  /** The current default model name for this provider (from the global default). */
-  defaultModelName?: string;
-
  // Onboarding-specific (only when variant === "onboarding")
  onboardingState?: OnboardingState;
  onboardingActions?: OnboardingActions;
--- a/web/src/refresh-pages/admin/LLMConfigurationPage.tsx
+++ b/web/src/refresh-pages/admin/LLMConfigurationPage.tsx
@@ -148,14 +148,12 @@ interface ExistingProviderCardProps {
  provider: LLMProviderView;
  isDefault: boolean;
  isLastProvider: boolean;
-  defaultModelName?: string;
 }

 function ExistingProviderCard({
  provider,
  isDefault,
  isLastProvider,
-  defaultModelName,
 }: ExistingProviderCardProps) {
  const { mutate } = useSWRConfig();
  const [isOpen, setIsOpen] = useState(false);
@@ -232,12 +230,7 @@ function ExistingProviderCard({
              </Section>
            }
          />
-          {getModalForExistingProvider(
-            provider,
-            isOpen,
-            setIsOpen,
-            defaultModelName
-          )}
+          {getModalForExistingProvider(provider, isOpen, setIsOpen)}
        </Card>
      </Hoverable.Root>
    </>
@@ -453,11 +446,6 @@ export default function LLMConfigurationPage() {
                    provider={provider}
                    isDefault={defaultText?.provider_id === provider.id}
                    isLastProvider={sortedProviders.length === 1}
-                    defaultModelName={
-                      defaultText?.provider_id === provider.id
-                        ? defaultText.model_name
-                        : undefined
-                    }
                  />
                ))}
              </div>
--- a/web/src/refresh-pages/admin/UsersPage/UsersTable.tsx
+++ b/web/src/refresh-pages/admin/UsersPage/UsersTable.tsx
@@ -77,7 +77,6 @@ function buildColumns(onMutate: () => void) {
  return [
    tc.qualifier({
      content: "icon",
-      iconSize: "lg",
      getContent: (row) => {
        const user = {
          email: row.email,
--- a/web/src/sections/modals/llmConfig/AnthropicModal.tsx
+++ b/web/src/sections/modals/llmConfig/AnthropicModal.tsx
@@ -35,7 +35,6 @@ export default function AnthropicModal({
  shouldMarkAsDefault,
  open,
  onOpenChange,
-  defaultModelName,
  onboardingState,
  onboardingActions,
  llmDescriptor,
@@ -65,15 +64,10 @@ export default function AnthropicModal({
        default_model_name: DEFAULT_DEFAULT_MODEL_NAME,
      }
    : {
-        ...buildDefaultInitialValues(
-          existingLlmProvider,
-          modelConfigurations,
-          defaultModelName
-        ),
+        ...buildDefaultInitialValues(existingLlmProvider, modelConfigurations),
        api_key: existingLlmProvider?.api_key ?? "",
        api_base: existingLlmProvider?.api_base ?? undefined,
        default_model_name:
-          defaultModelName ??
          wellKnownLLMProvider?.recommended_default_model?.name ??
          DEFAULT_DEFAULT_MODEL_NAME,
        is_auto_mode: existingLlmProvider?.is_auto_mode ?? true,
--- a/web/src/sections/modals/llmConfig/AzureModal.tsx
+++ b/web/src/sections/modals/llmConfig/AzureModal.tsx
@@ -81,7 +81,6 @@ export default function AzureModal({
  shouldMarkAsDefault,
  open,
  onOpenChange,
-  defaultModelName,
  onboardingState,
  onboardingActions,
  llmDescriptor,
@@ -110,11 +109,7 @@ export default function AzureModal({
        default_model_name: "",
      } as AzureModalValues)
    : {
-        ...buildDefaultInitialValues(
-          existingLlmProvider,
-          modelConfigurations,
-          defaultModelName
-        ),
+        ...buildDefaultInitialValues(existingLlmProvider, modelConfigurations),
        api_key: existingLlmProvider?.api_key ?? "",
        target_uri: buildTargetUri(existingLlmProvider),
      };
--- a/web/src/sections/modals/llmConfig/BedrockModal.tsx
+++ b/web/src/sections/modals/llmConfig/BedrockModal.tsx
@@ -315,7 +315,6 @@ export default function BedrockModal({
  shouldMarkAsDefault,
  open,
  onOpenChange,
-  defaultModelName,
  onboardingState,
  onboardingActions,
  llmDescriptor,
@@ -352,11 +351,7 @@ export default function BedrockModal({
        },
      } as BedrockModalValues)
    : {
-        ...buildDefaultInitialValues(
-          existingLlmProvider,
-          modelConfigurations,
-          defaultModelName
-        ),
+        ...buildDefaultInitialValues(existingLlmProvider, modelConfigurations),
        custom_config: {
          AWS_REGION_NAME:
            (existingLlmProvider?.custom_config?.AWS_REGION_NAME as string) ??
--- a/web/src/sections/modals/llmConfig/CustomModal.tsx
+++ b/web/src/sections/modals/llmConfig/CustomModal.tsx
@@ -197,7 +197,6 @@ export default function CustomModal({
  shouldMarkAsDefault,
  open,
  onOpenChange,
-  defaultModelName,
  onboardingState,
  onboardingActions,
 }: LLMProviderFormProps) {
@@ -210,11 +209,7 @@ export default function CustomModal({
  const onClose = () => onOpenChange?.(false);

  const initialValues = {
-    ...buildDefaultInitialValues(
-      existingLlmProvider,
-      undefined,
-      defaultModelName
-    ),
+    ...buildDefaultInitialValues(existingLlmProvider),
    ...(isOnboarding ? buildOnboardingInitialValues() : {}),
    provider: existingLlmProvider?.provider ?? "",
    model_configurations: existingLlmProvider?.model_configurations.map(
--- a/web/src/sections/modals/llmConfig/LMStudioForm.tsx
+++ b/web/src/sections/modals/llmConfig/LMStudioForm.tsx
@@ -192,7 +192,6 @@ export default function LMStudioForm({
  shouldMarkAsDefault,
  open,
  onOpenChange,
-  defaultModelName,
  onboardingState,
  onboardingActions,
  llmDescriptor,
@@ -226,11 +225,7 @@ export default function LMStudioForm({
        },
      } as LMStudioFormValues)
    : {
-        ...buildDefaultInitialValues(
-          existingLlmProvider,
-          modelConfigurations,
-          defaultModelName
-        ),
+        ...buildDefaultInitialValues(existingLlmProvider, modelConfigurations),
        api_base: existingLlmProvider?.api_base ?? DEFAULT_API_BASE,
        custom_config: {
          LM_STUDIO_API_KEY:
--- a/web/src/sections/modals/llmConfig/LiteLLMProxyModal.tsx
+++ b/web/src/sections/modals/llmConfig/LiteLLMProxyModal.tsx
@@ -159,7 +159,6 @@ export default function LiteLLMProxyModal({
  shouldMarkAsDefault,
  open,
  onOpenChange,
-  defaultModelName,
  onboardingState,
  onboardingActions,
  llmDescriptor,
@@ -191,11 +190,7 @@ export default function LiteLLMProxyModal({
        default_model_name: "",
      } as LiteLLMProxyModalValues)
    : {
-        ...buildDefaultInitialValues(
-          existingLlmProvider,
-          modelConfigurations,
-          defaultModelName
-        ),
+        ...buildDefaultInitialValues(existingLlmProvider, modelConfigurations),
        api_key: existingLlmProvider?.api_key ?? "",
        api_base: existingLlmProvider?.api_base ?? DEFAULT_API_BASE,
      };
--- a/web/src/sections/modals/llmConfig/OllamaModal.tsx
+++ b/web/src/sections/modals/llmConfig/OllamaModal.tsx
@@ -212,7 +212,6 @@ export default function OllamaModal({
  shouldMarkAsDefault,
  open,
  onOpenChange,
-  defaultModelName,
  onboardingState,
  onboardingActions,
  llmDescriptor,
@@ -245,11 +244,7 @@ export default function OllamaModal({
        },
      } as OllamaModalValues)
    : {
-        ...buildDefaultInitialValues(
-          existingLlmProvider,
-          modelConfigurations,
-          defaultModelName
-        ),
+        ...buildDefaultInitialValues(existingLlmProvider, modelConfigurations),
        api_base: existingLlmProvider?.api_base ?? DEFAULT_API_BASE,
        custom_config: {
          OLLAMA_API_KEY:
--- a/web/src/sections/modals/llmConfig/OpenAIModal.tsx
+++ b/web/src/sections/modals/llmConfig/OpenAIModal.tsx
@@ -35,7 +35,6 @@ export default function OpenAIModal({
  shouldMarkAsDefault,
  open,
  onOpenChange,
-  defaultModelName,
  onboardingState,
  onboardingActions,
  llmDescriptor,
@@ -64,14 +63,9 @@ export default function OpenAIModal({
        default_model_name: DEFAULT_DEFAULT_MODEL_NAME,
      }
    : {
-        ...buildDefaultInitialValues(
-          existingLlmProvider,
-          modelConfigurations,
-          defaultModelName
-        ),
+        ...buildDefaultInitialValues(existingLlmProvider, modelConfigurations),
        api_key: existingLlmProvider?.api_key ?? "",
        default_model_name:
-          defaultModelName ??
          wellKnownLLMProvider?.recommended_default_model?.name ??
          DEFAULT_DEFAULT_MODEL_NAME,
        is_auto_mode: existingLlmProvider?.is_auto_mode ?? true,
--- a/web/src/sections/modals/llmConfig/OpenRouterModal.tsx
+++ b/web/src/sections/modals/llmConfig/OpenRouterModal.tsx
@@ -158,7 +158,6 @@ export default function OpenRouterModal({
  shouldMarkAsDefault,
  open,
  onOpenChange,
-  defaultModelName,
  onboardingState,
  onboardingActions,
  llmDescriptor,
@@ -190,11 +189,7 @@ export default function OpenRouterModal({
        default_model_name: "",
      } as OpenRouterModalValues)
    : {
-        ...buildDefaultInitialValues(
-          existingLlmProvider,
-          modelConfigurations,
-          defaultModelName
-        ),
+        ...buildDefaultInitialValues(existingLlmProvider, modelConfigurations),
        api_key: existingLlmProvider?.api_key ?? "",
        api_base: existingLlmProvider?.api_base ?? DEFAULT_API_BASE,
      };
--- a/web/src/sections/modals/llmConfig/VertexAIModal.tsx
+++ b/web/src/sections/modals/llmConfig/VertexAIModal.tsx
@@ -48,7 +48,6 @@ export default function VertexAIModal({
  shouldMarkAsDefault,
  open,
  onOpenChange,
-  defaultModelName,
  onboardingState,
  onboardingActions,
  llmDescriptor,
@@ -81,13 +80,8 @@ export default function VertexAIModal({
        },
      } as VertexAIModalValues)
    : {
-        ...buildDefaultInitialValues(
-          existingLlmProvider,
-          modelConfigurations,
-          defaultModelName
-        ),
+        ...buildDefaultInitialValues(existingLlmProvider, modelConfigurations),
        default_model_name:
-          defaultModelName ??
          wellKnownLLMProvider?.recommended_default_model?.name ??
          VERTEXAI_DEFAULT_MODEL,
        is_auto_mode: existingLlmProvider?.is_auto_mode ?? true,
--- a/web/src/sections/modals/llmConfig/getModal.tsx
+++ b/web/src/sections/modals/llmConfig/getModal.tsx
@@ -22,15 +22,9 @@ function detectIfRealOpenAIProvider(provider: LLMProviderView) {
 export function getModalForExistingProvider(
  provider: LLMProviderView,
  open?: boolean,
-  onOpenChange?: (open: boolean) => void,
-  defaultModelName?: string
+  onOpenChange?: (open: boolean) => void
 ) {
-  const props = {
-    existingLlmProvider: provider,
-    open,
-    onOpenChange,
-    defaultModelName,
-  };
+  const props = { existingLlmProvider: provider, open, onOpenChange };

  switch (provider.provider) {
    case LLMProviderName.OPENAI:
--- a/web/src/sections/modals/llmConfig/utils.ts
+++ b/web/src/sections/modals/llmConfig/utils.ts
@@ -12,11 +12,9 @@ export const LLM_FORM_CLASS_NAME = "flex flex-col gap-y-4 items-stretch mt-6";

 export const buildDefaultInitialValues = (
  existingLlmProvider?: LLMProviderView,
-  modelConfigurations?: ModelConfiguration[],
-  currentDefaultModelName?: string
+  modelConfigurations?: ModelConfiguration[]
 ) => {
  const defaultModelName =
-    currentDefaultModelName ??
    existingLlmProvider?.model_configurations?.[0]?.name ??
    modelConfigurations?.[0]?.name ??
    "";
Author	SHA1	Message	Date
Evan Lohn	d8b672bb8e	fix: eager load chat session persona	2026-03-23 16:17:10 -07:00
Evan Lohn	f3e38a7ef7	refactor: filter fields	2026-03-23 16:17:09 -07:00
Evan Lohn	a4c9926eb1	pr comments	2026-03-23 16:16:02 -07:00
Evan Lohn	8c63831fff	chore: use efficient persona id query path	2026-03-23 14:01:11 -07:00