fix model defaults

2026-02-18 16:25:45 +00:00 · 2025-01-27 21:22:45 -08:00
143 changed files with 2393 additions and 3886 deletions
--- a/.github/workflows/pr-python-connector-tests.yml
+++ b/.github/workflows/pr-python-connector-tests.yml
@@ -39,12 +39,6 @@ env:
  AIRTABLE_TEST_TABLE_ID: ${{ secrets.AIRTABLE_TEST_TABLE_ID }}
  AIRTABLE_TEST_TABLE_NAME: ${{ secrets.AIRTABLE_TEST_TABLE_NAME }}
  AIRTABLE_ACCESS_TOKEN: ${{ secrets.AIRTABLE_ACCESS_TOKEN }}
-  # Sharepoint
-  SHAREPOINT_CLIENT_ID: ${{ secrets.SHAREPOINT_CLIENT_ID }}
-  SHAREPOINT_CLIENT_SECRET: ${{ secrets.SHAREPOINT_CLIENT_SECRET }}
-  SHAREPOINT_CLIENT_DIRECTORY_ID: ${{ secrets.SHAREPOINT_CLIENT_DIRECTORY_ID }}
-  SHAREPOINT_SITE: ${{ secrets.SHAREPOINT_SITE }}
-
 jobs:
  connectors-check:
    # See https://runs-on.com/runners/linux/
--- a/backend/alembic/versions/33ea50e88f24_foreign_key_input_prompts.py
+++ b/backend/alembic/versions/33ea50e88f24_foreign_key_input_prompts.py
@@ -1,80 +0,0 @@
-"""foreign key input prompts
-
-Revision ID: 33ea50e88f24
-Revises: a6df6b88ef81
-Create Date: 2025-01-29 10:54:22.141765
-
-"""
-from alembic import op
-
-
-# revision identifiers, used by Alembic.
-revision = "33ea50e88f24"
-down_revision = "a6df6b88ef81"
-branch_labels = None
-depends_on = None
-
-
-def upgrade() -> None:
-    # Safely drop constraints if exists
-    op.execute(
-        """
-        ALTER TABLE inputprompt__user
-        DROP CONSTRAINT IF EXISTS inputprompt__user_input_prompt_id_fkey
-        """
-    )
-    op.execute(
-        """
-        ALTER TABLE inputprompt__user
-        DROP CONSTRAINT IF EXISTS inputprompt__user_user_id_fkey
-        """
-    )
-
-    # Recreate with ON DELETE CASCADE
-    op.create_foreign_key(
-        "inputprompt__user_input_prompt_id_fkey",
-        "inputprompt__user",
-        "inputprompt",
-        ["input_prompt_id"],
-        ["id"],
-        ondelete="CASCADE",
-    )
-
-    op.create_foreign_key(
-        "inputprompt__user_user_id_fkey",
-        "inputprompt__user",
-        "user",
-        ["user_id"],
-        ["id"],
-        ondelete="CASCADE",
-    )
-
-
-def downgrade() -> None:
-    # Drop the new FKs with ondelete
-    op.drop_constraint(
-        "inputprompt__user_input_prompt_id_fkey",
-        "inputprompt__user",
-        type_="foreignkey",
-    )
-    op.drop_constraint(
-        "inputprompt__user_user_id_fkey",
-        "inputprompt__user",
-        type_="foreignkey",
-    )
-
-    # Recreate them without cascading
-    op.create_foreign_key(
-        "inputprompt__user_input_prompt_id_fkey",
-        "inputprompt__user",
-        "inputprompt",
-        ["input_prompt_id"],
-        ["id"],
-    )
-    op.create_foreign_key(
-        "inputprompt__user_user_id_fkey",
-        "inputprompt__user",
-        "user",
-        ["user_id"],
-        ["id"],
-    )
--- a/backend/alembic/versions/4d58345da04a_lowercase_user_emails.py
+++ b/backend/alembic/versions/4d58345da04a_lowercase_user_emails.py
@@ -1,37 +0,0 @@
-"""lowercase_user_emails
-
-Revision ID: 4d58345da04a
-Revises: f1ca58b2f2ec
-Create Date: 2025-01-29 07:48:46.784041
-
-"""
-from alembic import op
-from sqlalchemy.sql import text
-
-
-# revision identifiers, used by Alembic.
-revision = "4d58345da04a"
-down_revision = "f1ca58b2f2ec"
-branch_labels = None
-depends_on = None
-
-
-def upgrade() -> None:
-    # Get database connection
-    connection = op.get_bind()
-
-    # Update all user emails to lowercase
-    connection.execute(
-        text(
-            """
-            UPDATE "user"
-            SET email = LOWER(email)
-            WHERE email != LOWER(email)
-            """
-        )
-    )
-
-
-def downgrade() -> None:
-    # Cannot restore original case of emails
-    pass
--- a/backend/alembic/versions/a6df6b88ef81_remove_recent_assistants.py
+++ b/backend/alembic/versions/a6df6b88ef81_remove_recent_assistants.py
@@ -1,29 +0,0 @@
-"""remove recent assistants
-
-Revision ID: a6df6b88ef81
-Revises: 4d58345da04a
-Create Date: 2025-01-29 10:25:52.790407
-
-"""
-from alembic import op
-import sqlalchemy as sa
-from sqlalchemy.dialects import postgresql
-
-# revision identifiers, used by Alembic.
-revision = "a6df6b88ef81"
-down_revision = "4d58345da04a"
-branch_labels = None
-depends_on = None
-
-
-def upgrade() -> None:
-    op.drop_column("user", "recent_assistants")
-
-
-def downgrade() -> None:
-    op.add_column(
-        "user",
-        sa.Column(
-            "recent_assistants", postgresql.JSONB(), server_default="[]", nullable=False
-        ),
-    )
--- a/backend/ee/onyx/external_permissions/confluence/group_sync.py
+++ b/backend/ee/onyx/external_permissions/confluence/group_sync.py
@@ -14,8 +14,6 @@ def _build_group_member_email_map(
 ) -> dict[str, set[str]]:
    group_member_emails: dict[str, set[str]] = {}
    for user_result in confluence_client.paginated_cql_user_retrieval():
-        logger.debug(f"Processing groups for user: {user_result}")
-
        user = user_result.get("user", {})
        if not user:
            logger.warning(f"user result missing user field: {user_result}")
@@ -35,17 +33,10 @@ def _build_group_member_email_map(
            logger.warning(f"user result missing email field: {user_result}")
            continue

-        all_users_groups: set[str] = set()
        for group in confluence_client.paginated_groups_by_user_retrieval(user):
            # group name uniqueness is enforced by Confluence, so we can use it as a group ID
            group_id = group["name"]
            group_member_emails.setdefault(group_id, set()).add(email)
-            all_users_groups.add(group_id)
-
-        if not group_member_emails:
-            logger.warning(f"No groups found for user with email: {email}")
-        else:
-            logger.debug(f"Found groups {all_users_groups} for user with email {email}")

    return group_member_emails

--- a/backend/ee/onyx/server/tenants/api.py
+++ b/backend/ee/onyx/server/tenants/api.py
@@ -111,7 +111,6 @@ async def login_as_anonymous_user(
    token = generate_anonymous_user_jwt_token(tenant_id)

    response = Response()
-    response.delete_cookie("fastapiusersauth")
    response.set_cookie(
        key=ANONYMOUS_USER_COOKIE_NAME,
        value=token,
--- a/backend/ee/onyx/server/user_group/models.py
+++ b/backend/ee/onyx/server/user_group/models.py
@@ -58,7 +58,6 @@ class UserGroup(BaseModel):
                    credential=CredentialSnapshot.from_credential_db_model(
                        cc_pair_relationship.cc_pair.credential
                    ),
-                    access_type=cc_pair_relationship.cc_pair.access_type,
                )
                for cc_pair_relationship in user_group_model.cc_pair_relationships
                if cc_pair_relationship.is_current
--- a/backend/onyx/auth/schemas.py
+++ b/backend/onyx/auth/schemas.py
@@ -42,10 +42,6 @@ class UserCreate(schemas.BaseUserCreate):
    tenant_id: str | None = None


-class UserUpdateWithRole(schemas.BaseUserUpdate):
-    role: UserRole
-
-
 class UserUpdate(schemas.BaseUserUpdate):
    """
    Role updates are not allowed through the user update endpoint for security reasons
--- a/backend/onyx/auth/users.py
+++ b/backend/onyx/auth/users.py
@@ -57,7 +57,7 @@ from onyx.auth.invited_users import get_invited_users
 from onyx.auth.schemas import AuthBackend
 from onyx.auth.schemas import UserCreate
 from onyx.auth.schemas import UserRole
-from onyx.auth.schemas import UserUpdateWithRole
+from onyx.auth.schemas import UserUpdate
 from onyx.configs.app_configs import AUTH_BACKEND
 from onyx.configs.app_configs import AUTH_COOKIE_EXPIRE_TIME_SECONDS
 from onyx.configs.app_configs import AUTH_TYPE
@@ -216,6 +216,7 @@ class UserManager(UUIDIDMixin, BaseUserManager[User, uuid.UUID]):
    reset_password_token_secret = USER_AUTH_SECRET
    verification_token_secret = USER_AUTH_SECRET
    verification_token_lifetime_seconds = AUTH_COOKIE_EXPIRE_TIME_SECONDS
+
    user_db: SQLAlchemyUserDatabase[User, uuid.UUID]

    async def create(
@@ -245,8 +246,10 @@ class UserManager(UUIDIDMixin, BaseUserManager[User, uuid.UUID]):
            referral_source=referral_source,
            request=request,
        )
+
        async with get_async_session_with_tenant(tenant_id) as db_session:
            token = CURRENT_TENANT_ID_CONTEXTVAR.set(tenant_id)
+
            verify_email_is_invited(user_create.email)
            verify_email_domain(user_create.email)
            if MULTI_TENANT:
@@ -265,16 +268,16 @@ class UserManager(UUIDIDMixin, BaseUserManager[User, uuid.UUID]):
                    user_create.role = UserRole.ADMIN
                else:
                    user_create.role = UserRole.BASIC
+
            try:
                user = await super().create(user_create, safe=safe, request=request)  # type: ignore
            except exceptions.UserAlreadyExists:
                user = await self.get_by_email(user_create.email)
                # Handle case where user has used product outside of web and is now creating an account through web
                if not user.role.is_web_login() and user_create.role.is_web_login():
-                    user_update = UserUpdateWithRole(
+                    user_update = UserUpdate(
                        password=user_create.password,
                        is_verified=user_create.is_verified,
-                        role=user_create.role,
                    )
                    user = await self.update(user_update, user)
                else:
@@ -282,6 +285,7 @@ class UserManager(UUIDIDMixin, BaseUserManager[User, uuid.UUID]):

            finally:
                CURRENT_TENANT_ID_CONTEXTVAR.reset(token)
+
        return user

    async def validate_password(self, password: str, _: schemas.UC | models.UP) -> None:
--- a/backend/onyx/background/celery/apps/app_base.py
+++ b/backend/onyx/background/celery/apps/app_base.py
@@ -24,7 +24,6 @@ from onyx.configs.constants import ONYX_CLOUD_CELERY_TASK_PREFIX
 from onyx.configs.constants import OnyxRedisLocks
 from onyx.db.engine import get_sqlalchemy_engine
 from onyx.document_index.vespa.shared_utils.utils import wait_for_vespa_with_timeout
-from onyx.httpx.httpx_pool import HttpxPool
 from onyx.redis.redis_connector import RedisConnector
 from onyx.redis.redis_connector_credential_pair import RedisConnectorCredentialPair
 from onyx.redis.redis_connector_delete import RedisConnectorDelete
@@ -317,8 +316,6 @@ def on_worker_ready(sender: Any, **kwargs: Any) -> None:


 def on_worker_shutdown(sender: Any, **kwargs: Any) -> None:
-    HttpxPool.close_all()
-
    if not celery_is_worker_primary(sender):
        return

--- a/backend/onyx/background/celery/apps/light.py
+++ b/backend/onyx/background/celery/apps/light.py
@@ -10,10 +10,6 @@ from celery.signals import worker_ready
 from celery.signals import worker_shutdown

 import onyx.background.celery.apps.app_base as app_base
-from onyx.background.celery.celery_utils import httpx_init_vespa_pool
-from onyx.configs.app_configs import MANAGED_VESPA
-from onyx.configs.app_configs import VESPA_CLOUD_CERT_PATH
-from onyx.configs.app_configs import VESPA_CLOUD_KEY_PATH
 from onyx.configs.constants import POSTGRES_CELERY_WORKER_LIGHT_APP_NAME
 from onyx.db.engine import SqlEngine
 from onyx.utils.logger import setup_logger
@@ -58,23 +54,12 @@ def on_celeryd_init(sender: str, conf: Any = None, **kwargs: Any) -> None:

@worker_init.connect
 def on_worker_init(sender: Worker, **kwargs: Any) -> None:
-    EXTRA_CONCURRENCY = 8  # small extra fudge factor for connection limits
-
    logger.info("worker_init signal received.")

    logger.info(f"Concurrency: {sender.concurrency}")  # type: ignore

    SqlEngine.set_app_name(POSTGRES_CELERY_WORKER_LIGHT_APP_NAME)
-    SqlEngine.init_engine(pool_size=sender.concurrency, max_overflow=EXTRA_CONCURRENCY)  # type: ignore
-
-    if MANAGED_VESPA:
-        httpx_init_vespa_pool(
-            sender.concurrency + EXTRA_CONCURRENCY,  # type: ignore
-            ssl_cert=VESPA_CLOUD_CERT_PATH,
-            ssl_key=VESPA_CLOUD_KEY_PATH,
-        )
-    else:
-        httpx_init_vespa_pool(sender.concurrency + EXTRA_CONCURRENCY)  # type: ignore
+    SqlEngine.init_engine(pool_size=sender.concurrency, max_overflow=8)  # type: ignore

    app_base.wait_for_redis(sender, **kwargs)
    app_base.wait_for_db(sender, **kwargs)
--- a/backend/onyx/background/celery/celery_utils.py
+++ b/backend/onyx/background/celery/celery_utils.py
@@ -1,13 +1,10 @@
 from datetime import datetime
 from datetime import timezone
 from typing import Any
-from typing import cast

-import httpx
 from sqlalchemy.orm import Session

 from onyx.configs.app_configs import MAX_PRUNING_DOCUMENT_RETRIEVAL_PER_MINUTE
-from onyx.configs.app_configs import VESPA_REQUEST_TIMEOUT
 from onyx.connectors.cross_connector_utils.rate_limit_wrapper import (
    rate_limit_builder,
 )
@@ -20,7 +17,6 @@ from onyx.db.connector_credential_pair import get_connector_credential_pair
 from onyx.db.enums import ConnectorCredentialPairStatus
 from onyx.db.enums import TaskStatus
 from onyx.db.models import TaskQueueState
-from onyx.httpx.httpx_pool import HttpxPool
 from onyx.indexing.indexing_heartbeat import IndexingHeartbeatInterface
 from onyx.redis.redis_connector import RedisConnector
 from onyx.server.documents.models import DeletionAttemptSnapshot
@@ -158,25 +154,3 @@ def celery_is_worker_primary(worker: Any) -> bool:
        return True

    return False
-
-
-def httpx_init_vespa_pool(
-    max_keepalive_connections: int,
-    timeout: int = VESPA_REQUEST_TIMEOUT,
-    ssl_cert: str | None = None,
-    ssl_key: str | None = None,
-) -> None:
-    httpx_cert = None
-    httpx_verify = False
-    if ssl_cert and ssl_key:
-        httpx_cert = cast(tuple[str, str], (ssl_cert, ssl_key))
-        httpx_verify = True
-
-    HttpxPool.init_client(
-        name="vespa",
-        cert=httpx_cert,
-        verify=httpx_verify,
-        timeout=timeout,
-        http2=False,
-        limits=httpx.Limits(max_keepalive_connections=max_keepalive_connections),
-    )
--- a/backend/onyx/background/celery/tasks/doc_permission_syncing/tasks.py
+++ b/backend/onyx/background/celery/tasks/doc_permission_syncing/tasks.py
@@ -11,7 +11,6 @@ from celery import Task
 from celery.exceptions import SoftTimeLimitExceeded
 from redis import Redis
 from redis.lock import Lock as RedisLock
-from sqlalchemy.orm import Session

 from ee.onyx.db.connector_credential_pair import get_all_auto_sync_cc_pairs
 from ee.onyx.db.document import upsert_document_external_perms
@@ -32,17 +31,12 @@ from onyx.configs.constants import OnyxCeleryPriority
 from onyx.configs.constants import OnyxCeleryQueues
 from onyx.configs.constants import OnyxCeleryTask
 from onyx.configs.constants import OnyxRedisLocks
-from onyx.db.connector import mark_cc_pair_as_permissions_synced
 from onyx.db.connector_credential_pair import get_connector_credential_pair_from_id
 from onyx.db.document import upsert_document_by_connector_credential_pair
 from onyx.db.engine import get_session_with_tenant
 from onyx.db.enums import AccessType
 from onyx.db.enums import ConnectorCredentialPairStatus
-from onyx.db.enums import SyncStatus
-from onyx.db.enums import SyncType
 from onyx.db.models import ConnectorCredentialPair
-from onyx.db.sync_record import insert_sync_record
-from onyx.db.sync_record import update_sync_record_status
 from onyx.db.users import batch_add_ext_perm_user_if_not_exists
 from onyx.redis.redis_connector import RedisConnector
 from onyx.redis.redis_connector_doc_perm_sync import (
@@ -63,9 +57,6 @@ LIGHT_SOFT_TIME_LIMIT = 105
 LIGHT_TIME_LIMIT = LIGHT_SOFT_TIME_LIMIT + 15


-"""Jobs / utils for kicking off doc permissions sync tasks."""
-
-
 def _is_external_doc_permissions_sync_due(cc_pair: ConnectorCredentialPair) -> bool:
    """Returns boolean indicating if external doc permissions sync is due."""

@@ -183,19 +174,6 @@ def try_creating_permissions_sync_task(

        custom_task_id = f"{redis_connector.permissions.generator_task_key}_{uuid4()}"

-        # create before setting fence to avoid race condition where the monitoring
-        # task updates the sync record before it is created
-        with get_session_with_tenant(tenant_id) as db_session:
-            insert_sync_record(
-                db_session=db_session,
-                entity_id=cc_pair_id,
-                sync_type=SyncType.EXTERNAL_PERMISSIONS,
-            )
-
-        # set a basic fence to start
-        payload = RedisConnectorPermissionSyncPayload(started=None, celery_task_id=None)
-        redis_connector.permissions.set_fence(payload)
-
        result = app.send_task(
            OnyxCeleryTask.CONNECTOR_PERMISSION_SYNC_GENERATOR_TASK,
            kwargs=dict(
@@ -207,8 +185,11 @@ def try_creating_permissions_sync_task(
            priority=OnyxCeleryPriority.HIGH,
        )

-        # fill in the celery task id
-        payload.celery_task_id = result.id
+        # set a basic fence to start
+        payload = RedisConnectorPermissionSyncPayload(
+            started=None, celery_task_id=result.id
+        )
+
        redis_connector.permissions.set_fence(payload)
    except Exception:
        task_logger.exception(f"Unexpected exception: cc_pair={cc_pair_id}")
@@ -418,53 +399,3 @@ def update_external_document_permissions_task(
            f"Error Syncing Document Permissions: connector_id={connector_id} doc_id={doc_id}"
        )
        return False
-
-
-"""Monitoring CCPair permissions utils, called in monitor_vespa_sync"""
-
-
-def monitor_ccpair_permissions_taskset(
-    tenant_id: str | None, key_bytes: bytes, r: Redis, db_session: Session
-) -> None:
-    fence_key = key_bytes.decode("utf-8")
-    cc_pair_id_str = RedisConnector.get_id_from_fence_key(fence_key)
-    if cc_pair_id_str is None:
-        task_logger.warning(
-            f"monitor_ccpair_permissions_taskset: could not parse cc_pair_id from {fence_key}"
-        )
-        return
-
-    cc_pair_id = int(cc_pair_id_str)
-
-    redis_connector = RedisConnector(tenant_id, cc_pair_id)
-    if not redis_connector.permissions.fenced:
-        return
-
-    initial = redis_connector.permissions.generator_complete
-    if initial is None:
-        return
-
-    remaining = redis_connector.permissions.get_remaining()
-    task_logger.info(
-        f"Permissions sync progress: cc_pair={cc_pair_id} remaining={remaining} initial={initial}"
-    )
-    if remaining > 0:
-        return
-
-    payload: RedisConnectorPermissionSyncPayload | None = (
-        redis_connector.permissions.payload
-    )
-    start_time: datetime | None = payload.started if payload else None
-
-    mark_cc_pair_as_permissions_synced(db_session, int(cc_pair_id), start_time)
-    task_logger.info(f"Successfully synced permissions for cc_pair={cc_pair_id}")
-
-    update_sync_record_status(
-        db_session=db_session,
-        entity_id=cc_pair_id,
-        sync_type=SyncType.EXTERNAL_PERMISSIONS,
-        sync_status=SyncStatus.SUCCESS,
-        num_docs_synced=initial,
-    )
-
-    redis_connector.permissions.reset()
--- a/backend/onyx/background/celery/tasks/external_group_syncing/tasks.py
+++ b/backend/onyx/background/celery/tasks/external_group_syncing/tasks.py
@@ -33,11 +33,7 @@ from onyx.db.connector_credential_pair import get_connector_credential_pair_from
 from onyx.db.engine import get_session_with_tenant
 from onyx.db.enums import AccessType
 from onyx.db.enums import ConnectorCredentialPairStatus
-from onyx.db.enums import SyncStatus
-from onyx.db.enums import SyncType
 from onyx.db.models import ConnectorCredentialPair
-from onyx.db.sync_record import insert_sync_record
-from onyx.db.sync_record import update_sync_record_status
 from onyx.redis.redis_connector import RedisConnector
 from onyx.redis.redis_connector_ext_group_sync import (
    RedisConnectorExternalGroupSyncPayload,
@@ -204,15 +200,6 @@ def try_creating_external_group_sync_task(
            celery_task_id=result.id,
        )

-        # create before setting fence to avoid race condition where the monitoring
-        # task updates the sync record before it is created
-        with get_session_with_tenant(tenant_id) as db_session:
-            insert_sync_record(
-                db_session=db_session,
-                entity_id=cc_pair_id,
-                sync_type=SyncType.EXTERNAL_GROUP,
-            )
-
        redis_connector.external_group_sync.set_fence(payload)

    except Exception:
@@ -302,26 +289,11 @@ def connector_external_group_sync_generator_task(
            )

            mark_cc_pair_as_external_group_synced(db_session, cc_pair.id)
-
-            update_sync_record_status(
-                db_session=db_session,
-                entity_id=cc_pair_id,
-                sync_type=SyncType.EXTERNAL_GROUP,
-                sync_status=SyncStatus.SUCCESS,
-            )
    except Exception as e:
        task_logger.exception(
            f"Failed to run external group sync: cc_pair={cc_pair_id}"
        )

-        with get_session_with_tenant(tenant_id) as db_session:
-            update_sync_record_status(
-                db_session=db_session,
-                entity_id=cc_pair_id,
-                sync_type=SyncType.EXTERNAL_GROUP,
-                sync_status=SyncStatus.FAILED,
-            )
-
        redis_connector.external_group_sync.generator_clear()
        redis_connector.external_group_sync.taskset_clear()
        raise e
--- a/backend/onyx/background/celery/tasks/indexing/tasks.py
+++ b/backend/onyx/background/celery/tasks/indexing/tasks.py
@@ -15,7 +15,6 @@ from redis import Redis
 from redis.lock import Lock as RedisLock

 from onyx.background.celery.apps.app_base import task_logger
-from onyx.background.celery.celery_utils import httpx_init_vespa_pool
 from onyx.background.celery.tasks.indexing.utils import _should_index
 from onyx.background.celery.tasks.indexing.utils import get_unfenced_index_attempt_ids
 from onyx.background.celery.tasks.indexing.utils import IndexingCallback
@@ -23,9 +22,6 @@ from onyx.background.celery.tasks.indexing.utils import try_creating_indexing_ta
 from onyx.background.celery.tasks.indexing.utils import validate_indexing_fences
 from onyx.background.indexing.job_client import SimpleJobClient
 from onyx.background.indexing.run_indexing import run_indexing_entrypoint
-from onyx.configs.app_configs import MANAGED_VESPA
-from onyx.configs.app_configs import VESPA_CLOUD_CERT_PATH
-from onyx.configs.app_configs import VESPA_CLOUD_KEY_PATH
 from onyx.configs.constants import CELERY_GENERIC_BEAT_LOCK_TIMEOUT
 from onyx.configs.constants import CELERY_INDEXING_LOCK_TIMEOUT
 from onyx.configs.constants import CELERY_TASK_WAIT_FOR_FENCE_TIMEOUT
@@ -41,7 +37,8 @@ from onyx.db.index_attempt import get_index_attempt
 from onyx.db.index_attempt import get_last_attempt_for_cc_pair
 from onyx.db.index_attempt import mark_attempt_canceled
 from onyx.db.index_attempt import mark_attempt_failed
-from onyx.db.search_settings import get_active_search_settings_list
+from onyx.db.models import SearchSettings
+from onyx.db.search_settings import get_active_search_settings
 from onyx.db.search_settings import get_current_search_settings
 from onyx.db.swap_index import check_index_swap
 from onyx.natural_language_processing.search_nlp_models import EmbeddingModel
@@ -124,7 +121,9 @@ def check_for_indexing(self: Task, *, tenant_id: str | None) -> int | None:

            redis_connector = RedisConnector(tenant_id, cc_pair_id)
            with get_session_with_tenant(tenant_id) as db_session:
-                search_settings_list = get_active_search_settings_list(db_session)
+                search_settings_list: list[SearchSettings] = get_active_search_settings(
+                    db_session
+                )
                for search_settings_instance in search_settings_list:
                    redis_connector_index = redis_connector.new_index(
                        search_settings_instance.id
@@ -304,14 +303,6 @@ def connector_indexing_task(
    attempt_found = False
    n_final_progress: int | None = None

-    # 20 is the documented default for httpx max_keepalive_connections
-    if MANAGED_VESPA:
-        httpx_init_vespa_pool(
-            20, ssl_cert=VESPA_CLOUD_CERT_PATH, ssl_key=VESPA_CLOUD_KEY_PATH
-        )
-    else:
-        httpx_init_vespa_pool(20)
-
    redis_connector = RedisConnector(tenant_id, cc_pair_id)
    redis_connector_index = redis_connector.new_index(search_settings_id)

--- a/backend/onyx/background/celery/tasks/monitoring/tasks.py
+++ b/backend/onyx/background/celery/tasks/monitoring/tasks.py
@@ -34,7 +34,7 @@ from onyx.db.models import DocumentSet
 from onyx.db.models import IndexAttempt
 from onyx.db.models import SyncRecord
 from onyx.db.models import UserGroup
-from onyx.db.search_settings import get_active_search_settings_list
+from onyx.db.search_settings import get_active_search_settings
 from onyx.redis.redis_pool import get_redis_client
 from onyx.redis.redis_pool import redis_lock_dump
 from onyx.utils.telemetry import optional_telemetry
@@ -58,11 +58,6 @@ _SYNC_START_LATENCY_KEY_FMT = (
    "sync_start_latency:{sync_type}:{entity_id}:{sync_record_id}"
 )

-_CONNECTOR_START_TIME_KEY_FMT = "connector_start_time:{cc_pair_id}:{index_attempt_id}"
-_CONNECTOR_END_TIME_KEY_FMT = "connector_end_time:{cc_pair_id}:{index_attempt_id}"
-_SYNC_START_TIME_KEY_FMT = "sync_start_time:{sync_type}:{entity_id}:{sync_record_id}"
-_SYNC_END_TIME_KEY_FMT = "sync_end_time:{sync_type}:{entity_id}:{sync_record_id}"
-

 def _mark_metric_as_emitted(redis_std: Redis, key: str) -> None:
    """Mark a metric as having been emitted by setting a Redis key with expiration"""
@@ -308,6 +303,8 @@ def _build_connector_final_metrics(
                )
            )

+        _mark_metric_as_emitted(redis_std, metric_key)
+
    return metrics


@@ -318,13 +315,13 @@ def _collect_connector_metrics(db_session: Session, redis_std: Redis) -> list[Me
    # Get all connector credential pairs
    cc_pairs = db_session.scalars(select(ConnectorCredentialPair)).all()
    # Might be more than one search setting, or just one
-    active_search_settings_list = get_active_search_settings_list(db_session)
+    active_search_settings = get_active_search_settings(db_session)

    metrics = []

    # If you want to process each cc_pair against each search setting:
    for cc_pair in cc_pairs:
-        for search_settings in active_search_settings_list:
+        for search_settings in active_search_settings:
            recent_attempts = (
                db_session.query(IndexAttempt)
                .filter(
@@ -347,52 +344,6 @@ def _collect_connector_metrics(db_session: Session, redis_std: Redis) -> list[Me
            if one_hour_ago > most_recent_attempt.time_created:
                continue

-            # Build a job_id for correlation
-            job_id = build_job_id(
-                "connector", str(cc_pair.id), str(most_recent_attempt.id)
-            )
-
-            # Add raw start time metric if available
-            if most_recent_attempt.time_started:
-                start_time_key = _CONNECTOR_START_TIME_KEY_FMT.format(
-                    cc_pair_id=cc_pair.id,
-                    index_attempt_id=most_recent_attempt.id,
-                )
-                metrics.append(
-                    Metric(
-                        key=start_time_key,
-                        name="connector_start_time",
-                        value=most_recent_attempt.time_started.timestamp(),
-                        tags={
-                            "job_id": job_id,
-                            "connector_id": str(cc_pair.connector.id),
-                            "source": str(cc_pair.connector.source),
-                        },
-                    )
-                )
-
-            # Add raw end time metric if available and in terminal state
-            if (
-                most_recent_attempt.status.is_terminal()
-                and most_recent_attempt.time_updated
-            ):
-                end_time_key = _CONNECTOR_END_TIME_KEY_FMT.format(
-                    cc_pair_id=cc_pair.id,
-                    index_attempt_id=most_recent_attempt.id,
-                )
-                metrics.append(
-                    Metric(
-                        key=end_time_key,
-                        name="connector_end_time",
-                        value=most_recent_attempt.time_updated.timestamp(),
-                        tags={
-                            "job_id": job_id,
-                            "connector_id": str(cc_pair.connector.id),
-                            "source": str(cc_pair.connector.source),
-                        },
-                    )
-                )
-
            # Connector start latency
            start_latency_metric = _build_connector_start_latency_metric(
                cc_pair, most_recent_attempt, second_most_recent_attempt, redis_std
@@ -414,10 +365,9 @@ def _collect_sync_metrics(db_session: Session, redis_std: Redis) -> list[Metric]
    """
    Collect metrics for document set and group syncing:
      - Success/failure status
-      - Start latency (for doc sets / user groups)
+      - Start latency (always)
      - Duration & doc count (only if success)
      - Throughput (docs/min) (only if success)
-      - Raw start/end times for each sync
    """
    one_hour_ago = get_db_current_time(db_session) - timedelta(hours=1)

@@ -439,43 +389,6 @@ def _collect_sync_metrics(db_session: Session, redis_std: Redis) -> list[Metric]
        # Build a job_id for correlation
        job_id = build_job_id("sync_record", str(sync_record.id))

-        # Add raw start time metric
-        start_time_key = _SYNC_START_TIME_KEY_FMT.format(
-            sync_type=sync_record.sync_type,
-            entity_id=sync_record.entity_id,
-            sync_record_id=sync_record.id,
-        )
-        metrics.append(
-            Metric(
-                key=start_time_key,
-                name="sync_start_time",
-                value=sync_record.sync_start_time.timestamp(),
-                tags={
-                    "job_id": job_id,
-                    "sync_type": str(sync_record.sync_type),
-                },
-            )
-        )
-
-        # Add raw end time metric if available
-        if sync_record.sync_end_time:
-            end_time_key = _SYNC_END_TIME_KEY_FMT.format(
-                sync_type=sync_record.sync_type,
-                entity_id=sync_record.entity_id,
-                sync_record_id=sync_record.id,
-            )
-            metrics.append(
-                Metric(
-                    key=end_time_key,
-                    name="sync_end_time",
-                    value=sync_record.sync_end_time.timestamp(),
-                    tags={
-                        "job_id": job_id,
-                        "sync_type": str(sync_record.sync_type),
-                    },
-                )
-            )
-
        # Emit a SUCCESS/FAIL boolean metric
        #    Use a single Redis key to avoid re-emitting final metrics
        final_metric_key = _FINAL_METRIC_KEY_FMT.format(
@@ -526,7 +439,7 @@ def _collect_sync_metrics(db_session: Session, redis_std: Redis) -> list[Metric]
                if duration_seconds is not None:
                    metrics.append(
                        Metric(
-                            key=final_metric_key,
+                            key=None,
                            name="sync_duration_seconds",
                            value=duration_seconds,
                            tags={
@@ -542,7 +455,7 @@ def _collect_sync_metrics(db_session: Session, redis_std: Redis) -> list[Metric]

                metrics.append(
                    Metric(
-                        key=final_metric_key,
+                        key=None,
                        name="sync_doc_count",
                        value=doc_count,
                        tags={
@@ -555,7 +468,7 @@ def _collect_sync_metrics(db_session: Session, redis_std: Redis) -> list[Metric]
                if sync_speed is not None:
                    metrics.append(
                        Metric(
-                            key=final_metric_key,
+                            key=None,
                            name="sync_speed_docs_per_min",
                            value=sync_speed,
                            tags={
@@ -569,6 +482,9 @@ def _collect_sync_metrics(db_session: Session, redis_std: Redis) -> list[Metric]
                        f"Invalid sync record {sync_record.id} with no duration"
                    )

+            # Mark final metrics as emitted so we don't re-emit
+            _mark_metric_as_emitted(redis_std, final_metric_key)
+
        # Emit start latency
        start_latency_key = _SYNC_START_LATENCY_KEY_FMT.format(
            sync_type=sync_record.sync_type,
@@ -586,20 +502,22 @@ def _collect_sync_metrics(db_session: Session, redis_std: Redis) -> list[Metric]
                entity = db_session.scalar(
                    select(UserGroup).where(UserGroup.id == sync_record.entity_id)
                )
+            else:
+                task_logger.info(
+                    f"Skipping sync record {sync_record.id} of type {sync_record.sync_type}."
+                )
+                continue

            if entity is None:
                task_logger.error(
-                    f"Sync record of type {sync_record.sync_type} doesn't have an entity "
-                    f"associated with it (id={sync_record.entity_id}). Skipping start latency metric."
+                    f"Could not find entity for sync record {sync_record.id} "
+                    f"(type={sync_record.sync_type}, id={sync_record.entity_id})."
                )
+                continue

            # Calculate start latency in seconds:
            #    (actual sync start) - (last modified time)
-            if (
-                entity is not None
-                and entity.time_last_modified_by_user
-                and sync_record.sync_start_time
-            ):
+            if entity.time_last_modified_by_user and sync_record.sync_start_time:
                start_latency = (
                    sync_record.sync_start_time - entity.time_last_modified_by_user
                ).total_seconds()
@@ -623,6 +541,8 @@ def _collect_sync_metrics(db_session: Session, redis_std: Redis) -> list[Metric]
                    )
                )

+                _mark_metric_as_emitted(redis_std, start_latency_key)
+
    return metrics


@@ -687,12 +607,9 @@ def monitor_background_processes(self: Task, *, tenant_id: str | None) -> None:
            for metric_fn in metric_functions:
                metrics = metric_fn()
                for metric in metrics:
-                    # double check to make sure we aren't double-emitting metrics
-                    if metric.key is not None and not _has_metric_been_emitted(
-                        redis_std, metric.key
-                    ):
-                        metric.log()
-                        metric.emit(tenant_id)
+                    metric.log()
+                    metric.emit(tenant_id)
+                    if metric.key:
                        _mark_metric_as_emitted(redis_std, metric.key)

        task_logger.info("Successfully collected background metrics")
--- a/backend/onyx/background/celery/tasks/pruning/tasks.py
+++ b/backend/onyx/background/celery/tasks/pruning/tasks.py
@@ -25,18 +25,13 @@ from onyx.configs.constants import OnyxCeleryTask
 from onyx.configs.constants import OnyxRedisLocks
 from onyx.connectors.factory import instantiate_connector
 from onyx.connectors.models import InputType
-from onyx.db.connector import mark_ccpair_as_pruned
 from onyx.db.connector_credential_pair import get_connector_credential_pair
 from onyx.db.connector_credential_pair import get_connector_credential_pair_from_id
 from onyx.db.connector_credential_pair import get_connector_credential_pairs
 from onyx.db.document import get_documents_for_connector_credential_pair
 from onyx.db.engine import get_session_with_tenant
 from onyx.db.enums import ConnectorCredentialPairStatus
-from onyx.db.enums import SyncStatus
-from onyx.db.enums import SyncType
 from onyx.db.models import ConnectorCredentialPair
-from onyx.db.sync_record import insert_sync_record
-from onyx.db.sync_record import update_sync_record_status
 from onyx.redis.redis_connector import RedisConnector
 from onyx.redis.redis_pool import get_redis_client
 from onyx.utils.logger import pruning_ctx
@@ -45,9 +40,6 @@ from onyx.utils.logger import setup_logger
 logger = setup_logger()


-"""Jobs / utils for kicking off pruning tasks."""
-
-
 def _is_pruning_due(cc_pair: ConnectorCredentialPair) -> bool:
    """Returns boolean indicating if pruning is due.

@@ -212,14 +204,6 @@ def try_creating_prune_generator_task(
            priority=OnyxCeleryPriority.LOW,
        )

-        # create before setting fence to avoid race condition where the monitoring
-        # task updates the sync record before it is created
-        insert_sync_record(
-            db_session=db_session,
-            entity_id=cc_pair.id,
-            sync_type=SyncType.PRUNING,
-        )
-
        # set this only after all tasks have been added
        redis_connector.prune.set_fence(True)
    except Exception:
@@ -364,52 +348,3 @@ def connector_pruning_generator_task(
            lock.release()

        task_logger.info(f"Pruning generator finished: cc_pair={cc_pair_id}")
-
-
-"""Monitoring pruning utils, called in monitor_vespa_sync"""
-
-
-def monitor_ccpair_pruning_taskset(
-    tenant_id: str | None, key_bytes: bytes, r: Redis, db_session: Session
-) -> None:
-    fence_key = key_bytes.decode("utf-8")
-    cc_pair_id_str = RedisConnector.get_id_from_fence_key(fence_key)
-    if cc_pair_id_str is None:
-        task_logger.warning(
-            f"monitor_ccpair_pruning_taskset: could not parse cc_pair_id from {fence_key}"
-        )
-        return
-
-    cc_pair_id = int(cc_pair_id_str)
-
-    redis_connector = RedisConnector(tenant_id, cc_pair_id)
-    if not redis_connector.prune.fenced:
-        return
-
-    initial = redis_connector.prune.generator_complete
-    if initial is None:
-        return
-
-    remaining = redis_connector.prune.get_remaining()
-    task_logger.info(
-        f"Connector pruning progress: cc_pair={cc_pair_id} remaining={remaining} initial={initial}"
-    )
-    if remaining > 0:
-        return
-
-    mark_ccpair_as_pruned(int(cc_pair_id), db_session)
-    task_logger.info(
-        f"Successfully pruned connector credential pair. cc_pair={cc_pair_id}"
-    )
-
-    update_sync_record_status(
-        db_session=db_session,
-        entity_id=cc_pair_id,
-        sync_type=SyncType.PRUNING,
-        sync_status=SyncStatus.SUCCESS,
-        num_docs_synced=initial,
-    )
-
-    redis_connector.prune.taskset_clear()
-    redis_connector.prune.generator_clear()
-    redis_connector.prune.set_fence(False)
--- a/backend/onyx/background/celery/tasks/shared/tasks.py
+++ b/backend/onyx/background/celery/tasks/shared/tasks.py
@@ -27,10 +27,9 @@ from onyx.db.document import mark_document_as_synced
 from onyx.db.document_set import fetch_document_sets_for_document
 from onyx.db.engine import get_all_tenant_ids
 from onyx.db.engine import get_session_with_tenant
-from onyx.db.search_settings import get_active_search_settings
+from onyx.document_index.document_index_utils import get_both_index_names
 from onyx.document_index.factory import get_default_document_index
 from onyx.document_index.interfaces import VespaDocumentFields
-from onyx.httpx.httpx_pool import HttpxPool
 from onyx.redis.redis_pool import get_redis_client
 from onyx.redis.redis_pool import redis_lock_dump
 from onyx.server.documents.models import ConnectorCredentialPairIdentifier
@@ -80,11 +79,9 @@ def document_by_cc_pair_cleanup_task(
            action = "skip"
            chunks_affected = 0

-            active_search_settings = get_active_search_settings(db_session)
+            curr_ind_name, sec_ind_name = get_both_index_names(db_session)
            doc_index = get_default_document_index(
-                active_search_settings.primary,
-                active_search_settings.secondary,
-                httpx_client=HttpxPool.get("vespa"),
+                primary_index_name=curr_ind_name, secondary_index_name=sec_ind_name
            )

            retry_index = RetryDocumentIndex(doc_index)
--- a/backend/onyx/background/celery/tasks/vespa/tasks.py
+++ b/backend/onyx/background/celery/tasks/vespa/tasks.py
@@ -24,10 +24,6 @@ from onyx.access.access import get_access_for_document
 from onyx.background.celery.apps.app_base import task_logger
 from onyx.background.celery.celery_redis import celery_get_queue_length
 from onyx.background.celery.celery_redis import celery_get_unacked_task_ids
-from onyx.background.celery.tasks.doc_permission_syncing.tasks import (
-    monitor_ccpair_permissions_taskset,
-)
-from onyx.background.celery.tasks.pruning.tasks import monitor_ccpair_pruning_taskset
 from onyx.background.celery.tasks.shared.RetryDocumentIndex import RetryDocumentIndex
 from onyx.background.celery.tasks.shared.tasks import LIGHT_SOFT_TIME_LIMIT
 from onyx.background.celery.tasks.shared.tasks import LIGHT_TIME_LIMIT
@@ -38,6 +34,8 @@ from onyx.configs.constants import OnyxCeleryQueues
 from onyx.configs.constants import OnyxCeleryTask
 from onyx.configs.constants import OnyxRedisLocks
 from onyx.db.connector import fetch_connector_by_id
+from onyx.db.connector import mark_cc_pair_as_permissions_synced
+from onyx.db.connector import mark_ccpair_as_pruned
 from onyx.db.connector_credential_pair import add_deletion_failure_message
 from onyx.db.connector_credential_pair import (
    delete_connector_credential_pair__no_commit,
@@ -63,17 +61,19 @@ from onyx.db.index_attempt import get_index_attempt
 from onyx.db.index_attempt import mark_attempt_failed
 from onyx.db.models import DocumentSet
 from onyx.db.models import UserGroup
-from onyx.db.search_settings import get_active_search_settings
 from onyx.db.sync_record import cleanup_sync_records
 from onyx.db.sync_record import insert_sync_record
 from onyx.db.sync_record import update_sync_record_status
+from onyx.document_index.document_index_utils import get_both_index_names
 from onyx.document_index.factory import get_default_document_index
 from onyx.document_index.interfaces import VespaDocumentFields
-from onyx.httpx.httpx_pool import HttpxPool
 from onyx.redis.redis_connector import RedisConnector
 from onyx.redis.redis_connector_credential_pair import RedisConnectorCredentialPair
 from onyx.redis.redis_connector_delete import RedisConnectorDelete
 from onyx.redis.redis_connector_doc_perm_sync import RedisConnectorPermissionSync
+from onyx.redis.redis_connector_doc_perm_sync import (
+    RedisConnectorPermissionSyncPayload,
+)
 from onyx.redis.redis_connector_index import RedisConnectorIndex
 from onyx.redis.redis_connector_prune import RedisConnectorPrune
 from onyx.redis.redis_document_set import RedisDocumentSet
@@ -652,6 +652,83 @@ def monitor_connector_deletion_taskset(
    redis_connector.delete.reset()


+def monitor_ccpair_pruning_taskset(
+    tenant_id: str | None, key_bytes: bytes, r: Redis, db_session: Session
+) -> None:
+    fence_key = key_bytes.decode("utf-8")
+    cc_pair_id_str = RedisConnector.get_id_from_fence_key(fence_key)
+    if cc_pair_id_str is None:
+        task_logger.warning(
+            f"monitor_ccpair_pruning_taskset: could not parse cc_pair_id from {fence_key}"
+        )
+        return
+
+    cc_pair_id = int(cc_pair_id_str)
+
+    redis_connector = RedisConnector(tenant_id, cc_pair_id)
+    if not redis_connector.prune.fenced:
+        return
+
+    initial = redis_connector.prune.generator_complete
+    if initial is None:
+        return
+
+    remaining = redis_connector.prune.get_remaining()
+    task_logger.info(
+        f"Connector pruning progress: cc_pair={cc_pair_id} remaining={remaining} initial={initial}"
+    )
+    if remaining > 0:
+        return
+
+    mark_ccpair_as_pruned(int(cc_pair_id), db_session)
+    task_logger.info(
+        f"Successfully pruned connector credential pair. cc_pair={cc_pair_id}"
+    )
+
+    redis_connector.prune.taskset_clear()
+    redis_connector.prune.generator_clear()
+    redis_connector.prune.set_fence(False)
+
+
+def monitor_ccpair_permissions_taskset(
+    tenant_id: str | None, key_bytes: bytes, r: Redis, db_session: Session
+) -> None:
+    fence_key = key_bytes.decode("utf-8")
+    cc_pair_id_str = RedisConnector.get_id_from_fence_key(fence_key)
+    if cc_pair_id_str is None:
+        task_logger.warning(
+            f"monitor_ccpair_permissions_taskset: could not parse cc_pair_id from {fence_key}"
+        )
+        return
+
+    cc_pair_id = int(cc_pair_id_str)
+
+    redis_connector = RedisConnector(tenant_id, cc_pair_id)
+    if not redis_connector.permissions.fenced:
+        return
+
+    initial = redis_connector.permissions.generator_complete
+    if initial is None:
+        return
+
+    remaining = redis_connector.permissions.get_remaining()
+    task_logger.info(
+        f"Permissions sync progress: cc_pair={cc_pair_id} remaining={remaining} initial={initial}"
+    )
+    if remaining > 0:
+        return
+
+    payload: RedisConnectorPermissionSyncPayload | None = (
+        redis_connector.permissions.payload
+    )
+    start_time: datetime | None = payload.started if payload else None
+
+    mark_cc_pair_as_permissions_synced(db_session, int(cc_pair_id), start_time)
+    task_logger.info(f"Successfully synced permissions for cc_pair={cc_pair_id}")
+
+    redis_connector.permissions.reset()
+
+
 def monitor_ccpair_indexing_taskset(
    tenant_id: str | None, key_bytes: bytes, r: Redis, db_session: Session
 ) -> None:
@@ -1019,11 +1096,9 @@ def vespa_metadata_sync_task(

    try:
        with get_session_with_tenant(tenant_id) as db_session:
-            active_search_settings = get_active_search_settings(db_session)
+            curr_ind_name, sec_ind_name = get_both_index_names(db_session)
            doc_index = get_default_document_index(
-                search_settings=active_search_settings.primary,
-                secondary_search_settings=active_search_settings.secondary,
-                httpx_client=HttpxPool.get("vespa"),
+                primary_index_name=curr_ind_name, secondary_index_name=sec_ind_name
            )

            retry_index = RetryDocumentIndex(doc_index)
--- a/backend/onyx/background/indexing/run_indexing.py
+++ b/backend/onyx/background/indexing/run_indexing.py
@@ -35,7 +35,6 @@ from onyx.db.models import IndexAttempt
 from onyx.db.models import IndexingStatus
 from onyx.db.models import IndexModelStatus
 from onyx.document_index.factory import get_default_document_index
-from onyx.httpx.httpx_pool import HttpxPool
 from onyx.indexing.embedder import DefaultIndexingEmbedder
 from onyx.indexing.indexing_heartbeat import IndexingHeartbeatInterface
 from onyx.indexing.indexing_pipeline import build_indexing_pipeline
@@ -220,10 +219,9 @@ def _run_indexing(
            callback=callback,
        )

+    # Indexing is only done into one index at a time
    document_index = get_default_document_index(
-        index_attempt_start.search_settings,
-        None,
-        httpx_client=HttpxPool.get("vespa"),
+        primary_index_name=ctx.index_name, secondary_index_name=None
    )

    indexing_pipeline = build_indexing_pipeline(
--- a/backend/onyx/chat/process_message.py
+++ b/backend/onyx/chat/process_message.py
@@ -254,7 +254,6 @@ def _get_force_search_settings(
            and new_msg_req.retrieval_options.run_search
            == OptionalSearchSetting.ALWAYS,
            new_msg_req.search_doc_ids,
-            new_msg_req.query_override is not None,
            DISABLE_LLM_CHOOSE_SEARCH,
        ]
    )
@@ -426,7 +425,9 @@ def stream_chat_message_objects(
        )

        search_settings = get_current_search_settings(db_session)
-        document_index = get_default_document_index(search_settings, None)
+        document_index = get_default_document_index(
+            primary_index_name=search_settings.index_name, secondary_index_name=None
+        )

        # Every chat Session begins with an empty root message
        root_message = get_or_create_root_message(
@@ -498,6 +499,14 @@ def stream_chat_message_objects(
                        f"existing assistant message id: {existing_assistant_message_id}"
                    )

+        # Disable Query Rephrasing for the first message
+        # This leads to a better first response since the LLM rephrasing the question
+        # leads to worst search quality
+        if not history_msgs:
+            new_msg_req.query_override = (
+                new_msg_req.query_override or new_msg_req.message
+            )
+
        # load all files needed for this chat chain in memory
        files = load_all_chat_files(
            history_msgs, new_msg_req.file_descriptors, db_session
--- a/backend/onyx/configs/app_configs.py
+++ b/backend/onyx/configs/app_configs.py
@@ -478,12 +478,6 @@ INDEXING_SIZE_WARNING_THRESHOLD = int(
 # 0 disables this behavior and is the default.
 INDEXING_TRACER_INTERVAL = int(os.environ.get("INDEXING_TRACER_INTERVAL") or 0)

-# Enable multi-threaded embedding model calls for parallel processing
-# Note: only applies for API-based embedding models
-INDEXING_EMBEDDING_MODEL_NUM_THREADS = int(
-    os.environ.get("INDEXING_EMBEDDING_MODEL_NUM_THREADS") or 1
-)
-
 # During an indexing attempt, specifies the number of batches which are allowed to
 # exception without aborting the attempt.
 INDEXING_EXCEPTION_LIMIT = int(os.environ.get("INDEXING_EXCEPTION_LIMIT") or 0)
--- a/backend/onyx/connectors/airtable/airtable_connector.py
+++ b/backend/onyx/connectors/airtable/airtable_connector.py
@@ -1,5 +1,3 @@
-from concurrent.futures import as_completed
-from concurrent.futures import ThreadPoolExecutor
 from io import BytesIO
 from typing import Any

@@ -22,9 +20,9 @@ from onyx.utils.logger import setup_logger
 logger = setup_logger()

 # NOTE: all are made lowercase to avoid case sensitivity issues
-# These field types are considered metadata by default when
-# treat_all_non_attachment_fields_as_metadata is False
-DEFAULT_METADATA_FIELD_TYPES = {
+# these are the field types that are considered metadata rather
+# than sections
+_METADATA_FIELD_TYPES = {
    "singlecollaborator",
    "collaborator",
    "createdby",
@@ -62,16 +60,12 @@ class AirtableConnector(LoadConnector):
        self,
        base_id: str,
        table_name_or_id: str,
-        treat_all_non_attachment_fields_as_metadata: bool = False,
        batch_size: int = INDEX_BATCH_SIZE,
    ) -> None:
        self.base_id = base_id
        self.table_name_or_id = table_name_or_id
        self.batch_size = batch_size
        self.airtable_client: AirtableApi | None = None
-        self.treat_all_non_attachment_fields_as_metadata = (
-            treat_all_non_attachment_fields_as_metadata
-        )

    def load_credentials(self, credentials: dict[str, Any]) -> dict[str, Any] | None:
        self.airtable_client = AirtableApi(credentials["airtable_access_token"])
@@ -172,14 +166,8 @@ class AirtableConnector(LoadConnector):
        return [(str(field_info), default_link)]

    def _should_be_metadata(self, field_type: str) -> bool:
-        """Determine if a field type should be treated as metadata.
-
-        When treat_all_non_attachment_fields_as_metadata is True, all fields except
-        attachments are treated as metadata. Otherwise, only fields with types listed
-        in DEFAULT_METADATA_FIELD_TYPES are treated as metadata."""
-        if self.treat_all_non_attachment_fields_as_metadata:
-            return field_type.lower() != "multipleattachments"
-        return field_type.lower() in DEFAULT_METADATA_FIELD_TYPES
+        """Determine if a field type should be treated as metadata."""
+        return field_type.lower() in _METADATA_FIELD_TYPES

    def _process_field(
        self,
@@ -245,7 +233,7 @@ class AirtableConnector(LoadConnector):
        record: RecordDict,
        table_schema: TableSchema,
        primary_field_name: str | None,
-    ) -> Document | None:
+    ) -> Document:
        """Process a single Airtable record into a Document.

        Args:
@@ -276,11 +264,6 @@ class AirtableConnector(LoadConnector):
            field_val = fields.get(field_name)
            field_type = field_schema.type

-            logger.debug(
-                f"Processing field '{field_name}' of type '{field_type}' "
-                f"for record '{record_id}'."
-            )
-
            field_sections, field_metadata = self._process_field(
                field_id=field_schema.id,
                field_name=field_name,
@@ -294,10 +277,6 @@ class AirtableConnector(LoadConnector):
            sections.extend(field_sections)
            metadata.update(field_metadata)

-        if not sections:
-            logger.warning(f"No sections found for record {record_id}")
-            return None
-
        semantic_id = (
            f"{table_name}: {primary_field_value}"
            if primary_field_value
@@ -334,45 +313,18 @@ class AirtableConnector(LoadConnector):
                primary_field_name = field.name
                break

-        logger.info(f"Starting to process Airtable records for {table.name}.")
+        record_documents: list[Document] = []
+        for record in records:
+            document = self._process_record(
+                record=record,
+                table_schema=table_schema,
+                primary_field_name=primary_field_name,
+            )
+            record_documents.append(document)

-        # Process records in parallel batches using ThreadPoolExecutor
-        PARALLEL_BATCH_SIZE = 16
-        max_workers = min(PARALLEL_BATCH_SIZE, len(records))
-
-        # Process records in batches
-        for i in range(0, len(records), PARALLEL_BATCH_SIZE):
-            batch_records = records[i : i + PARALLEL_BATCH_SIZE]
-            record_documents: list[Document] = []
-
-            with ThreadPoolExecutor(max_workers=max_workers) as executor:
-                # Submit batch tasks
-                future_to_record = {
-                    executor.submit(
-                        self._process_record,
-                        record=record,
-                        table_schema=table_schema,
-                        primary_field_name=primary_field_name,
-                    ): record
-                    for record in batch_records
-                }
-
-                # Wait for all tasks in this batch to complete
-                for future in as_completed(future_to_record):
-                    record = future_to_record[future]
-                    try:
-                        document = future.result()
-                        if document:
-                            record_documents.append(document)
-                    except Exception as e:
-                        logger.exception(f"Failed to process record {record['id']}")
-                        raise e
-
-            # After batch is complete, yield if we've hit the batch size
            if len(record_documents) >= self.batch_size:
                yield record_documents
                record_documents = []

-        # Yield any remaining records
        if record_documents:
            yield record_documents
--- a/backend/onyx/connectors/connector_runner.py
+++ b/backend/onyx/connectors/connector_runner.py
@@ -1,5 +1,4 @@
 import sys
-import time
 from datetime import datetime

 from onyx.connectors.interfaces import BaseConnector
@@ -46,17 +45,7 @@ class ConnectorRunner:
    def run(self) -> GenerateDocumentsOutput:
        """Adds additional exception logging to the connector."""
        try:
-            start = time.monotonic()
-            for batch in self.doc_batch_generator:
-                # to know how long connector is taking
-                logger.debug(
-                    f"Connector took {time.monotonic() - start} seconds to build a batch."
-                )
-
-                yield batch
-
-                start = time.monotonic()
-
+            yield from self.doc_batch_generator
        except Exception:
            exc_type, _, exc_traceback = sys.exc_info()

--- a/backend/onyx/connectors/fireflies/connector.py
+++ b/backend/onyx/connectors/fireflies/connector.py
@@ -50,9 +50,6 @@ def _create_doc_from_transcript(transcript: dict) -> Document | None:
    current_link = ""
    current_text = ""

-    if transcript["sentences"] is None:
-        return None
-
    for sentence in transcript["sentences"]:
        if sentence["speaker_name"] != current_speaker_name:
            if current_speaker_name is not None:
--- a/backend/onyx/connectors/models.py
+++ b/backend/onyx/connectors/models.py
@@ -150,16 +150,6 @@ class Document(DocumentBase):
    id: str  # This must be unique or during indexing/reindexing, chunks will be overwritten
    source: DocumentSource

-    def get_total_char_length(self) -> int:
-        """Calculate the total character length of the document including sections, metadata, and identifiers."""
-        section_length = sum(len(section.text) for section in self.sections)
-        identifier_length = len(self.semantic_identifier) + len(self.title or "")
-        metadata_length = sum(
-            len(k) + len(v) if isinstance(v, str) else len(k) + sum(len(x) for x in v)
-            for k, v in self.metadata.items()
-        )
-        return section_length + identifier_length + metadata_length
-
    def to_short_descriptor(self) -> str:
        """Used when logging the identity of a document"""
        return f"ID: '{self.id}'; Semantic ID: '{self.semantic_identifier}'"
--- a/backend/onyx/connectors/sharepoint/connector.py
+++ b/backend/onyx/connectors/sharepoint/connector.py
@@ -1,14 +1,17 @@
 import io
 import os
+from dataclasses import dataclass
+from dataclasses import field
 from datetime import datetime
 from datetime import timezone
 from typing import Any
+from typing import Optional
 from urllib.parse import unquote

 import msal  # type: ignore
 from office365.graph_client import GraphClient  # type: ignore
 from office365.onedrive.driveitems.driveItem import DriveItem  # type: ignore
-from pydantic import BaseModel
+from office365.onedrive.sites.site import Site  # type: ignore

 from onyx.configs.app_configs import INDEX_BATCH_SIZE
 from onyx.configs.constants import DocumentSource
@@ -27,25 +30,16 @@ from onyx.utils.logger import setup_logger
 logger = setup_logger()


-class SiteDescriptor(BaseModel):
-    """Data class for storing SharePoint site information.
-
-    Args:
-        url: The base site URL (e.g. https://danswerai.sharepoint.com/sites/sharepoint-tests)
-        drive_name: The name of the drive to access (e.g. "Shared Documents", "Other Library")
-                   If None, all drives will be accessed.
-        folder_path: The folder path within the drive to access (e.g. "test/nested with spaces")
-                    If None, all folders will be accessed.
-    """
-
-    url: str
-    drive_name: str | None
-    folder_path: str | None
+@dataclass
+class SiteData:
+    url: str | None
+    folder: Optional[str]
+    sites: list = field(default_factory=list)
+    driveitems: list = field(default_factory=list)


 def _convert_driveitem_to_document(
    driveitem: DriveItem,
-    drive_name: str,
 ) -> Document:
    file_text = extract_file_text(
        file=io.BytesIO(driveitem.get_content().execute_query().value),
@@ -65,7 +59,7 @@ def _convert_driveitem_to_document(
                email=driveitem.last_modified_by.user.email,
            )
        ],
-        metadata={"drive": drive_name},
+        metadata={},
    )
    return doc

@@ -77,179 +71,106 @@ class SharepointConnector(LoadConnector, PollConnector):
        sites: list[str] = [],
    ) -> None:
        self.batch_size = batch_size
-        self._graph_client: GraphClient | None = None
-        self.site_descriptors: list[SiteDescriptor] = self._extract_site_and_drive_info(
-            sites
-        )
-        self.msal_app: msal.ConfidentialClientApplication | None = None
-
-    @property
-    def graph_client(self) -> GraphClient:
-        if self._graph_client is None:
-            raise ConnectorMissingCredentialError("Sharepoint")
-
-        return self._graph_client
+        self.graph_client: GraphClient | None = None
+        self.site_data: list[SiteData] = self._extract_site_and_folder(sites)

    @staticmethod
-    def _extract_site_and_drive_info(site_urls: list[str]) -> list[SiteDescriptor]:
+    def _extract_site_and_folder(site_urls: list[str]) -> list[SiteData]:
        site_data_list = []
        for url in site_urls:
            parts = url.strip().split("/")
            if "sites" in parts:
                sites_index = parts.index("sites")
                site_url = "/".join(parts[: sites_index + 2])
-                remaining_parts = parts[sites_index + 2 :]
-
-                # Extract drive name and folder path
-                if remaining_parts:
-                    drive_name = unquote(remaining_parts[0])
-                    folder_path = (
-                        "/".join(unquote(part) for part in remaining_parts[1:])
-                        if len(remaining_parts) > 1
-                        else None
-                    )
-                else:
-                    drive_name = None
-                    folder_path = None
-
+                folder = (
+                    "/".join(unquote(part) for part in parts[sites_index + 2 :])
+                    if len(parts) > sites_index + 2
+                    else None
+                )
+                # Handling for new URL structure
+                if folder and folder.startswith("Shared Documents/"):
+                    folder = folder[len("Shared Documents/") :]
                site_data_list.append(
-                    SiteDescriptor(
-                        url=site_url,
-                        drive_name=drive_name,
-                        folder_path=folder_path,
-                    )
+                    SiteData(url=site_url, folder=folder, sites=[], driveitems=[])
                )
        return site_data_list

-    def _fetch_driveitems(
+    def _populate_sitedata_driveitems(
        self,
-        site_descriptor: SiteDescriptor,
        start: datetime | None = None,
        end: datetime | None = None,
-    ) -> list[tuple[DriveItem, str]]:
-        final_driveitems: list[tuple[DriveItem, str]] = []
-        try:
-            site = self.graph_client.sites.get_by_url(site_descriptor.url)
+    ) -> None:
+        filter_str = ""
+        if start is not None and end is not None:
+            filter_str = f"last_modified_datetime ge {start.isoformat()} and last_modified_datetime le {end.isoformat()}"

-            # Get all drives in the site
-            drives = site.drives.get().execute_query()
-            logger.debug(f"Found drives: {[drive.name for drive in drives]}")
+        for element in self.site_data:
+            sites: list[Site] = []
+            for site in element.sites:
+                site_sublist = site.lists.get().execute_query()
+                sites.extend(site_sublist)

-            # Filter drives based on the requested drive name
-            if site_descriptor.drive_name:
-                drives = [
-                    drive
-                    for drive in drives
-                    if drive.name == site_descriptor.drive_name
-                    or (
-                        drive.name == "Documents"
-                        and site_descriptor.drive_name == "Shared Documents"
-                    )
-                ]
-                if not drives:
-                    logger.warning(f"Drive '{site_descriptor.drive_name}' not found")
-                    return []
-
-            # Process each matching drive
-            for drive in drives:
+            for site in sites:
                try:
-                    root_folder = drive.root
-                    if site_descriptor.folder_path:
-                        # If a specific folder is requested, navigate to it
-                        for folder_part in site_descriptor.folder_path.split("/"):
-                            root_folder = root_folder.get_by_path(folder_part)
-
-                    # Get all items recursively
-                    query = root_folder.get_files(
-                        recursive=True,
-                        page_size=1000,
-                    )
+                    query = site.drive.root.get_files(True, 1000)
+                    if filter_str:
+                        query = query.filter(filter_str)
                    driveitems = query.execute_query()
-                    logger.debug(
-                        f"Found {len(driveitems)} items in drive '{drive.name}'"
-                    )
-
-                    # Use "Shared Documents" as the library name for the default "Documents" drive
-                    drive_name = (
-                        "Shared Documents" if drive.name == "Documents" else drive.name
-                    )
-
-                    # Filter items based on folder path if specified
-                    if site_descriptor.folder_path:
-                        # Filter items to ensure they're in the specified folder or its subfolders
-                        # The path will be in format: /drives/{drive_id}/root:/folder/path
-                        driveitems = [
+                    if element.folder:
+                        expected_path = f"/root:/{element.folder}"
+                        filtered_driveitems = [
                            item
                            for item in driveitems
-                            if any(
-                                path_part == site_descriptor.folder_path
-                                or path_part.startswith(
-                                    site_descriptor.folder_path + "/"
-                                )
-                                for path_part in item.parent_reference.path.split(
-                                    "root:/"
-                                )[1].split("/")
-                            )
+                            if item.parent_reference.path.endswith(expected_path)
                        ]
-                        if len(driveitems) == 0:
+                        if len(filtered_driveitems) == 0:
                            all_paths = [
                                item.parent_reference.path for item in driveitems
                            ]
                            logger.warning(
-                                f"Nothing found for folder '{site_descriptor.folder_path}' "
-                                f"in; any of valid paths: {all_paths}"
+                                f"Nothing found for folder '{expected_path}' in any of valid paths: {all_paths}"
                            )
+                        element.driveitems.extend(filtered_driveitems)
+                    else:
+                        element.driveitems.extend(driveitems)

-                    # Filter items based on time window if specified
-                    if start is not None and end is not None:
-                        driveitems = [
-                            item
-                            for item in driveitems
-                            if start
-                            <= item.last_modified_datetime.replace(tzinfo=timezone.utc)
-                            <= end
-                        ]
-                        logger.debug(
-                            f"Found {len(driveitems)} items within time window in drive '{drive.name}'"
-                        )
+                except Exception:
+                    # Sites include things that do not contain .drive.root so this fails
+                    # but this is fine, as there are no actually documents in those
+                    pass

-                    for item in driveitems:
-                        final_driveitems.append((item, drive_name))
+    def _populate_sitedata_sites(self) -> None:
+        if self.graph_client is None:
+            raise ConnectorMissingCredentialError("Sharepoint")

-                except Exception as e:
-                    # Some drives might not be accessible
-                    logger.warning(f"Failed to process drive: {str(e)}")
-
-        except Exception as e:
-            # Sites include things that do not contain drives so this fails
-            # but this is fine, as there are no actual documents in those
-            logger.warning(f"Failed to process site: {str(e)}")
-
-        return final_driveitems
-
-    def _fetch_sites(self) -> list[SiteDescriptor]:
-        sites = self.graph_client.sites.get_all().execute_query()
-        site_descriptors = [
-            SiteDescriptor(
-                url=sites.resource_url,
-                drive_name=None,
-                folder_path=None,
-            )
-        ]
-        return site_descriptors
+        if self.site_data:
+            for element in self.site_data:
+                element.sites = [
+                    self.graph_client.sites.get_by_url(element.url)
+                    .get()
+                    .execute_query()
+                ]
+        else:
+            sites = self.graph_client.sites.get_all().execute_query()
+            self.site_data = [
+                SiteData(url=None, folder=None, sites=sites, driveitems=[])
+            ]

    def _fetch_from_sharepoint(
        self, start: datetime | None = None, end: datetime | None = None
    ) -> GenerateDocumentsOutput:
-        site_descriptors = self.site_descriptors or self._fetch_sites()
+        if self.graph_client is None:
+            raise ConnectorMissingCredentialError("Sharepoint")
+
+        self._populate_sitedata_sites()
+        self._populate_sitedata_driveitems(start=start, end=end)

        # goes over all urls, converts them into Document objects and then yields them in batches
        doc_batch: list[Document] = []
-        for site_descriptor in site_descriptors:
-            driveitems = self._fetch_driveitems(site_descriptor, start=start, end=end)
-            for driveitem, drive_name in driveitems:
+        for element in self.site_data:
+            for driveitem in element.driveitems:
                logger.debug(f"Processing: {driveitem.web_url}")
-                doc_batch.append(_convert_driveitem_to_document(driveitem, drive_name))
+                doc_batch.append(_convert_driveitem_to_document(driveitem))

                if len(doc_batch) >= self.batch_size:
                    yield doc_batch
@@ -261,26 +182,22 @@ class SharepointConnector(LoadConnector, PollConnector):
        sp_client_secret = credentials["sp_client_secret"]
        sp_directory_id = credentials["sp_directory_id"]

-        authority_url = f"https://login.microsoftonline.com/{sp_directory_id}"
-        self.msal_app = msal.ConfidentialClientApplication(
-            authority=authority_url,
-            client_id=sp_client_id,
-            client_credential=sp_client_secret,
-        )
-
        def _acquire_token_func() -> dict[str, Any]:
            """
            Acquire token via MSAL
            """
-            if self.msal_app is None:
-                raise RuntimeError("MSAL app is not initialized")
-
-            token = self.msal_app.acquire_token_for_client(
+            authority_url = f"https://login.microsoftonline.com/{sp_directory_id}"
+            app = msal.ConfidentialClientApplication(
+                authority=authority_url,
+                client_id=sp_client_id,
+                client_credential=sp_client_secret,
+            )
+            token = app.acquire_token_for_client(
                scopes=["https://graph.microsoft.com/.default"]
            )
            return token

-        self._graph_client = GraphClient(_acquire_token_func)
+        self.graph_client = GraphClient(_acquire_token_func)
        return None

    def load_from_state(self) -> GenerateDocumentsOutput:
@@ -289,19 +206,19 @@ class SharepointConnector(LoadConnector, PollConnector):
    def poll_source(
        self, start: SecondsSinceUnixEpoch, end: SecondsSinceUnixEpoch
    ) -> GenerateDocumentsOutput:
-        start_datetime = datetime.fromtimestamp(start, timezone.utc)
-        end_datetime = datetime.fromtimestamp(end, timezone.utc)
+        start_datetime = datetime.utcfromtimestamp(start)
+        end_datetime = datetime.utcfromtimestamp(end)
        return self._fetch_from_sharepoint(start=start_datetime, end=end_datetime)


 if __name__ == "__main__":
-    connector = SharepointConnector(sites=os.environ["SHAREPOINT_SITES"].split(","))
+    connector = SharepointConnector(sites=os.environ["SITES"].split(","))

    connector.load_credentials(
        {
-            "sp_client_id": os.environ["SHAREPOINT_CLIENT_ID"],
-            "sp_client_secret": os.environ["SHAREPOINT_CLIENT_SECRET"],
-            "sp_directory_id": os.environ["SHAREPOINT_CLIENT_DIRECTORY_ID"],
+            "sp_client_id": os.environ["SP_CLIENT_ID"],
+            "sp_client_secret": os.environ["SP_CLIENT_SECRET"],
+            "sp_directory_id": os.environ["SP_CLIENT_DIRECTORY_ID"],
        }
    )
    document_batches = connector.load_from_state()
--- a/backend/onyx/connectors/slack/utils.py
+++ b/backend/onyx/connectors/slack/utils.py
@@ -104,11 +104,8 @@ def make_slack_api_rate_limited(
                        f"Slack call rate limited, retrying after {retry_after} seconds. Exception: {e}"
                    )
                    time.sleep(retry_after)
-                elif error in ["already_reacted", "no_reaction", "internal_error"]:
-                    # Log internal_error and return the response instead of failing
-                    logger.warning(
-                        f"Slack call encountered '{error}', skipping and continuing..."
-                    )
+                elif error in ["already_reacted", "no_reaction"]:
+                    # The response isn't used for reactions, this is basically just a pass
                    return e.response
                else:
                    # Raise the error for non-transient errors
--- a/backend/onyx/connectors/teams/connector.py
+++ b/backend/onyx/connectors/teams/connector.py
@@ -180,28 +180,23 @@ class TeamsConnector(LoadConnector, PollConnector):
        self.batch_size = batch_size
        self.graph_client: GraphClient | None = None
        self.requested_team_list: list[str] = teams
-        self.msal_app: msal.ConfidentialClientApplication | None = None

    def load_credentials(self, credentials: dict[str, Any]) -> dict[str, Any] | None:
        teams_client_id = credentials["teams_client_id"]
        teams_client_secret = credentials["teams_client_secret"]
        teams_directory_id = credentials["teams_directory_id"]

-        authority_url = f"https://login.microsoftonline.com/{teams_directory_id}"
-        self.msal_app = msal.ConfidentialClientApplication(
-            authority=authority_url,
-            client_id=teams_client_id,
-            client_credential=teams_client_secret,
-        )
-
        def _acquire_token_func() -> dict[str, Any]:
            """
            Acquire token via MSAL
            """
-            if self.msal_app is None:
-                raise RuntimeError("MSAL app is not initialized")
-
-            token = self.msal_app.acquire_token_for_client(
+            authority_url = f"https://login.microsoftonline.com/{teams_directory_id}"
+            app = msal.ConfidentialClientApplication(
+                authority=authority_url,
+                client_id=teams_client_id,
+                client_credential=teams_client_secret,
+            )
+            token = app.acquire_token_for_client(
                scopes=["https://graph.microsoft.com/.default"]
            )
            return token
--- a/backend/onyx/context/search/pipeline.py
+++ b/backend/onyx/context/search/pipeline.py
@@ -67,7 +67,10 @@ class SearchPipeline:
        self.rerank_metrics_callback = rerank_metrics_callback

        self.search_settings = get_current_search_settings(db_session)
-        self.document_index = get_default_document_index(self.search_settings, None)
+        self.document_index = get_default_document_index(
+            primary_index_name=self.search_settings.index_name,
+            secondary_index_name=None,
+        )
        self.prompt_config: PromptConfig | None = prompt_config

        # Preprocessing steps generate this
--- a/backend/onyx/db/enums.py
+++ b/backend/onyx/db/enums.py
@@ -28,9 +28,6 @@ class SyncType(str, PyEnum):
    DOCUMENT_SET = "document_set"
    USER_GROUP = "user_group"
    CONNECTOR_DELETION = "connector_deletion"
-    PRUNING = "pruning"  # not really a sync, but close enough
-    EXTERNAL_PERMISSIONS = "external_permissions"
-    EXTERNAL_GROUP = "external_group"

    def __str__(self) -> str:
        return self.value
--- a/backend/onyx/db/llm.py
+++ b/backend/onyx/db/llm.py
@@ -3,8 +3,6 @@ from sqlalchemy import or_
 from sqlalchemy import select
 from sqlalchemy.orm import Session

-from onyx.configs.app_configs import AUTH_TYPE
-from onyx.configs.constants import AuthType
 from onyx.db.models import CloudEmbeddingProvider as CloudEmbeddingProviderModel
 from onyx.db.models import DocumentSet
 from onyx.db.models import LLMProvider as LLMProviderModel
@@ -126,29 +124,10 @@ def fetch_existing_tools(db_session: Session, tool_ids: list[int]) -> list[ToolM

 def fetch_existing_llm_providers(
    db_session: Session,
-) -> list[LLMProviderModel]:
-    stmt = select(LLMProviderModel)
-    return list(db_session.scalars(stmt).all())
-
-
-def fetch_existing_llm_providers_for_user(
-    db_session: Session,
    user: User | None = None,
 ) -> list[LLMProviderModel]:
    if not user:
-        if AUTH_TYPE != AuthType.DISABLED:
-            # User is anonymous
-            return list(
-                db_session.scalars(
-                    select(LLMProviderModel).where(
-                        LLMProviderModel.is_public == True  # noqa: E712
-                    )
-                ).all()
-            )
-        else:
-            # If auth is disabled, user has access to all providers
-            return fetch_existing_llm_providers(db_session)
-
+        return list(db_session.scalars(select(LLMProviderModel)).all())
    stmt = select(LLMProviderModel).distinct()
    user_groups_select = select(User__UserGroup.user_group_id).where(
        User__UserGroup.user_id == user.id
--- a/backend/onyx/db/models.py
+++ b/backend/onyx/db/models.py
@@ -161,7 +161,9 @@ class User(SQLAlchemyBaseUserTableUUID, Base):
    hidden_assistants: Mapped[list[int]] = mapped_column(
        postgresql.JSONB(), nullable=False, default=[]
    )
-
+    recent_assistants: Mapped[list[dict]] = mapped_column(
+        postgresql.JSONB(), nullable=False, default=list, server_default="[]"
+    )
    pinned_assistants: Mapped[list[int] | None] = mapped_column(
        postgresql.JSONB(), nullable=True, default=None
    )
@@ -745,34 +747,6 @@ class SearchSettings(Base):
    def api_key(self) -> str | None:
        return self.cloud_provider.api_key if self.cloud_provider is not None else None

-    @property
-    def large_chunks_enabled(self) -> bool:
-        """
-        Given multipass usage and an embedder, decides whether large chunks are allowed
-        based on model/provider constraints.
-        """
-        # Only local models that support a larger context are from Nomic
-        # Cohere does not support larger contexts (they recommend not going above ~512 tokens)
-        return SearchSettings.can_use_large_chunks(
-            self.multipass_indexing, self.model_name, self.provider_type
-        )
-
-    @staticmethod
-    def can_use_large_chunks(
-        multipass: bool, model_name: str, provider_type: EmbeddingProvider | None
-    ) -> bool:
-        """
-        Given multipass usage and an embedder, decides whether large chunks are allowed
-        based on model/provider constraints.
-        """
-        # Only local models that support a larger context are from Nomic
-        # Cohere does not support larger contexts (they recommend not going above ~512 tokens)
-        return (
-            multipass
-            and model_name.startswith("nomic-ai")
-            and provider_type != EmbeddingProvider.COHERE
-        )
-

 class IndexAttempt(Base):
    """
--- a/backend/onyx/db/persona.py
+++ b/backend/onyx/db/persona.py
@@ -11,7 +11,7 @@ from sqlalchemy import Select
 from sqlalchemy import select
 from sqlalchemy import update
 from sqlalchemy.orm import aliased
-from sqlalchemy.orm import selectinload
+from sqlalchemy.orm import joinedload
 from sqlalchemy.orm import Session

 from onyx.auth.schemas import UserRole
@@ -291,9 +291,8 @@ def get_personas_for_user(
    include_deleted: bool = False,
    joinedload_all: bool = False,
 ) -> Sequence[Persona]:
-    stmt = select(Persona)
-    stmt = _add_user_filters(stmt, user, get_editable)
-
+    stmt = select(Persona).distinct()
+    stmt = _add_user_filters(stmt=stmt, user=user, get_editable=get_editable)
    if not include_default:
        stmt = stmt.where(Persona.builtin_persona.is_(False))
    if not include_slack_bot_personas:
@@ -303,16 +302,14 @@ def get_personas_for_user(

    if joinedload_all:
        stmt = stmt.options(
-            selectinload(Persona.prompts),
-            selectinload(Persona.tools),
-            selectinload(Persona.document_sets),
-            selectinload(Persona.groups),
-            selectinload(Persona.users),
-            selectinload(Persona.labels),
+            joinedload(Persona.prompts),
+            joinedload(Persona.tools),
+            joinedload(Persona.document_sets),
+            joinedload(Persona.groups),
+            joinedload(Persona.users),
        )

-    results = db_session.execute(stmt).scalars().all()
-    return results
+    return db_session.execute(stmt).unique().scalars().all()


 def get_personas(db_session: Session) -> Sequence[Persona]:
--- a/backend/onyx/db/search_settings.py
+++ b/backend/onyx/db/search_settings.py
@@ -29,21 +29,9 @@ from onyx.utils.logger import setup_logger
 from shared_configs.configs import PRESERVED_SEARCH_FIELDS
 from shared_configs.enums import EmbeddingProvider

-
 logger = setup_logger()


-class ActiveSearchSettings:
-    primary: SearchSettings
-    secondary: SearchSettings | None
-
-    def __init__(
-        self, primary: SearchSettings, secondary: SearchSettings | None
-    ) -> None:
-        self.primary = primary
-        self.secondary = secondary
-
-
 def create_search_settings(
    search_settings: SavedSearchSettings,
    db_session: Session,
@@ -155,27 +143,21 @@ def get_secondary_search_settings(db_session: Session) -> SearchSettings | None:
    return latest_settings


-def get_active_search_settings(db_session: Session) -> ActiveSearchSettings:
-    """Returns active search settings. Secondary search settings may be None."""
-
-    # Get the primary and secondary search settings
-    primary_search_settings = get_current_search_settings(db_session)
-    secondary_search_settings = get_secondary_search_settings(db_session)
-    return ActiveSearchSettings(
-        primary=primary_search_settings, secondary=secondary_search_settings
-    )
-
-
-def get_active_search_settings_list(db_session: Session) -> list[SearchSettings]:
-    """Returns active search settings as a list. Primary settings are the first element,
-    and if secondary search settings exist, they will be the second element."""
-
+def get_active_search_settings(db_session: Session) -> list[SearchSettings]:
+    """Returns active search settings. The first entry will always be the current search
+    settings. If there are new search settings that are being migrated to, those will be
+    the second entry."""
    search_settings_list: list[SearchSettings] = []

-    active_search_settings = get_active_search_settings(db_session)
-    search_settings_list.append(active_search_settings.primary)
-    if active_search_settings.secondary:
-        search_settings_list.append(active_search_settings.secondary)
+    # Get the primary search settings
+    primary_search_settings = get_current_search_settings(db_session)
+    search_settings_list.append(primary_search_settings)
+
+    # Check for secondary search settings
+    secondary_search_settings = get_secondary_search_settings(db_session)
+    if secondary_search_settings is not None:
+        # If secondary settings exist, add them to the list
+        search_settings_list.append(secondary_search_settings)

    return search_settings_list

--- a/backend/onyx/document_index/document_index_utils.py
+++ b/backend/onyx/document_index/document_index_utils.py
@@ -4,63 +4,24 @@ from uuid import UUID

 from sqlalchemy.orm import Session

-from onyx.configs.app_configs import ENABLE_MULTIPASS_INDEXING
-from onyx.db.models import SearchSettings
 from onyx.db.search_settings import get_current_search_settings
 from onyx.db.search_settings import get_secondary_search_settings
 from onyx.document_index.interfaces import EnrichedDocumentIndexingInfo
 from onyx.indexing.models import DocMetadataAwareIndexChunk
-from onyx.indexing.models import MultipassConfig
 from shared_configs.configs import MULTI_TENANT

 DEFAULT_BATCH_SIZE = 30
 DEFAULT_INDEX_NAME = "danswer_chunk"


-def should_use_multipass(search_settings: SearchSettings | None) -> bool:
-    """
-    Determines whether multipass should be used based on the search settings
-    or the default config if settings are unavailable.
-    """
-    if search_settings is not None:
-        return search_settings.multipass_indexing
-    return ENABLE_MULTIPASS_INDEXING
-
-
-def get_multipass_config(search_settings: SearchSettings) -> MultipassConfig:
-    """
-    Determines whether to enable multipass and large chunks by examining
-    the current search settings and the embedder configuration.
-    """
-    if not search_settings:
-        return MultipassConfig(multipass_indexing=False, enable_large_chunks=False)
-
-    multipass = should_use_multipass(search_settings)
-    enable_large_chunks = SearchSettings.can_use_large_chunks(
-        multipass, search_settings.model_name, search_settings.provider_type
-    )
-    return MultipassConfig(
-        multipass_indexing=multipass, enable_large_chunks=enable_large_chunks
-    )
-
-
-def get_both_index_properties(
-    db_session: Session,
-) -> tuple[str, str | None, bool, bool | None]:
+def get_both_index_names(db_session: Session) -> tuple[str, str | None]:
    search_settings = get_current_search_settings(db_session)
-    config_1 = get_multipass_config(search_settings)

    search_settings_new = get_secondary_search_settings(db_session)
    if not search_settings_new:
-        return search_settings.index_name, None, config_1.enable_large_chunks, None
+        return search_settings.index_name, None

-    config_2 = get_multipass_config(search_settings)
-    return (
-        search_settings.index_name,
-        search_settings_new.index_name,
-        config_1.enable_large_chunks,
-        config_2.enable_large_chunks,
-    )
+    return search_settings.index_name, search_settings_new.index_name


 def translate_boost_count_to_multiplier(boost: int) -> float:
--- a/backend/onyx/document_index/factory.py
+++ b/backend/onyx/document_index/factory.py
@@ -1,7 +1,5 @@
-import httpx
 from sqlalchemy.orm import Session

-from onyx.db.models import SearchSettings
 from onyx.db.search_settings import get_current_search_settings
 from onyx.document_index.interfaces import DocumentIndex
 from onyx.document_index.vespa.index import VespaIndex
@@ -9,28 +7,17 @@ from shared_configs.configs import MULTI_TENANT


 def get_default_document_index(
-    search_settings: SearchSettings,
-    secondary_search_settings: SearchSettings | None,
-    httpx_client: httpx.Client | None = None,
+    primary_index_name: str,
+    secondary_index_name: str | None,
 ) -> DocumentIndex:
    """Primary index is the index that is used for querying/updating etc.
    Secondary index is for when both the currently used index and the upcoming
    index both need to be updated, updates are applied to both indices"""
-
-    secondary_index_name: str | None = None
-    secondary_large_chunks_enabled: bool | None = None
-    if secondary_search_settings:
-        secondary_index_name = secondary_search_settings.index_name
-        secondary_large_chunks_enabled = secondary_search_settings.large_chunks_enabled
-
    # Currently only supporting Vespa
    return VespaIndex(
-        index_name=search_settings.index_name,
+        index_name=primary_index_name,
        secondary_index_name=secondary_index_name,
-        large_chunks_enabled=search_settings.large_chunks_enabled,
-        secondary_large_chunks_enabled=secondary_large_chunks_enabled,
        multitenant=MULTI_TENANT,
-        httpx_client=httpx_client,
    )


@@ -40,6 +27,6 @@ def get_current_primary_default_document_index(db_session: Session) -> DocumentI
    """
    search_settings = get_current_search_settings(db_session)
    return get_default_document_index(
-        search_settings,
-        None,
+        primary_index_name=search_settings.index_name,
+        secondary_index_name=None,
    )
--- a/backend/onyx/document_index/vespa/chunk_retrieval.py
+++ b/backend/onyx/document_index/vespa/chunk_retrieval.py
@@ -231,22 +231,21 @@ def _get_chunks_via_visit_api(
    return document_chunks


-# TODO(rkuo): candidate for removal if not being used
-# @retry(tries=10, delay=1, backoff=2)
-# def get_all_vespa_ids_for_document_id(
-#     document_id: str,
-#     index_name: str,
-#     filters: IndexFilters | None = None,
-#     get_large_chunks: bool = False,
-# ) -> list[str]:
-#     document_chunks = _get_chunks_via_visit_api(
-#         chunk_request=VespaChunkRequest(document_id=document_id),
-#         index_name=index_name,
-#         filters=filters or IndexFilters(access_control_list=None),
-#         field_names=[DOCUMENT_ID],
-#         get_large_chunks=get_large_chunks,
-#     )
-#     return [chunk["id"].split("::", 1)[-1] for chunk in document_chunks]
+@retry(tries=10, delay=1, backoff=2)
+def get_all_vespa_ids_for_document_id(
+    document_id: str,
+    index_name: str,
+    filters: IndexFilters | None = None,
+    get_large_chunks: bool = False,
+) -> list[str]:
+    document_chunks = _get_chunks_via_visit_api(
+        chunk_request=VespaChunkRequest(document_id=document_id),
+        index_name=index_name,
+        filters=filters or IndexFilters(access_control_list=None),
+        field_names=[DOCUMENT_ID],
+        get_large_chunks=get_large_chunks,
+    )
+    return [chunk["id"].split("::", 1)[-1] for chunk in document_chunks]


 def parallel_visit_api_retrieval(
--- a/backend/onyx/document_index/vespa/index.py
+++ b/backend/onyx/document_index/vespa/index.py
@@ -25,6 +25,7 @@ from onyx.configs.chat_configs import VESPA_SEARCHER_THREADS
 from onyx.configs.constants import KV_REINDEX_KEY
 from onyx.context.search.models import IndexFilters
 from onyx.context.search.models import InferenceChunkUncleaned
+from onyx.db.engine import get_session_with_tenant
 from onyx.document_index.document_index_utils import get_document_chunk_ids
 from onyx.document_index.interfaces import DocumentIndex
 from onyx.document_index.interfaces import DocumentInsertionRecord
@@ -40,12 +41,12 @@ from onyx.document_index.vespa.chunk_retrieval import (
 )
 from onyx.document_index.vespa.chunk_retrieval import query_vespa
 from onyx.document_index.vespa.deletion import delete_vespa_chunks
-from onyx.document_index.vespa.indexing_utils import BaseHTTPXClientContext
 from onyx.document_index.vespa.indexing_utils import batch_index_vespa_chunks
 from onyx.document_index.vespa.indexing_utils import check_for_final_chunk_existence
 from onyx.document_index.vespa.indexing_utils import clean_chunk_id_copy
-from onyx.document_index.vespa.indexing_utils import GlobalHTTPXClientContext
-from onyx.document_index.vespa.indexing_utils import TemporaryHTTPXClientContext
+from onyx.document_index.vespa.indexing_utils import (
+    get_multipass_config,
+)
 from onyx.document_index.vespa.shared_utils.utils import get_vespa_http_client
 from onyx.document_index.vespa.shared_utils.utils import (
    replace_invalid_doc_id_characters,
@@ -131,34 +132,12 @@ class VespaIndex(DocumentIndex):
        self,
        index_name: str,
        secondary_index_name: str | None,
-        large_chunks_enabled: bool,
-        secondary_large_chunks_enabled: bool | None,
        multitenant: bool = False,
-        httpx_client: httpx.Client | None = None,
    ) -> None:
        self.index_name = index_name
        self.secondary_index_name = secondary_index_name
-
-        self.large_chunks_enabled = large_chunks_enabled
-        self.secondary_large_chunks_enabled = secondary_large_chunks_enabled
-
        self.multitenant = multitenant
-
-        self.httpx_client_context: BaseHTTPXClientContext
-
-        if httpx_client:
-            self.httpx_client_context = GlobalHTTPXClientContext(httpx_client)
-        else:
-            self.httpx_client_context = TemporaryHTTPXClientContext(
-                get_vespa_http_client
-            )
-
-        self.index_to_large_chunks_enabled: dict[str, bool] = {}
-        self.index_to_large_chunks_enabled[index_name] = large_chunks_enabled
-        if secondary_index_name and secondary_large_chunks_enabled:
-            self.index_to_large_chunks_enabled[
-                secondary_index_name
-            ] = secondary_large_chunks_enabled
+        self.http_client = get_vespa_http_client()

    def ensure_indices_exist(
        self,
@@ -352,7 +331,7 @@ class VespaIndex(DocumentIndex):
        # indexing / updates / deletes since we have to make a large volume of requests.
        with (
            concurrent.futures.ThreadPoolExecutor(max_workers=NUM_THREADS) as executor,
-            self.httpx_client_context as http_client,
+            get_vespa_http_client() as http_client,
        ):
            # We require the start and end index for each document in order to
            # know precisely which chunks to delete. This information exists for
@@ -411,11 +390,9 @@ class VespaIndex(DocumentIndex):
            for doc_id in all_doc_ids
        }

-    @classmethod
+    @staticmethod
    def _apply_updates_batched(
-        cls,
        updates: list[_VespaUpdateRequest],
-        httpx_client: httpx.Client,
        batch_size: int = BATCH_SIZE,
    ) -> None:
        """Runs a batch of updates in parallel via the ThreadPoolExecutor."""
@@ -437,7 +414,7 @@ class VespaIndex(DocumentIndex):

        with (
            concurrent.futures.ThreadPoolExecutor(max_workers=NUM_THREADS) as executor,
-            httpx_client as http_client,
+            get_vespa_http_client() as http_client,
        ):
            for update_batch in batch_generator(updates, batch_size):
                future_to_document_id = {
@@ -478,7 +455,7 @@ class VespaIndex(DocumentIndex):
            index_names.append(self.secondary_index_name)

        chunk_id_start_time = time.monotonic()
-        with self.httpx_client_context as http_client:
+        with get_vespa_http_client() as http_client:
            for update_request in update_requests:
                for doc_info in update_request.minimal_document_indexing_info:
                    for index_name in index_names:
@@ -534,8 +511,7 @@ class VespaIndex(DocumentIndex):
                        )
                    )

-        with self.httpx_client_context as httpx_client:
-            self._apply_updates_batched(processed_updates_requests, httpx_client)
+        self._apply_updates_batched(processed_updates_requests)
        logger.debug(
            "Finished updating Vespa documents in %.2f seconds",
            time.monotonic() - update_start,
@@ -547,7 +523,6 @@ class VespaIndex(DocumentIndex):
        index_name: str,
        fields: VespaDocumentFields,
        doc_id: str,
-        http_client: httpx.Client,
    ) -> None:
        """
        Update a single "chunk" (document) in Vespa using its chunk ID.
@@ -579,17 +554,18 @@ class VespaIndex(DocumentIndex):

        vespa_url = f"{DOCUMENT_ID_ENDPOINT.format(index_name=index_name)}/{doc_chunk_id}?create=true"

-        try:
-            resp = http_client.put(
-                vespa_url,
-                headers={"Content-Type": "application/json"},
-                json=update_dict,
-            )
-            resp.raise_for_status()
-        except httpx.HTTPStatusError as e:
-            error_message = f"Failed to update doc chunk {doc_chunk_id} (doc_id={doc_id}). Details: {e.response.text}"
-            logger.error(error_message)
-            raise
+        with get_vespa_http_client(http2=False) as http_client:
+            try:
+                resp = http_client.put(
+                    vespa_url,
+                    headers={"Content-Type": "application/json"},
+                    json=update_dict,
+                )
+                resp.raise_for_status()
+            except httpx.HTTPStatusError as e:
+                error_message = f"Failed to update doc chunk {doc_chunk_id} (doc_id={doc_id}). Details: {e.response.text}"
+                logger.error(error_message)
+                raise

    def update_single(
        self,
@@ -603,16 +579,24 @@ class VespaIndex(DocumentIndex):
        function will complete with no errors or exceptions.
        Handle other exceptions if you wish to implement retry behavior
        """
+
        doc_chunk_count = 0

-        with self.httpx_client_context as httpx_client:
-            for (
-                index_name,
-                large_chunks_enabled,
-            ) in self.index_to_large_chunks_enabled.items():
+        index_names = [self.index_name]
+        if self.secondary_index_name:
+            index_names.append(self.secondary_index_name)
+
+        with get_vespa_http_client(http2=False) as http_client:
+            for index_name in index_names:
+                with get_session_with_tenant(tenant_id=tenant_id) as db_session:
+                    multipass_config = get_multipass_config(
+                        db_session=db_session,
+                        primary_index=index_name == self.index_name,
+                    )
+                    large_chunks_enabled = multipass_config.enable_large_chunks
                enriched_doc_infos = VespaIndex.enrich_basic_chunk_info(
                    index_name=index_name,
-                    http_client=httpx_client,
+                    http_client=http_client,
                    document_id=doc_id,
                    previous_chunk_count=chunk_count,
                    new_chunk_count=0,
@@ -628,7 +612,10 @@ class VespaIndex(DocumentIndex):

                for doc_chunk_id in doc_chunk_ids:
                    self.update_single_chunk(
-                        doc_chunk_id, index_name, fields, doc_id, httpx_client
+                        doc_chunk_id=doc_chunk_id,
+                        index_name=index_name,
+                        fields=fields,
+                        doc_id=doc_id,
                    )

        return doc_chunk_count
@@ -650,13 +637,19 @@ class VespaIndex(DocumentIndex):
        if self.secondary_index_name:
            index_names.append(self.secondary_index_name)

-        with self.httpx_client_context as http_client, concurrent.futures.ThreadPoolExecutor(
+        with get_vespa_http_client(
+            http2=False
+        ) as http_client, concurrent.futures.ThreadPoolExecutor(
            max_workers=NUM_THREADS
        ) as executor:
-            for (
-                index_name,
-                large_chunks_enabled,
-            ) in self.index_to_large_chunks_enabled.items():
+            for index_name in index_names:
+                with get_session_with_tenant(tenant_id=tenant_id) as db_session:
+                    multipass_config = get_multipass_config(
+                        db_session=db_session,
+                        primary_index=index_name == self.index_name,
+                    )
+                    large_chunks_enabled = multipass_config.enable_large_chunks
+
                enriched_doc_infos = VespaIndex.enrich_basic_chunk_info(
                    index_name=index_name,
                    http_client=http_client,
@@ -825,9 +818,6 @@ class VespaIndex(DocumentIndex):
        """
        Deletes all entries in the specified index with the given tenant_id.

-        Currently unused, but we anticipate this being useful. The entire flow does not
-        use the httpx connection pool of an instance.
-
        Parameters:
            tenant_id (str): The tenant ID whose documents are to be deleted.
            index_name (str): The name of the index from which to delete documents.
@@ -860,8 +850,6 @@ class VespaIndex(DocumentIndex):
        """
        Retrieves all document IDs with the specified tenant_id, handling pagination.

-        Internal helper function for delete_entries_by_tenant_id.
-
        Parameters:
            tenant_id (str): The tenant ID to search for.
            index_name (str): The name of the index to search in.
@@ -894,8 +882,8 @@ class VespaIndex(DocumentIndex):
                f"Querying for document IDs with tenant_id: {tenant_id}, offset: {offset}"
            )

-            with get_vespa_http_client() as http_client:
-                response = http_client.get(url, params=query_params, timeout=None)
+            with get_vespa_http_client(no_timeout=True) as http_client:
+                response = http_client.get(url, params=query_params)
                response.raise_for_status()

                search_result = response.json()
@@ -925,11 +913,6 @@ class VespaIndex(DocumentIndex):
        """
        Deletes documents in batches using multiple threads.

-        Internal helper function for delete_entries_by_tenant_id.
-
-        This is a class method and does not use the httpx pool of the instance.
-        This is OK because we don't use this method often.
-
        Parameters:
            delete_requests (List[_VespaDeleteRequest]): The list of delete requests.
            batch_size (int): The number of documents to delete in each batch.
@@ -942,14 +925,13 @@ class VespaIndex(DocumentIndex):
            response = http_client.delete(
                delete_request.url,
                headers={"Content-Type": "application/json"},
-                timeout=None,
            )
            response.raise_for_status()

        logger.debug(f"Starting batch deletion for {len(delete_requests)} documents")

        with concurrent.futures.ThreadPoolExecutor(max_workers=NUM_THREADS) as executor:
-            with get_vespa_http_client() as http_client:
+            with get_vespa_http_client(no_timeout=True) as http_client:
                for batch_start in range(0, len(delete_requests), batch_size):
                    batch = delete_requests[batch_start : batch_start + batch_size]

--- a/backend/onyx/document_index/vespa/indexing_utils.py
+++ b/backend/onyx/document_index/vespa/indexing_utils.py
@@ -1,19 +1,21 @@
 import concurrent.futures
 import json
 import uuid
-from abc import ABC
-from abc import abstractmethod
-from collections.abc import Callable
 from datetime import datetime
 from datetime import timezone
 from http import HTTPStatus

 import httpx
 from retry import retry
+from sqlalchemy.orm import Session

+from onyx.configs.app_configs import ENABLE_MULTIPASS_INDEXING
 from onyx.connectors.cross_connector_utils.miscellaneous_utils import (
    get_experts_stores_representations,
 )
+from onyx.db.models import SearchSettings
+from onyx.db.search_settings import get_current_search_settings
+from onyx.db.search_settings import get_secondary_search_settings
 from onyx.document_index.document_index_utils import get_uuid_from_chunk
 from onyx.document_index.document_index_utils import get_uuid_from_chunk_info_old
 from onyx.document_index.interfaces import MinimalDocumentIndexingInfo
@@ -48,9 +50,10 @@ from onyx.document_index.vespa_constants import TENANT_ID
 from onyx.document_index.vespa_constants import TITLE
 from onyx.document_index.vespa_constants import TITLE_EMBEDDING
 from onyx.indexing.models import DocMetadataAwareIndexChunk
+from onyx.indexing.models import EmbeddingProvider
+from onyx.indexing.models import MultipassConfig
 from onyx.utils.logger import setup_logger

-
 logger = setup_logger()


@@ -272,42 +275,46 @@ def check_for_final_chunk_existence(
        index += 1


-class BaseHTTPXClientContext(ABC):
-    """Abstract base class for an HTTPX client context manager."""
-
-    @abstractmethod
-    def __enter__(self) -> httpx.Client:
-        pass
-
-    @abstractmethod
-    def __exit__(self, exc_type, exc_value, traceback):  # type: ignore
-        pass
+def should_use_multipass(search_settings: SearchSettings | None) -> bool:
+    """
+    Determines whether multipass should be used based on the search settings
+    or the default config if settings are unavailable.
+    """
+    if search_settings is not None:
+        return search_settings.multipass_indexing
+    return ENABLE_MULTIPASS_INDEXING


-class GlobalHTTPXClientContext(BaseHTTPXClientContext):
-    """Context manager for a global HTTPX client that does not close it."""
-
-    def __init__(self, client: httpx.Client):
-        self._client = client
-
-    def __enter__(self) -> httpx.Client:
-        return self._client  # Reuse the global client
-
-    def __exit__(self, exc_type, exc_value, traceback):  # type: ignore
-        pass  # Do nothing; don't close the global client
+def can_use_large_chunks(multipass: bool, search_settings: SearchSettings) -> bool:
+    """
+    Given multipass usage and an embedder, decides whether large chunks are allowed
+    based on model/provider constraints.
+    """
+    # Only local models that support a larger context are from Nomic
+    # Cohere does not support larger contexts (they recommend not going above ~512 tokens)
+    return (
+        multipass
+        and search_settings.model_name.startswith("nomic-ai")
+        and search_settings.provider_type != EmbeddingProvider.COHERE
+    )


-class TemporaryHTTPXClientContext(BaseHTTPXClientContext):
-    """Context manager for a temporary HTTPX client that closes it after use."""
-
-    def __init__(self, client_factory: Callable[[], httpx.Client]):
-        self._client_factory = client_factory
-        self._client: httpx.Client | None = None  # Client will be created in __enter__
-
-    def __enter__(self) -> httpx.Client:
-        self._client = self._client_factory()  # Create a new client
-        return self._client
-
-    def __exit__(self, exc_type, exc_value, traceback):  # type: ignore
-        if self._client:
-            self._client.close()
+def get_multipass_config(
+    db_session: Session, primary_index: bool = True
+) -> MultipassConfig:
+    """
+    Determines whether to enable multipass and large chunks by examining
+    the current search settings and the embedder configuration.
+    """
+    search_settings = (
+        get_current_search_settings(db_session)
+        if primary_index
+        else get_secondary_search_settings(db_session)
+    )
+    multipass = should_use_multipass(search_settings)
+    if not search_settings:
+        return MultipassConfig(multipass_indexing=False, enable_large_chunks=False)
+    enable_large_chunks = can_use_large_chunks(multipass, search_settings)
+    return MultipassConfig(
+        multipass_indexing=multipass, enable_large_chunks=enable_large_chunks
+    )
--- a/backend/onyx/document_index/vespa/shared_utils/utils.py
+++ b/backend/onyx/document_index/vespa/shared_utils/utils.py
@@ -55,7 +55,7 @@ def remove_invalid_unicode_chars(text: str) -> str:
    """Vespa does not take in unicode chars that aren't valid for XML.
    This removes them."""
    _illegal_xml_chars_RE: re.Pattern = re.compile(
-        "[\x00-\x08\x0b\x0c\x0e-\x1F\uD800-\uDFFF\uFDD0-\uFDEF\uFFFE\uFFFF]"
+        "[\x00-\x08\x0b\x0c\x0e-\x1F\uD800-\uDFFF\uFFFE\uFFFF]"
    )
    return _illegal_xml_chars_RE.sub("", text)

--- a/backend/onyx/file_processing/extract_file_text.py
+++ b/backend/onyx/file_processing/extract_file_text.py
@@ -358,13 +358,7 @@ def extract_file_text(

    try:
        if get_unstructured_api_key():
-            try:
-                return unstructured_to_text(file, file_name)
-            except Exception as unstructured_error:
-                logger.error(
-                    f"Failed to process with Unstructured: {str(unstructured_error)}. Falling back to normal processing."
-                )
-                # Fall through to normal processing
+            return unstructured_to_text(file, file_name)

        if file_name or extension:
            if extension is not None:
--- a/backend/onyx/file_processing/unstructured.py
+++ b/backend/onyx/file_processing/unstructured.py
@@ -52,7 +52,7 @@ def _sdk_partition_request(

 def unstructured_to_text(file: IO[Any], file_name: str) -> str:
    logger.debug(f"Starting to read file: {file_name}")
-    req = _sdk_partition_request(file, file_name, strategy="fast")
+    req = _sdk_partition_request(file, file_name, strategy="auto")

    unstructured_client = UnstructuredClient(api_key_auth=get_unstructured_api_key())

--- a/backend/onyx/httpx/httpx_pool.py
+++ b/backend/onyx/httpx/httpx_pool.py
@@ -1,57 +0,0 @@
-import threading
-from typing import Any
-
-import httpx
-
-
-class HttpxPool:
-    """Class to manage a global httpx Client instance"""
-
-    _clients: dict[str, httpx.Client] = {}
-    _lock: threading.Lock = threading.Lock()
-
-    # Default parameters for creation
-    DEFAULT_KWARGS = {
-        "http2": True,
-        "limits": lambda: httpx.Limits(),
-    }
-
-    def __init__(self) -> None:
-        pass
-
-    @classmethod
-    def _init_client(cls, **kwargs: Any) -> httpx.Client:
-        """Private helper method to create and return an httpx.Client."""
-        merged_kwargs = {**cls.DEFAULT_KWARGS, **kwargs}
-        return httpx.Client(**merged_kwargs)
-
-    @classmethod
-    def init_client(cls, name: str, **kwargs: Any) -> None:
-        """Allow the caller to init the client with extra params."""
-        with cls._lock:
-            if name not in cls._clients:
-                cls._clients[name] = cls._init_client(**kwargs)
-
-    @classmethod
-    def close_client(cls, name: str) -> None:
-        """Allow the caller to close the client."""
-        with cls._lock:
-            client = cls._clients.pop(name, None)
-            if client:
-                client.close()
-
-    @classmethod
-    def close_all(cls) -> None:
-        """Close all registered clients."""
-        with cls._lock:
-            for client in cls._clients.values():
-                client.close()
-            cls._clients.clear()
-
-    @classmethod
-    def get(cls, name: str) -> httpx.Client:
-        """Gets the httpx.Client. Will init to default settings if not init'd."""
-        with cls._lock:
-            if name not in cls._clients:
-                cls._clients[name] = cls._init_client()
-            return cls._clients[name]
--- a/backend/onyx/indexing/indexing_pipeline.py
+++ b/backend/onyx/indexing/indexing_pipeline.py
@@ -31,15 +31,14 @@ from onyx.db.document import upsert_documents
 from onyx.db.document_set import fetch_document_sets_for_documents
 from onyx.db.index_attempt import create_index_attempt_error
 from onyx.db.models import Document as DBDocument
-from onyx.db.search_settings import get_current_search_settings
 from onyx.db.tag import create_or_add_document_tag
 from onyx.db.tag import create_or_add_document_tag_list
-from onyx.document_index.document_index_utils import (
-    get_multipass_config,
-)
 from onyx.document_index.interfaces import DocumentIndex
 from onyx.document_index.interfaces import DocumentMetadata
 from onyx.document_index.interfaces import IndexBatchParams
+from onyx.document_index.vespa.indexing_utils import (
+    get_multipass_config,
+)
 from onyx.indexing.chunker import Chunker
 from onyx.indexing.embedder import IndexingEmbedder
 from onyx.indexing.indexing_heartbeat import IndexingHeartbeatInterface
@@ -358,6 +357,7 @@ def index_doc_batch(
        is_public=False,
    )

+    logger.debug("Filtering Documents")
    filtered_documents = filter_fnc(document_batch)

    ctx = index_doc_batch_prepare(
@@ -380,15 +380,6 @@ def index_doc_batch(
            new_docs=0, total_docs=len(filtered_documents), total_chunks=0
        )

-    doc_descriptors = [
-        {
-            "doc_id": doc.id,
-            "doc_length": doc.get_total_char_length(),
-        }
-        for doc in ctx.updatable_docs
-    ]
-    logger.debug(f"Starting indexing process for documents: {doc_descriptors}")
-
    logger.debug("Starting chunking")
    chunks: list[DocAwareChunk] = chunker.chunk(ctx.updatable_docs)

@@ -536,8 +527,7 @@ def build_indexing_pipeline(
    callback: IndexingHeartbeatInterface | None = None,
 ) -> IndexingPipelineProtocol:
    """Builds a pipeline which takes in a list (batch) of docs and indexes them."""
-    search_settings = get_current_search_settings(db_session)
-    multipass_config = get_multipass_config(search_settings)
+    multipass_config = get_multipass_config(db_session, primary_index=True)

    chunker = chunker or Chunker(
        tokenizer=embedder.embedding_model.tokenizer,
--- a/backend/onyx/indexing/models.py
+++ b/backend/onyx/indexing/models.py
@@ -55,7 +55,9 @@ class DocAwareChunk(BaseChunk):

    def to_short_descriptor(self) -> str:
        """Used when logging the identity of a chunk"""
-        return f"{self.source_document.to_short_descriptor()} Chunk ID: {self.chunk_id}"
+        return (
+            f"Chunk ID: '{self.chunk_id}'; {self.source_document.to_short_descriptor()}"
+        )


 class IndexChunk(DocAwareChunk):
--- a/backend/onyx/natural_language_processing/search_nlp_models.py
+++ b/backend/onyx/natural_language_processing/search_nlp_models.py
@@ -1,8 +1,6 @@
 import threading
 import time
 from collections.abc import Callable
-from concurrent.futures import as_completed
-from concurrent.futures import ThreadPoolExecutor
 from functools import wraps
 from typing import Any

@@ -13,7 +11,6 @@ from requests import RequestException
 from requests import Response
 from retry import retry

-from onyx.configs.app_configs import INDEXING_EMBEDDING_MODEL_NUM_THREADS
 from onyx.configs.app_configs import LARGE_CHUNK_RATIO
 from onyx.configs.app_configs import SKIP_WARM_UP
 from onyx.configs.model_configs import BATCH_SIZE_ENCODE_CHUNKS
@@ -158,7 +155,6 @@ class EmbeddingModel:
        text_type: EmbedTextType,
        batch_size: int,
        max_seq_length: int,
-        num_threads: int = INDEXING_EMBEDDING_MODEL_NUM_THREADS,
    ) -> list[Embedding]:
        text_batches = batch_list(texts, batch_size)

@@ -167,15 +163,12 @@ class EmbeddingModel:
        )

        embeddings: list[Embedding] = []
-
-        def process_batch(
-            batch_idx: int, text_batch: list[str]
-        ) -> tuple[int, list[Embedding]]:
+        for idx, text_batch in enumerate(text_batches, start=1):
            if self.callback:
                if self.callback.should_stop():
                    raise RuntimeError("_batch_encode_texts detected stop signal")

-            logger.debug(f"Encoding batch {batch_idx} of {len(text_batches)}")
+            logger.debug(f"Encoding batch {idx} of {len(text_batches)}")
            embed_request = EmbedRequest(
                model_name=self.model_name,
                texts=text_batch,
@@ -192,43 +185,10 @@ class EmbeddingModel:
            )

            response = self._make_model_server_request(embed_request)
-            return batch_idx, response.embeddings
-
-        # only multi thread if:
-        #   1. num_threads is greater than 1
-        #   2. we are using an API-based embedding model (provider_type is not None)
-        #   3. there are more than 1 batch (no point in threading if only 1)
-        if num_threads >= 1 and self.provider_type and len(text_batches) > 1:
-            with ThreadPoolExecutor(max_workers=num_threads) as executor:
-                future_to_batch = {
-                    executor.submit(process_batch, idx, batch): idx
-                    for idx, batch in enumerate(text_batches, start=1)
-                }
-
-                # Collect results in order
-                batch_results: list[tuple[int, list[Embedding]]] = []
-                for future in as_completed(future_to_batch):
-                    try:
-                        result = future.result()
-                        batch_results.append(result)
-                        if self.callback:
-                            self.callback.progress("_batch_encode_texts", 1)
-                    except Exception as e:
-                        logger.exception("Embedding model failed to process batch")
-                        raise e
-
-                # Sort by batch index and extend embeddings
-                batch_results.sort(key=lambda x: x[0])
-                for _, batch_embeddings in batch_results:
-                    embeddings.extend(batch_embeddings)
-        else:
-            # Original sequential processing
-            for idx, text_batch in enumerate(text_batches, start=1):
-                _, batch_embeddings = process_batch(idx, text_batch)
-                embeddings.extend(batch_embeddings)
-                if self.callback:
-                    self.callback.progress("_batch_encode_texts", 1)
+            embeddings.extend(response.embeddings)

+            if self.callback:
+                self.callback.progress("_batch_encode_texts", 1)
        return embeddings

    def encode(
--- a/backend/onyx/onyxbot/slack/listener.py
+++ b/backend/onyx/onyxbot/slack/listener.py
@@ -537,36 +537,30 @@ def prefilter_requests(req: SocketModeRequest, client: TenantSocketModeClient) -
                # Let the tag flow handle this case, don't reply twice
                return False

-        # Check if this is a bot message (either via bot_profile or bot_message subtype)
-        is_bot_message = bool(
-            event.get("bot_profile") or event.get("subtype") == "bot_message"
-        )
-        if is_bot_message:
+        if event.get("bot_profile"):
            channel_name, _ = get_channel_name_from_id(
                client=client.web_client, channel_id=channel
            )
+
            with get_session_with_tenant(client.tenant_id) as db_session:
                slack_channel_config = get_slack_channel_config_for_bot_and_channel(
                    db_session=db_session,
                    slack_bot_id=client.slack_bot_id,
                    channel_name=channel_name,
                )
-
            # If OnyxBot is not specifically tagged and the channel is not set to respond to bots, ignore the message
            if (not bot_tag_id or bot_tag_id not in msg) and (
                not slack_channel_config
                or not slack_channel_config.channel_config.get("respond_to_bots")
            ):
-                channel_specific_logger.info(
-                    "Ignoring message from bot since respond_to_bots is disabled"
-                )
+                channel_specific_logger.info("Ignoring message from bot")
                return False

        # Ignore things like channel_join, channel_leave, etc.
        # NOTE: "file_share" is just a message with a file attachment, so we
        # should not ignore it
        message_subtype = event.get("subtype")
-        if message_subtype not in [None, "file_share", "bot_message"]:
+        if message_subtype not in [None, "file_share"]:
            channel_specific_logger.info(
                f"Ignoring message with subtype '{message_subtype}' since it is a special message type"
            )
--- a/backend/onyx/redis/redis_connector_prune.py
+++ b/backend/onyx/redis/redis_connector_prune.py
@@ -92,7 +92,7 @@ class RedisConnectorPrune:
        if fence_bytes is None:
            return None

-        fence_int = int(cast(bytes, fence_bytes))
+        fence_int = cast(int, fence_bytes)
        return fence_int

    @generator_complete.setter
--- a/backend/onyx/secondary_llm_flows/starter_message_creation.py
+++ b/backend/onyx/secondary_llm_flows/starter_message_creation.py
@@ -16,7 +16,7 @@ from onyx.context.search.preprocessing.access_filters import (
 from onyx.db.document_set import get_document_sets_by_ids
 from onyx.db.models import StarterMessageModel as StarterMessage
 from onyx.db.models import User
-from onyx.db.search_settings import get_active_search_settings
+from onyx.document_index.document_index_utils import get_both_index_names
 from onyx.document_index.factory import get_default_document_index
 from onyx.llm.factory import get_default_llms
 from onyx.prompts.starter_messages import format_persona_starter_message_prompt
@@ -34,11 +34,8 @@ def get_random_chunks_from_doc_sets(
    """
    Retrieves random chunks from the specified document sets.
    """
-    active_search_settings = get_active_search_settings(db_session)
-    document_index = get_default_document_index(
-        search_settings=active_search_settings.primary,
-        secondary_search_settings=active_search_settings.secondary,
-    )
+    curr_ind_name, sec_ind_name = get_both_index_names(db_session)
+    document_index = get_default_document_index(curr_ind_name, sec_ind_name)

    acl_filters = build_access_filters_for_user(user, db_session)
    filters = IndexFilters(document_set=doc_sets, access_control_list=acl_filters)
--- a/backend/onyx/seeding/load_docs.py
+++ b/backend/onyx/seeding/load_docs.py
@@ -3,7 +3,6 @@ import json
 import os
 from typing import cast

-from sqlalchemy import update
 from sqlalchemy.orm import Session

 from onyx.access.models import default_public_access
@@ -24,7 +23,6 @@ from onyx.db.document import check_docs_exist
 from onyx.db.enums import AccessType
 from onyx.db.enums import ConnectorCredentialPairStatus
 from onyx.db.index_attempt import mock_successful_index_attempt
-from onyx.db.models import Document as DbDocument
 from onyx.db.search_settings import get_current_search_settings
 from onyx.document_index.factory import get_default_document_index
 from onyx.document_index.interfaces import IndexBatchParams
@@ -61,7 +59,6 @@ def _create_indexable_chunks(
            doc_updated_at=None,
            primary_owners=[],
            secondary_owners=[],
-            chunk_count=1,
        )
        if preprocessed_doc["chunk_ind"] == 0:
            ids_to_documents[document.id] = document
@@ -158,7 +155,9 @@ def seed_initial_documents(
        logger.info("Embedding model has been updated, skipping")
        return

-    document_index = get_default_document_index(search_settings, None)
+    document_index = get_default_document_index(
+        primary_index_name=search_settings.index_name, secondary_index_name=None
+    )

    # Create a connector so the user can delete it if they want
    # or reindex it with a new search model if they want
@@ -241,12 +240,4 @@ def seed_initial_documents(
        db_session=db_session,
    )

-    # Since we bypass the indexing flow, we need to manually update the chunk count
-    for doc in docs:
-        db_session.execute(
-            update(DbDocument)
-            .where(DbDocument.id == doc.id)
-            .values(chunk_count=doc.chunk_count)
-        )
-
    kv_store.store(KV_DOCUMENTS_SEEDED_KEY, True)
--- a/backend/onyx/server/documents/cc_pair.py
+++ b/backend/onyx/server/documents/cc_pair.py
@@ -15,9 +15,6 @@ from onyx.background.celery.celery_utils import get_deletion_attempt_snapshot
 from onyx.background.celery.tasks.doc_permission_syncing.tasks import (
    try_creating_permissions_sync_task,
 )
-from onyx.background.celery.tasks.external_group_syncing.tasks import (
-    try_creating_external_group_sync_task,
-)
 from onyx.background.celery.tasks.pruning.tasks import (
    try_creating_prune_generator_task,
 )
@@ -42,7 +39,7 @@ from onyx.db.index_attempt import get_latest_index_attempt_for_cc_pair_id
 from onyx.db.index_attempt import get_paginated_index_attempts_for_cc_pair_id
 from onyx.db.models import SearchSettings
 from onyx.db.models import User
-from onyx.db.search_settings import get_active_search_settings_list
+from onyx.db.search_settings import get_active_search_settings
 from onyx.db.search_settings import get_current_search_settings
 from onyx.redis.redis_connector import RedisConnector
 from onyx.redis.redis_pool import get_redis_client
@@ -192,7 +189,7 @@ def update_cc_pair_status(
    if status_update_request.status == ConnectorCredentialPairStatus.PAUSED:
        redis_connector.stop.set_fence(True)

-        search_settings_list: list[SearchSettings] = get_active_search_settings_list(
+        search_settings_list: list[SearchSettings] = get_active_search_settings(
            db_session
        )

@@ -446,78 +443,6 @@ def sync_cc_pair(
    )


-@router.get("/admin/cc-pair/{cc_pair_id}/sync-groups")
-def get_cc_pair_latest_group_sync(
-    cc_pair_id: int,
-    user: User = Depends(current_curator_or_admin_user),
-    db_session: Session = Depends(get_session),
-) -> datetime | None:
-    cc_pair = get_connector_credential_pair_from_id_for_user(
-        cc_pair_id=cc_pair_id,
-        db_session=db_session,
-        user=user,
-        get_editable=False,
-    )
-    if not cc_pair:
-        raise HTTPException(
-            status_code=400,
-            detail="cc_pair not found for current user's permissions",
-        )
-
-    return cc_pair.last_time_external_group_sync
-
-
-@router.post("/admin/cc-pair/{cc_pair_id}/sync-groups")
-def sync_cc_pair_groups(
-    cc_pair_id: int,
-    user: User = Depends(current_curator_or_admin_user),
-    db_session: Session = Depends(get_session),
-    tenant_id: str | None = Depends(get_current_tenant_id),
-) -> StatusResponse[list[int]]:
-    """Triggers group sync on a particular cc_pair immediately"""
-
-    cc_pair = get_connector_credential_pair_from_id_for_user(
-        cc_pair_id=cc_pair_id,
-        db_session=db_session,
-        user=user,
-        get_editable=False,
-    )
-    if not cc_pair:
-        raise HTTPException(
-            status_code=400,
-            detail="Connection not found for current user's permissions",
-        )
-
-    r = get_redis_client(tenant_id=tenant_id)
-
-    redis_connector = RedisConnector(tenant_id, cc_pair_id)
-    if redis_connector.external_group_sync.fenced:
-        raise HTTPException(
-            status_code=HTTPStatus.CONFLICT,
-            detail="External group sync task already in progress.",
-        )
-
-    logger.info(
-        f"External group sync cc_pair={cc_pair_id} "
-        f"connector_id={cc_pair.connector_id} "
-        f"credential_id={cc_pair.credential_id} "
-        f"{cc_pair.connector.name} connector."
-    )
-    tasks_created = try_creating_external_group_sync_task(
-        primary_app, cc_pair_id, r, CURRENT_TENANT_ID_CONTEXTVAR.get()
-    )
-    if not tasks_created:
-        raise HTTPException(
-            status_code=HTTPStatus.INTERNAL_SERVER_ERROR,
-            detail="External group sync task creation failed.",
-        )
-
-    return StatusResponse(
-        success=True,
-        message="Successfully created the external group sync task.",
-    )
-
-
@router.get("/admin/cc-pair/{cc_pair_id}/get-docs-sync-status")
 def get_docs_sync_status(
    cc_pair_id: int,
--- a/backend/onyx/server/documents/document.py
+++ b/backend/onyx/server/documents/document.py
@@ -32,7 +32,10 @@ def get_document_info(
    db_session: Session = Depends(get_session),
 ) -> DocumentInfo:
    search_settings = get_current_search_settings(db_session)
-    document_index = get_default_document_index(search_settings, None)
+
+    document_index = get_default_document_index(
+        primary_index_name=search_settings.index_name, secondary_index_name=None
+    )

    user_acl_filters = build_access_filters_for_user(user, db_session)
    inference_chunks = document_index.id_based_retrieval(
@@ -76,7 +79,10 @@ def get_chunk_info(
    db_session: Session = Depends(get_session),
 ) -> ChunkInfo:
    search_settings = get_current_search_settings(db_session)
-    document_index = get_default_document_index(search_settings, None)
+
+    document_index = get_default_document_index(
+        primary_index_name=search_settings.index_name, secondary_index_name=None
+    )

    user_acl_filters = build_access_filters_for_user(user, db_session)
    chunk_request = VespaChunkRequest(
--- a/backend/onyx/server/documents/models.py
+++ b/backend/onyx/server/documents/models.py
@@ -357,7 +357,6 @@ class ConnectorCredentialPairDescriptor(BaseModel):
    name: str | None = None
    connector: ConnectorSnapshot
    credential: CredentialSnapshot
-    access_type: AccessType


 class RunConnectorRequest(BaseModel):
--- a/backend/onyx/server/features/document_set/models.py
+++ b/backend/onyx/server/features/document_set/models.py
@@ -68,7 +68,6 @@ class DocumentSet(BaseModel):
                    credential=CredentialSnapshot.from_credential_db_model(
                        cc_pair.credential
                    ),
-                    access_type=cc_pair.access_type,
                )
                for cc_pair in document_set_model.connector_credential_pairs
            ],
--- a/backend/onyx/server/manage/llm/api.py
+++ b/backend/onyx/server/manage/llm/api.py
@@ -10,7 +10,6 @@ from onyx.auth.users import current_admin_user
 from onyx.auth.users import current_chat_accesssible_user
 from onyx.db.engine import get_session
 from onyx.db.llm import fetch_existing_llm_providers
-from onyx.db.llm import fetch_existing_llm_providers_for_user
 from onyx.db.llm import fetch_provider
 from onyx.db.llm import remove_llm_provider
 from onyx.db.llm import update_default_provider
@@ -196,7 +195,5 @@ def list_llm_provider_basics(
 ) -> list[LLMProviderDescriptor]:
    return [
        LLMProviderDescriptor.from_model(llm_provider_model)
-        for llm_provider_model in fetch_existing_llm_providers_for_user(
-            db_session, user
-        )
+        for llm_provider_model in fetch_existing_llm_providers(db_session, user)
    ]
--- a/backend/onyx/server/manage/models.py
+++ b/backend/onyx/server/manage/models.py
@@ -44,6 +44,7 @@ class UserPreferences(BaseModel):
    chosen_assistants: list[int] | None = None
    hidden_assistants: list[int] = []
    visible_assistants: list[int] = []
+    recent_assistants: list[int] | None = None
    default_model: str | None = None
    auto_scroll: bool | None = None
    pinned_assistants: list[int] | None = None
--- a/backend/onyx/server/manage/search_settings.py
+++ b/backend/onyx/server/manage/search_settings.py
@@ -22,7 +22,6 @@ from onyx.db.search_settings import get_embedding_provider_from_provider_type
 from onyx.db.search_settings import get_secondary_search_settings
 from onyx.db.search_settings import update_current_search_settings
 from onyx.db.search_settings import update_search_settings_status
-from onyx.document_index.document_index_utils import get_multipass_config
 from onyx.document_index.factory import get_default_document_index
 from onyx.file_processing.unstructured import delete_unstructured_api_key
 from onyx.file_processing.unstructured import get_unstructured_api_key
@@ -98,9 +97,10 @@ def set_new_search_settings(
    )

    # Ensure Vespa has the new index immediately
-    get_multipass_config(search_settings)
-    get_multipass_config(new_search_settings)
-    document_index = get_default_document_index(search_settings, new_search_settings)
+    document_index = get_default_document_index(
+        primary_index_name=search_settings.index_name,
+        secondary_index_name=new_search_settings.index_name,
+    )

    document_index.ensure_indices_exist(
        index_embedding_dim=search_settings.model_dim,
--- a/backend/onyx/server/manage/users.py
+++ b/backend/onyx/server/manage/users.py
@@ -572,6 +572,59 @@ class ChosenDefaultModelRequest(BaseModel):
    default_model: str | None = None


+class RecentAssistantsRequest(BaseModel):
+    current_assistant: int
+
+
+def update_recent_assistants(
+    recent_assistants: list[int] | None, current_assistant: int
+) -> list[int]:
+    if recent_assistants is None:
+        recent_assistants = []
+    else:
+        recent_assistants = [x for x in recent_assistants if x != current_assistant]
+
+    # Add current assistant to start of list
+    recent_assistants.insert(0, current_assistant)
+
+    # Keep only the 5 most recent assistants
+    recent_assistants = recent_assistants[:5]
+    return recent_assistants
+
+
+@router.patch("/user/recent-assistants")
+def update_user_recent_assistants(
+    request: RecentAssistantsRequest,
+    user: User | None = Depends(current_user),
+    db_session: Session = Depends(get_session),
+) -> None:
+    if user is None:
+        if AUTH_TYPE == AuthType.DISABLED:
+            store = get_kv_store()
+            no_auth_user = fetch_no_auth_user(store)
+            preferences = no_auth_user.preferences
+            recent_assistants = preferences.recent_assistants
+            updated_preferences = update_recent_assistants(
+                recent_assistants, request.current_assistant
+            )
+            preferences.recent_assistants = updated_preferences
+            set_no_auth_user_preferences(store, preferences)
+            return
+        else:
+            raise RuntimeError("This should never happen")
+
+    recent_assistants = UserInfo.from_model(user).preferences.recent_assistants
+    updated_recent_assistants = update_recent_assistants(
+        recent_assistants, request.current_assistant
+    )
+    db_session.execute(
+        update(User)
+        .where(User.id == user.id)  # type: ignore
+        .values(recent_assistants=updated_recent_assistants)
+    )
+    db_session.commit()
+
+
@router.patch("/shortcut-enabled")
 def update_user_shortcut_enabled(
    shortcut_enabled: bool,
@@ -678,6 +731,30 @@ class ChosenAssistantsRequest(BaseModel):
    chosen_assistants: list[int]


+@router.patch("/user/assistant-list")
+def update_user_assistant_list(
+    request: ChosenAssistantsRequest,
+    user: User | None = Depends(current_user),
+    db_session: Session = Depends(get_session),
+) -> None:
+    if user is None:
+        if AUTH_TYPE == AuthType.DISABLED:
+            store = get_kv_store()
+            no_auth_user = fetch_no_auth_user(store)
+            no_auth_user.preferences.chosen_assistants = request.chosen_assistants
+            set_no_auth_user_preferences(store, no_auth_user.preferences)
+            return
+        else:
+            raise RuntimeError("This should never happen")
+
+    db_session.execute(
+        update(User)
+        .where(User.id == user.id)  # type: ignore
+        .values(chosen_assistants=request.chosen_assistants)
+    )
+    db_session.commit()
+
+
 def update_assistant_visibility(
    preferences: UserPreferences, assistant_id: int, show: bool
 ) -> UserPreferences:
--- a/backend/onyx/server/onyx_api/ingestion.py
+++ b/backend/onyx/server/onyx_api/ingestion.py
@@ -14,9 +14,9 @@ from onyx.db.document import get_ingestion_documents
 from onyx.db.engine import get_current_tenant_id
 from onyx.db.engine import get_session
 from onyx.db.models import User
-from onyx.db.search_settings import get_active_search_settings
 from onyx.db.search_settings import get_current_search_settings
 from onyx.db.search_settings import get_secondary_search_settings
+from onyx.document_index.document_index_utils import get_both_index_names
 from onyx.document_index.factory import get_default_document_index
 from onyx.indexing.embedder import DefaultIndexingEmbedder
 from onyx.indexing.indexing_pipeline import build_indexing_pipeline
@@ -89,10 +89,9 @@ def upsert_ingestion_doc(
        )

    # Need to index for both the primary and secondary index if possible
-    active_search_settings = get_active_search_settings(db_session)
+    curr_ind_name, sec_ind_name = get_both_index_names(db_session)
    curr_doc_index = get_default_document_index(
-        active_search_settings.primary,
-        None,
+        primary_index_name=curr_ind_name, secondary_index_name=None
    )

    search_settings = get_current_search_settings(db_session)
@@ -118,7 +117,11 @@ def upsert_ingestion_doc(
    )

    # If there's a secondary index being built, index the doc but don't use it for return here
-    if active_search_settings.secondary:
+    if sec_ind_name:
+        sec_doc_index = get_default_document_index(
+            primary_index_name=curr_ind_name, secondary_index_name=None
+        )
+
        sec_search_settings = get_secondary_search_settings(db_session)

        if sec_search_settings is None:
@@ -131,10 +134,6 @@ def upsert_ingestion_doc(
            search_settings=sec_search_settings
        )

-        sec_doc_index = get_default_document_index(
-            active_search_settings.secondary, None
-        )
-
        sec_ind_pipeline = build_indexing_pipeline(
            embedder=new_index_embedding_model,
            document_index=sec_doc_index,
--- a/backend/onyx/server/query_and_chat/chat_backend.py
+++ b/backend/onyx/server/query_and_chat/chat_backend.py
@@ -672,25 +672,23 @@ def upload_files_for_chat(
            else ChatFileType.PLAIN_TEXT
        )

-        file_content = file.file.read()  # Read the file content
-
        if file_type == ChatFileType.IMAGE:
-            file_content_io = file.file
+            file_content = file.file
            # NOTE: Image conversion to JPEG used to be enforced here.
            # This was removed to:
            # 1. Preserve original file content for downloads
            # 2. Maintain transparency in formats like PNG
            # 3. Ameliorate issue with file conversion
        else:
-            file_content_io = io.BytesIO(file_content)
+            file_content = io.BytesIO(file.file.read())

        new_content_type = file.content_type

-        # Store the file normally
+        # store the file (now JPEG for images)
        file_id = str(uuid.uuid4())
        file_store.save_file(
            file_name=file_id,
-            content=file_content_io,
+            content=file_content,
            display_name=file.filename,
            file_origin=FileOrigin.CHAT_UPLOAD,
            file_type=new_content_type or file_type.value,
@@ -700,7 +698,7 @@ def upload_files_for_chat(
        # to re-extract it every time we send a message
        if file_type == ChatFileType.DOC:
            extracted_text = extract_file_text(
-                file=io.BytesIO(file_content),  # use the bytes we already read
+                file=file.file,
                file_name=file.filename or "",
            )
            text_file_id = str(uuid.uuid4())
--- a/backend/onyx/server/query_and_chat/query_backend.py
+++ b/backend/onyx/server/query_and_chat/query_backend.py
@@ -64,8 +64,9 @@ def admin_search(
        tenant_id=tenant_id,
    )
    search_settings = get_current_search_settings(db_session)
-    document_index = get_default_document_index(search_settings, None)
-
+    document_index = get_default_document_index(
+        primary_index_name=search_settings.index_name, secondary_index_name=None
+    )
    if not isinstance(document_index, VespaIndex):
        raise HTTPException(
            status_code=400,
--- a/backend/onyx/setup.py
+++ b/backend/onyx/setup.py
@@ -25,7 +25,6 @@ from onyx.db.llm import fetch_default_provider
 from onyx.db.llm import update_default_provider
 from onyx.db.llm import upsert_llm_provider
 from onyx.db.persona import delete_old_default_personas
-from onyx.db.search_settings import get_active_search_settings
 from onyx.db.search_settings import get_current_search_settings
 from onyx.db.search_settings import get_secondary_search_settings
 from onyx.db.search_settings import update_current_search_settings
@@ -71,19 +70,8 @@ def setup_onyx(
    The Tenant Service calls the tenants/create endpoint which runs this.
    """
    check_index_swap(db_session=db_session)
-
-    active_search_settings = get_active_search_settings(db_session)
-    search_settings = active_search_settings.primary
-    secondary_search_settings = active_search_settings.secondary
-
-    # search_settings = get_current_search_settings(db_session)
-    # multipass_config_1 = get_multipass_config(search_settings)
-
-    # secondary_large_chunks_enabled: bool | None = None
-    # secondary_search_settings = get_secondary_search_settings(db_session)
-    # if secondary_search_settings:
-    #     multipass_config_2 = get_multipass_config(secondary_search_settings)
-    #     secondary_large_chunks_enabled = multipass_config_2.enable_large_chunks
+    search_settings = get_current_search_settings(db_session)
+    secondary_search_settings = get_secondary_search_settings(db_session)

    # Break bad state for thrashing indexes
    if secondary_search_settings and DISABLE_INDEX_UPDATE_ON_SWAP:
@@ -134,8 +122,10 @@ def setup_onyx(
    # takes a bit of time to start up
    logger.notice("Verifying Document Index(s) is/are available.")
    document_index = get_default_document_index(
-        search_settings,
-        secondary_search_settings,
+        primary_index_name=search_settings.index_name,
+        secondary_index_name=secondary_search_settings.index_name
+        if secondary_search_settings
+        else None,
    )

    success = setup_vespa(
--- a/backend/onyx/tools/tool_implementations/internet_search/internet_search_tool.py
+++ b/backend/onyx/tools/tool_implementations/internet_search/internet_search_tool.py
@@ -220,13 +220,6 @@ class InternetSearchTool(Tool):
        )
        results = response.json()

-        # If no hits, Bing does not include the webPages key
-        search_results = (
-            results["webPages"]["value"][: self.num_results]
-            if "webPages" in results
-            else []
-        )
-
        return InternetSearchResponse(
            revised_query=query,
            internet_results=[
@@ -235,7 +228,7 @@ class InternetSearchTool(Tool):
                    link=result["url"],
                    snippet=result["snippet"],
                )
-                for result in search_results
+                for result in results["webPages"]["value"][: self.num_results]
            ],
        )

--- a/backend/requirements/default.txt
+++ b/backend/requirements/default.txt
@@ -81,7 +81,6 @@ hubspot-api-client==8.1.0
 asana==5.0.8
 dropbox==11.36.2
 boto3-stubs[s3]==1.34.133
-shapely==2.0.6
 stripe==10.12.0
 urllib3==2.2.3
 mistune==0.8.4
--- a/backend/scripts/force_delete_connector_by_id.py
+++ b/backend/scripts/force_delete_connector_by_id.py
@@ -7,7 +7,6 @@ from sqlalchemy.orm import Session

 from onyx.db.document import delete_documents_complete__no_commit
 from onyx.db.enums import ConnectorCredentialPairStatus
-from onyx.db.search_settings import get_active_search_settings

 # Modify sys.path
 current_dir = os.path.dirname(os.path.abspath(__file__))
@@ -39,6 +38,7 @@ from onyx.db.connector_credential_pair import (
 from onyx.db.engine import get_session_context_manager
 from onyx.document_index.factory import get_default_document_index
 from onyx.file_store.file_store import get_default_file_store
+from onyx.document_index.document_index_utils import get_both_index_names

 # pylint: enable=E402
 # flake8: noqa: E402
@@ -191,10 +191,9 @@ def _delete_connector(cc_pair_id: int, db_session: Session) -> None:
    )
    try:
        logger.notice("Deleting information from Vespa and Postgres")
-        active_search_settings = get_active_search_settings(db_session)
+        curr_ind_name, sec_ind_name = get_both_index_names(db_session)
        document_index = get_default_document_index(
-            active_search_settings.primary,
-            active_search_settings.secondary,
+            primary_index_name=curr_ind_name, secondary_index_name=sec_ind_name
        )

        files_deleted_count = _unsafe_deletion(
--- a/backend/scripts/orphan_doc_cleanup_script.py
+++ b/backend/scripts/orphan_doc_cleanup_script.py
@@ -5,8 +5,6 @@ import sys
 from sqlalchemy import text
 from sqlalchemy.orm import Session

-from onyx.document_index.document_index_utils import get_multipass_config
-
 # makes it so `PYTHONPATH=.` is not required when running this script
 parent_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
 sys.path.append(parent_dir)
@@ -56,14 +54,8 @@ def main() -> None:

            # Setup Vespa index
            search_settings = get_current_search_settings(db_session)
-            multipass_config = get_multipass_config(search_settings)
            index_name = search_settings.index_name
-            vespa_index = VespaIndex(
-                index_name=index_name,
-                secondary_index_name=None,
-                large_chunks_enabled=multipass_config.enable_large_chunks,
-                secondary_large_chunks_enabled=None,
-            )
+            vespa_index = VespaIndex(index_name=index_name, secondary_index_name=None)

            # Delete chunks from Vespa first
            print("Deleting orphaned document chunks from Vespa")
--- a/backend/scripts/query_time_check/seed_dummy_docs.py
+++ b/backend/scripts/query_time_check/seed_dummy_docs.py
@@ -16,7 +16,6 @@ from onyx.configs.constants import DocumentSource
 from onyx.connectors.models import Document
 from onyx.db.engine import get_session_context_manager
 from onyx.db.search_settings import get_current_search_settings
-from onyx.document_index.document_index_utils import get_multipass_config
 from onyx.document_index.vespa.index import VespaIndex
 from onyx.indexing.indexing_pipeline import IndexBatchParams
 from onyx.indexing.models import ChunkEmbedding
@@ -134,16 +133,10 @@ def seed_dummy_docs(
 ) -> None:
    with get_session_context_manager() as db_session:
        search_settings = get_current_search_settings(db_session)
-        multipass_config = get_multipass_config(search_settings)
        index_name = search_settings.index_name
        embedding_dim = search_settings.model_dim

-    vespa_index = VespaIndex(
-        index_name=index_name,
-        secondary_index_name=None,
-        large_chunks_enabled=multipass_config.enable_large_chunks,
-        secondary_large_chunks_enabled=None,
-    )
+    vespa_index = VespaIndex(index_name=index_name, secondary_index_name=None)
    print(index_name)

    all_chunks = []
--- a/backend/scripts/query_time_check/test_query_times.py
+++ b/backend/scripts/query_time_check/test_query_times.py
@@ -9,7 +9,6 @@ from onyx.configs.model_configs import DOC_EMBEDDING_DIM
 from onyx.context.search.models import IndexFilters
 from onyx.db.engine import get_session_context_manager
 from onyx.db.search_settings import get_current_search_settings
-from onyx.document_index.document_index_utils import get_multipass_config
 from onyx.document_index.vespa.index import VespaIndex
 from scripts.query_time_check.seed_dummy_docs import TOTAL_ACL_ENTRIES_PER_CATEGORY
 from scripts.query_time_check.seed_dummy_docs import TOTAL_DOC_SETS
@@ -63,15 +62,9 @@ def test_hybrid_retrieval_times(
 ) -> None:
    with get_session_context_manager() as db_session:
        search_settings = get_current_search_settings(db_session)
-        multipass_config = get_multipass_config(search_settings)
        index_name = search_settings.index_name

-    vespa_index = VespaIndex(
-        index_name=index_name,
-        secondary_index_name=None,
-        large_chunks_enabled=multipass_config.enable_large_chunks,
-        secondary_large_chunks_enabled=None,
-    )
+    vespa_index = VespaIndex(index_name=index_name, secondary_index_name=None)

    # Generate random queries
    queries = [f"Random Query {i}" for i in range(number_of_queries)]
--- a/backend/tests/daily/connectors/airtable/test_airtable_basic.py
+++ b/backend/tests/daily/connectors/airtable/test_airtable_basic.py
@@ -1,8 +1,8 @@
 import os
 from unittest.mock import MagicMock
+from unittest.mock import patch

 import pytest
-from pydantic import BaseModel

 from onyx.configs.constants import DocumentSource
 from onyx.connectors.airtable.airtable_connector import AirtableConnector
@@ -10,25 +10,26 @@ from onyx.connectors.models import Document
 from onyx.connectors.models import Section


-class AirtableConfig(BaseModel):
-    base_id: str
-    table_identifier: str
-    access_token: str
-
-
-@pytest.fixture(params=[True, False])
-def airtable_config(request: pytest.FixtureRequest) -> AirtableConfig:
-    table_identifier = (
-        os.environ["AIRTABLE_TEST_TABLE_NAME"]
-        if request.param
-        else os.environ["AIRTABLE_TEST_TABLE_ID"]
-    )
-    return AirtableConfig(
+@pytest.fixture(
+    params=[
+        ("table_name", os.environ["AIRTABLE_TEST_TABLE_NAME"]),
+        ("table_id", os.environ["AIRTABLE_TEST_TABLE_ID"]),
+    ]
+)
+def airtable_connector(request: pytest.FixtureRequest) -> AirtableConnector:
+    param_type, table_identifier = request.param
+    connector = AirtableConnector(
        base_id=os.environ["AIRTABLE_TEST_BASE_ID"],
-        table_identifier=table_identifier,
-        access_token=os.environ["AIRTABLE_ACCESS_TOKEN"],
+        table_name_or_id=table_identifier,
    )

+    connector.load_credentials(
+        {
+            "airtable_access_token": os.environ["AIRTABLE_ACCESS_TOKEN"],
+        }
+    )
+    return connector
+

 def create_test_document(
    id: str,
@@ -45,37 +46,18 @@ def create_test_document(
    assignee: str,
    days_since_status_change: int | None,
    attachments: list[tuple[str, str]] | None = None,
-    all_fields_as_metadata: bool = False,
 ) -> Document:
-    base_id = os.environ.get("AIRTABLE_TEST_BASE_ID")
-    table_id = os.environ.get("AIRTABLE_TEST_TABLE_ID")
-    missing_vars = []
-    if not base_id:
-        missing_vars.append("AIRTABLE_TEST_BASE_ID")
-    if not table_id:
-        missing_vars.append("AIRTABLE_TEST_TABLE_ID")
-
-    if missing_vars:
-        raise RuntimeError(
-            f"Required environment variables not set: {', '.join(missing_vars)}. "
-            "These variables are required to run Airtable connector tests."
-        )
-    link_base = f"https://airtable.com/{base_id}/{table_id}"
-    sections = []
-
-    if not all_fields_as_metadata:
-        sections.extend(
-            [
-                Section(
-                    text=f"Title:\n------------------------\n{title}\n------------------------",
-                    link=f"{link_base}/{id}",
-                ),
-                Section(
-                    text=f"Description:\n------------------------\n{description}\n------------------------",
-                    link=f"{link_base}/{id}",
-                ),
-            ]
-        )
+    link_base = f"https://airtable.com/{os.environ['AIRTABLE_TEST_BASE_ID']}/{os.environ['AIRTABLE_TEST_TABLE_ID']}"
+    sections = [
+        Section(
+            text=f"Title:\n------------------------\n{title}\n------------------------",
+            link=f"{link_base}/{id}",
+        ),
+        Section(
+            text=f"Description:\n------------------------\n{description}\n------------------------",
+            link=f"{link_base}/{id}",
+        ),
+    ]

    if attachments:
        for attachment_text, attachment_link in attachments:
@@ -86,36 +68,26 @@ def create_test_document(
                ),
            )

-    metadata: dict[str, str | list[str]] = {
-        # "Category": category,
-        "Assignee": assignee,
-        "Submitted by": submitted_by,
-        "Priority": priority,
-        "Status": status,
-        "Created time": created_time,
-        "ID": ticket_id,
-        "Status last changed": status_last_changed,
-        **(
-            {"Days since status change": str(days_since_status_change)}
-            if days_since_status_change is not None
-            else {}
-        ),
-    }
-
-    if all_fields_as_metadata:
-        metadata.update(
-            {
-                "Title": title,
-                "Description": description,
-            }
-        )
-
    return Document(
        id=f"airtable__{id}",
        sections=sections,
        source=DocumentSource.AIRTABLE,
-        semantic_identifier=f"{os.environ.get('AIRTABLE_TEST_TABLE_NAME', '')}: {title}",
-        metadata=metadata,
+        semantic_identifier=f"{os.environ['AIRTABLE_TEST_TABLE_NAME']}: {title}",
+        metadata={
+            # "Category": category,
+            "Assignee": assignee,
+            "Submitted by": submitted_by,
+            "Priority": priority,
+            "Status": status,
+            "Created time": created_time,
+            "ID": ticket_id,
+            "Status last changed": status_last_changed,
+            **(
+                {"Days since status change": str(days_since_status_change)}
+                if days_since_status_change is not None
+                else {}
+            ),
+        },
        doc_updated_at=None,
        primary_owners=None,
        secondary_owners=None,
@@ -125,75 +97,15 @@ def create_test_document(
    )


-def compare_documents(
-    actual_docs: list[Document], expected_docs: list[Document]
-) -> None:
-    """Utility function to compare actual and expected documents, ignoring order."""
-    actual_docs_dict = {doc.id: doc for doc in actual_docs}
-    expected_docs_dict = {doc.id: doc for doc in expected_docs}
-
-    assert actual_docs_dict.keys() == expected_docs_dict.keys(), "Document ID mismatch"
-
-    for doc_id in actual_docs_dict:
-        actual = actual_docs_dict[doc_id]
-        expected = expected_docs_dict[doc_id]
-
-        assert (
-            actual.source == expected.source
-        ), f"Source mismatch for document {doc_id}"
-        assert (
-            actual.semantic_identifier == expected.semantic_identifier
-        ), f"Semantic identifier mismatch for document {doc_id}"
-        assert (
-            actual.metadata == expected.metadata
-        ), f"Metadata mismatch for document {doc_id}"
-        assert (
-            actual.doc_updated_at == expected.doc_updated_at
-        ), f"Updated at mismatch for document {doc_id}"
-        assert (
-            actual.primary_owners == expected.primary_owners
-        ), f"Primary owners mismatch for document {doc_id}"
-        assert (
-            actual.secondary_owners == expected.secondary_owners
-        ), f"Secondary owners mismatch for document {doc_id}"
-        assert actual.title == expected.title, f"Title mismatch for document {doc_id}"
-        assert (
-            actual.from_ingestion_api == expected.from_ingestion_api
-        ), f"Ingestion API flag mismatch for document {doc_id}"
-        assert (
-            actual.additional_info == expected.additional_info
-        ), f"Additional info mismatch for document {doc_id}"
-
-        # Compare sections
-        assert len(actual.sections) == len(
-            expected.sections
-        ), f"Number of sections mismatch for document {doc_id}"
-        for i, (actual_section, expected_section) in enumerate(
-            zip(actual.sections, expected.sections)
-        ):
-            assert (
-                actual_section.text == expected_section.text
-            ), f"Section {i} text mismatch for document {doc_id}"
-            assert (
-                actual_section.link == expected_section.link
-            ), f"Section {i} link mismatch for document {doc_id}"
-
-
+@patch(
+    "onyx.file_processing.extract_file_text.get_unstructured_api_key",
+    return_value=None,
+)
 def test_airtable_connector_basic(
-    mock_get_unstructured_api_key: MagicMock, airtable_config: AirtableConfig
+    mock_get_api_key: MagicMock, airtable_connector: AirtableConnector
 ) -> None:
-    """Test behavior when all non-attachment fields are treated as metadata."""
-    connector = AirtableConnector(
-        base_id=airtable_config.base_id,
-        table_name_or_id=airtable_config.table_identifier,
-        treat_all_non_attachment_fields_as_metadata=False,
-    )
-    connector.load_credentials(
-        {
-            "airtable_access_token": airtable_config.access_token,
-        }
-    )
-    doc_batch_generator = connector.load_from_state()
+    doc_batch_generator = airtable_connector.load_from_state()
+
    doc_batch = next(doc_batch_generator)
    with pytest.raises(StopIteration):
        next(doc_batch_generator)
@@ -207,62 +119,15 @@ def test_airtable_connector_basic(
            description="The internet connection is very slow.",
            priority="Medium",
            status="In Progress",
+            # Link to another record is skipped for now
+            # category="Data Science",
            ticket_id="2",
            created_time="2024-12-24T21:02:49.000Z",
            status_last_changed="2024-12-24T21:02:49.000Z",
            days_since_status_change=0,
            assignee="Chris Weaver (chris@onyx.app)",
            submitted_by="Chris Weaver (chris@onyx.app)",
-            all_fields_as_metadata=False,
        ),
-        create_test_document(
-            id="reccSlIA4pZEFxPBg",
-            title="Printer Issue",
-            description="The office printer is not working.",
-            priority="High",
-            status="Open",
-            ticket_id="1",
-            created_time="2024-12-24T21:02:49.000Z",
-            status_last_changed="2024-12-24T21:02:49.000Z",
-            days_since_status_change=0,
-            assignee="Chris Weaver (chris@onyx.app)",
-            submitted_by="Chris Weaver (chris@onyx.app)",
-            attachments=[
-                (
-                    "Test.pdf:\ntesting!!!",
-                    "https://airtable.com/appCXJqDFS4gea8tn/tblRxFQsTlBBZdRY1/viwVUEJjWPd8XYjh8/reccSlIA4pZEFxPBg/fld1u21zkJACIvAEF/attlj2UBWNEDZngCc?blocks=hide",
-                )
-            ],
-            all_fields_as_metadata=False,
-        ),
-    ]
-
-    # Compare documents using the utility function
-    compare_documents(doc_batch, expected_docs)
-
-
-def test_airtable_connector_all_metadata(
-    mock_get_unstructured_api_key: MagicMock, airtable_config: AirtableConfig
-) -> None:
-    connector = AirtableConnector(
-        base_id=airtable_config.base_id,
-        table_name_or_id=airtable_config.table_identifier,
-        treat_all_non_attachment_fields_as_metadata=True,
-    )
-    connector.load_credentials(
-        {
-            "airtable_access_token": airtable_config.access_token,
-        }
-    )
-    doc_batch_generator = connector.load_from_state()
-    doc_batch = next(doc_batch_generator)
-    with pytest.raises(StopIteration):
-        next(doc_batch_generator)
-
-    # NOTE: one of the rows has no attachments -> no content -> no document
-    assert len(doc_batch) == 1
-
-    expected_docs = [
        create_test_document(
            id="reccSlIA4pZEFxPBg",
            title="Printer Issue",
@@ -284,9 +149,50 @@ def test_airtable_connector_all_metadata(
                    "https://airtable.com/appCXJqDFS4gea8tn/tblRxFQsTlBBZdRY1/viwVUEJjWPd8XYjh8/reccSlIA4pZEFxPBg/fld1u21zkJACIvAEF/attlj2UBWNEDZngCc?blocks=hide",
                )
            ],
-            all_fields_as_metadata=True,
        ),
    ]

-    # Compare documents using the utility function
-    compare_documents(doc_batch, expected_docs)
+    # Compare each document field by field
+    for actual, expected in zip(doc_batch, expected_docs):
+        assert actual.id == expected.id, f"ID mismatch for document {actual.id}"
+        assert (
+            actual.source == expected.source
+        ), f"Source mismatch for document {actual.id}"
+        assert (
+            actual.semantic_identifier == expected.semantic_identifier
+        ), f"Semantic identifier mismatch for document {actual.id}"
+        assert (
+            actual.metadata == expected.metadata
+        ), f"Metadata mismatch for document {actual.id}"
+        assert (
+            actual.doc_updated_at == expected.doc_updated_at
+        ), f"Updated at mismatch for document {actual.id}"
+        assert (
+            actual.primary_owners == expected.primary_owners
+        ), f"Primary owners mismatch for document {actual.id}"
+        assert (
+            actual.secondary_owners == expected.secondary_owners
+        ), f"Secondary owners mismatch for document {actual.id}"
+        assert (
+            actual.title == expected.title
+        ), f"Title mismatch for document {actual.id}"
+        assert (
+            actual.from_ingestion_api == expected.from_ingestion_api
+        ), f"Ingestion API flag mismatch for document {actual.id}"
+        assert (
+            actual.additional_info == expected.additional_info
+        ), f"Additional info mismatch for document {actual.id}"
+
+        # Compare sections
+        assert len(actual.sections) == len(
+            expected.sections
+        ), f"Number of sections mismatch for document {actual.id}"
+        for i, (actual_section, expected_section) in enumerate(
+            zip(actual.sections, expected.sections)
+        ):
+            assert (
+                actual_section.text == expected_section.text
+            ), f"Section {i} text mismatch for document {actual.id}"
+            assert (
+                actual_section.link == expected_section.link
+            ), f"Section {i} link mismatch for document {actual.id}"
--- a/backend/tests/daily/connectors/conftest.py
+++ b/backend/tests/daily/connectors/conftest.py
@@ -1,14 +0,0 @@
-from collections.abc import Generator
-from unittest.mock import MagicMock
-from unittest.mock import patch
-
-import pytest
-
-
-@pytest.fixture
-def mock_get_unstructured_api_key() -> Generator[MagicMock, None, None]:
-    with patch(
-        "onyx.file_processing.extract_file_text.get_unstructured_api_key",
-        return_value=None,
-    ) as mock:
-        yield mock
--- a/backend/tests/daily/connectors/sharepoint/test_sharepoint_connector.py
+++ b/backend/tests/daily/connectors/sharepoint/test_sharepoint_connector.py
@@ -1,210 +0,0 @@
-import os
-from dataclasses import dataclass
-from datetime import datetime
-from datetime import timezone
-from unittest.mock import MagicMock
-
-import pytest
-
-from onyx.configs.constants import DocumentSource
-from onyx.connectors.models import Document
-from onyx.connectors.sharepoint.connector import SharepointConnector
-
-
-@dataclass
-class ExpectedDocument:
-    semantic_identifier: str
-    content: str
-    folder_path: str | None = None
-    library: str = "Shared Documents"  # Default to main library
-
-
-EXPECTED_DOCUMENTS = [
-    ExpectedDocument(
-        semantic_identifier="test1.docx",
-        content="test1",
-        folder_path="test",
-    ),
-    ExpectedDocument(
-        semantic_identifier="test2.docx",
-        content="test2",
-        folder_path="test/nested with spaces",
-    ),
-    ExpectedDocument(
-        semantic_identifier="should-not-index-on-specific-folder.docx",
-        content="should-not-index-on-specific-folder",
-        folder_path=None,  # root folder
-    ),
-    ExpectedDocument(
-        semantic_identifier="other.docx",
-        content="other",
-        folder_path=None,
-        library="Other Library",
-    ),
-]
-
-
-def verify_document_metadata(doc: Document) -> None:
-    """Verify common metadata that should be present on all documents."""
-    assert isinstance(doc.doc_updated_at, datetime)
-    assert doc.doc_updated_at.tzinfo == timezone.utc
-    assert doc.source == DocumentSource.SHAREPOINT
-    assert doc.primary_owners is not None
-    assert len(doc.primary_owners) == 1
-    owner = doc.primary_owners[0]
-    assert owner.display_name is not None
-    assert owner.email is not None
-
-
-def verify_document_content(doc: Document, expected: ExpectedDocument) -> None:
-    """Verify a document matches its expected content."""
-    assert doc.semantic_identifier == expected.semantic_identifier
-    assert len(doc.sections) == 1
-    assert expected.content in doc.sections[0].text
-    verify_document_metadata(doc)
-
-
-def find_document(documents: list[Document], semantic_identifier: str) -> Document:
-    """Find a document by its semantic identifier."""
-    matching_docs = [
-        d for d in documents if d.semantic_identifier == semantic_identifier
-    ]
-    assert (
-        len(matching_docs) == 1
-    ), f"Expected exactly one document with identifier {semantic_identifier}"
-    return matching_docs[0]
-
-
-@pytest.fixture
-def sharepoint_credentials() -> dict[str, str]:
-    return {
-        "sp_client_id": os.environ["SHAREPOINT_CLIENT_ID"],
-        "sp_client_secret": os.environ["SHAREPOINT_CLIENT_SECRET"],
-        "sp_directory_id": os.environ["SHAREPOINT_CLIENT_DIRECTORY_ID"],
-    }
-
-
-def test_sharepoint_connector_specific_folder(
-    mock_get_unstructured_api_key: MagicMock,
-    sharepoint_credentials: dict[str, str],
-) -> None:
-    # Initialize connector with the test site URL and specific folder
-    connector = SharepointConnector(
-        sites=[os.environ["SHAREPOINT_SITE"] + "/Shared Documents/test"]
-    )
-
-    # Load credentials
-    connector.load_credentials(sharepoint_credentials)
-
-    # Get all documents
-    document_batches = list(connector.load_from_state())
-    found_documents: list[Document] = [
-        doc for batch in document_batches for doc in batch
-    ]
-
-    # Should only find documents in the test folder
-    test_folder_docs = [
-        doc
-        for doc in EXPECTED_DOCUMENTS
-        if doc.folder_path and doc.folder_path.startswith("test")
-    ]
-    assert len(found_documents) == len(
-        test_folder_docs
-    ), "Should only find documents in test folder"
-
-    # Verify each expected document
-    for expected in test_folder_docs:
-        doc = find_document(found_documents, expected.semantic_identifier)
-        verify_document_content(doc, expected)
-
-
-def test_sharepoint_connector_root_folder(
-    mock_get_unstructured_api_key: MagicMock,
-    sharepoint_credentials: dict[str, str],
-) -> None:
-    # Initialize connector with the base site URL
-    connector = SharepointConnector(sites=[os.environ["SHAREPOINT_SITE"]])
-
-    # Load credentials
-    connector.load_credentials(sharepoint_credentials)
-
-    # Get all documents
-    document_batches = list(connector.load_from_state())
-    found_documents: list[Document] = [
-        doc for batch in document_batches for doc in batch
-    ]
-
-    assert len(found_documents) == len(
-        EXPECTED_DOCUMENTS
-    ), "Should find all documents in main library"
-
-    # Verify each expected document
-    for expected in EXPECTED_DOCUMENTS:
-        doc = find_document(found_documents, expected.semantic_identifier)
-        verify_document_content(doc, expected)
-
-
-def test_sharepoint_connector_other_library(
-    mock_get_unstructured_api_key: MagicMock,
-    sharepoint_credentials: dict[str, str],
-) -> None:
-    # Initialize connector with the other library
-    connector = SharepointConnector(
-        sites=[
-            os.environ["SHAREPOINT_SITE"] + "/Other Library",
-        ]
-    )
-
-    # Load credentials
-    connector.load_credentials(sharepoint_credentials)
-
-    # Get all documents
-    document_batches = list(connector.load_from_state())
-    found_documents: list[Document] = [
-        doc for batch in document_batches for doc in batch
-    ]
-    expected_documents: list[ExpectedDocument] = [
-        doc for doc in EXPECTED_DOCUMENTS if doc.library == "Other Library"
-    ]
-
-    # Should find all documents in `Other Library`
-    assert len(found_documents) == len(
-        expected_documents
-    ), "Should find all documents in `Other Library`"
-
-    # Verify each expected document
-    for expected in expected_documents:
-        doc = find_document(found_documents, expected.semantic_identifier)
-        verify_document_content(doc, expected)
-
-
-def test_sharepoint_connector_poll(
-    mock_get_unstructured_api_key: MagicMock,
-    sharepoint_credentials: dict[str, str],
-) -> None:
-    # Initialize connector with the base site URL
-    connector = SharepointConnector(
-        sites=["https://danswerai.sharepoint.com/sites/sharepoint-tests"]
-    )
-
-    # Load credentials
-    connector.load_credentials(sharepoint_credentials)
-
-    # Set time window to only capture test1.docx (modified at 2025-01-28 20:51:42+00:00)
-    start = datetime(2025, 1, 28, 20, 51, 30, tzinfo=timezone.utc)  # 12 seconds before
-    end = datetime(2025, 1, 28, 20, 51, 50, tzinfo=timezone.utc)  # 8 seconds after
-
-    # Get documents within the time window
-    document_batches = list(connector._fetch_from_sharepoint(start=start, end=end))
-    found_documents: list[Document] = [
-        doc for batch in document_batches for doc in batch
-    ]
-
-    # Should only find test1.docx
-    assert len(found_documents) == 1, "Should only find one document in the time window"
-    doc = found_documents[0]
-    assert doc.semantic_identifier == "test1.docx"
-    verify_document_metadata(doc)
-    verify_document_content(
-        doc, [d for d in EXPECTED_DOCUMENTS if d.semantic_identifier == "test1.docx"][0]
-    )
--- a/backend/tests/integration/common_utils/managers/cc_pair.py
+++ b/backend/tests/integration/common_utils/managers/cc_pair.py
@@ -432,61 +432,30 @@ class CCPairManager:
            if user_performing_action
            else GENERAL_HEADERS,
        )
+        #
        if result.status_code != 409:
            result.raise_for_status()

-        group_sync_result = requests.post(
-            url=f"{API_SERVER_URL}/manage/admin/cc-pair/{cc_pair.id}/sync-groups",
-            headers=user_performing_action.headers
-            if user_performing_action
-            else GENERAL_HEADERS,
-        )
-        if group_sync_result.status_code != 409:
-            group_sync_result.raise_for_status()
-
    @staticmethod
-    def get_doc_sync_task(
+    def get_sync_task(
        cc_pair: DATestCCPair,
        user_performing_action: DATestUser | None = None,
    ) -> datetime | None:
-        doc_sync_response = requests.get(
+        response = requests.get(
            url=f"{API_SERVER_URL}/manage/admin/cc-pair/{cc_pair.id}/sync-permissions",
            headers=user_performing_action.headers
            if user_performing_action
            else GENERAL_HEADERS,
        )
-        doc_sync_response.raise_for_status()
-        doc_sync_response_str = doc_sync_response.json()
+        response.raise_for_status()
+        response_str = response.json()

        # If the response itself is a datetime string, parse it
-        if not isinstance(doc_sync_response_str, str):
+        if not isinstance(response_str, str):
            return None

        try:
-            return datetime.fromisoformat(doc_sync_response_str)
-        except ValueError:
-            return None
-
-    @staticmethod
-    def get_group_sync_task(
-        cc_pair: DATestCCPair,
-        user_performing_action: DATestUser | None = None,
-    ) -> datetime | None:
-        group_sync_response = requests.get(
-            url=f"{API_SERVER_URL}/manage/admin/cc-pair/{cc_pair.id}/sync-groups",
-            headers=user_performing_action.headers
-            if user_performing_action
-            else GENERAL_HEADERS,
-        )
-        group_sync_response.raise_for_status()
-        group_sync_response_str = group_sync_response.json()
-
-        # If the response itself is a datetime string, parse it
-        if not isinstance(group_sync_response_str, str):
-            return None
-
-        try:
-            return datetime.fromisoformat(group_sync_response_str)
+            return datetime.fromisoformat(response_str)
        except ValueError:
            return None

@@ -529,37 +498,15 @@ class CCPairManager:
        timeout: float = MAX_DELAY,
        number_of_updated_docs: int = 0,
        user_performing_action: DATestUser | None = None,
-        # Sometimes waiting for a group sync is not necessary
-        should_wait_for_group_sync: bool = True,
-        # Sometimes waiting for a vespa sync is not necessary
-        should_wait_for_vespa_sync: bool = True,
    ) -> None:
        """after: The task register time must be after this time."""
-        doc_synced = False
-        group_synced = False
        start = time.monotonic()
        while True:
-            # We are treating both syncs as part of one larger permission sync job
-            doc_last_synced = CCPairManager.get_doc_sync_task(
-                cc_pair, user_performing_action
-            )
-            group_last_synced = CCPairManager.get_group_sync_task(
-                cc_pair, user_performing_action
-            )
-
-            if not doc_synced and doc_last_synced and doc_last_synced > after:
-                print(f"doc_last_synced: {doc_last_synced}")
+            last_synced = CCPairManager.get_sync_task(cc_pair, user_performing_action)
+            if last_synced and last_synced > after:
+                print(f"last_synced: {last_synced}")
                print(f"sync command start time: {after}")
                print(f"permission sync complete: cc_pair={cc_pair.id}")
-                doc_synced = True
-
-            if not group_synced and group_last_synced and group_last_synced > after:
-                print(f"group_last_synced: {group_last_synced}")
-                print(f"sync command start time: {after}")
-                print(f"group sync complete: cc_pair={cc_pair.id}")
-                group_synced = True
-
-            if doc_synced and (group_synced or not should_wait_for_group_sync):
                break

            elapsed = time.monotonic() - start
@@ -577,9 +524,6 @@ class CCPairManager:
        # this shouldnt be necessary but something is off with the timing for the sync jobs
        time.sleep(5)

-        if not should_wait_for_vespa_sync:
-            return
-
        print("waiting for vespa sync")
        # wait for the vespa sync to complete once the permission sync is complete
        start = time.monotonic()
--- a/backend/tests/integration/common_utils/reset.py
+++ b/backend/tests/integration/common_utils/reset.py
@@ -18,7 +18,6 @@ from onyx.db.engine import get_session_with_tenant
 from onyx.db.engine import SYNC_DB_API
 from onyx.db.search_settings import get_current_search_settings
 from onyx.db.swap_index import check_index_swap
-from onyx.document_index.document_index_utils import get_multipass_config
 from onyx.document_index.vespa.index import DOCUMENT_ID_ENDPOINT
 from onyx.document_index.vespa.index import VespaIndex
 from onyx.indexing.models import IndexingSetting
@@ -174,16 +173,10 @@ def reset_vespa() -> None:
        check_index_swap(db_session)

        search_settings = get_current_search_settings(db_session)
-        multipass_config = get_multipass_config(search_settings)
        index_name = search_settings.index_name

    success = setup_vespa(
-        document_index=VespaIndex(
-            index_name=index_name,
-            secondary_index_name=None,
-            large_chunks_enabled=multipass_config.enable_large_chunks,
-            secondary_large_chunks_enabled=None,
-        ),
+        document_index=VespaIndex(index_name=index_name, secondary_index_name=None),
        index_setting=IndexingSetting.from_db_model(search_settings),
        secondary_index_setting=None,
    )
@@ -257,16 +250,10 @@ def reset_vespa_multitenant() -> None:
            check_index_swap(db_session)

            search_settings = get_current_search_settings(db_session)
-            multipass_config = get_multipass_config(search_settings)
            index_name = search_settings.index_name

        success = setup_vespa(
-            document_index=VespaIndex(
-                index_name=index_name,
-                secondary_index_name=None,
-                large_chunks_enabled=multipass_config.enable_large_chunks,
-                secondary_large_chunks_enabled=None,
-            ),
+            document_index=VespaIndex(index_name=index_name, secondary_index_name=None),
            index_setting=IndexingSetting.from_db_model(search_settings),
            secondary_index_setting=None,
        )
--- a/backend/tests/integration/connector_job_tests/google/google_drive_api_utils.py
+++ b/backend/tests/integration/connector_job_tests/google/google_drive_api_utils.py
@@ -1,186 +0,0 @@
-from typing import Any
-from uuid import uuid4
-
-from google.oauth2.service_account import Credentials
-
-from onyx.connectors.google_utils.resources import get_drive_service
-from onyx.connectors.google_utils.resources import get_google_docs_service
-from onyx.connectors.google_utils.resources import GoogleDocsService
-from onyx.connectors.google_utils.resources import GoogleDriveService
-
-
-GOOGLE_SCOPES = {
-    "google_drive": [
-        "https://www.googleapis.com/auth/drive",
-        "https://www.googleapis.com/auth/admin.directory.group",
-        "https://www.googleapis.com/auth/admin.directory.user",
-    ],
-}
-
-
-def _create_doc_service(drive_service: GoogleDriveService) -> GoogleDocsService:
-    docs_service = get_google_docs_service(
-        creds=drive_service._http.credentials,
-        user_email=drive_service._http.credentials._subject,
-    )
-    return docs_service
-
-
-class GoogleDriveManager:
-    @staticmethod
-    def create_impersonated_drive_service(
-        service_account_key: dict, impersonated_user_email: str
-    ) -> GoogleDriveService:
-        """Gets a drive service that impersonates a specific user"""
-        credentials = Credentials.from_service_account_info(
-            service_account_key,
-            scopes=GOOGLE_SCOPES["google_drive"],
-            subject=impersonated_user_email,
-        )
-
-        service = get_drive_service(credentials, impersonated_user_email)
-
-        # Verify impersonation
-        about = service.about().get(fields="user").execute()
-        if about.get("user", {}).get("emailAddress") != impersonated_user_email:
-            raise ValueError(
-                f"Failed to impersonate {impersonated_user_email}. "
-                f"Instead got {about.get('user', {}).get('emailAddress')}"
-            )
-        return service
-
-    @staticmethod
-    def create_shared_drive(
-        drive_service: GoogleDriveService, admin_email: str, test_id: str
-    ) -> str:
-        """
-        Creates a shared drive and returns the drive's ID
-        """
-        try:
-            about = drive_service.about().get(fields="user").execute()
-            creating_user = about["user"]["emailAddress"]
-
-            # Verify we're still impersonating the admin
-            if creating_user != admin_email:
-                raise ValueError(
-                    f"Expected to create drive as {admin_email}, but instead created drive as {creating_user}"
-                )
-
-            drive_metadata = {"name": f"perm_sync_drive_{test_id}"}
-
-            request_id = str(uuid4())
-            drive = (
-                drive_service.drives()
-                .create(
-                    body=drive_metadata,
-                    requestId=request_id,
-                    fields="id,name,capabilities",
-                )
-                .execute()
-            )
-
-            return drive["id"]
-        except Exception as e:
-            print(f"Error creating shared drive: {str(e)}")
-            raise
-
-    @staticmethod
-    def create_empty_doc(
-        drive_service: Any,
-        drive_id: str,
-    ) -> str:
-        """
-        Creates an empty document in the given drive and returns the document's ID
-        """
-        file_metadata = {
-            "name": f"perm_sync_doc_{drive_id}_{str(uuid4())}",
-            "mimeType": "application/vnd.google-apps.document",
-            "parents": [drive_id],
-        }
-        file = (
-            drive_service.files()
-            .create(body=file_metadata, supportsAllDrives=True)
-            .execute()
-        )
-
-        return file["id"]
-
-    @staticmethod
-    def append_text_to_doc(
-        drive_service: GoogleDriveService, doc_id: str, text: str
-    ) -> None:
-        docs_service = _create_doc_service(drive_service)
-
-        docs_service.documents().batchUpdate(
-            documentId=doc_id,
-            body={
-                "requests": [{"insertText": {"location": {"index": 1}, "text": text}}]
-            },
-        ).execute()
-
-    @staticmethod
-    def update_file_permissions(
-        drive_service: Any, file_id: str, email: str, role: str = "reader"
-    ) -> None:
-        permission = {"type": "user", "role": role, "emailAddress": email}
-        drive_service.permissions().create(
-            fileId=file_id,
-            body=permission,
-            supportsAllDrives=True,
-            sendNotificationEmail=False,
-        ).execute()
-
-    @staticmethod
-    def remove_file_permissions(drive_service: Any, file_id: str, email: str) -> None:
-        permissions = (
-            drive_service.permissions()
-            .list(fileId=file_id, supportsAllDrives=True)
-            .execute()
-        )
-        # TODO: This is a hacky way to remove permissions. Removes anyone with reader role.
-        # Need to find a way to map a user's email to a permission id.
-        # The permissions.get returns a permissionID but email field is None,
-        # something to do with it being a group or domain wide delegation.
-        for permission in permissions.get("permissions", []):
-            if permission.get("role") == "reader":
-                drive_service.permissions().delete(
-                    fileId=file_id,
-                    permissionId=permission["id"],
-                    supportsAllDrives=True,
-                ).execute()
-                break
-
-    @staticmethod
-    def make_file_public(drive_service: Any, file_id: str) -> None:
-        permission = {"type": "anyone", "role": "reader"}
-        drive_service.permissions().create(
-            fileId=file_id, body=permission, supportsAllDrives=True
-        ).execute()
-
-    @staticmethod
-    def cleanup_drive(drive_service: Any, drive_id: str) -> None:
-        try:
-            # Delete up to 2 files that match our pattern
-            file_name_prefix = f"perm_sync_doc_{drive_id}"
-            files = (
-                drive_service.files()
-                .list(
-                    q=f"name contains '{file_name_prefix}'",
-                    driveId=drive_id,
-                    includeItemsFromAllDrives=True,
-                    supportsAllDrives=True,
-                    corpora="drive",
-                    fields="files(id)",
-                )
-                .execute()
-            )
-
-            for file in files.get("files", []):
-                drive_service.files().delete(
-                    fileId=file["id"], supportsAllDrives=True
-                ).execute()
-
-            # Then delete the drive
-            drive_service.drives().delete(driveId=drive_id).execute()
-        except Exception as e:
-            print(f"Error cleaning up drive {drive_id}: {e}")
--- a/backend/tests/integration/connector_job_tests/google/test_google_drive_permission_sync.py
+++ b/backend/tests/integration/connector_job_tests/google/test_google_drive_permission_sync.py
@@ -1,332 +0,0 @@
-import json
-import os
-from collections.abc import Generator
-from datetime import datetime
-from datetime import timezone
-from uuid import uuid4
-
-import pytest
-
-from onyx.configs.constants import DocumentSource
-from onyx.connectors.google_utils.resources import GoogleDriveService
-from onyx.connectors.google_utils.shared_constants import (
-    DB_CREDENTIALS_DICT_SERVICE_ACCOUNT_KEY,
-)
-from onyx.connectors.google_utils.shared_constants import (
-    DB_CREDENTIALS_PRIMARY_ADMIN_KEY,
-)
-from onyx.connectors.models import InputType
-from onyx.db.enums import AccessType
-from tests.integration.common_utils.managers.cc_pair import CCPairManager
-from tests.integration.common_utils.managers.connector import ConnectorManager
-from tests.integration.common_utils.managers.credential import CredentialManager
-from tests.integration.common_utils.managers.document_search import (
-    DocumentSearchManager,
-)
-from tests.integration.common_utils.managers.llm_provider import LLMProviderManager
-from tests.integration.common_utils.managers.user import UserManager
-from tests.integration.common_utils.test_models import DATestCCPair
-from tests.integration.common_utils.test_models import DATestConnector
-from tests.integration.common_utils.test_models import DATestCredential
-from tests.integration.common_utils.test_models import DATestUser
-from tests.integration.common_utils.vespa import vespa_fixture
-from tests.integration.connector_job_tests.google.google_drive_api_utils import (
-    GoogleDriveManager,
-)
-
-
-@pytest.fixture()
-def google_drive_test_env_setup() -> (
-    Generator[
-        tuple[
-            GoogleDriveService, str, DATestCCPair, DATestUser, DATestUser, DATestUser
-        ],
-        None,
-        None,
-    ]
-):
-    # Creating an admin user (first user created is automatically an admin)
-    admin_user: DATestUser = UserManager.create(email="admin@onyx-test.com")
-    # Creating a non-admin user
-    test_user_1: DATestUser = UserManager.create(email="test_user_1@onyx-test.com")
-    # Creating a non-admin user
-    test_user_2: DATestUser = UserManager.create(email="test_user_2@onyx-test.com")
-
-    service_account_key = os.environ["FULL_CONTROL_DRIVE_SERVICE_ACCOUNT"]
-    drive_id: str | None = None
-
-    try:
-        credentials = {
-            DB_CREDENTIALS_PRIMARY_ADMIN_KEY: admin_user.email,
-            DB_CREDENTIALS_DICT_SERVICE_ACCOUNT_KEY: service_account_key,
-        }
-
-        # Setup Google Drive
-        drive_service = GoogleDriveManager.create_impersonated_drive_service(
-            json.loads(service_account_key), admin_user.email
-        )
-        test_id = str(uuid4())
-        drive_id = GoogleDriveManager.create_shared_drive(
-            drive_service, admin_user.email, test_id
-        )
-
-        # Setup Onyx infrastructure
-        LLMProviderManager.create(user_performing_action=admin_user)
-
-        before = datetime.now(timezone.utc)
-        credential: DATestCredential = CredentialManager.create(
-            source=DocumentSource.GOOGLE_DRIVE,
-            credential_json=credentials,
-            user_performing_action=admin_user,
-        )
-        connector: DATestConnector = ConnectorManager.create(
-            name="Google Drive Test",
-            input_type=InputType.POLL,
-            source=DocumentSource.GOOGLE_DRIVE,
-            connector_specific_config={
-                "shared_drive_urls": f"https://drive.google.com/drive/folders/{drive_id}"
-            },
-            access_type=AccessType.SYNC,
-            user_performing_action=admin_user,
-        )
-        cc_pair: DATestCCPair = CCPairManager.create(
-            credential_id=credential.id,
-            connector_id=connector.id,
-            access_type=AccessType.SYNC,
-            user_performing_action=admin_user,
-        )
-        CCPairManager.wait_for_indexing_completion(
-            cc_pair=cc_pair, after=before, user_performing_action=admin_user
-        )
-
-        yield drive_service, drive_id, cc_pair, admin_user, test_user_1, test_user_2
-
-    except json.JSONDecodeError:
-        pytest.skip("FULL_CONTROL_DRIVE_SERVICE_ACCOUNT is not valid JSON")
-    finally:
-        # Cleanup drive and file
-        if drive_id is not None:
-            GoogleDriveManager.cleanup_drive(drive_service, drive_id)
-
-
-@pytest.mark.xfail(reason="Needs to be tested for flakiness")
-def test_google_permission_sync(
-    reset: None,
-    vespa_client: vespa_fixture,
-    google_drive_test_env_setup: tuple[
-        GoogleDriveService, str, DATestCCPair, DATestUser, DATestUser, DATestUser
-    ],
-) -> None:
-    (
-        drive_service,
-        drive_id,
-        cc_pair,
-        admin_user,
-        test_user_1,
-        test_user_2,
-    ) = google_drive_test_env_setup
-
-    # ----------------------BASELINE TEST----------------------
-    before = datetime.now(timezone.utc)
-
-    # Create empty test doc in drive
-    doc_id_1 = GoogleDriveManager.create_empty_doc(drive_service, drive_id)
-
-    # Append text to doc
-    doc_text_1 = "The secret number is 12345"
-    GoogleDriveManager.append_text_to_doc(drive_service, doc_id_1, doc_text_1)
-
-    # run indexing
-    CCPairManager.run_once(cc_pair, admin_user)
-    CCPairManager.wait_for_indexing_completion(
-        cc_pair=cc_pair, after=before, user_performing_action=admin_user
-    )
-
-    # run permission sync
-    CCPairManager.sync(
-        cc_pair=cc_pair,
-        user_performing_action=admin_user,
-    )
-    CCPairManager.wait_for_sync(
-        cc_pair=cc_pair,
-        after=before,
-        number_of_updated_docs=1,
-        user_performing_action=admin_user,
-    )
-
-    # Verify admin has access to document
-    admin_results = DocumentSearchManager.search_documents(
-        query="secret number", user_performing_action=admin_user
-    )
-    assert doc_text_1 in [result.strip("\ufeff") for result in admin_results]
-
-    # Verify test_user_1 cannot access document
-    user1_results = DocumentSearchManager.search_documents(
-        query="secret number", user_performing_action=test_user_1
-    )
-    assert doc_text_1 not in [result.strip("\ufeff") for result in user1_results]
-
-    # ----------------------GRANT USER 1 DOC PERMISSIONS TEST--------------------------
-    before = datetime.now(timezone.utc)
-
-    # Grant user 1 access to document 1
-    GoogleDriveManager.update_file_permissions(
-        drive_service=drive_service,
-        file_id=doc_id_1,
-        email=test_user_1.email,
-        role="reader",
-    )
-
-    # Create a second doc in the drive which user 1 should not have access to
-    doc_id_2 = GoogleDriveManager.create_empty_doc(drive_service, drive_id)
-    doc_text_2 = "The secret number is 67890"
-    GoogleDriveManager.append_text_to_doc(drive_service, doc_id_2, doc_text_2)
-
-    # Run indexing
-    CCPairManager.run_once(cc_pair, admin_user)
-    CCPairManager.wait_for_indexing_completion(
-        cc_pair=cc_pair,
-        after=before,
-        user_performing_action=admin_user,
-    )
-
-    # Run permission sync
-    CCPairManager.sync(
-        cc_pair=cc_pair,
-        user_performing_action=admin_user,
-    )
-    CCPairManager.wait_for_sync(
-        cc_pair=cc_pair,
-        after=before,
-        number_of_updated_docs=1,
-        user_performing_action=admin_user,
-    )
-
-    # Verify admin can access both documents
-    admin_results = DocumentSearchManager.search_documents(
-        query="secret number", user_performing_action=admin_user
-    )
-    assert {doc_text_1, doc_text_2} == {
-        result.strip("\ufeff") for result in admin_results
-    }
-
-    # Verify user 1 can access document 1
-    user1_results = DocumentSearchManager.search_documents(
-        query="secret number", user_performing_action=test_user_1
-    )
-    assert doc_text_1 in [result.strip("\ufeff") for result in user1_results]
-
-    # Verify user 1 cannot access document 2
-    user1_results_2 = DocumentSearchManager.search_documents(
-        query="secret number", user_performing_action=test_user_1
-    )
-    assert doc_text_2 not in [result.strip("\ufeff") for result in user1_results_2]
-
-    # ----------------------REMOVE USER 1 DOC PERMISSIONS TEST--------------------------
-    before = datetime.now(timezone.utc)
-
-    # Remove user 1 access to document 1
-    GoogleDriveManager.remove_file_permissions(
-        drive_service=drive_service, file_id=doc_id_1, email=test_user_1.email
-    )
-    # Run permission sync
-    CCPairManager.sync(
-        cc_pair=cc_pair,
-        user_performing_action=admin_user,
-    )
-    CCPairManager.wait_for_sync(
-        cc_pair=cc_pair,
-        after=before,
-        number_of_updated_docs=1,
-        user_performing_action=admin_user,
-    )
-
-    # Verify admin can access both documents
-    admin_results = DocumentSearchManager.search_documents(
-        query="secret number", user_performing_action=admin_user
-    )
-    assert {doc_text_1, doc_text_2} == {
-        result.strip("\ufeff") for result in admin_results
-    }
-
-    # Verify user 1 cannot access either document
-    user1_results = DocumentSearchManager.search_documents(
-        query="secret numbers", user_performing_action=test_user_1
-    )
-    assert {result.strip("\ufeff") for result in user1_results} == set()
-
-    # ----------------------GRANT USER 1 DRIVE PERMISSIONS TEST--------------------------
-    before = datetime.now(timezone.utc)
-
-    # Grant user 1 access to drive
-    GoogleDriveManager.update_file_permissions(
-        drive_service=drive_service,
-        file_id=drive_id,
-        email=test_user_1.email,
-        role="reader",
-    )
-
-    # Run permission sync
-    CCPairManager.sync(
-        cc_pair=cc_pair,
-        user_performing_action=admin_user,
-    )
-
-    CCPairManager.wait_for_sync(
-        cc_pair=cc_pair,
-        after=before,
-        number_of_updated_docs=2,
-        user_performing_action=admin_user,
-        # if we are only updating the group definition for this test we use this varaiable,
-        # since it doesn't result in a vespa sync so we don't want to wait for it
-        should_wait_for_vespa_sync=False,
-    )
-
-    # Verify user 1 can access both documents
-    user1_results = DocumentSearchManager.search_documents(
-        query="secret numbers", user_performing_action=test_user_1
-    )
-    assert {doc_text_1, doc_text_2} == {
-        result.strip("\ufeff") for result in user1_results
-    }
-
-    # ----------------------MAKE DRIVE PUBLIC TEST--------------------------
-    before = datetime.now(timezone.utc)
-
-    # Unable to make drive itself public as Google's security policies prevent this, so we make the documents public instead
-    GoogleDriveManager.make_file_public(drive_service, doc_id_1)
-    GoogleDriveManager.make_file_public(drive_service, doc_id_2)
-
-    # Run permission sync
-    CCPairManager.sync(
-        cc_pair=cc_pair,
-        user_performing_action=admin_user,
-    )
-    CCPairManager.wait_for_sync(
-        cc_pair=cc_pair,
-        after=before,
-        number_of_updated_docs=2,
-        user_performing_action=admin_user,
-    )
-
-    # Verify all users can access both documents
-    admin_results = DocumentSearchManager.search_documents(
-        query="secret number", user_performing_action=admin_user
-    )
-    assert {doc_text_1, doc_text_2} == {
-        result.strip("\ufeff") for result in admin_results
-    }
-
-    user1_results = DocumentSearchManager.search_documents(
-        query="secret number", user_performing_action=test_user_1
-    )
-    assert {doc_text_1, doc_text_2} == {
-        result.strip("\ufeff") for result in user1_results
-    }
-
-    user2_results = DocumentSearchManager.search_documents(
-        query="secret number", user_performing_action=test_user_2
-    )
-    assert {doc_text_1, doc_text_2} == {
-        result.strip("\ufeff") for result in user2_results
-    }
--- a/backend/tests/unit/onyx/document_index/vespa/shared_utils/test_utils.py
+++ b/backend/tests/unit/onyx/document_index/vespa/shared_utils/test_utils.py
@@ -1,20 +0,0 @@
-from onyx.document_index.vespa.shared_utils.utils import remove_invalid_unicode_chars
-
-
-def test_remove_invalid_unicode_chars() -> None:
-    """Test that invalid Unicode characters are properly removed."""
-    # Test removal of illegal XML character 0xFDDB
-    text_with_illegal_char = "Valid text \uFDDB more text"
-    sanitized = remove_invalid_unicode_chars(text_with_illegal_char)
-    assert "\uFDDB" not in sanitized
-    assert sanitized == "Valid text  more text"
-
-    # Test that valid characters are preserved
-    valid_text = "Hello, world! 你好世界"
-    assert remove_invalid_unicode_chars(valid_text) == valid_text
-
-    # Test multiple invalid characters including 0xFDDB
-    text_with_multiple_illegal = "\x00Hello\uFDDB World\uFFFE!"
-    sanitized = remove_invalid_unicode_chars(text_with_multiple_illegal)
-    assert all(c not in sanitized for c in ["\x00", "\uFDDB", "\uFFFE"])
-    assert sanitized == "Hello World!"
--- a/backend/tests/unit/onyx/indexing/test_vespa.py
+++ b/backend/tests/unit/onyx/indexing/test_vespa.py
@@ -6,7 +6,7 @@ import pytest
 from sqlalchemy.orm import Session

 from onyx.db.engine import get_sqlalchemy_engine
-from onyx.document_index.document_index_utils import get_both_index_properties
+from onyx.document_index.document_index_utils import get_both_index_names
 from onyx.document_index.vespa_constants import DOCUMENT_ID_ENDPOINT


@@ -19,7 +19,7 @@ def test_vespa_update() -> None:
    doc_id = "test-vespa-update"

    with Session(get_sqlalchemy_engine()) as db_session:
-        primary_index_name, _, _, _ = get_both_index_properties(db_session)
+        primary_index_name, _ = get_both_index_names(db_session)
        endpoint = (
            f"{DOCUMENT_ID_ENDPOINT.format(index_name=primary_index_name)}/{doc_id}"
        )
--- a/deployment/cloud_kubernetes/hpa/workers_hpa.yaml
+++ b/deployment/cloud_kubernetes/hpa/workers_hpa.yaml
@@ -0,0 +1,75 @@
+apiVersion: autoscaling/v2
+kind: HorizontalPodAutoscaler
+metadata:
+  name: celery-worker-heavy-hpa
+spec:
+  scaleTargetRef:
+    apiVersion: apps/v1
+    kind: Deployment
+    name: celery-worker-heavy
+  minReplicas: 1
+  maxReplicas: 5
+  metrics:
+    - type: Resource
+      resource:
+        name: cpu
+        target:
+          type: Utilization
+          averageUtilization: 60
+---
+apiVersion: autoscaling/v2
+kind: HorizontalPodAutoscaler
+metadata:
+  name: celery-worker-light-hpa
+spec:
+  scaleTargetRef:
+    apiVersion: apps/v1
+    kind: Deployment
+    name: celery-worker-light
+  minReplicas: 1
+  maxReplicas: 10
+  metrics:
+    - type: Resource
+      resource:
+        name: cpu
+        target:
+          type: Utilization
+          averageUtilization: 70
+---
+apiVersion: autoscaling/v2
+kind: HorizontalPodAutoscaler
+metadata:
+  name: celery-worker-indexing-hpa
+spec:
+  scaleTargetRef:
+    apiVersion: apps/v1
+    kind: Deployment
+    name: celery-worker-indexing
+  minReplicas: 1
+  maxReplicas: 10
+  metrics:
+    - type: Resource
+      resource:
+        name: cpu
+        target:
+          type: Utilization
+          averageUtilization: 70
+---
+apiVersion: autoscaling/v2
+kind: HorizontalPodAutoscaler
+metadata:
+  name: celery-worker-monitoring-hpa
+spec:
+  scaleTargetRef:
+    apiVersion: apps/v1
+    kind: Deployment
+    name: celery-worker-indexing
+  minReplicas: 1
+  maxReplicas: 4
+  metrics:
+    - type: Resource
+      resource:
+        name: cpu
+        target:
+          type: Utilization
+          averageUtilization: 70
--- a/deployment/cloud_kubernetes/keda/celery-worker-auth.yaml
+++ b/deployment/cloud_kubernetes/keda/celery-worker-auth.yaml
@@ -0,0 +1,13 @@
+apiVersion: keda.sh/v1alpha1
+kind: TriggerAuthentication
+metadata:
+  name: celery-worker-auth
+  namespace: onyx
+spec:
+  secretTargetRef:
+    - parameter: host
+      name: keda-redis-secret
+      key: host
+    - parameter: password
+      name: keda-redis-secret
+      key: password
--- a/deployment/cloud_kubernetes/keda/celery-worker-indexing-scaledobject.yaml
+++ b/deployment/cloud_kubernetes/keda/celery-worker-indexing-scaledobject.yaml
@@ -0,0 +1,53 @@
+apiVersion: keda.sh/v1alpha1
+kind: ScaledObject
+metadata:
+  name: celery-worker-indexing-scaledobject
+  namespace: onyx
+  labels:
+    app: celery-worker-indexing
+spec:
+  scaleTargetRef:
+    name: celery-worker-indexing
+  minReplicaCount: 1
+  maxReplicaCount: 30
+  triggers:
+    - type: redis
+      metadata:
+        sslEnabled: "true"
+        port: "6379"
+        enableTLS: "true"
+        listName: connector_indexing
+        listLength: "1"
+        databaseIndex: "15"
+      authenticationRef:
+        name: celery-worker-auth
+
+    - type: redis
+      metadata:
+        sslEnabled: "true"
+        port: "6379"
+        enableTLS: "true"
+        listName: connector_indexing:2
+        listLength: "1"
+        databaseIndex: "15"
+      authenticationRef:
+        name: celery-worker-auth
+    - type: redis
+      metadata:
+        sslEnabled: "true"
+        port: "6379"
+        enableTLS: "true"
+        listName: connector_indexing:3
+        listLength: "1"
+        databaseIndex: "15"
+      authenticationRef:
+        name: celery-worker-auth
+    - type: cpu
+      metadata:
+        type: Utilization
+        value: "70"
+
+    - type: memory
+      metadata:
+        type: Utilization
+        value: "70"
--- a/deployment/cloud_kubernetes/keda/celery-worker-light-scaledobject.yaml
+++ b/deployment/cloud_kubernetes/keda/celery-worker-light-scaledobject.yaml
@@ -0,0 +1,58 @@
+apiVersion: keda.sh/v1alpha1
+kind: ScaledObject
+metadata:
+  name: celery-worker-light-scaledobject
+  namespace: onyx
+  labels:
+    app: celery-worker-light
+spec:
+  scaleTargetRef:
+    name: celery-worker-light
+  minReplicaCount: 5
+  maxReplicaCount: 20
+  triggers:
+    - type: redis
+      metadata:
+        port: "6379"
+        enableTLS: "true"
+        listName: vespa_metadata_sync
+        listLength: "1"
+        databaseIndex: "15"
+      authenticationRef:
+        name: celery-worker-auth
+    - type: redis
+      metadata:
+        port: "6379"
+        enableTLS: "true"
+        listName: vespa_metadata_sync:2
+        listLength: "1"
+        databaseIndex: "15"
+      authenticationRef:
+        name: celery-worker-auth
+    - type: redis
+      metadata:
+        port: "6379"
+        enableTLS: "true"
+        listName: vespa_metadata_sync:3
+        listLength: "1"
+        databaseIndex: "15"
+      authenticationRef:
+        name: celery-worker-auth
+    - type: redis
+      metadata:
+        port: "6379"
+        enableTLS: "true"
+        listName: connector_deletion
+        listLength: "1"
+        databaseIndex: "15"
+      authenticationRef:
+        name: celery-worker-auth
+    - type: redis
+      metadata:
+        port: "6379"
+        enableTLS: "true"
+        listName: connector_deletion:2
+        listLength: "1"
+        databaseIndex: "15"
+      authenticationRef:
+        name: celery-worker-auth
--- a/deployment/cloud_kubernetes/keda/celery-worker-primary-scaledobject.yaml
+++ b/deployment/cloud_kubernetes/keda/celery-worker-primary-scaledobject.yaml
@@ -0,0 +1,70 @@
+apiVersion: keda.sh/v1alpha1
+kind: ScaledObject
+metadata:
+  name: celery-worker-primary-scaledobject
+  namespace: onyx
+  labels:
+    app: celery-worker-primary
+spec:
+  scaleTargetRef:
+    name: celery-worker-primary
+  pollingInterval: 15 # Check every 15 seconds
+  cooldownPeriod: 30 # Wait 30 seconds before scaling down
+  minReplicaCount: 4
+  maxReplicaCount: 4
+  triggers:
+    - type: redis
+      metadata:
+        port: "6379"
+        enableTLS: "true"
+        listName: celery
+        listLength: "1"
+        databaseIndex: "15"
+      authenticationRef:
+        name: celery-worker-auth
+
+    - type: redis
+      metadata:
+        port: "6379"
+        enableTLS: "true"
+        listName: celery:1
+        listLength: "1"
+        databaseIndex: "15"
+      authenticationRef:
+        name: celery-worker-auth
+    - type: redis
+      metadata:
+        port: "6379"
+        enableTLS: "true"
+        listName: celery:2
+        listLength: "1"
+        databaseIndex: "15"
+      authenticationRef:
+        name: celery-worker-auth
+    - type: redis
+      metadata:
+        port: "6379"
+        enableTLS: "true"
+        listName: celery:3
+        listLength: "1"
+        databaseIndex: "15"
+      authenticationRef:
+        name: celery-worker-auth
+    - type: redis
+      metadata:
+        port: "6379"
+        enableTLS: "true"
+        listName: periodic_tasks
+        listLength: "1"
+        databaseIndex: "15"
+      authenticationRef:
+        name: celery-worker-auth
+    - type: redis
+      metadata:
+        port: "6379"
+        enableTLS: "true"
+        listName: periodic_tasks:2
+        listLength: "1"
+        databaseIndex: "15"
+      authenticationRef:
+        name: celery-worker-auth
--- a/deployment/cloud_kubernetes/keda/indexing-model-server-scaledobject.yaml
+++ b/deployment/cloud_kubernetes/keda/indexing-model-server-scaledobject.yaml
@@ -0,0 +1,19 @@
+apiVersion: keda.sh/v1alpha1
+kind: ScaledObject
+metadata:
+  name: indexing-model-server-scaledobject
+  namespace: onyx
+  labels:
+    app: indexing-model-server
+spec:
+  scaleTargetRef:
+    name: indexing-model-server-deployment
+  pollingInterval: 15 # Check every 15 seconds
+  cooldownPeriod: 30 # Wait 30 seconds before scaling down
+  minReplicaCount: 10
+  maxReplicaCount: 10
+  triggers:
+    - type: cpu
+      metadata:
+        type: Utilization
+        value: "70"
--- a/deployment/cloud_kubernetes/keda/keda-redis-secret.yaml
+++ b/deployment/cloud_kubernetes/keda/keda-redis-secret.yaml
@@ -0,0 +1,9 @@
+apiVersion: v1
+kind: Secret
+metadata:
+  name: keda-redis-secret
+  namespace: onyx
+type: Opaque
+data:
+  host: { base64 encoded host here }
+  password: { base64 encoded password here }
--- a/deployment/cloud_kubernetes/workers/beat.yaml
+++ b/deployment/cloud_kubernetes/workers/beat.yaml
@@ -0,0 +1,44 @@
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: celery-beat
+spec:
+  replicas: 1
+  selector:
+    matchLabels:
+      app: celery-beat
+  template:
+    metadata:
+      labels:
+        app: celery-beat
+    spec:
+      containers:
+        - name: celery-beat
+          image: onyxdotapp/onyx-backend-cloud:v0.14.0-cloud.beta.21
+          imagePullPolicy: IfNotPresent
+          command:
+            [
+              "celery",
+              "-A",
+              "onyx.background.celery.versioned_apps.beat",
+              "beat",
+              "--loglevel=INFO",
+            ]
+          env:
+            - name: REDIS_PASSWORD
+              valueFrom:
+                secretKeyRef:
+                  name: onyx-secrets
+                  key: redis_password
+            - name: ONYX_VERSION
+              value: "v0.11.0-cloud.beta.8"
+          envFrom:
+            - configMapRef:
+                name: env-configmap
+          resources:
+            requests:
+              cpu: "250m"
+              memory: "512Mi"
+            limits:
+              cpu: "500m"
+              memory: "1Gi"
--- a/deployment/cloud_kubernetes/workers/heavy_worker.yaml
+++ b/deployment/cloud_kubernetes/workers/heavy_worker.yaml
@@ -0,0 +1,60 @@
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: celery-worker-heavy
+spec:
+  replicas: 2
+  selector:
+    matchLabels:
+      app: celery-worker-heavy
+  template:
+    metadata:
+      labels:
+        app: celery-worker-heavy
+    spec:
+      containers:
+        - name: celery-worker-heavy
+          image: onyxdotapp/onyx-backend-cloud:v0.14.0-cloud.beta.21
+          imagePullPolicy: IfNotPresent
+          command:
+            [
+              "celery",
+              "-A",
+              "onyx.background.celery.versioned_apps.heavy",
+              "worker",
+              "--loglevel=INFO",
+              "--hostname=heavy@%n",
+              "-Q",
+              "connector_pruning,connector_doc_permissions_sync,connector_external_group_sync",
+            ]
+          env:
+            - name: REDIS_PASSWORD
+              valueFrom:
+                secretKeyRef:
+                  name: onyx-secrets
+                  key: redis_password
+            - name: ONYX_VERSION
+              value: "v0.11.0-cloud.beta.8"
+          envFrom:
+            - configMapRef:
+                name: env-configmap
+          volumeMounts:
+            - name: vespa-certificates
+              mountPath: "/app/certs"
+              readOnly: true
+          resources:
+            requests:
+              cpu: "1000m"
+              memory: "2Gi"
+            limits:
+              cpu: "2000m"
+              memory: "4Gi"
+      volumes:
+        - name: vespa-certificates
+          secret:
+            secretName: vespa-certificates
+            items:
+              - key: cert.pem
+                path: cert.pem
+              - key: key.pem
+                path: key.pem
--- a/deployment/cloud_kubernetes/workers/indexing_worker.yaml
+++ b/deployment/cloud_kubernetes/workers/indexing_worker.yaml
@@ -0,0 +1,62 @@
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: celery-worker-indexing
+spec:
+  replicas: 1
+  selector:
+    matchLabels:
+      app: celery-worker-indexing
+  template:
+    metadata:
+      labels:
+        app: celery-worker-indexing
+    spec:
+      containers:
+        - name: celery-worker-indexing
+          image: onyxdotapp/onyx-backend-cloud:v0.14.0-cloud.beta.21
+          imagePullPolicy: IfNotPresent
+          command:
+            [
+              "celery",
+              "-A",
+              "onyx.background.celery.versioned_apps.indexing",
+              "worker",
+              "--loglevel=INFO",
+              "--hostname=indexing@%n",
+              "-Q",
+              "connector_indexing",
+              "--prefetch-multiplier=1",
+              "--concurrency=10",
+            ]
+          env:
+            - name: REDIS_PASSWORD
+              valueFrom:
+                secretKeyRef:
+                  name: onyx-secrets
+                  key: redis_password
+            - name: ONYX_VERSION
+              value: "v0.11.0-cloud.beta.8"
+          envFrom:
+            - configMapRef:
+                name: env-configmap
+          volumeMounts:
+            - name: vespa-certificates
+              mountPath: "/app/certs"
+              readOnly: true
+          resources:
+            requests:
+              cpu: "500m"
+              memory: "4Gi"
+            limits:
+              cpu: "1000m"
+              memory: "8Gi"
+      volumes:
+        - name: vespa-certificates
+          secret:
+            secretName: vespa-certificates
+            items:
+              - key: cert.pem
+                path: cert.pem
+              - key: key.pem
+                path: key.pem
--- a/deployment/cloud_kubernetes/workers/light_worker.yaml
+++ b/deployment/cloud_kubernetes/workers/light_worker.yaml
@@ -0,0 +1,62 @@
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: celery-worker-light
+spec:
+  replicas: 1
+  selector:
+    matchLabels:
+      app: celery-worker-light
+  template:
+    metadata:
+      labels:
+        app: celery-worker-light
+    spec:
+      containers:
+        - name: celery-worker-light
+          image: onyxdotapp/onyx-backend-cloud:v0.14.0-cloud.beta.21
+          imagePullPolicy: IfNotPresent
+          command:
+            [
+              "celery",
+              "-A",
+              "onyx.background.celery.versioned_apps.light",
+              "worker",
+              "--loglevel=INFO",
+              "--hostname=light@%n",
+              "-Q",
+              "vespa_metadata_sync,connector_deletion,doc_permissions_upsert",
+              "--prefetch-multiplier=1",
+              "--concurrency=10",
+            ]
+          env:
+            - name: REDIS_PASSWORD
+              valueFrom:
+                secretKeyRef:
+                  name: onyx-secrets
+                  key: redis_password
+            - name: ONYX_VERSION
+              value: "v0.11.0-cloud.beta.8"
+          envFrom:
+            - configMapRef:
+                name: env-configmap
+          volumeMounts:
+            - name: vespa-certificates
+              mountPath: "/app/certs"
+              readOnly: true
+          resources:
+            requests:
+              cpu: "500m"
+              memory: "1Gi"
+            limits:
+              cpu: "1000m"
+              memory: "2Gi"
+      volumes:
+        - name: vespa-certificates
+          secret:
+            secretName: vespa-certificates
+            items:
+              - key: cert.pem
+                path: cert.pem
+              - key: key.pem
+                path: key.pem
--- a/deployment/cloud_kubernetes/workers/monitoring.yaml
+++ b/deployment/cloud_kubernetes/workers/monitoring.yaml
@@ -0,0 +1,62 @@
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: celery-worker-monitoring
+spec:
+  replicas: 2
+  selector:
+    matchLabels:
+      app: celery-worker-monitoring
+  template:
+    metadata:
+      labels:
+        app: celery-worker-monitoring
+    spec:
+      containers:
+        - name: celery-worker-monitoring
+          image: onyxdotapp/onyx-backend-cloud:v0.14.0-cloud.beta.21
+          imagePullPolicy: IfNotPresent
+          command:
+            [
+              "celery",
+              "-A",
+              "onyx.background.celery.versioned_apps.monitoring",
+              "worker",
+              "--loglevel=INFO",
+              "--hostname=monitoring@%n",
+              "-Q",
+              "monitoring",
+              "--prefetch-multiplier=8",
+              "--concurrency=8",
+            ]
+          env:
+            - name: REDIS_PASSWORD
+              valueFrom:
+                secretKeyRef:
+                  name: onyx-secrets
+                  key: redis_password
+            - name: ONYX_VERSION
+              value: "v0.11.0-cloud.beta.8"
+          envFrom:
+            - configMapRef:
+                name: env-configmap
+          volumeMounts:
+            - name: vespa-certificates
+              mountPath: "/app/certs"
+              readOnly: true
+          resources:
+            requests:
+              cpu: "1000m"
+              memory: "1Gi"
+            limits:
+              cpu: "1000m"
+              memory: "1Gi"
+      volumes:
+        - name: vespa-certificates
+          secret:
+            secretName: vespa-certificates
+            items:
+              - key: cert.pem
+                path: cert.pem
+              - key: key.pem
+                path: key.pem
--- a/deployment/cloud_kubernetes/workers/primary.yaml
+++ b/deployment/cloud_kubernetes/workers/primary.yaml
@@ -0,0 +1,62 @@
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: celery-worker-primary
+spec:
+  replicas: 1
+  selector:
+    matchLabels:
+      app: celery-worker-primary
+  template:
+    metadata:
+      labels:
+        app: celery-worker-primary
+    spec:
+      containers:
+        - name: celery-worker-primary
+          image: onyxdotapp/onyx-backend-cloud:v0.14.0-cloud.beta.21
+          imagePullPolicy: IfNotPresent
+          command:
+            [
+              "celery",
+              "-A",
+              "onyx.background.celery.versioned_apps.primary",
+              "worker",
+              "--loglevel=INFO",
+              "--hostname=primary@%n",
+              "-Q",
+              "celery,periodic_tasks",
+              "--prefetch-multiplier=1",
+              "--concurrency=10",
+            ]
+          env:
+            - name: REDIS_PASSWORD
+              valueFrom:
+                secretKeyRef:
+                  name: onyx-secrets
+                  key: redis_password
+            - name: ONYX_VERSION
+              value: "v0.11.0-cloud.beta.8"
+          envFrom:
+            - configMapRef:
+                name: env-configmap
+          volumeMounts:
+            - name: vespa-certificates
+              mountPath: "/app/certs"
+              readOnly: true
+          resources:
+            requests:
+              cpu: "500m"
+              memory: "1Gi"
+            limits:
+              cpu: "1000m"
+              memory: "2Gi"
+      volumes:
+        - name: vespa-certificates
+          secret:
+            secretName: vespa-certificates
+            items:
+              - key: cert.pem
+                path: cert.pem
+              - key: key.pem
+                path: key.pem
--- a/package-lock.json
+++ b/package-lock.json
@@ -0,0 +1,183 @@
+{
+  "name": "onyx",
+  "lockfileVersion": 3,
+  "requires": true,
+  "packages": {
+    "": {
+      "dependencies": {
+        "react-datepicker": "^7.6.0"
+      },
+      "devDependencies": {
+        "@types/react-datepicker": "^6.2.0"
+      }
+    },
+    "node_modules/@floating-ui/core": {
+      "version": "1.6.9",
+      "resolved": "https://registry.npmjs.org/@floating-ui/core/-/core-1.6.9.tgz",
+      "integrity": "sha512-uMXCuQ3BItDUbAMhIXw7UPXRfAlOAvZzdK9BWpE60MCn+Svt3aLn9jsPTi/WNGlRUu2uI0v5S7JiIUsbsvh3fw==",
+      "license": "MIT",
+      "dependencies": {
+        "@floating-ui/utils": "^0.2.9"
+      }
+    },
+    "node_modules/@floating-ui/dom": {
+      "version": "1.6.13",
+      "resolved": "https://registry.npmjs.org/@floating-ui/dom/-/dom-1.6.13.tgz",
+      "integrity": "sha512-umqzocjDgNRGTuO7Q8CU32dkHkECqI8ZdMZ5Swb6QAM0t5rnlrN3lGo1hdpscRd3WS8T6DKYK4ephgIH9iRh3w==",
+      "license": "MIT",
+      "dependencies": {
+        "@floating-ui/core": "^1.6.0",
+        "@floating-ui/utils": "^0.2.9"
+      }
+    },
+    "node_modules/@floating-ui/react": {
+      "version": "0.27.3",
+      "resolved": "https://registry.npmjs.org/@floating-ui/react/-/react-0.27.3.tgz",
+      "integrity": "sha512-CLHnes3ixIFFKVQDdICjel8muhFLOBdQH7fgtHNPY8UbCNqbeKZ262G7K66lGQOUQWWnYocf7ZbUsLJgGfsLHg==",
+      "license": "MIT",
+      "dependencies": {
+        "@floating-ui/react-dom": "^2.1.2",
+        "@floating-ui/utils": "^0.2.9",
+        "tabbable": "^6.0.0"
+      },
+      "peerDependencies": {
+        "react": ">=17.0.0",
+        "react-dom": ">=17.0.0"
+      }
+    },
+    "node_modules/@floating-ui/react-dom": {
+      "version": "2.1.2",
+      "resolved": "https://registry.npmjs.org/@floating-ui/react-dom/-/react-dom-2.1.2.tgz",
+      "integrity": "sha512-06okr5cgPzMNBy+Ycse2A6udMi4bqwW/zgBF/rwjcNqWkyr82Mcg8b0vjX8OJpZFy/FKjJmw6wV7t44kK6kW7A==",
+      "license": "MIT",
+      "dependencies": {
+        "@floating-ui/dom": "^1.0.0"
+      },
+      "peerDependencies": {
+        "react": ">=16.8.0",
+        "react-dom": ">=16.8.0"
+      }
+    },
+    "node_modules/@floating-ui/utils": {
+      "version": "0.2.9",
+      "resolved": "https://registry.npmjs.org/@floating-ui/utils/-/utils-0.2.9.tgz",
+      "integrity": "sha512-MDWhGtE+eHw5JW7lq4qhc5yRLS11ERl1c7Z6Xd0a58DozHES6EnNNwUWbMiG4J9Cgj053Bhk8zvlhFYKVhULwg==",
+      "license": "MIT"
+    },
+    "node_modules/@types/react": {
+      "version": "19.0.4",
+      "resolved": "https://registry.npmjs.org/@types/react/-/react-19.0.4.tgz",
+      "integrity": "sha512-3O4QisJDYr1uTUMZHA2YswiQZRq+Pd8D+GdVFYikTutYsTz+QZgWkAPnP7rx9txoI6EXKcPiluMqWPFV3tT9Wg==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "csstype": "^3.0.2"
+      }
+    },
+    "node_modules/@types/react-datepicker": {
+      "version": "6.2.0",
+      "resolved": "https://registry.npmjs.org/@types/react-datepicker/-/react-datepicker-6.2.0.tgz",
+      "integrity": "sha512-+JtO4Fm97WLkJTH8j8/v3Ldh7JCNRwjMYjRaKh4KHH0M3jJoXtwiD3JBCsdlg3tsFIw9eQSqyAPeVDN2H2oM9Q==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "@floating-ui/react": "^0.26.2",
+        "@types/react": "*",
+        "date-fns": "^3.3.1"
+      }
+    },
+    "node_modules/@types/react-datepicker/node_modules/@floating-ui/react": {
+      "version": "0.26.28",
+      "resolved": "https://registry.npmjs.org/@floating-ui/react/-/react-0.26.28.tgz",
+      "integrity": "sha512-yORQuuAtVpiRjpMhdc0wJj06b9JFjrYF4qp96j++v2NBpbi6SEGF7donUJ3TMieerQ6qVkAv1tgr7L4r5roTqw==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "@floating-ui/react-dom": "^2.1.2",
+        "@floating-ui/utils": "^0.2.8",
+        "tabbable": "^6.0.0"
+      },
+      "peerDependencies": {
+        "react": ">=16.8.0",
+        "react-dom": ">=16.8.0"
+      }
+    },
+    "node_modules/clsx": {
+      "version": "2.1.1",
+      "resolved": "https://registry.npmjs.org/clsx/-/clsx-2.1.1.tgz",
+      "integrity": "sha512-eYm0QWBtUrBWZWG0d386OGAw16Z995PiOVo2B7bjWSbHedGl5e0ZWaq65kOGgUSNesEIDkB9ISbTg/JK9dhCZA==",
+      "license": "MIT",
+      "engines": {
+        "node": ">=6"
+      }
+    },
+    "node_modules/csstype": {
+      "version": "3.1.3",
+      "resolved": "https://registry.npmjs.org/csstype/-/csstype-3.1.3.tgz",
+      "integrity": "sha512-M1uQkMl8rQK/szD0LNhtqxIPLpimGm8sOBwU7lLnCpSbTyY3yeU1Vc7l4KT5zT4s/yOxHH5O7tIuuLOCnLADRw==",
+      "dev": true,
+      "license": "MIT"
+    },
+    "node_modules/date-fns": {
+      "version": "3.6.0",
+      "resolved": "https://registry.npmjs.org/date-fns/-/date-fns-3.6.0.tgz",
+      "integrity": "sha512-fRHTG8g/Gif+kSh50gaGEdToemgfj74aRX3swtiouboip5JDLAyDE9F11nHMIcvOaXeOC6D7SpNhi7uFyB7Uww==",
+      "license": "MIT",
+      "funding": {
+        "type": "github",
+        "url": "https://github.com/sponsors/kossnocorp"
+      }
+    },
+    "node_modules/react": {
+      "version": "19.0.0",
+      "resolved": "https://registry.npmjs.org/react/-/react-19.0.0.tgz",
+      "integrity": "sha512-V8AVnmPIICiWpGfm6GLzCR/W5FXLchHop40W4nXBmdlEceh16rCN8O8LNWm5bh5XUX91fh7KpA+W0TgMKmgTpQ==",
+      "license": "MIT",
+      "peer": true,
+      "engines": {
+        "node": ">=0.10.0"
+      }
+    },
+    "node_modules/react-datepicker": {
+      "version": "7.6.0",
+      "resolved": "https://registry.npmjs.org/react-datepicker/-/react-datepicker-7.6.0.tgz",
+      "integrity": "sha512-9cQH6Z/qa4LrGhzdc3XoHbhrxNcMi9MKjZmYgF/1MNNaJwvdSjv3Xd+jjvrEEbKEf71ZgCA3n7fQbdwd70qCRw==",
+      "license": "MIT",
+      "dependencies": {
+        "@floating-ui/react": "^0.27.0",
+        "clsx": "^2.1.1",
+        "date-fns": "^3.6.0"
+      },
+      "peerDependencies": {
+        "react": "^16.9.0 || ^17 || ^18 || ^19 || ^19.0.0-rc",
+        "react-dom": "^16.9.0 || ^17 || ^18 || ^19 || ^19.0.0-rc"
+      }
+    },
+    "node_modules/react-dom": {
+      "version": "19.0.0",
+      "resolved": "https://registry.npmjs.org/react-dom/-/react-dom-19.0.0.tgz",
+      "integrity": "sha512-4GV5sHFG0e/0AD4X+ySy6UJd3jVl1iNsNHdpad0qhABJ11twS3TTBnseqsKurKcsNqCEFeGL3uLpVChpIO3QfQ==",
+      "license": "MIT",
+      "peer": true,
+      "dependencies": {
+        "scheduler": "^0.25.0"
+      },
+      "peerDependencies": {
+        "react": "^19.0.0"
+      }
+    },
+    "node_modules/scheduler": {
+      "version": "0.25.0",
+      "resolved": "https://registry.npmjs.org/scheduler/-/scheduler-0.25.0.tgz",
+      "integrity": "sha512-xFVuu11jh+xcO7JOAGJNOXld8/TcEHK/4CituBUeUb5hqxJLj9YuemAEuvm9gQ/+pgXYfbQuqAkiYu+u7YEsNA==",
+      "license": "MIT",
+      "peer": true
+    },
+    "node_modules/tabbable": {
+      "version": "6.2.0",
+      "resolved": "https://registry.npmjs.org/tabbable/-/tabbable-6.2.0.tgz",
+      "integrity": "sha512-Cat63mxsVJlzYvN51JmVXIgNoUokrIaT2zLclCXjRd8boZ0004U4KCs/sToJ75C6sdlByWxpYnb5Boif1VSFew==",
+      "license": "MIT"
+    }
+  }
+}
--- a/package.json
+++ b/package.json
@@ -0,0 +1,8 @@
+{
+  "dependencies": {
+    "react-datepicker": "^7.6.0"
+  },
+  "devDependencies": {
+    "@types/react-datepicker": "^6.2.0"
+  }
+}
--- a/web/package-lock.json
+++ b/web/package-lock.json
@@ -16,7 +16,6 @@
        "@headlessui/tailwindcss": "^0.2.1",
        "@phosphor-icons/react": "^2.0.8",
        "@radix-ui/react-checkbox": "^1.1.2",
-        "@radix-ui/react-collapsible": "^1.1.2",
        "@radix-ui/react-dialog": "^1.1.2",
        "@radix-ui/react-dropdown-menu": "^2.1.4",
        "@radix-ui/react-label": "^2.1.1",
@@ -3508,137 +3507,6 @@
        }
      }
    },
-    "node_modules/@radix-ui/react-collapsible": {
-      "version": "1.1.2",
-      "resolved": "https://registry.npmjs.org/@radix-ui/react-collapsible/-/react-collapsible-1.1.2.tgz",
-      "integrity": "sha512-PliMB63vxz7vggcyq0IxNYk8vGDrLXVWw4+W4B8YnwI1s18x7YZYqlG9PLX7XxAJUi0g2DxP4XKJMFHh/iVh9A==",
-      "license": "MIT",
-      "dependencies": {
-        "@radix-ui/primitive": "1.1.1",
-        "@radix-ui/react-compose-refs": "1.1.1",
-        "@radix-ui/react-context": "1.1.1",
-        "@radix-ui/react-id": "1.1.0",
-        "@radix-ui/react-presence": "1.1.2",
-        "@radix-ui/react-primitive": "2.0.1",
-        "@radix-ui/react-use-controllable-state": "1.1.0",
-        "@radix-ui/react-use-layout-effect": "1.1.0"
-      },
-      "peerDependencies": {
-        "@types/react": "*",
-        "@types/react-dom": "*",
-        "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc",
-        "react-dom": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc"
-      },
-      "peerDependenciesMeta": {
-        "@types/react": {
-          "optional": true
-        },
-        "@types/react-dom": {
-          "optional": true
-        }
-      }
-    },
-    "node_modules/@radix-ui/react-collapsible/node_modules/@radix-ui/primitive": {
-      "version": "1.1.1",
-      "resolved": "https://registry.npmjs.org/@radix-ui/primitive/-/primitive-1.1.1.tgz",
-      "integrity": "sha512-SJ31y+Q/zAyShtXJc8x83i9TYdbAfHZ++tUZnvjJJqFjzsdUnKsxPL6IEtBlxKkU7yzer//GQtZSV4GbldL3YA==",
-      "license": "MIT"
-    },
-    "node_modules/@radix-ui/react-collapsible/node_modules/@radix-ui/react-compose-refs": {
-      "version": "1.1.1",
-      "resolved": "https://registry.npmjs.org/@radix-ui/react-compose-refs/-/react-compose-refs-1.1.1.tgz",
-      "integrity": "sha512-Y9VzoRDSJtgFMUCoiZBDVo084VQ5hfpXxVE+NgkdNsjiDBByiImMZKKhxMwCbdHvhlENG6a833CbFkOQvTricw==",
-      "license": "MIT",
-      "peerDependencies": {
-        "@types/react": "*",
-        "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc"
-      },
-      "peerDependenciesMeta": {
-        "@types/react": {
-          "optional": true
-        }
-      }
-    },
-    "node_modules/@radix-ui/react-collapsible/node_modules/@radix-ui/react-context": {
-      "version": "1.1.1",
-      "resolved": "https://registry.npmjs.org/@radix-ui/react-context/-/react-context-1.1.1.tgz",
-      "integrity": "sha512-UASk9zi+crv9WteK/NU4PLvOoL3OuE6BWVKNF6hPRBtYBDXQ2u5iu3O59zUlJiTVvkyuycnqrztsHVJwcK9K+Q==",
-      "license": "MIT",
-      "peerDependencies": {
-        "@types/react": "*",
-        "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc"
-      },
-      "peerDependenciesMeta": {
-        "@types/react": {
-          "optional": true
-        }
-      }
-    },
-    "node_modules/@radix-ui/react-collapsible/node_modules/@radix-ui/react-presence": {
-      "version": "1.1.2",
-      "resolved": "https://registry.npmjs.org/@radix-ui/react-presence/-/react-presence-1.1.2.tgz",
-      "integrity": "sha512-18TFr80t5EVgL9x1SwF/YGtfG+l0BS0PRAlCWBDoBEiDQjeKgnNZRVJp/oVBl24sr3Gbfwc/Qpj4OcWTQMsAEg==",
-      "license": "MIT",
-      "dependencies": {
-        "@radix-ui/react-compose-refs": "1.1.1",
-        "@radix-ui/react-use-layout-effect": "1.1.0"
-      },
-      "peerDependencies": {
-        "@types/react": "*",
-        "@types/react-dom": "*",
-        "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc",
-        "react-dom": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc"
-      },
-      "peerDependenciesMeta": {
-        "@types/react": {
-          "optional": true
-        },
-        "@types/react-dom": {
-          "optional": true
-        }
-      }
-    },
-    "node_modules/@radix-ui/react-collapsible/node_modules/@radix-ui/react-primitive": {
-      "version": "2.0.1",
-      "resolved": "https://registry.npmjs.org/@radix-ui/react-primitive/-/react-primitive-2.0.1.tgz",
-      "integrity": "sha512-sHCWTtxwNn3L3fH8qAfnF3WbUZycW93SM1j3NFDzXBiz8D6F5UTTy8G1+WFEaiCdvCVRJWj6N2R4Xq6HdiHmDg==",
-      "license": "MIT",
-      "dependencies": {
-        "@radix-ui/react-slot": "1.1.1"
-      },
-      "peerDependencies": {
-        "@types/react": "*",
-        "@types/react-dom": "*",
-        "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc",
-        "react-dom": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc"
-      },
-      "peerDependenciesMeta": {
-        "@types/react": {
-          "optional": true
-        },
-        "@types/react-dom": {
-          "optional": true
-        }
-      }
-    },
-    "node_modules/@radix-ui/react-collapsible/node_modules/@radix-ui/react-slot": {
-      "version": "1.1.1",
-      "resolved": "https://registry.npmjs.org/@radix-ui/react-slot/-/react-slot-1.1.1.tgz",
-      "integrity": "sha512-RApLLOcINYJA+dMVbOju7MYv1Mb2EBp2nH4HdDzXTSyaR5optlm6Otrz1euW3HbdOR8UmmFK06TD+A9frYWv+g==",
-      "license": "MIT",
-      "dependencies": {
-        "@radix-ui/react-compose-refs": "1.1.1"
-      },
-      "peerDependencies": {
-        "@types/react": "*",
-        "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc"
-      },
-      "peerDependenciesMeta": {
-        "@types/react": {
-          "optional": true
-        }
-      }
-    },
    "node_modules/@radix-ui/react-collection": {
      "version": "1.1.0",
      "resolved": "https://registry.npmjs.org/@radix-ui/react-collection/-/react-collection-1.1.0.tgz",
--- a/web/package.json
+++ b/web/package.json
@@ -19,7 +19,6 @@
    "@headlessui/tailwindcss": "^0.2.1",
    "@phosphor-icons/react": "^2.0.8",
    "@radix-ui/react-checkbox": "^1.1.2",
-    "@radix-ui/react-collapsible": "^1.1.2",
    "@radix-ui/react-dialog": "^1.1.2",
    "@radix-ui/react-dropdown-menu": "^2.1.4",
    "@radix-ui/react-label": "^2.1.1",
--- a/web/playwright.config.ts
+++ b/web/playwright.config.ts
@@ -1,16 +1,41 @@
 import { defineConfig, devices } from "@playwright/test";

 export default defineConfig({
-  globalSetup: require.resolve("./tests/e2e/global-setup"),
-
+  workers: 1, // temporary change to see if single threaded testing stabilizes the tests
+  testDir: "./tests/e2e", // Folder for test files
+  reporter: "list",
+  // Configure paths for screenshots
+  // expect: {
+  //   toMatchSnapshot: {
+  //     threshold: 0.2, // Adjust the threshold for visual diffs
+  //   },
+  // },
+  // reporter: [["html", { outputFolder: "test-results/output/report" }]], // HTML report location
+  // outputDir: "test-results/output/screenshots", // Set output folder for test artifacts
  projects: [
    {
-      name: "admin",
+      // dependency for admin workflows
+      name: "admin_setup",
+      testMatch: /.*\admin_auth\.setup\.ts/,
+    },
+    {
+      // tests admin workflows
+      name: "chromium-admin",
+      grep: /@admin/,
      use: {
        ...devices["Desktop Chrome"],
+        // Use prepared auth state.
        storageState: "admin_auth.json",
      },
-      testIgnore: ["**/codeUtils.test.ts"],
+      dependencies: ["admin_setup"],
+    },
+    {
+      // tests logged out / guest workflows
+      name: "chromium-guest",
+      grep: /@guest/,
+      use: {
+        ...devices["Desktop Chrome"],
+      },
    },
  ],
 });
--- a/web/src/app/admin/assistants/AssistantEditor.tsx
+++ b/web/src/app/admin/assistants/AssistantEditor.tsx
@@ -40,7 +40,14 @@ import * as Yup from "yup";
 import CollapsibleSection from "./CollapsibleSection";
 import { SuccessfulPersonaUpdateRedirectType } from "./enums";
 import { Persona, PersonaLabel, StarterMessage } from "./interfaces";
-import { PersonaUpsertParameters, createPersona, updatePersona } from "./lib";
+import {
+  createPersonaLabel,
+  PersonaUpsertParameters,
+  createPersona,
+  deletePersonaLabel,
+  updatePersonaLabel,
+  updatePersona,
+} from "./lib";
 import {
  CameraIcon,
  GroupsIconSkeleton,
@@ -73,10 +80,9 @@ import { errorHandlingFetcher } from "@/lib/fetcher";
 import { DeleteEntityModal } from "@/components/modals/DeleteEntityModal";
 import { DeletePersonaButton } from "./[id]/DeletePersonaButton";
 import Title from "@/components/ui/title";
-import { SEARCH_TOOL_ID } from "@/app/chat/tools/constants";

 function findSearchTool(tools: ToolSnapshot[]) {
-  return tools.find((tool) => tool.in_code_tool_id === SEARCH_TOOL_ID);
+  return tools.find((tool) => tool.in_code_tool_id === "SearchTool");
 }

 function findImageGenerationTool(tools: ToolSnapshot[]) {
@@ -232,9 +238,11 @@ export function AssistantEditor({
      existingPersona?.llm_model_provider_override ?? null,
    llm_model_version_override:
      existingPersona?.llm_model_version_override ?? null,
-    starter_messages: existingPersona?.starter_messages?.length
-      ? existingPersona.starter_messages
-      : [{ message: "" }],
+    starter_messages: existingPersona?.starter_messages ?? [
+      {
+        message: "",
+      },
+    ],
    enabled_tools_map: enabledToolsMap,
    icon_color: existingPersona?.icon_color ?? defautIconColor,
    icon_shape: existingPersona?.icon_shape ?? defaultIconShape,
@@ -902,11 +910,28 @@ export function AssistantEditor({

                    {internetSearchTool && (
                      <>
-                        <BooleanFormField
-                          name={`enabled_tools_map.${internetSearchTool.id}`}
-                          label={internetSearchTool.display_name}
-                          subtext="Access real-time information and search the web for up-to-date results"
-                        />
+                        <div className="flex items-center content-start mb-2">
+                          <Checkbox
+                            size="sm"
+                            id={`enabled_tools_map.${internetSearchTool.id}`}
+                            checked={
+                              values.enabled_tools_map[internetSearchTool.id]
+                            }
+                            onCheckedChange={() => {
+                              toggleToolInValues(internetSearchTool.id);
+                            }}
+                            name={`enabled_tools_map.${internetSearchTool.id}`}
+                          />
+                          <div className="flex flex-col ml-2">
+                            <span className="text-sm">
+                              {internetSearchTool.display_name}
+                            </span>
+                            <span className="text-xs text-subtle">
+                              Access real-time information and search the web
+                              for up-to-date results
+                            </span>
+                          </div>
+                        </div>
                      </>
                    )}

@@ -1097,9 +1122,7 @@ export function AssistantEditor({
                      )}
                    </div>
                  </div>
-
                  <Separator />
-
                  <div className="w-full flex flex-col">
                    <div className="flex gap-x-2 items-center">
                      <div className="block font-medium text-sm">
@@ -1110,7 +1133,6 @@ export function AssistantEditor({
                    <SubLabel>
                      Sample messages that help users understand what this
                      assistant can do and how to interact with it effectively.
-                      New input fields will appear automatically as you type.
                    </SubLabel>

                    <div className="w-full">
--- a/web/src/app/admin/assistants/StarterMessageList.tsx
+++ b/web/src/app/admin/assistants/StarterMessageList.tsx
@@ -64,16 +64,19 @@ export default function StarterMessagesList({
            size="icon"
            onClick={() => {
              arrayHelpers.remove(index);
+              if (
+                index === values.length - 2 &&
+                !values[values.length - 1].message
+              ) {
+                arrayHelpers.pop();
+              }
            }}
            className={`text-gray-400 hover:text-red-500 ${
              index === values.length - 1 && !starterMessage.message
                ? "opacity-50 cursor-not-allowed"
                : ""
            }`}
-            disabled={
-              (index === values.length - 1 && !starterMessage.message) ||
-              (values.length === 1 && index === 0) // should never happen, but just in case
-            }
+            disabled={index === values.length - 1 && !starterMessage.message}
          >
            <FiTrash2 className="h-4 w-4" />
          </Button>
--- a/web/src/app/admin/bots/[bot-id]/channels/SlackChannelConfigCreationForm.tsx
+++ b/web/src/app/admin/bots/[bot-id]/channels/SlackChannelConfigCreationForm.tsx
@@ -1,21 +1,38 @@
 "use client";

-import React, { useMemo } from "react";
-import { Formik } from "formik";
+import { ArrayHelpers, FieldArray, Form, Formik } from "formik";
 import * as Yup from "yup";
 import { usePopup } from "@/components/admin/connectors/Popup";
 import { DocumentSet, SlackChannelConfig } from "@/lib/types";
+import {
+  BooleanFormField,
+  Label,
+  SelectorFormField,
+  SubLabel,
+  TextArrayField,
+  TextFormField,
+} from "@/components/admin/connectors/Field";
 import {
  createSlackChannelConfig,
  isPersonaASlackBotPersona,
  updateSlackChannelConfig,
 } from "../lib";
 import CardSection from "@/components/admin/CardSection";
+import { Button } from "@/components/ui/button";
 import { useRouter } from "next/navigation";
 import { Persona } from "@/app/admin/assistants/interfaces";
+import { useState } from "react";
+import { AdvancedOptionsToggle } from "@/components/AdvancedOptionsToggle";
+import { DocumentSetSelectable } from "@/components/documentSet/DocumentSetSelectable";
+import CollapsibleSection from "@/app/admin/assistants/CollapsibleSection";
 import { StandardAnswerCategoryResponse } from "@/components/standardAnswers/getStandardAnswerCategoriesIfEE";
-import { SEARCH_TOOL_ID, SEARCH_TOOL_NAME } from "@/app/chat/tools/constants";
-import { SlackChannelConfigFormFields } from "./SlackChannelConfigFormFields";
+import { StandardAnswerCategoryDropdownField } from "@/components/standardAnswers/StandardAnswerCategoryDropdown";
+import {
+  Tabs,
+  TabsList,
+  TabsTrigger,
+  TabsContent,
+} from "@/components/ui/fully_wrapped_tabs";

 export const SlackChannelConfigCreationForm = ({
  slack_bot_id,
@@ -30,175 +47,353 @@ export const SlackChannelConfigCreationForm = ({
  standardAnswerCategoryResponse: StandardAnswerCategoryResponse;
  existingSlackChannelConfig?: SlackChannelConfig;
 }) => {
+  const isUpdate = existingSlackChannelConfig !== undefined;
  const { popup, setPopup } = usePopup();
  const router = useRouter();
-  const isUpdate = Boolean(existingSlackChannelConfig);
  const existingSlackBotUsesPersona = existingSlackChannelConfig?.persona
    ? !isPersonaASlackBotPersona(existingSlackChannelConfig.persona)
    : false;
+  const [usingPersonas, setUsingPersonas] = useState(
+    existingSlackBotUsesPersona
+  );
+  const [showAdvancedOptions, setShowAdvancedOptions] = useState(false);

-  const searchEnabledAssistants = useMemo(() => {
-    return personas.filter((persona) => {
-      return persona.tools.some(
-        (tool) => tool.in_code_tool_id == SEARCH_TOOL_ID
-      );
-    });
-  }, [personas]);
+  const knowledgePersona = personas.find((persona) => persona.id === 0);

  return (
-    <CardSection className="max-w-4xl">
-      {popup}
-      <Formik
-        initialValues={{
-          slack_bot_id: slack_bot_id,
-          channel_name:
-            existingSlackChannelConfig?.channel_config.channel_name || "",
-          answer_validity_check_enabled: (
-            existingSlackChannelConfig?.channel_config?.answer_filters || []
-          ).includes("well_answered_postfilter"),
-          questionmark_prefilter_enabled: (
-            existingSlackChannelConfig?.channel_config?.answer_filters || []
-          ).includes("questionmark_prefilter"),
-          respond_tag_only:
-            existingSlackChannelConfig?.channel_config?.respond_tag_only ||
-            false,
-          respond_to_bots:
-            existingSlackChannelConfig?.channel_config?.respond_to_bots ||
-            false,
-          show_continue_in_web_ui:
-            existingSlackChannelConfig?.channel_config
-              ?.show_continue_in_web_ui ?? !isUpdate,
-          enable_auto_filters:
-            existingSlackChannelConfig?.enable_auto_filters || false,
-          respond_member_group_list:
-            existingSlackChannelConfig?.channel_config
-              ?.respond_member_group_list || [],
-          still_need_help_enabled:
-            existingSlackChannelConfig?.channel_config?.follow_up_tags !==
-            undefined,
-          follow_up_tags:
-            existingSlackChannelConfig?.channel_config?.follow_up_tags ||
-            undefined,
-          document_sets:
-            existingSlackChannelConfig && existingSlackChannelConfig.persona
-              ? existingSlackChannelConfig.persona.document_sets.map(
-                  (documentSet) => documentSet.id
-                )
-              : ([] as number[]),
-          persona_id:
-            existingSlackChannelConfig?.persona &&
-            !isPersonaASlackBotPersona(existingSlackChannelConfig.persona)
-              ? existingSlackChannelConfig.persona.id
-              : null,
-          response_type:
-            existingSlackChannelConfig?.response_type || "citations",
-          standard_answer_categories:
-            existingSlackChannelConfig?.standard_answer_categories || [],
-          knowledge_source: existingSlackBotUsesPersona
-            ? "assistant"
-            : existingSlackChannelConfig?.persona
-              ? "document_sets"
-              : "all_public",
-        }}
-        validationSchema={Yup.object().shape({
-          slack_bot_id: Yup.number().required(),
-          channel_name: Yup.string().required("Channel Name is required"),
-          response_type: Yup.string()
-            .oneOf(["quotes", "citations"])
-            .required("Response type is required"),
-          answer_validity_check_enabled: Yup.boolean().required(),
-          questionmark_prefilter_enabled: Yup.boolean().required(),
-          respond_tag_only: Yup.boolean().required(),
-          respond_to_bots: Yup.boolean().required(),
-          show_continue_in_web_ui: Yup.boolean().required(),
-          enable_auto_filters: Yup.boolean().required(),
-          respond_member_group_list: Yup.array().of(Yup.string()).required(),
-          still_need_help_enabled: Yup.boolean().required(),
-          follow_up_tags: Yup.array().of(Yup.string()),
-          document_sets: Yup.array()
-            .of(Yup.number())
-            .when("knowledge_source", {
-              is: "document_sets",
-              then: (schema) =>
-                schema.min(
-                  1,
-                  "At least one Document Set is required when using the 'Document Sets' knowledge source"
-                ),
-            }),
-          persona_id: Yup.number()
-            .nullable()
-            .when("knowledge_source", {
-              is: "assistant",
-              then: (schema) =>
-                schema.required(
-                  "A persona is required when using the'Assistant' knowledge source"
-                ),
-            }),
-          standard_answer_categories: Yup.array(),
-          knowledge_source: Yup.string()
-            .oneOf(["all_public", "document_sets", "assistant"])
-            .required(),
-        })}
-        onSubmit={async (values, formikHelpers) => {
-          formikHelpers.setSubmitting(true);
-
-          const cleanedValues = {
-            ...values,
-            slack_bot_id,
-            channel_name: values.channel_name,
-            respond_member_group_list: values.respond_member_group_list,
-            usePersona: values.knowledge_source === "assistant",
+    <div>
+      <CardSection>
+        {popup}
+        <Formik
+          initialValues={{
+            slack_bot_id: slack_bot_id,
+            channel_name:
+              existingSlackChannelConfig?.channel_config.channel_name,
+            answer_validity_check_enabled: (
+              existingSlackChannelConfig?.channel_config?.answer_filters || []
+            ).includes("well_answered_postfilter"),
+            questionmark_prefilter_enabled: (
+              existingSlackChannelConfig?.channel_config?.answer_filters || []
+            ).includes("questionmark_prefilter"),
+            respond_tag_only:
+              existingSlackChannelConfig?.channel_config?.respond_tag_only ||
+              false,
+            respond_to_bots:
+              existingSlackChannelConfig?.channel_config?.respond_to_bots ||
+              false,
+            show_continue_in_web_ui:
+              // If we're updating, we want to keep the existing value
+              // Otherwise, we want to default to true
+              existingSlackChannelConfig?.channel_config
+                ?.show_continue_in_web_ui ?? !isUpdate,
+            enable_auto_filters:
+              existingSlackChannelConfig?.enable_auto_filters || false,
+            respond_member_group_list:
+              existingSlackChannelConfig?.channel_config
+                ?.respond_member_group_list ?? [],
+            still_need_help_enabled:
+              existingSlackChannelConfig?.channel_config?.follow_up_tags !==
+              undefined,
+            follow_up_tags:
+              existingSlackChannelConfig?.channel_config?.follow_up_tags,
            document_sets:
-              values.knowledge_source === "document_sets"
-                ? values.document_sets
-                : [],
+              existingSlackChannelConfig && existingSlackChannelConfig.persona
+                ? existingSlackChannelConfig.persona.document_sets.map(
+                    (documentSet) => documentSet.id
+                  )
+                : ([] as number[]),
+            // prettier-ignore
            persona_id:
-              values.knowledge_source === "assistant"
-                ? values.persona_id
-                : null,
-            standard_answer_categories: values.standard_answer_categories.map(
-              (category: any) => category.id
-            ),
-          };
+              existingSlackChannelConfig?.persona &&
+              !isPersonaASlackBotPersona(existingSlackChannelConfig.persona)
+                ? existingSlackChannelConfig.persona.id
+                : knowledgePersona?.id ?? null,
+            response_type:
+              existingSlackChannelConfig?.response_type || "citations",
+            standard_answer_categories: existingSlackChannelConfig
+              ? existingSlackChannelConfig.standard_answer_categories
+              : [],
+          }}
+          validationSchema={Yup.object().shape({
+            slack_bot_id: Yup.number().required(),
+            channel_name: Yup.string(),
+            response_type: Yup.string()
+              .oneOf(["quotes", "citations"])
+              .required(),
+            answer_validity_check_enabled: Yup.boolean().required(),
+            questionmark_prefilter_enabled: Yup.boolean().required(),
+            respond_tag_only: Yup.boolean().required(),
+            respond_to_bots: Yup.boolean().required(),
+            show_continue_in_web_ui: Yup.boolean().required(),
+            enable_auto_filters: Yup.boolean().required(),
+            respond_member_group_list: Yup.array().of(Yup.string()).required(),
+            still_need_help_enabled: Yup.boolean().required(),
+            follow_up_tags: Yup.array().of(Yup.string()),
+            document_sets: Yup.array().of(Yup.number()),
+            persona_id: Yup.number().nullable(),
+            standard_answer_categories: Yup.array(),
+          })}
+          onSubmit={async (values, formikHelpers) => {
+            formikHelpers.setSubmitting(true);

-          if (!cleanedValues.still_need_help_enabled) {
-            cleanedValues.follow_up_tags = undefined;
-          } else {
-            if (!cleanedValues.follow_up_tags) {
-              cleanedValues.follow_up_tags = [];
+            const cleanedValues = {
+              ...values,
+              slack_bot_id: slack_bot_id,
+              channel_name: values.channel_name!,
+              respond_member_group_list: values.respond_member_group_list,
+              usePersona: usingPersonas,
+              standard_answer_categories: values.standard_answer_categories.map(
+                (category) => category.id
+              ),
+            };
+            if (!cleanedValues.still_need_help_enabled) {
+              cleanedValues.follow_up_tags = undefined;
+            } else {
+              if (!cleanedValues.follow_up_tags) {
+                cleanedValues.follow_up_tags = [];
+              }
            }
-          }
-
-          const response = isUpdate
-            ? await updateSlackChannelConfig(
-                existingSlackChannelConfig!.id,
+            let response;
+            if (isUpdate) {
+              response = await updateSlackChannelConfig(
+                existingSlackChannelConfig.id,
                cleanedValues
-              )
-            : await createSlackChannelConfig(cleanedValues);
+              );
+            } else {
+              response = await createSlackChannelConfig(cleanedValues);
+            }
+            formikHelpers.setSubmitting(false);
+            if (response.ok) {
+              router.push(`/admin/bots/${slack_bot_id}`);
+            } else {
+              const responseJson = await response.json();
+              const errorMsg = responseJson.detail || responseJson.message;
+              setPopup({
+                message: isUpdate
+                  ? `Error updating OnyxBot config - ${errorMsg}`
+                  : `Error creating OnyxBot config - ${errorMsg}`,
+                type: "error",
+              });
+            }
+          }}
+        >
+          {({ isSubmitting, values, setFieldValue }) => (
+            <Form>
+              <div className="px-6 pb-6 pt-4 w-full">
+                <TextFormField
+                  name="channel_name"
+                  label="Slack Channel Name:"
+                />

-          formikHelpers.setSubmitting(false);
-          if (response.ok) {
-            router.push(`/admin/bots/${slack_bot_id}`);
-          } else {
-            const responseJson = await response.json();
-            const errorMsg = responseJson.detail || responseJson.message;
-            setPopup({
-              message: `Error ${
-                isUpdate ? "updating" : "creating"
-              } OnyxBot config - ${errorMsg}`,
-              type: "error",
-            });
-          }
-        }}
-      >
-        <SlackChannelConfigFormFields
-          isUpdate={isUpdate}
-          documentSets={documentSets}
-          searchEnabledAssistants={searchEnabledAssistants}
-          standardAnswerCategoryResponse={standardAnswerCategoryResponse}
-          setPopup={setPopup}
-        />
-      </Formik>
-    </CardSection>
+                <div className="mt-6">
+                  <Label>Knowledge Sources</Label>
+                  <SubLabel>
+                    Controls which information OnyxBot will pull from when
+                    answering questions.
+                  </SubLabel>
+
+                  <Tabs
+                    defaultValue="document_sets"
+                    className="w-full mt-4"
+                    value={usingPersonas ? "assistants" : "document_sets"}
+                    onValueChange={(value) =>
+                      setUsingPersonas(value === "assistants")
+                    }
+                  >
+                    <TabsList>
+                      <TabsTrigger value="document_sets">
+                        Document Sets
+                      </TabsTrigger>
+                      <TabsTrigger value="assistants">Assistants</TabsTrigger>
+                    </TabsList>
+
+                    <TabsContent value="assistants">
+                      <SubLabel>
+                        Select the assistant OnyxBot will use while answering
+                        questions in Slack.
+                      </SubLabel>
+                      <SelectorFormField
+                        name="persona_id"
+                        options={personas.map((persona) => {
+                          return {
+                            name: persona.name,
+                            value: persona.id,
+                          };
+                        })}
+                      />
+                    </TabsContent>
+
+                    <TabsContent value="document_sets">
+                      <SubLabel>
+                        Select the document sets OnyxBot will use while
+                        answering questions in Slack.
+                      </SubLabel>
+                      <SubLabel>
+                        Note: If No Document Sets are selected, OnyxBot will
+                        search through all connected documents.
+                      </SubLabel>
+                      <FieldArray
+                        name="document_sets"
+                        render={(arrayHelpers: ArrayHelpers) => (
+                          <div>
+                            <div className="mb-3 mt-2 flex gap-2 flex-wrap text-sm">
+                              {documentSets.map((documentSet) => {
+                                const ind = values.document_sets.indexOf(
+                                  documentSet.id
+                                );
+                                const isSelected = ind !== -1;
+
+                                return (
+                                  <DocumentSetSelectable
+                                    key={documentSet.id}
+                                    documentSet={documentSet}
+                                    isSelected={isSelected}
+                                    onSelect={() => {
+                                      if (isSelected) {
+                                        arrayHelpers.remove(ind);
+                                      } else {
+                                        arrayHelpers.push(documentSet.id);
+                                      }
+                                    }}
+                                  />
+                                );
+                              })}
+                            </div>
+                            <div></div>
+                          </div>
+                        )}
+                      />
+                    </TabsContent>
+                  </Tabs>
+                </div>
+
+                <div className="mt-6">
+                  <AdvancedOptionsToggle
+                    showAdvancedOptions={showAdvancedOptions}
+                    setShowAdvancedOptions={setShowAdvancedOptions}
+                  />
+                </div>
+
+                {showAdvancedOptions && (
+                  <div className="mt-4">
+                    <div className="w-64 mb-4">
+                      <SelectorFormField
+                        name="response_type"
+                        label="Answer Type"
+                        tooltip="Controls the format of OnyxBot's responses."
+                        options={[
+                          { name: "Standard", value: "citations" },
+                          { name: "Detailed", value: "quotes" },
+                        ]}
+                      />
+                    </div>
+
+                    <BooleanFormField
+                      name="show_continue_in_web_ui"
+                      removeIndent
+                      label="Show Continue in Web UI button"
+                      tooltip="If set, will show a button at the bottom of the response that allows the user to continue the conversation in the Onyx Web UI"
+                    />
+                    <div className="flex flex-col space-y-3 mt-2">
+                      <BooleanFormField
+                        name="still_need_help_enabled"
+                        removeIndent
+                        label={'Give a "Still need help?" button'}
+                        tooltip={`OnyxBot's response will include a button at the bottom 
+                      of the response that asks the user if they still need help.`}
+                      />
+                      {values.still_need_help_enabled && (
+                        <CollapsibleSection prompt="Configure Still Need Help Button">
+                          <TextArrayField
+                            name="follow_up_tags"
+                            label="(Optional) Users / Groups to Tag"
+                            values={values}
+                            subtext={
+                              <div>
+                                The Slack users / groups we should tag if the
+                                user clicks the &quot;Still need help?&quot;
+                                button. If no emails are provided, we will not
+                                tag anyone and will just react with a 🆘 emoji
+                                to the original message.
+                              </div>
+                            }
+                            placeholder="User email or user group name..."
+                          />
+                        </CollapsibleSection>
+                      )}
+
+                      <BooleanFormField
+                        name="answer_validity_check_enabled"
+                        removeIndent
+                        label="Only respond if citations found"
+                        tooltip="If set, will only answer questions where the model successfully produces citations"
+                      />
+                      <BooleanFormField
+                        name="questionmark_prefilter_enabled"
+                        removeIndent
+                        label="Only respond to questions"
+                        tooltip="If set, will only respond to messages that contain a question mark"
+                      />
+                      <BooleanFormField
+                        name="respond_tag_only"
+                        removeIndent
+                        label="Respond to @OnyxBot Only"
+                        tooltip="If set, OnyxBot will only respond when directly tagged"
+                      />
+                      <BooleanFormField
+                        name="respond_to_bots"
+                        removeIndent
+                        label="Respond to Bot messages"
+                        tooltip="If not set, OnyxBot will always ignore messages from Bots"
+                      />
+                      <BooleanFormField
+                        name="enable_auto_filters"
+                        removeIndent
+                        label="Enable LLM Autofiltering"
+                        tooltip="If set, the LLM will generate source and time filters based on the user's query"
+                      />
+
+                      <div className="mt-12">
+                        <TextArrayField
+                          name="respond_member_group_list"
+                          label="(Optional) Respond to Certain Users / Groups"
+                          subtext={
+                            "If specified, OnyxBot responses will only " +
+                            "be visible to the members or groups in this list."
+                          }
+                          values={values}
+                          placeholder="User email or user group name..."
+                        />
+                      </div>
+                    </div>
+
+                    <StandardAnswerCategoryDropdownField
+                      standardAnswerCategoryResponse={
+                        standardAnswerCategoryResponse
+                      }
+                      categories={values.standard_answer_categories}
+                      setCategories={(categories) =>
+                        setFieldValue("standard_answer_categories", categories)
+                      }
+                    />
+                  </div>
+                )}
+
+                <div className="flex">
+                  <Button
+                    type="submit"
+                    variant="submit"
+                    disabled={isSubmitting || !values.channel_name}
+                    className="mx-auto w-64"
+                  >
+                    {isUpdate ? "Update!" : "Create!"}
+                  </Button>
+                </div>
+              </div>
+            </Form>
+          )}
+        </Formik>
+      </CardSection>
+    </div>
  );
 };
--- a/web/src/app/admin/bots/[bot-id]/channels/SlackChannelConfigFormFields.tsx
+++ b/web/src/app/admin/bots/[bot-id]/channels/SlackChannelConfigFormFields.tsx
@@ -1,530 +0,0 @@
-"use client";
-
-import React, { useState, useEffect, useMemo } from "react";
-import { FieldArray, Form, useFormikContext, ErrorMessage } from "formik";
-import { CCPairDescriptor, DocumentSet } from "@/lib/types";
-import {
-  BooleanFormField,
-  Label,
-  SelectorFormField,
-  SubLabel,
-  TextArrayField,
-  TextFormField,
-} from "@/components/admin/connectors/Field";
-import { Button } from "@/components/ui/button";
-import { Persona } from "@/app/admin/assistants/interfaces";
-import { AdvancedOptionsToggle } from "@/components/AdvancedOptionsToggle";
-import { DocumentSetSelectable } from "@/components/documentSet/DocumentSetSelectable";
-import CollapsibleSection from "@/app/admin/assistants/CollapsibleSection";
-import { StandardAnswerCategoryResponse } from "@/components/standardAnswers/getStandardAnswerCategoriesIfEE";
-import { StandardAnswerCategoryDropdownField } from "@/components/standardAnswers/StandardAnswerCategoryDropdown";
-import { RadioGroup } from "@/components/ui/radio-group";
-import { RadioGroupItemField } from "@/components/ui/RadioGroupItemField";
-import { AlertCircle, View } from "lucide-react";
-import { useRouter } from "next/navigation";
-import {
-  Tooltip,
-  TooltipContent,
-  TooltipTrigger,
-} from "@/components/ui/tooltip";
-import { TooltipProvider } from "@radix-ui/react-tooltip";
-import { SourceIcon } from "@/components/SourceIcon";
-import Link from "next/link";
-import { AssistantIcon } from "@/components/assistants/AssistantIcon";
-
-interface SlackChannelConfigFormFieldsProps {
-  isUpdate: boolean;
-  documentSets: DocumentSet[];
-  searchEnabledAssistants: Persona[];
-  standardAnswerCategoryResponse: StandardAnswerCategoryResponse;
-  setPopup: (popup: {
-    message: string;
-    type: "error" | "success" | "warning";
-  }) => void;
-}
-
-export function SlackChannelConfigFormFields({
-  isUpdate,
-  documentSets,
-  searchEnabledAssistants,
-  standardAnswerCategoryResponse,
-  setPopup,
-}: SlackChannelConfigFormFieldsProps) {
-  const router = useRouter();
-  const { values, setFieldValue } = useFormikContext<any>();
-  const [showAdvancedOptions, setShowAdvancedOptions] = useState(false);
-  const [viewUnselectableSets, setViewUnselectableSets] = useState(false);
-  const [viewSyncEnabledAssistants, setViewSyncEnabledAssistants] =
-    useState(false);
-
-  const documentSetContainsSync = (documentSet: DocumentSet) =>
-    documentSet.cc_pair_descriptors.some(
-      (descriptor) => descriptor.access_type === "sync"
-    );
-
-  const [syncEnabledAssistants, availableAssistants] = useMemo(() => {
-    const sync: Persona[] = [];
-    const available: Persona[] = [];
-
-    searchEnabledAssistants.forEach((persona) => {
-      const hasSyncSet = persona.document_sets.some(documentSetContainsSync);
-      if (hasSyncSet) {
-        sync.push(persona);
-      } else {
-        available.push(persona);
-      }
-    });
-
-    return [sync, available];
-  }, [searchEnabledAssistants]);
-
-  const unselectableSets = useMemo(() => {
-    return documentSets.filter((ds) =>
-      ds.cc_pair_descriptors.some(
-        (descriptor) => descriptor.access_type === "sync"
-      )
-    );
-  }, [documentSets]);
-  const memoizedPrivateConnectors = useMemo(() => {
-    const uniqueDescriptors = new Map();
-    documentSets.forEach((ds) => {
-      ds.cc_pair_descriptors.forEach((descriptor) => {
-        if (
-          descriptor.access_type === "private" &&
-          !uniqueDescriptors.has(descriptor.id)
-        ) {
-          uniqueDescriptors.set(descriptor.id, descriptor);
-        }
-      });
-    });
-    return Array.from(uniqueDescriptors.values());
-  }, [documentSets]);
-
-  useEffect(() => {
-    const invalidSelected = values.document_sets.filter((dsId: number) =>
-      unselectableSets.some((us) => us.id === dsId)
-    );
-    if (invalidSelected.length > 0) {
-      setFieldValue(
-        "document_sets",
-        values.document_sets.filter(
-          (dsId: number) => !invalidSelected.includes(dsId)
-        )
-      );
-      setPopup({
-        message:
-          "We removed one or more document sets from your selection because they are no longer valid. Please review and update your configuration.",
-        type: "warning",
-      });
-    }
-  }, [unselectableSets, values.document_sets, setFieldValue, setPopup]);
-
-  const documentSetContainsPrivate = (documentSet: DocumentSet) => {
-    return documentSet.cc_pair_descriptors.some(
-      (descriptor) => descriptor.access_type === "private"
-    );
-  };
-
-  const shouldShowPrivacyAlert = useMemo(() => {
-    if (values.knowledge_source === "document_sets") {
-      const selectedSets = documentSets.filter((ds) =>
-        values.document_sets.includes(ds.id)
-      );
-      return selectedSets.some((ds) => documentSetContainsPrivate(ds));
-    } else if (values.knowledge_source === "assistant") {
-      const chosenAssistant = searchEnabledAssistants.find(
-        (p) => p.id == values.persona_id
-      );
-      return chosenAssistant?.document_sets.some((ds) =>
-        documentSetContainsPrivate(ds)
-      );
-    }
-    return false;
-    // eslint-disable-next-line react-hooks/exhaustive-deps
-  }, [values.knowledge_source, values.document_sets, values.persona_id]);
-
-  const selectableSets = useMemo(() => {
-    return documentSets.filter(
-      (ds) =>
-        !ds.cc_pair_descriptors.some(
-          (descriptor) => descriptor.access_type === "sync"
-        )
-    );
-  }, [documentSets]);
-
-  return (
-    <Form className="px-6 max-w-4xl">
-      <div className="pt-4 w-full">
-        <TextFormField name="channel_name" label="Slack Channel Name:" />
-
-        <div className="space-y-2 mt-4">
-          <Label>Knowledge Source</Label>
-          <RadioGroup
-            className="flex flex-col gap-y-4"
-            value={values.knowledge_source}
-            onValueChange={(value: string) => {
-              setFieldValue("knowledge_source", value);
-            }}
-          >
-            <RadioGroupItemField
-              value="all_public"
-              id="all_public"
-              label="All Public Knowledge"
-              sublabel="Let OnyxBot respond based on information from all public connectors "
-            />
-            {selectableSets.length + unselectableSets.length > 0 && (
-              <RadioGroupItemField
-                value="document_sets"
-                id="document_sets"
-                label="Specific Document Sets"
-                sublabel="Control which documents to use for answering questions"
-              />
-            )}
-            <RadioGroupItemField
-              value="assistant"
-              id="assistant"
-              label="Specific Assistant"
-              sublabel="Control both the documents and the prompt to use for answering questions"
-            />
-          </RadioGroup>
-        </div>
-
-        {values.knowledge_source === "document_sets" &&
-          documentSets.length > 0 && (
-            <div className="mt-4">
-              <SubLabel>
-                <>
-                  Select the document sets OnyxBot will use while answering
-                  questions in Slack.
-                  <br />
-                  {unselectableSets.length > 0 ? (
-                    <span>
-                      Some incompatible document sets are{" "}
-                      {viewUnselectableSets ? "visible" : "hidden"}.{" "}
-                      <button
-                        type="button"
-                        onClick={() =>
-                          setViewUnselectableSets(
-                            (viewUnselectableSets) => !viewUnselectableSets
-                          )
-                        }
-                        className="text-sm text-link"
-                      >
-                        {viewUnselectableSets
-                          ? "Hide un-selectable "
-                          : "View all "}
-                        document sets
-                      </button>
-                    </span>
-                  ) : (
-                    ""
-                  )}
-                </>
-              </SubLabel>
-              <FieldArray
-                name="document_sets"
-                render={(arrayHelpers) => (
-                  <>
-                    {selectableSets.length > 0 && (
-                      <div className="mb-3 mt-2 flex gap-2 flex-wrap text-sm">
-                        {selectableSets.map((documentSet) => {
-                          const selectedIndex = values.document_sets.indexOf(
-                            documentSet.id
-                          );
-                          const isSelected = selectedIndex !== -1;
-
-                          return (
-                            <DocumentSetSelectable
-                              key={documentSet.id}
-                              documentSet={documentSet}
-                              isSelected={isSelected}
-                              onSelect={() => {
-                                if (isSelected) {
-                                  arrayHelpers.remove(selectedIndex);
-                                } else {
-                                  arrayHelpers.push(documentSet.id);
-                                }
-                              }}
-                            />
-                          );
-                        })}
-                      </div>
-                    )}
-
-                    {viewUnselectableSets && unselectableSets.length > 0 && (
-                      <div className="mt-4">
-                        <p className="text-sm text-text-dark/80">
-                          These document sets cannot be attached as they have
-                          auto-synced docs:
-                        </p>
-                        <div className="mb-3 mt-2 flex gap-2 flex-wrap text-sm">
-                          {unselectableSets.map((documentSet) => (
-                            <DocumentSetSelectable
-                              key={documentSet.id}
-                              documentSet={documentSet}
-                              disabled
-                              disabledTooltip="Unable to use this document set because it contains a connector with auto-sync permissions. OnyxBot's responses in this channel are visible to all Slack users, so mirroring the asker's permissions could inadvertently expose private information."
-                              isSelected={false}
-                              onSelect={() => {}}
-                            />
-                          ))}
-                        </div>
-                      </div>
-                    )}
-                    <ErrorMessage
-                      className="text-red-500 text-sm mt-1"
-                      name="document_sets"
-                      component="div"
-                    />
-                  </>
-                )}
-              />
-            </div>
-          )}
-
-        {values.knowledge_source === "assistant" && (
-          <div className="mt-4">
-            <SubLabel>
-              <>
-                Select the search-enabled assistant OnyxBot will use while
-                answering questions in Slack.
-                {syncEnabledAssistants.length > 0 && (
-                  <>
-                    <br />
-                    <span className="text-sm text-text-dark/80">
-                      Note: Some of your assistants have auto-synced connectors
-                      in their document sets. You cannot select these assistants
-                      as they will not be able to answer questions in Slack.{" "}
-                      <button
-                        type="button"
-                        onClick={() =>
-                          setViewSyncEnabledAssistants(
-                            (viewSyncEnabledAssistants) =>
-                              !viewSyncEnabledAssistants
-                          )
-                        }
-                        className="text-sm text-link"
-                      >
-                        {viewSyncEnabledAssistants
-                          ? "Hide un-selectable "
-                          : "View all "}
-                        assistants
-                      </button>
-                    </span>
-                  </>
-                )}
-              </>
-            </SubLabel>
-
-            <SelectorFormField
-              name="persona_id"
-              options={availableAssistants.map((persona) => ({
-                name: persona.name,
-                value: persona.id,
-              }))}
-            />
-            {viewSyncEnabledAssistants && syncEnabledAssistants.length > 0 && (
-              <div className="mt-4">
-                <p className="text-sm text-text-dark/80">
-                  Un-selectable assistants:
-                </p>
-                <div className="mb-3 mt-2 flex gap-2 flex-wrap text-sm">
-                  {syncEnabledAssistants.map((persona: Persona) => (
-                    <button
-                      type="button"
-                      onClick={() =>
-                        router.push(`/admin/assistants/${persona.id}`)
-                      }
-                      key={persona.id}
-                      className="p-2 bg-background-100 cursor-pointer rounded-md flex items-center gap-2"
-                    >
-                      <AssistantIcon
-                        assistant={persona}
-                        size={16}
-                        className="flex-none"
-                      />
-                      {persona.name}
-                    </button>
-                  ))}
-                </div>
-              </div>
-            )}
-          </div>
-        )}
-      </div>
-
-      <div className="mt-2">
-        <AdvancedOptionsToggle
-          showAdvancedOptions={showAdvancedOptions}
-          setShowAdvancedOptions={setShowAdvancedOptions}
-        />
-      </div>
-      {showAdvancedOptions && (
-        <div className="mt-4">
-          <div className="w-64 mb-4">
-            <SelectorFormField
-              name="response_type"
-              label="Answer Type"
-              tooltip="Controls the format of OnyxBot's responses."
-              options={[
-                { name: "Standard", value: "citations" },
-                { name: "Detailed", value: "quotes" },
-              ]}
-            />
-          </div>
-
-          <BooleanFormField
-            name="show_continue_in_web_ui"
-            removeIndent
-            label="Show Continue in Web UI button"
-            tooltip="If set, will show a button at the bottom of the response that allows the user to continue the conversation in the Onyx Web UI"
-          />
-
-          <div className="flex flex-col space-y-3 mt-2">
-            <BooleanFormField
-              name="still_need_help_enabled"
-              removeIndent
-              onChange={(checked: boolean) => {
-                setFieldValue("still_need_help_enabled", checked);
-                if (!checked) {
-                  setFieldValue("follow_up_tags", []);
-                }
-              }}
-              label={'Give a "Still need help?" button'}
-              tooltip={`OnyxBot's response will include a button at the bottom 
-                  of the response that asks the user if they still need help.`}
-            />
-            {values.still_need_help_enabled && (
-              <CollapsibleSection prompt="Configure Still Need Help Button">
-                <TextArrayField
-                  name="follow_up_tags"
-                  label="(Optional) Users / Groups to Tag"
-                  values={values}
-                  subtext={
-                    <div>
-                      The Slack users / groups we should tag if the user clicks
-                      the &quot;Still need help?&quot; button. If no emails are
-                      provided, we will not tag anyone and will just react with
-                      a 🆘 emoji to the original message.
-                    </div>
-                  }
-                  placeholder="User email or user group name..."
-                />
-              </CollapsibleSection>
-            )}
-
-            <BooleanFormField
-              name="answer_validity_check_enabled"
-              removeIndent
-              label="Only respond if citations found"
-              tooltip="If set, will only answer questions where the model successfully produces citations"
-            />
-            <BooleanFormField
-              name="questionmark_prefilter_enabled"
-              removeIndent
-              label="Only respond to questions"
-              tooltip="If set, OnyxBot will only respond to messages that contain a question mark"
-            />
-            <BooleanFormField
-              name="respond_tag_only"
-              removeIndent
-              label="Respond to @OnyxBot Only"
-              tooltip="If set, OnyxBot will only respond when directly tagged"
-            />
-            <BooleanFormField
-              name="respond_to_bots"
-              removeIndent
-              label="Respond to Bot messages"
-              tooltip="If not set, OnyxBot will always ignore messages from Bots"
-            />
-            <BooleanFormField
-              name="enable_auto_filters"
-              removeIndent
-              label="Enable LLM Autofiltering"
-              tooltip="If set, the LLM will generate source and time filters based on the user's query"
-            />
-
-            <div className="mt-12">
-              <TextArrayField
-                name="respond_member_group_list"
-                label="(Optional) Respond to Certain Users / Groups"
-                subtext={
-                  "If specified, OnyxBot responses will only " +
-                  "be visible to the members or groups in this list."
-                }
-                values={values}
-                placeholder="User email or user group name..."
-              />
-            </div>
-          </div>
-
-          <StandardAnswerCategoryDropdownField
-            standardAnswerCategoryResponse={standardAnswerCategoryResponse}
-            categories={values.standard_answer_categories}
-            setCategories={(categories: any) =>
-              setFieldValue("standard_answer_categories", categories)
-            }
-          />
-        </div>
-      )}
-
-      <div className="flex mt-2 gap-x-2 w-full justify-end flex">
-        {shouldShowPrivacyAlert && (
-          <TooltipProvider>
-            <Tooltip>
-              <TooltipTrigger asChild>
-                <div className="flex hover:bg-background-150 cursor-pointer p-2 rounded-lg items-center">
-                  <AlertCircle className="h-5 w-5 text-alert" />
-                </div>
-              </TooltipTrigger>
-              <TooltipContent side="top" className="bg-white p-4 w-80">
-                <Label className="text-text mb-2 font-semibold">
-                  Privacy Alert
-                </Label>
-                <p className="text-sm text-text-darker mb-4">
-                  Please note that at least one of the documents accessible by
-                  your OnyxBot is marked as private and may contain sensitive
-                  information. These documents will be accessible to all users
-                  of this OnyxBot. Ensure this aligns with your intended
-                  document sharing policy.
-                </p>
-                <div className="space-y-2">
-                  <h4 className="text-sm text-text font-medium">
-                    Relevant Connectors:
-                  </h4>
-                  <div className="max-h-40 overflow-y-auto border-t border-text-subtle flex-col gap-y-2">
-                    {memoizedPrivateConnectors.map(
-                      (ccpairinfo: CCPairDescriptor<any, any>) => (
-                        <Link
-                          key={ccpairinfo.id}
-                          href={`/admin/connector/${ccpairinfo.id}`}
-                          className="flex items-center p-2 rounded-md hover:bg-gray-100 transition-colors"
-                        >
-                          <div className="mr-2">
-                            <SourceIcon
-                              iconSize={16}
-                              sourceType={ccpairinfo.connector.source}
-                            />
-                          </div>
-                          <span className="text-sm text-text-darker font-medium">
-                            {ccpairinfo.name}
-                          </span>
-                        </Link>
-                      )
-                    )}
-                  </div>
-                </div>
-              </TooltipContent>
-            </Tooltip>
-          </TooltipProvider>
-        )}
-        <Button onClick={() => {}} type="submit">
-          {isUpdate ? "Update" : "Create"}
-        </Button>
-        <Button type="button" variant="outline" onClick={() => router.back()}>
-          Cancel
-        </Button>
-      </div>
-    </Form>
-  );
-}
--- a/Show More
+++ b/Show More