updates

scores_v0
proper documents for sub-answers
2026-03-07 00:25:48 +00:00 · 2024-12-17 17:53:39 -08:00 · 2024-12-17 16:43:40 -08:00 · 2024-12-16 21:49:55 -08:00 · 2024-12-16 17:42:32 -08:00 · 2024-12-16 14:37:01 -08:00
145 changed files with 2648 additions and 2559 deletions
--- a/backend/alembic/env.py
+++ b/backend/alembic/env.py
@@ -1,48 +1,38 @@
-from typing import Any, Literal
-from onyx.db.engine import get_iam_auth_token
-from onyx.configs.app_configs import USE_IAM_AUTH
-from onyx.configs.app_configs import POSTGRES_HOST
-from onyx.configs.app_configs import POSTGRES_PORT
-from onyx.configs.app_configs import POSTGRES_USER
-from onyx.configs.app_configs import AWS_REGION
-from onyx.db.engine import build_connection_string
-from onyx.db.engine import get_all_tenant_ids
-from sqlalchemy import event
-from sqlalchemy import pool
-from sqlalchemy import text
 from sqlalchemy.engine.base import Connection
-import os
-import ssl
+from typing import Literal
 import asyncio
-import logging
 from logging.config import fileConfig
+import logging

 from alembic import context
+from sqlalchemy import pool
 from sqlalchemy.ext.asyncio import create_async_engine
+from sqlalchemy.sql import text
 from sqlalchemy.sql.schema import SchemaItem
-from onyx.configs.constants import SSL_CERT_FILE
-from shared_configs.configs import MULTI_TENANT, POSTGRES_DEFAULT_SCHEMA
+
+from shared_configs.configs import MULTI_TENANT
+from onyx.db.engine import build_connection_string
 from onyx.db.models import Base
 from celery.backends.database.session import ResultModelBase  # type: ignore
+from onyx.db.engine import get_all_tenant_ids
+from shared_configs.configs import POSTGRES_DEFAULT_SCHEMA

 # Alembic Config object
 config = context.config

+# Interpret the config file for Python logging.
 if config.config_file_name is not None and config.attributes.get(
    "configure_logger", True
 ):
    fileConfig(config.config_file_name)

+# Add your model's MetaData object here for 'autogenerate' support
 target_metadata = [Base.metadata, ResultModelBase.metadata]

 EXCLUDE_TABLES = {"kombu_queue", "kombu_message"}
-logger = logging.getLogger(__name__)

-ssl_context: ssl.SSLContext | None = None
-if USE_IAM_AUTH:
-    if not os.path.exists(SSL_CERT_FILE):
-        raise FileNotFoundError(f"Expected {SSL_CERT_FILE} when USE_IAM_AUTH is true.")
-    ssl_context = ssl.create_default_context(cafile=SSL_CERT_FILE)
+# Set up logging
+logger = logging.getLogger(__name__)


 def include_object(
@@ -59,12 +49,20 @@ def include_object(
    reflected: bool,
    compare_to: SchemaItem | None,
 ) -> bool:
+    """
+    Determines whether a database object should be included in migrations.
+    Excludes specified tables from migrations.
+    """
    if type_ == "table" and name in EXCLUDE_TABLES:
        return False
    return True


 def get_schema_options() -> tuple[str, bool, bool]:
+    """
+    Parses command-line options passed via '-x' in Alembic commands.
+    Recognizes 'schema', 'create_schema', and 'upgrade_all_tenants' options.
+    """
    x_args_raw = context.get_x_argument()
    x_args = {}
    for arg in x_args_raw:
@@ -92,12 +90,16 @@ def get_schema_options() -> tuple[str, bool, bool]:
 def do_run_migrations(
    connection: Connection, schema_name: str, create_schema: bool
 ) -> None:
+    """
+    Executes migrations in the specified schema.
+    """
    logger.info(f"About to migrate schema: {schema_name}")

    if create_schema:
        connection.execute(text(f'CREATE SCHEMA IF NOT EXISTS "{schema_name}"'))
        connection.execute(text("COMMIT"))

+    # Set search_path to the target schema
    connection.execute(text(f'SET search_path TO "{schema_name}"'))

    context.configure(
@@ -115,25 +117,11 @@ def do_run_migrations(
        context.run_migrations()


-def provide_iam_token_for_alembic(
-    dialect: Any, conn_rec: Any, cargs: Any, cparams: Any
-) -> None:
-    if USE_IAM_AUTH:
-        # Database connection settings
-        region = AWS_REGION
-        host = POSTGRES_HOST
-        port = POSTGRES_PORT
-        user = POSTGRES_USER
-
-        # Get IAM authentication token
-        token = get_iam_auth_token(host, port, user, region)
-
-        # For Alembic / SQLAlchemy in this context, set SSL and password
-        cparams["password"] = token
-        cparams["ssl"] = ssl_context
-
-
 async def run_async_migrations() -> None:
+    """
+    Determines whether to run migrations for a single schema or all schemas,
+    and executes migrations accordingly.
+    """
    schema_name, create_schema, upgrade_all_tenants = get_schema_options()

    engine = create_async_engine(
@@ -141,16 +129,10 @@ async def run_async_migrations() -> None:
        poolclass=pool.NullPool,
    )

-    if USE_IAM_AUTH:
-
-        @event.listens_for(engine.sync_engine, "do_connect")
-        def event_provide_iam_token_for_alembic(
-            dialect: Any, conn_rec: Any, cargs: Any, cparams: Any
-        ) -> None:
-            provide_iam_token_for_alembic(dialect, conn_rec, cargs, cparams)
-
    if upgrade_all_tenants:
+        # Run migrations for all tenant schemas sequentially
        tenant_schemas = get_all_tenant_ids()
+
        for schema in tenant_schemas:
            try:
                logger.info(f"Migrating schema: {schema}")
@@ -180,20 +162,15 @@ async def run_async_migrations() -> None:


 def run_migrations_offline() -> None:
+    """
+    Run migrations in 'offline' mode.
+    """
    schema_name, _, upgrade_all_tenants = get_schema_options()
    url = build_connection_string()

    if upgrade_all_tenants:
+        # Run offline migrations for all tenant schemas
        engine = create_async_engine(url)
-
-        if USE_IAM_AUTH:
-
-            @event.listens_for(engine.sync_engine, "do_connect")
-            def event_provide_iam_token_for_alembic_offline(
-                dialect: Any, conn_rec: Any, cargs: Any, cparams: Any
-            ) -> None:
-                provide_iam_token_for_alembic(dialect, conn_rec, cargs, cparams)
-
        tenant_schemas = get_all_tenant_ids()
        engine.sync_engine.dispose()

@@ -230,6 +207,9 @@ def run_migrations_offline() -> None:


 def run_migrations_online() -> None:
+    """
+    Runs migrations in 'online' mode using an asynchronous engine.
+    """
    asyncio.run(run_async_migrations())


--- a/backend/alembic/versions/35e518e0ddf4_properly_cascade.py
+++ b/backend/alembic/versions/35e518e0ddf4_properly_cascade.py
@@ -1,121 +0,0 @@
-"""properly_cascade
-
-Revision ID: 35e518e0ddf4
-Revises: 91a0a4d62b14
-Create Date: 2024-09-20 21:24:04.891018
-
-"""
-from alembic import op
-
-
-# revision identifiers, used by Alembic.
-revision = "35e518e0ddf4"
-down_revision = "91a0a4d62b14"
-branch_labels = None
-depends_on = None
-
-
-def upgrade() -> None:
-    # Update chat_message foreign key constraint
-    op.drop_constraint(
-        "chat_message_chat_session_id_fkey", "chat_message", type_="foreignkey"
-    )
-    op.create_foreign_key(
-        "chat_message_chat_session_id_fkey",
-        "chat_message",
-        "chat_session",
-        ["chat_session_id"],
-        ["id"],
-        ondelete="CASCADE",
-    )
-
-    # Update chat_message__search_doc foreign key constraints
-    op.drop_constraint(
-        "chat_message__search_doc_chat_message_id_fkey",
-        "chat_message__search_doc",
-        type_="foreignkey",
-    )
-    op.drop_constraint(
-        "chat_message__search_doc_search_doc_id_fkey",
-        "chat_message__search_doc",
-        type_="foreignkey",
-    )
-
-    op.create_foreign_key(
-        "chat_message__search_doc_chat_message_id_fkey",
-        "chat_message__search_doc",
-        "chat_message",
-        ["chat_message_id"],
-        ["id"],
-        ondelete="CASCADE",
-    )
-    op.create_foreign_key(
-        "chat_message__search_doc_search_doc_id_fkey",
-        "chat_message__search_doc",
-        "search_doc",
-        ["search_doc_id"],
-        ["id"],
-        ondelete="CASCADE",
-    )
-
-    # Add CASCADE delete for tool_call foreign key
-    op.drop_constraint("tool_call_message_id_fkey", "tool_call", type_="foreignkey")
-    op.create_foreign_key(
-        "tool_call_message_id_fkey",
-        "tool_call",
-        "chat_message",
-        ["message_id"],
-        ["id"],
-        ondelete="CASCADE",
-    )
-
-
-def downgrade() -> None:
-    # Revert chat_message foreign key constraint
-    op.drop_constraint(
-        "chat_message_chat_session_id_fkey", "chat_message", type_="foreignkey"
-    )
-    op.create_foreign_key(
-        "chat_message_chat_session_id_fkey",
-        "chat_message",
-        "chat_session",
-        ["chat_session_id"],
-        ["id"],
-    )
-
-    # Revert chat_message__search_doc foreign key constraints
-    op.drop_constraint(
-        "chat_message__search_doc_chat_message_id_fkey",
-        "chat_message__search_doc",
-        type_="foreignkey",
-    )
-    op.drop_constraint(
-        "chat_message__search_doc_search_doc_id_fkey",
-        "chat_message__search_doc",
-        type_="foreignkey",
-    )
-
-    op.create_foreign_key(
-        "chat_message__search_doc_chat_message_id_fkey",
-        "chat_message__search_doc",
-        "chat_message",
-        ["chat_message_id"],
-        ["id"],
-    )
-    op.create_foreign_key(
-        "chat_message__search_doc_search_doc_id_fkey",
-        "chat_message__search_doc",
-        "search_doc",
-        ["search_doc_id"],
-        ["id"],
-    )
-
-    # Revert tool_call foreign key constraint
-    op.drop_constraint("tool_call_message_id_fkey", "tool_call", type_="foreignkey")
-    op.create_foreign_key(
-        "tool_call_message_id_fkey",
-        "tool_call",
-        "chat_message",
-        ["message_id"],
-        ["id"],
-    )
--- a/backend/alembic/versions/c0aab6edb6dd_delete_workspace.py
+++ b/backend/alembic/versions/c0aab6edb6dd_delete_workspace.py
@@ -1,87 +0,0 @@
-"""delete workspace
-
-Revision ID: c0aab6edb6dd
-Revises: 35e518e0ddf4
-Create Date: 2024-12-17 14:37:07.660631
-
-"""
-
-from alembic import op
-
-
-# revision identifiers, used by Alembic.
-revision = "c0aab6edb6dd"
-down_revision = "35e518e0ddf4"
-branch_labels = None
-depends_on = None
-
-
-def upgrade() -> None:
-    op.execute(
-        """
-    UPDATE connector
-    SET connector_specific_config = connector_specific_config - 'workspace'
-    WHERE source = 'SLACK'
-    """
-    )
-
-
-def downgrade() -> None:
-    import json
-    from sqlalchemy import text
-    from slack_sdk import WebClient
-
-    conn = op.get_bind()
-
-    # Fetch all Slack credentials
-    creds_result = conn.execute(
-        text("SELECT id, credential_json FROM credential WHERE source = 'SLACK'")
-    )
-    all_slack_creds = creds_result.fetchall()
-    if not all_slack_creds:
-        return
-
-    for cred_row in all_slack_creds:
-        credential_id, credential_json = cred_row
-
-        credential_json = (
-            credential_json.tobytes().decode("utf-8")
-            if isinstance(credential_json, memoryview)
-            else credential_json.decode("utf-8")
-        )
-        credential_data = json.loads(credential_json)
-        slack_bot_token = credential_data.get("slack_bot_token")
-        if not slack_bot_token:
-            print(
-                f"No slack_bot_token found for credential {credential_id}. "
-                "Your Slack connector will not function until you upgrade and provide a valid token."
-            )
-            continue
-
-        client = WebClient(token=slack_bot_token)
-        try:
-            auth_response = client.auth_test()
-            workspace = auth_response["url"].split("//")[1].split(".")[0]
-
-            # Update only the connectors linked to this credential
-            # (and which are Slack connectors).
-            op.execute(
-                f"""
-                UPDATE connector AS c
-                SET connector_specific_config = jsonb_set(
-                    connector_specific_config,
-                    '{{workspace}}',
-                    to_jsonb('{workspace}'::text)
-                )
-                FROM connector_credential_pair AS ccp
-                WHERE ccp.connector_id = c.id
-                  AND c.source = 'SLACK'
-                  AND ccp.credential_id = {credential_id}
-            """
-            )
-        except Exception:
-            print(
-                f"We were unable to get the workspace url for your Slack Connector with id {credential_id}."
-            )
-            print("This connector will no longer work until you upgrade.")
-            continue
--- a/backend/ee/onyx/configs/app_configs.py
+++ b/backend/ee/onyx/configs/app_configs.py
@@ -53,5 +53,3 @@ OAUTH_GOOGLE_DRIVE_CLIENT_SECRET = os.environ.get(
 # when the capture is called. These defaults prevent Posthog issues from breaking the Onyx app
 POSTHOG_API_KEY = os.environ.get("POSTHOG_API_KEY") or "FooBar"
 POSTHOG_HOST = os.environ.get("POSTHOG_HOST") or "https://us.i.posthog.com"
-
-HUBSPOT_TRACKING_URL = os.environ.get("HUBSPOT_TRACKING_URL")
--- a/backend/ee/onyx/server/tenants/provisioning.py
+++ b/backend/ee/onyx/server/tenants/provisioning.py
@@ -3,15 +3,12 @@ import logging
 import uuid

 import aiohttp  # Async HTTP client
-import httpx
 from fastapi import HTTPException
-from fastapi import Request
 from sqlalchemy import select
 from sqlalchemy.orm import Session

 from ee.onyx.configs.app_configs import ANTHROPIC_DEFAULT_API_KEY
 from ee.onyx.configs.app_configs import COHERE_DEFAULT_API_KEY
-from ee.onyx.configs.app_configs import HUBSPOT_TRACKING_URL
 from ee.onyx.configs.app_configs import OPENAI_DEFAULT_API_KEY
 from ee.onyx.server.tenants.access import generate_data_plane_token
 from ee.onyx.server.tenants.models import TenantCreationPayload
@@ -50,16 +47,13 @@ from shared_configs.enums import EmbeddingProvider
 logger = logging.getLogger(__name__)


-async def get_or_provision_tenant(
-    email: str, referral_source: str | None = None, request: Request | None = None
+async def get_or_create_tenant_id(
+    email: str, referral_source: str | None = None
 ) -> str:
    """Get existing tenant ID for an email or create a new tenant if none exists."""
    if not MULTI_TENANT:
        return POSTGRES_DEFAULT_SCHEMA

-    if referral_source and request:
-        await submit_to_hubspot(email, referral_source, request)
-
    try:
        tenant_id = get_tenant_id_for_email(email)
    except exceptions.UserNotExists:
@@ -287,36 +281,3 @@ def configure_default_api_keys(db_session: Session) -> None:
        logger.info(
            "COHERE_DEFAULT_API_KEY not set, skipping Cohere embedding provider configuration"
        )
-
-
-async def submit_to_hubspot(
-    email: str, referral_source: str | None, request: Request
-) -> None:
-    if not HUBSPOT_TRACKING_URL:
-        logger.info("HUBSPOT_TRACKING_URL not set, skipping HubSpot submission")
-        return
-
-    # HubSpot tracking cookie
-    hubspot_cookie = request.cookies.get("hubspotutk")
-
-    # IP address
-    ip_address = request.client.host if request.client else None
-
-    data = {
-        "fields": [
-            {"name": "email", "value": email},
-            {"name": "referral_source", "value": referral_source or ""},
-        ],
-        "context": {
-            "hutk": hubspot_cookie,
-            "ipAddress": ip_address,
-            "pageUri": str(request.url),
-            "pageName": "User Registration",
-        },
-    }
-
-    async with httpx.AsyncClient() as client:
-        response = await client.post(HUBSPOT_TRACKING_URL, json=data)
-
-    if response.status_code != 200:
-        logger.error(f"Failed to submit to HubSpot: {response.text}")
--- a/backend/ee/onyx/utils/telemetry.py
+++ b/backend/ee/onyx/utils/telemetry.py
@@ -1,38 +1,14 @@
-from typing import Any
-
 from posthog import Posthog

 from ee.onyx.configs.app_configs import POSTHOG_API_KEY
 from ee.onyx.configs.app_configs import POSTHOG_HOST
-from onyx.utils.logger import setup_logger

-logger = setup_logger()
-
-
-def posthog_on_error(error: Any, items: Any) -> None:
-    logger.error(f"PostHog error: {error}, items: {items}")
-
-
-posthog = Posthog(
-    project_api_key=POSTHOG_API_KEY,
-    host=POSTHOG_HOST,
-    debug=True,
-    on_error=posthog_on_error,
-)
+posthog = Posthog(project_api_key=POSTHOG_API_KEY, host=POSTHOG_HOST)


 def event_telemetry(
-    distinct_id: str, event: str, properties: dict | None = None
+    distinct_id: str,
+    event: str,
+    properties: dict | None = None,
 ) -> None:
-    logger.info(f"Capturing Posthog event: {distinct_id} {event} {properties}")
-    print("API KEY", POSTHOG_API_KEY)
-    print("HOST", POSTHOG_HOST)
-    try:
-        print(type(distinct_id))
-        print(type(event))
-        print(type(properties))
-        response = posthog.capture(distinct_id, event, properties)
-        posthog.flush()
-        print(response)
-    except Exception as e:
-        logger.error(f"Error capturing Posthog event: {e}")
+    posthog.capture(distinct_id, event, properties)
--- a/backend/model_server/main.py
+++ b/backend/model_server/main.py
@@ -27,8 +27,8 @@ from shared_configs.configs import SENTRY_DSN
 os.environ["TOKENIZERS_PARALLELISM"] = "false"
 os.environ["HF_HUB_DISABLE_TELEMETRY"] = "1"

-HF_CACHE_PATH = Path(os.path.expanduser("~")) / ".cache/huggingface"
-TEMP_HF_CACHE_PATH = Path(os.path.expanduser("~")) / ".cache/temp_huggingface"
+HF_CACHE_PATH = Path("/root/.cache/huggingface/")
+TEMP_HF_CACHE_PATH = Path("/root/.cache/temp_huggingface/")

 transformer_logging.set_verbosity_error()

--- a/backend/onyx/agent_search/answer_query/graph_builder.py
+++ b/backend/onyx/agent_search/answer_query/graph_builder.py
@@ -0,0 +1,100 @@
+from langgraph.graph import END
+from langgraph.graph import START
+from langgraph.graph import StateGraph
+
+from onyx.agent_search.answer_query.nodes.answer_check import answer_check
+from onyx.agent_search.answer_query.nodes.answer_generation import answer_generation
+from onyx.agent_search.answer_query.nodes.format_answer import format_answer
+from onyx.agent_search.answer_query.states import AnswerQueryInput
+from onyx.agent_search.answer_query.states import AnswerQueryOutput
+from onyx.agent_search.answer_query.states import AnswerQueryState
+from onyx.agent_search.expanded_retrieval.graph_builder import (
+    expanded_retrieval_graph_builder,
+)
+
+
+def answer_query_graph_builder() -> StateGraph:
+    graph = StateGraph(
+        state_schema=AnswerQueryState,
+        input=AnswerQueryInput,
+        output=AnswerQueryOutput,
+    )
+
+    ### Add nodes ###
+
+    expanded_retrieval = expanded_retrieval_graph_builder().compile()
+    graph.add_node(
+        node="expanded_retrieval_for_initial_decomp",
+        action=expanded_retrieval,
+    )
+    graph.add_node(
+        node="answer_check",
+        action=answer_check,
+    )
+    graph.add_node(
+        node="answer_generation",
+        action=answer_generation,
+    )
+    graph.add_node(
+        node="format_answer",
+        action=format_answer,
+    )
+
+    ### Add edges ###
+
+    graph.add_edge(
+        start_key=START,
+        end_key="expanded_retrieval_for_initial_decomp",
+    )
+    graph.add_edge(
+        start_key="expanded_retrieval_for_initial_decomp",
+        end_key="answer_generation",
+    )
+    graph.add_edge(
+        start_key="answer_generation",
+        end_key="answer_check",
+    )
+    graph.add_edge(
+        start_key="answer_check",
+        end_key="format_answer",
+    )
+    graph.add_edge(
+        start_key="format_answer",
+        end_key=END,
+    )
+
+    return graph
+
+
+if __name__ == "__main__":
+    from onyx.db.engine import get_session_context_manager
+    from onyx.llm.factory import get_default_llms
+    from onyx.context.search.models import SearchRequest
+
+    graph = answer_query_graph_builder()
+    compiled_graph = graph.compile()
+    primary_llm, fast_llm = get_default_llms()
+    search_request = SearchRequest(
+        query="Who made Excel and what other products did they make?",
+    )
+    with get_session_context_manager() as db_session:
+        inputs = AnswerQueryInput(
+            search_request=search_request,
+            primary_llm=primary_llm,
+            fast_llm=fast_llm,
+            db_session=db_session,
+            query_to_answer="Who made Excel?",
+        )
+        output = compiled_graph.invoke(
+            input=inputs,
+            # debug=True,
+            # subgraphs=True,
+        )
+        print(output)
+        # for namespace, chunk in compiled_graph.stream(
+        #     input=inputs,
+        #     # debug=True,
+        #     subgraphs=True,
+        # ):
+        #     print(namespace)
+        #     print(chunk)
--- a/backend/onyx/agent_search/answer_query/nodes/answer_check.py
+++ b/backend/onyx/agent_search/answer_query/nodes/answer_check.py
@@ -0,0 +1,30 @@
+from langchain_core.messages import HumanMessage
+from langchain_core.messages import merge_message_runs
+
+from onyx.agent_search.answer_query.states import AnswerQueryState
+from onyx.agent_search.answer_query.states import QACheckOutput
+from onyx.agent_search.shared_graph_utils.prompts import SUB_CHECK_PROMPT
+
+
+def answer_check(state: AnswerQueryState) -> QACheckOutput:
+    msg = [
+        HumanMessage(
+            content=SUB_CHECK_PROMPT.format(
+                question=state["query_to_answer"],
+                base_answer=state["answer"],
+            )
+        )
+    ]
+
+    fast_llm = state["fast_llm"]
+    response = list(
+        fast_llm.stream(
+            prompt=msg,
+        )
+    )
+
+    response_str = merge_message_runs(response, chunk_separator="")[0].content
+
+    return QACheckOutput(
+        answer_quality=response_str,
+    )
--- a/backend/onyx/agent_search/answer_query/nodes/answer_generation.py
+++ b/backend/onyx/agent_search/answer_query/nodes/answer_generation.py
@@ -0,0 +1,32 @@
+from langchain_core.messages import HumanMessage
+from langchain_core.messages import merge_message_runs
+
+from onyx.agent_search.answer_query.states import AnswerQueryState
+from onyx.agent_search.answer_query.states import QAGenerationOutput
+from onyx.agent_search.shared_graph_utils.prompts import BASE_RAG_PROMPT
+from onyx.agent_search.shared_graph_utils.utils import format_docs
+
+
+def answer_generation(state: AnswerQueryState) -> QAGenerationOutput:
+    query = state["query_to_answer"]
+    docs = state["documents"]
+
+    print(f"Number of verified retrieval docs: {len(docs)}")
+
+    msg = [
+        HumanMessage(
+            content=BASE_RAG_PROMPT.format(question=query, context=format_docs(docs))
+        )
+    ]
+
+    fast_llm = state["fast_llm"]
+    response = list(
+        fast_llm.stream(
+            prompt=msg,
+        )
+    )
+
+    answer_str = merge_message_runs(response, chunk_separator="")[0].content
+    return QAGenerationOutput(
+        answer=answer_str,
+    )
--- a/backend/onyx/agent_search/answer_query/nodes/format_answer.py
+++ b/backend/onyx/agent_search/answer_query/nodes/format_answer.py
@@ -0,0 +1,20 @@
+from onyx.agent_search.answer_query.states import AnswerQueryOutput
+from onyx.agent_search.answer_query.states import AnswerQueryState
+from onyx.agent_search.answer_query.states import SearchAnswerResults
+
+
+def format_answer(state: AnswerQueryState) -> AnswerQueryOutput:
+    return AnswerQueryOutput(
+        decomp_answer_results=[
+            SearchAnswerResults(
+                query=state["query_to_answer"],
+                quality=state["answer_quality"],
+                answer=state["answer"],
+                documents=state["documents"],
+                ranking_scores=state["ranking_scores"],
+                original_question_ranking_scores=state[
+                    "original_question_ranking_scores"
+                ],
+            )
+        ],
+    )
--- a/backend/onyx/agent_search/answer_query/states.py
+++ b/backend/onyx/agent_search/answer_query/states.py
@@ -0,0 +1,52 @@
+from operator import add
+from typing import Annotated
+from typing import TypedDict
+
+from pydantic import BaseModel
+
+from onyx.agent_search.core_state import PrimaryState
+from onyx.agent_search.expanded_retrieval.states import DocRerankingOutput
+from onyx.agent_search.shared_graph_utils.operators import dedup_inference_sections
+from onyx.context.search.models import InferenceSection
+
+
+class SearchAnswerResults(BaseModel):
+    query: str
+    answer: str
+    quality: str
+    documents: Annotated[list[InferenceSection], dedup_inference_sections]
+
+
+class QACheckOutput(TypedDict, total=False):
+    answer_quality: str
+
+
+class QAGenerationOutput(TypedDict, total=False):
+    answer: str
+
+
+class ExpandedRetrievalOutput(TypedDict):
+    documents: Annotated[list[InferenceSection], dedup_inference_sections]
+    reranked_documents: Annotated[list[InferenceSection], dedup_inference_sections]
+    original_question_ranking_scores: Annotated[list[dict[str, float]], add]
+    ranking_scores: Annotated[list[dict[str, float]], add]
+
+
+class AnswerQueryState(
+    PrimaryState,
+    QACheckOutput,
+    QAGenerationOutput,
+    ExpandedRetrievalOutput,
+    total=True,
+):
+    query_to_answer: str
+
+
+class AnswerQueryInput(PrimaryState, QAGenerationOutput, total=True):
+    query_to_answer: str
+
+
+class AnswerQueryOutput(DocRerankingOutput):
+    decomp_answer_results: list[SearchAnswerResults]
+    original_question_ranking_scores: Annotated[list[dict[str, float]], add]
+    ranking_scores: Annotated[list[dict[str, float]], add]
--- a/backend/onyx/agent_search/core_state.py
+++ b/backend/onyx/agent_search/core_state.py
@@ -0,0 +1,15 @@
+from typing import TypedDict
+
+from sqlalchemy.orm import Session
+
+from onyx.context.search.models import SearchRequest
+from onyx.llm.interfaces import LLM
+
+
+class PrimaryState(TypedDict, total=False):
+    search_request: SearchRequest
+    primary_llm: LLM
+    fast_llm: LLM
+    # a single session for the entire agent search
+    # is fine if we are only reading
+    db_session: Session
--- a/backend/onyx/agent_search/deep_answer/edges.py
+++ b/backend/onyx/agent_search/deep_answer/edges.py
--- a/backend/onyx/agent_search/deep_answer/graph_builder.py
+++ b/backend/onyx/agent_search/deep_answer/graph_builder.py
--- a/backend/onyx/agent_search/deep_answer/nodes/answer_generation.py
+++ b/backend/onyx/agent_search/deep_answer/nodes/answer_generation.py
@@ -0,0 +1,114 @@
+from typing import Any
+
+from langchain_core.messages import HumanMessage
+
+from onyx.agent_search.main.states import MainState
+from onyx.agent_search.shared_graph_utils.prompts import COMBINED_CONTEXT
+from onyx.agent_search.shared_graph_utils.prompts import MODIFIED_RAG_PROMPT
+from onyx.agent_search.shared_graph_utils.utils import format_docs
+from onyx.agent_search.shared_graph_utils.utils import normalize_whitespace
+
+
+# aggregate sub questions and answers
+def deep_answer_generation(state: MainState) -> dict[str, Any]:
+    """
+    Generate answer
+
+    Args:
+        state (messages): The current state
+
+    Returns:
+         dict: The updated state with re-phrased question
+    """
+    print("---DEEP GENERATE---")
+
+    question = state["original_question"]
+    docs = state["deduped_retrieval_docs"]
+
+    deep_answer_context = state["core_answer_dynamic_context"]
+
+    print(f"Number of verified retrieval docs - deep: {len(docs)}")
+
+    combined_context = normalize_whitespace(
+        COMBINED_CONTEXT.format(
+            deep_answer_context=deep_answer_context, formated_docs=format_docs(docs)
+        )
+    )
+
+    msg = [
+        HumanMessage(
+            content=MODIFIED_RAG_PROMPT.format(
+                question=question, combined_context=combined_context
+            )
+        )
+    ]
+
+    # Grader
+    model = state["fast_llm"]
+    response = model.invoke(msg)
+
+    return {
+        "deep_answer": response.content,
+    }
+
+
+def final_stuff(state: MainState) -> dict[str, Any]:
+    """
+    Invokes the agent model to generate a response based on the current state. Given
+    the question, it will decide to retrieve using the retriever tool, or simply end.
+
+    Args:
+        state (messages): The current state
+
+    Returns:
+        dict: The updated state with the agent response appended to messages
+    """
+    print("---FINAL---")
+
+    messages = state["log_messages"]
+    time_ordered_messages = [x.pretty_repr() for x in messages]
+    time_ordered_messages.sort()
+
+    print("Message Log:")
+    print("\n".join(time_ordered_messages))
+
+    initial_sub_qas = state["initial_sub_qas"]
+    initial_sub_qa_list = []
+    for initial_sub_qa in initial_sub_qas:
+        if initial_sub_qa["sub_answer_check"] == "yes":
+            initial_sub_qa_list.append(
+                f'  Question:\n  {initial_sub_qa["sub_question"]}\n  --\n  Answer:\n  {initial_sub_qa["sub_answer"]}\n  -----'
+            )
+
+    initial_sub_qa_context = "\n".join(initial_sub_qa_list)
+
+    base_answer = state["base_answer"]
+
+    print(f"Final Base Answer:\n{base_answer}")
+    print("--------------------------------")
+    print(f"Initial Answered Sub Questions:\n{initial_sub_qa_context}")
+    print("--------------------------------")
+
+    if not state.get("deep_answer"):
+        print("No Deep Answer was required")
+        return {}
+
+    deep_answer = state["deep_answer"]
+    sub_qas = state["sub_qas"]
+    sub_qa_list = []
+    for sub_qa in sub_qas:
+        if sub_qa["sub_answer_check"] == "yes":
+            sub_qa_list.append(
+                f'  Question:\n  {sub_qa["sub_question"]}\n  --\n  Answer:\n  {sub_qa["sub_answer"]}\n  -----'
+            )
+
+    sub_qa_context = "\n".join(sub_qa_list)
+
+    print(f"Final Base Answer:\n{base_answer}")
+    print("--------------------------------")
+    print(f"Final Deep Answer:\n{deep_answer}")
+    print("--------------------------------")
+    print("Sub Questions and Answers:")
+    print(sub_qa_context)
+
+    return {}
--- a/backend/onyx/agent_search/deep_answer/nodes/deep_decomp.py
+++ b/backend/onyx/agent_search/deep_answer/nodes/deep_decomp.py
@@ -0,0 +1,78 @@
+import json
+import re
+from datetime import datetime
+from typing import Any
+
+from langchain_core.messages import HumanMessage
+
+from onyx.agent_search.main.states import MainState
+from onyx.agent_search.shared_graph_utils.prompts import DEEP_DECOMPOSE_PROMPT
+from onyx.agent_search.shared_graph_utils.utils import format_entity_term_extraction
+from onyx.agent_search.shared_graph_utils.utils import generate_log_message
+
+
+def decompose(state: MainState) -> dict[str, Any]:
+    """ """
+
+    node_start_time = datetime.now()
+
+    question = state["original_question"]
+    base_answer = state["base_answer"]
+
+    # get the entity term extraction dict and properly format it
+    entity_term_extraction_dict = state["retrieved_entities_relationships"][
+        "retrieved_entities_relationships"
+    ]
+
+    entity_term_extraction_str = format_entity_term_extraction(
+        entity_term_extraction_dict
+    )
+
+    initial_question_answers = state["initial_sub_qas"]
+
+    addressed_question_list = [
+        x["sub_question"]
+        for x in initial_question_answers
+        if x["sub_answer_check"] == "yes"
+    ]
+    failed_question_list = [
+        x["sub_question"]
+        for x in initial_question_answers
+        if x["sub_answer_check"] == "no"
+    ]
+
+    msg = [
+        HumanMessage(
+            content=DEEP_DECOMPOSE_PROMPT.format(
+                question=question,
+                entity_term_extraction_str=entity_term_extraction_str,
+                base_answer=base_answer,
+                answered_sub_questions="\n - ".join(addressed_question_list),
+                failed_sub_questions="\n - ".join(failed_question_list),
+            ),
+        )
+    ]
+
+    # Grader
+    model = state["fast_llm"]
+    response = model.invoke(msg)
+
+    cleaned_response = re.sub(r"```json\n|\n```", "", response.pretty_repr())
+    parsed_response = json.loads(cleaned_response)
+
+    sub_questions_dict = {}
+    for sub_question_nr, sub_question_dict in enumerate(
+        parsed_response["sub_questions"]
+    ):
+        sub_question_dict["answered"] = False
+        sub_question_dict["verified"] = False
+        sub_questions_dict[sub_question_nr] = sub_question_dict
+
+    return {
+        "decomposed_sub_questions_dict": sub_questions_dict,
+        "log_messages": generate_log_message(
+            message="deep - decompose",
+            node_start_time=node_start_time,
+            graph_start_time=state["graph_start_time"],
+        ),
+    }
--- a/backend/onyx/agent_search/deep_answer/nodes/entity_term_extraction.py
+++ b/backend/onyx/agent_search/deep_answer/nodes/entity_term_extraction.py
@@ -0,0 +1,40 @@
+import json
+import re
+from typing import Any
+
+from langchain_core.messages import HumanMessage
+from langchain_core.messages import merge_message_runs
+
+from onyx.agent_search.main.states import MainState
+from onyx.agent_search.shared_graph_utils.prompts import ENTITY_TERM_PROMPT
+from onyx.agent_search.shared_graph_utils.utils import format_docs
+
+
+def entity_term_extraction(state: MainState) -> dict[str, Any]:
+    """Extract entities and terms from the question and context"""
+
+    question = state["original_question"]
+    docs = state["deduped_retrieval_docs"]
+
+    doc_context = format_docs(docs)
+
+    msg = [
+        HumanMessage(
+            content=ENTITY_TERM_PROMPT.format(question=question, context=doc_context),
+        )
+    ]
+    fast_llm = state["fast_llm"]
+    # Grader
+    llm_response_list = list(
+        fast_llm.stream(
+            prompt=msg,
+        )
+    )
+    llm_response = merge_message_runs(llm_response_list, chunk_separator="")[0].content
+
+    cleaned_response = re.sub(r"```json\n|\n```", "", llm_response)
+    parsed_response = json.loads(cleaned_response)
+
+    return {
+        "retrieved_entities_relationships": parsed_response,
+    }
--- a/backend/onyx/agent_search/deep_answer/nodes/sub_qa_level_aggregator.py
+++ b/backend/onyx/agent_search/deep_answer/nodes/sub_qa_level_aggregator.py
@@ -0,0 +1,30 @@
+from typing import Any
+
+from onyx.agent_search.main.states import MainState
+
+
+# aggregate sub questions and answers
+def sub_qa_level_aggregator(state: MainState) -> dict[str, Any]:
+    sub_qas = state["sub_qas"]
+
+    dynamic_context_list = [
+        "Below you will find useful information to answer the original question:"
+    ]
+    checked_sub_qas = []
+
+    for core_answer_sub_qa in sub_qas:
+        question = core_answer_sub_qa["sub_question"]
+        answer = core_answer_sub_qa["sub_answer"]
+        verified = core_answer_sub_qa["sub_answer_check"]
+
+        if verified == "yes":
+            dynamic_context_list.append(
+                f"Question:\n{question}\n\nAnswer:\n{answer}\n\n---\n\n"
+            )
+            checked_sub_qas.append({"sub_question": question, "sub_answer": answer})
+    dynamic_context = "\n".join(dynamic_context_list)
+
+    return {
+        "core_answer_dynamic_context": dynamic_context,
+        "checked_sub_qas": checked_sub_qas,
+    }
--- a/backend/onyx/agent_search/deep_answer/nodes/sub_qa_manager.py
+++ b/backend/onyx/agent_search/deep_answer/nodes/sub_qa_manager.py
@@ -0,0 +1,19 @@
+from typing import Any
+
+from onyx.agent_search.main.states import MainState
+
+
+def sub_qa_manager(state: MainState) -> dict[str, Any]:
+    """ """
+
+    sub_questions_dict = state["decomposed_sub_questions_dict"]
+
+    sub_questions = {}
+
+    for sub_question_nr, sub_question_dict in sub_questions_dict.items():
+        sub_questions[sub_question_nr] = sub_question_dict["sub_question"]
+
+    return {
+        "sub_questions": sub_questions,
+        "num_new_question_iterations": 0,
+    }
--- a/backend/onyx/agent_search/deep_answer/states.py
+++ b/backend/onyx/agent_search/deep_answer/states.py
--- a/backend/onyx/agent_search/expanded_retrieval/edges.py
+++ b/backend/onyx/agent_search/expanded_retrieval/edges.py
@@ -0,0 +1,44 @@
+from collections.abc import Hashable
+
+from langchain_core.messages import HumanMessage
+from langchain_core.messages import merge_message_runs
+from langgraph.types import Send
+
+from onyx.agent_search.expanded_retrieval.nodes.doc_retrieval import RetrieveInput
+from onyx.agent_search.expanded_retrieval.states import ExpandedRetrievalInput
+from onyx.agent_search.shared_graph_utils.prompts import REWRITE_PROMPT_MULTI_ORIGINAL
+from onyx.llm.interfaces import LLM
+
+
+def parallel_retrieval_edge(state: ExpandedRetrievalInput) -> list[Send | Hashable]:
+    print(f"parallel_retrieval_edge state: {state.keys()}")
+
+    # This should be better...
+    question = state.get("query_to_answer") or state["search_request"].query
+    llm: LLM = state["fast_llm"]
+
+    msg = [
+        HumanMessage(
+            content=REWRITE_PROMPT_MULTI_ORIGINAL.format(question=question),
+        )
+    ]
+    llm_response_list = list(
+        llm.stream(
+            prompt=msg,
+        )
+    )
+    llm_response = merge_message_runs(llm_response_list, chunk_separator="")[0].content
+
+    print(f"llm_response: {llm_response}")
+
+    rewritten_queries = llm_response.split("--")
+
+    print(f"rewritten_queries: {rewritten_queries}")
+
+    return [
+        Send(
+            "doc_retrieval",
+            RetrieveInput(query_to_retrieve=query, **state),
+        )
+        for query in rewritten_queries
+    ]
--- a/backend/onyx/agent_search/expanded_retrieval/graph_builder.py
+++ b/backend/onyx/agent_search/expanded_retrieval/graph_builder.py
@@ -0,0 +1,88 @@
+from langgraph.graph import END
+from langgraph.graph import START
+from langgraph.graph import StateGraph
+
+from onyx.agent_search.expanded_retrieval.edges import parallel_retrieval_edge
+from onyx.agent_search.expanded_retrieval.nodes.doc_reranking import doc_reranking
+from onyx.agent_search.expanded_retrieval.nodes.doc_retrieval import doc_retrieval
+from onyx.agent_search.expanded_retrieval.nodes.doc_verification import (
+    doc_verification,
+)
+from onyx.agent_search.expanded_retrieval.nodes.verification_kickoff import (
+    verification_kickoff,
+)
+from onyx.agent_search.expanded_retrieval.states import ExpandedRetrievalInput
+from onyx.agent_search.expanded_retrieval.states import ExpandedRetrievalOutput
+from onyx.agent_search.expanded_retrieval.states import ExpandedRetrievalState
+
+
+def expanded_retrieval_graph_builder() -> StateGraph:
+    graph = StateGraph(
+        state_schema=ExpandedRetrievalState,
+        input=ExpandedRetrievalInput,
+        output=ExpandedRetrievalOutput,
+    )
+
+    ### Add nodes ###
+
+    graph.add_node(
+        node="doc_retrieval",
+        action=doc_retrieval,
+    )
+    graph.add_node(
+        node="verification_kickoff",
+        action=verification_kickoff,
+    )
+    graph.add_node(
+        node="doc_verification",
+        action=doc_verification,
+    )
+    graph.add_node(
+        node="doc_reranking",
+        action=doc_reranking,
+    )
+
+    ### Add edges ###
+
+    graph.add_conditional_edges(
+        source=START,
+        path=parallel_retrieval_edge,
+        path_map=["doc_retrieval"],
+    )
+    graph.add_edge(
+        start_key="doc_retrieval",
+        end_key="verification_kickoff",
+    )
+    graph.add_edge(
+        start_key="doc_verification",
+        end_key="doc_reranking",
+    )
+    graph.add_edge(
+        start_key="doc_reranking",
+        end_key=END,
+    )
+
+    return graph
+
+
+if __name__ == "__main__":
+    from onyx.db.engine import get_session_context_manager
+    from onyx.llm.factory import get_default_llms
+    from onyx.context.search.models import SearchRequest
+
+    graph = expanded_retrieval_graph_builder()
+    compiled_graph = graph.compile()
+    primary_llm, fast_llm = get_default_llms()
+    search_request = SearchRequest(
+        query="Who made Excel and what other products did they make?",
+    )
+    with get_session_context_manager() as db_session:
+        inputs = ExpandedRetrievalInput(
+            search_request=search_request,
+            primary_llm=primary_llm,
+            fast_llm=fast_llm,
+            db_session=db_session,
+            query_to_answer="Who made Excel?",
+        )
+        for thing in compiled_graph.stream(inputs, debug=True):
+            print(thing)
--- a/backend/onyx/agent_search/expanded_retrieval/nodes/doc_reranking.py
+++ b/backend/onyx/agent_search/expanded_retrieval/nodes/doc_reranking.py
@@ -0,0 +1,33 @@
+from onyx.agent_search.expanded_retrieval.states import DocRerankingOutput
+from onyx.agent_search.expanded_retrieval.states import ExpandedRetrievalState
+
+
+def doc_reranking(state: ExpandedRetrievalState) -> DocRerankingOutput:
+    # print(f"doc_reranking state: {state.keys()}")
+
+    original_question = state["search_request"].query
+    current_question = state.get("query_to_answer", original_question)
+    verified_documents = state["verified_documents"]
+    reranked_documents = verified_documents
+
+    retrieval_stats = state.get("retrieval_stats", [])
+
+    ranking_scores = {}
+
+    for type in ["reranked", "initial"]:
+        ranking_scores[type] = 0
+        for retrieval_stat in retrieval_stats:
+            for _, stat in retrieval_stat.items():
+                ranking_scores[type] += stat[type]["fit_score"]
+        ranking_scores[type] /= len(retrieval_stats)
+
+    if current_question != original_question:
+        return DocRerankingOutput(
+            documents=reranked_documents, ranking_scores=[ranking_scores]
+        )
+
+    else:
+        return DocRerankingOutput(
+            original_question_documents=reranked_documents,
+            original_question_ranking_scores=[ranking_scores],
+        )
--- a/backend/onyx/agent_search/expanded_retrieval/nodes/doc_retrieval.py
+++ b/backend/onyx/agent_search/expanded_retrieval/nodes/doc_retrieval.py
@@ -0,0 +1,91 @@
+from onyx.agent_search.expanded_retrieval.states import DocRetrievalOutput
+from onyx.agent_search.expanded_retrieval.states import ExpandedRetrievalState
+from onyx.agent_search.shared_graph_utils.operators import calculate_rank_shift
+from onyx.context.search.models import InferenceSection
+from onyx.context.search.models import SearchRequest
+from onyx.context.search.pipeline import SearchPipeline
+from onyx.db.engine import get_session_context_manager
+
+
+class RetrieveInput(ExpandedRetrievalState):
+    query_to_retrieve: str
+
+
+def doc_retrieval(state: RetrieveInput) -> DocRetrievalOutput:
+    # def doc_retrieval(state: RetrieveInput) -> Command[Literal["doc_verification"]]:
+    """
+    Retrieve documents
+
+    Args:
+        state (dict): The current graph state
+
+    Returns:
+        state (dict): New key added to state, documents, that contains retrieved documents
+    """
+    # print(f"doc_retrieval state: {state.keys()}")
+
+    documents: list[InferenceSection] = []
+    llm = state["primary_llm"]
+    fast_llm = state["fast_llm"]
+    # db_session = state["db_session"]
+    query_to_retrieve = state["query_to_retrieve"]
+    with get_session_context_manager() as db_session1:
+        documents = SearchPipeline(
+            search_request=SearchRequest(
+                query=query_to_retrieve,
+            ),
+            user=None,
+            llm=llm,
+            fast_llm=fast_llm,
+            db_session=db_session1,
+        )
+
+        ranked_sections = {
+            "initial": documents.final_context_sections,
+            "reranked": documents.reranked_sections,
+        }
+
+        fit_scores = {}
+
+        for rank_type, docs in ranked_sections.items():
+            fit_scores[rank_type] = {}
+            for i in [1, 5, 10]:
+                fit_scores[rank_type][i] = (
+                    sum([doc.center_chunk.score for doc in docs[:i]]) / i
+                )
+
+            fit_scores[rank_type]["fit_score"] = (
+                1
+                / 3
+                * (
+                    fit_scores[rank_type][1]
+                    + fit_scores[rank_type][5]
+                    + fit_scores[rank_type][10]
+                )
+            )
+            fit_scores[rank_type]["chunk_ids"] = [
+                doc.center_chunk.chunk_id for doc in docs
+            ]
+
+        fit_score_lift = (
+            fit_scores["reranked"]["fit_score"] / fit_scores["initial"]["fit_score"]
+        )
+
+        average_rank_change = calculate_rank_shift(
+            fit_scores["initial"]["chunk_ids"], fit_scores["reranked"]["chunk_ids"]
+        )
+
+        fit_scores["rerank_effect"] = average_rank_change
+        fit_scores["fit_score_lift"] = fit_score_lift
+
+    documents = documents.reranked_sections[:4]
+
+    print(f"retrieved documents: {len(documents)}")
+    return DocRetrievalOutput(
+        retrieved_documents=documents,
+        retrieval_stats=[
+            {
+                query_to_retrieve: fit_scores,
+            }
+        ],
+    )
--- a/backend/onyx/agent_search/expanded_retrieval/nodes/doc_verification.py
+++ b/backend/onyx/agent_search/expanded_retrieval/nodes/doc_verification.py
@@ -0,0 +1,61 @@
+from langchain_core.messages import HumanMessage
+from langchain_core.messages import merge_message_runs
+
+from onyx.agent_search.expanded_retrieval.states import DocVerificationOutput
+from onyx.agent_search.expanded_retrieval.states import ExpandedRetrievalState
+from onyx.agent_search.shared_graph_utils.models import BinaryDecision
+from onyx.agent_search.shared_graph_utils.prompts import VERIFIER_PROMPT
+from onyx.context.search.models import InferenceSection
+
+
+class DocVerificationInput(ExpandedRetrievalState, total=True):
+    doc_to_verify: InferenceSection
+
+
+def doc_verification(state: DocVerificationInput) -> DocVerificationOutput:
+    """
+    Check whether the document is relevant for the original user question
+
+    Args:
+        state (VerifierState): The current state
+
+    Returns:
+        dict: ict: The updated state with the final decision
+    """
+
+    # print(f"doc_verification state: {state.keys()}")
+
+    # original_query = state["search_request"].query
+    relevant_query = state.get("query_to_answer") or state["search_request"].query
+    doc_to_verify = state["doc_to_verify"]
+    document_content = doc_to_verify.combined_content
+
+    msg = [
+        HumanMessage(
+            content=VERIFIER_PROMPT.format(
+                question=relevant_query, document_content=document_content
+            )
+        )
+    ]
+
+    fast_llm = state["fast_llm"]
+    response = list(
+        fast_llm.stream(
+            prompt=msg,
+        )
+    )
+
+    response_string = merge_message_runs(response, chunk_separator="")[0].content
+    # Convert string response to proper dictionary format
+    decision_dict = {"decision": response_string.lower()}
+    formatted_response = BinaryDecision.model_validate(decision_dict)
+
+    print(f"Verdict: {formatted_response.decision}")
+
+    verified_documents = []
+    if formatted_response.decision == "yes":
+        verified_documents.append(doc_to_verify)
+
+    return DocVerificationOutput(
+        verified_documents=verified_documents,
+    )
--- a/backend/onyx/agent_search/expanded_retrieval/nodes/verification_kickoff.py
+++ b/backend/onyx/agent_search/expanded_retrieval/nodes/verification_kickoff.py
@@ -0,0 +1,27 @@
+from typing import Literal
+
+from langgraph.types import Command
+from langgraph.types import Send
+
+from onyx.agent_search.expanded_retrieval.nodes.doc_verification import (
+    DocVerificationInput,
+)
+from onyx.agent_search.expanded_retrieval.states import ExpandedRetrievalState
+
+
+def verification_kickoff(
+    state: ExpandedRetrievalState,
+) -> Command[Literal["doc_verification"]]:
+    # print(f"verification_kickoff state: {state.keys()}")
+
+    documents = state["retrieved_documents"]
+    return Command(
+        update={},
+        goto=[
+            Send(
+                node="doc_verification",
+                arg=DocVerificationInput(doc_to_verify=doc, **state),
+            )
+            for doc in documents
+        ],
+    )
--- a/backend/onyx/agent_search/expanded_retrieval/prompts.py
+++ b/backend/onyx/agent_search/expanded_retrieval/prompts.py
--- a/backend/onyx/agent_search/expanded_retrieval/states.py
+++ b/backend/onyx/agent_search/expanded_retrieval/states.py
@@ -0,0 +1,44 @@
+from operator import add
+from typing import Annotated
+from typing import Any
+from typing import TypedDict
+
+from onyx.agent_search.core_state import PrimaryState
+from onyx.agent_search.shared_graph_utils.operators import dedup_inference_sections
+from onyx.context.search.models import InferenceSection
+
+
+class DocRetrievalOutput(TypedDict, total=False):
+    retrieved_documents: Annotated[list[InferenceSection], dedup_inference_sections]
+    retrieval_stats: Annotated[list[dict[str, Any]], add]
+
+
+class DocVerificationOutput(TypedDict, total=False):
+    verified_documents: Annotated[list[InferenceSection], dedup_inference_sections]
+
+
+class DocRerankingOutput(TypedDict, total=False):
+    documents: Annotated[list[InferenceSection], dedup_inference_sections]
+    ranking_scores: Annotated[list[dict[str, float]], add]
+    original_question_documents: Annotated[
+        list[InferenceSection], dedup_inference_sections
+    ]
+    original_question_ranking_scores: Annotated[list[dict[str, float]], add]
+
+
+class ExpandedRetrievalState(
+    PrimaryState,
+    DocRetrievalOutput,
+    DocVerificationOutput,
+    DocRerankingOutput,
+    total=True,
+):
+    query_to_answer: str
+
+
+class ExpandedRetrievalInput(PrimaryState, total=True):
+    query_to_answer: str
+
+
+class ExpandedRetrievalOutput(DocRerankingOutput):
+    pass
--- a/backend/onyx/agent_search/main/edges.py
+++ b/backend/onyx/agent_search/main/edges.py
@@ -0,0 +1,61 @@
+from collections.abc import Hashable
+
+from langgraph.types import Send
+
+from onyx.agent_search.answer_query.states import AnswerQueryInput
+from onyx.agent_search.main.states import MainState
+
+
+def parallelize_decompozed_answer_queries(state: MainState) -> list[Send | Hashable]:
+    return [
+        Send(
+            "answer_query",
+            AnswerQueryInput(
+                **state,
+                query_to_answer=query,
+            ),
+        )
+        for query in state["initial_decomp_queries"]
+    ]
+
+
+# def continue_to_answer_sub_questions(state: QAState) -> Union[Hashable, list[Hashable]]:
+#     # Routes re-written queries to the (parallel) retrieval steps
+#     # Notice the 'Send()' API that takes care of the parallelization
+#     return [
+#         Send(
+#             "sub_answers_graph",
+#             ResearchQAState(
+#                 sub_question=sub_question["sub_question_str"],
+#                 sub_question_nr=sub_question["sub_question_nr"],
+#                 graph_start_time=state["graph_start_time"],
+#                 primary_llm=state["primary_llm"],
+#                 fast_llm=state["fast_llm"],
+#             ),
+#         )
+#         for sub_question in state["sub_questions"]
+#     ]
+
+
+# def continue_to_deep_answer(state: QAState) -> Union[Hashable, list[Hashable]]:
+#     print("---GO TO DEEP ANSWER OR END---")
+
+#     base_answer = state["base_answer"]
+
+#     question = state["original_question"]
+
+#     BASE_CHECK_MESSAGE = [
+#         HumanMessage(
+#             content=BASE_CHECK_PROMPT.format(question=question, base_answer=base_answer)
+#         )
+#     ]
+
+#     model = state["fast_llm"]
+#     response = model.invoke(BASE_CHECK_MESSAGE)
+
+#     print(f"CAN WE CONTINUE W/O GENERATING A DEEP ANSWER? - {response.pretty_repr()}")
+
+#     if response.pretty_repr() == "no":
+#         return "decompose"
+#     else:
+#         return "end"
--- a/backend/onyx/agent_search/main/graph_builder.py
+++ b/backend/onyx/agent_search/main/graph_builder.py
@@ -0,0 +1,110 @@
+from langgraph.graph import END
+from langgraph.graph import START
+from langgraph.graph import StateGraph
+
+from onyx.agent_search.answer_query.graph_builder import answer_query_graph_builder
+from onyx.agent_search.expanded_retrieval.graph_builder import (
+    expanded_retrieval_graph_builder,
+)
+from onyx.agent_search.main.edges import parallelize_decompozed_answer_queries
+from onyx.agent_search.main.nodes.base_decomp import main_decomp_base
+from onyx.agent_search.main.nodes.generate_initial_answer import (
+    generate_initial_answer,
+)
+from onyx.agent_search.main.states import MainInput
+from onyx.agent_search.main.states import MainState
+
+
+def main_graph_builder() -> StateGraph:
+    graph = StateGraph(
+        state_schema=MainState,
+        input=MainInput,
+    )
+
+    ### Add nodes ###
+
+    graph.add_node(
+        node="base_decomp",
+        action=main_decomp_base,
+    )
+    answer_query_subgraph = answer_query_graph_builder().compile()
+    graph.add_node(
+        node="answer_query",
+        action=answer_query_subgraph,
+    )
+    expanded_retrieval_subgraph = expanded_retrieval_graph_builder().compile()
+    graph.add_node(
+        node="expanded_retrieval",
+        action=expanded_retrieval_subgraph,
+    )
+    graph.add_node(
+        node="generate_initial_answer",
+        action=generate_initial_answer,
+    )
+
+    ### Add edges ###
+    graph.add_edge(
+        start_key=START,
+        end_key="expanded_retrieval",
+    )
+
+    graph.add_edge(
+        start_key=START,
+        end_key="base_decomp",
+    )
+    graph.add_conditional_edges(
+        source="base_decomp",
+        path=parallelize_decompozed_answer_queries,
+        path_map=["answer_query"],
+    )
+    # graph.add_edge(
+    #    start_key="answer_query",
+    #    end_key="generate_initial_answer",
+    # )
+    # graph.add_edge(
+    #    start_key="expanded_retrieval",
+    #    end_key="generate_initial_answer",
+    # )
+    graph.add_edge(
+        start_key=["answer_query", "expanded_retrieval"],
+        end_key="generate_initial_answer",
+    )
+    graph.add_edge(
+        start_key="generate_initial_answer",
+        end_key=END,
+    )
+
+    return graph
+
+
+if __name__ == "__main__":
+    from onyx.db.engine import get_session_context_manager
+    from onyx.llm.factory import get_default_llms
+    from onyx.context.search.models import SearchRequest
+
+    graph = main_graph_builder()
+    compiled_graph = graph.compile()
+    primary_llm, fast_llm = get_default_llms()
+    search_request = SearchRequest(
+        # query="If i am familiar with the function that I need, how can I type it into a cell?",
+        # query="What are the temperatures in Hawaii and New York?",
+        query="""What are the pros and cons of using a vertical action items bar in Sourcegraph, and
+        what are the goals defined for the navigation redesign?""",
+    )
+    with get_session_context_manager() as db_session:
+        inputs = MainInput(
+            search_request=search_request,
+            primary_llm=primary_llm,
+            fast_llm=fast_llm,
+            db_session=db_session,
+        )
+        for thing in compiled_graph.stream(
+            input=inputs,
+            # stream_mode="debug",
+            # debug=True,
+            subgraphs=True,
+        ):
+            pass
+            # print(thing)
+        # print(compiled_graph.stream(input=inputs))
+        print("DOONNEE")
--- a/backend/onyx/agent_search/main/nodes/base_decomp.py
+++ b/backend/onyx/agent_search/main/nodes/base_decomp.py
@@ -0,0 +1,31 @@
+from langchain_core.messages import HumanMessage
+
+from onyx.agent_search.main.states import BaseDecompOutput
+from onyx.agent_search.main.states import MainState
+from onyx.agent_search.shared_graph_utils.prompts import INITIAL_DECOMPOSITION_PROMPT
+from onyx.agent_search.shared_graph_utils.utils import clean_and_parse_list_string
+
+
+def main_decomp_base(state: MainState) -> BaseDecompOutput:
+    question = state["search_request"].query
+
+    msg = [
+        HumanMessage(
+            content=INITIAL_DECOMPOSITION_PROMPT.format(question=question),
+        )
+    ]
+
+    # Get the rewritten queries in a defined format
+    model = state["fast_llm"]
+    response = model.invoke(msg)
+
+    content = response.pretty_repr()
+    list_of_subquestions = clean_and_parse_list_string(content)
+
+    decomp_list: list[str] = [
+        sub_question["sub_question"].strip() for sub_question in list_of_subquestions
+    ]
+
+    return BaseDecompOutput(
+        initial_decomp_queries=decomp_list,
+    )
--- a/backend/onyx/agent_search/main/nodes/generate_initial_answer.py
+++ b/backend/onyx/agent_search/main/nodes/generate_initial_answer.py
@@ -0,0 +1,53 @@
+from langchain_core.messages import HumanMessage
+
+from onyx.agent_search.main.states import InitialAnswerOutput
+from onyx.agent_search.main.states import MainState
+from onyx.agent_search.shared_graph_utils.prompts import INITIAL_RAG_PROMPT
+from onyx.agent_search.shared_graph_utils.utils import format_docs
+
+
+def generate_initial_answer(state: MainState) -> InitialAnswerOutput:
+    print("---GENERATE INITIAL---")
+
+    question = state["search_request"].query
+    docs = state["original_question_documents"]
+
+    decomp_answer_results = state["decomp_answer_results"]
+
+    good_qa_list: list[str] = []
+
+    _SUB_QUESTION_ANSWER_TEMPLATE = """
+    Sub-Question:\n  - {sub_question}\n  --\nAnswer:\n  - {sub_answer}\n\n
+    """
+    for decomp_answer_result in decomp_answer_results:
+        if (
+            decomp_answer_result.quality.lower() == "yes"
+            and len(decomp_answer_result.answer) > 0
+            and decomp_answer_result.answer != "I don't know"
+        ):
+            good_qa_list.append(
+                _SUB_QUESTION_ANSWER_TEMPLATE.format(
+                    sub_question=decomp_answer_result.query,
+                    sub_answer=decomp_answer_result.answer,
+                )
+            )
+
+    sub_question_answer_str = "\n\n------\n\n".join(good_qa_list)
+
+    msg = [
+        HumanMessage(
+            content=INITIAL_RAG_PROMPT.format(
+                question=question,
+                context=format_docs(docs),
+                answered_sub_questions=sub_question_answer_str,
+            )
+        )
+    ]
+
+    # Grader
+    model = state["fast_llm"]
+    response = model.invoke(msg)
+    answer = response.pretty_repr()
+
+    print(answer)
+    return InitialAnswerOutput(initial_answer=answer)
--- a/backend/onyx/agent_search/main/states.py
+++ b/backend/onyx/agent_search/main/states.py
@@ -0,0 +1,42 @@
+from operator import add
+from typing import Annotated
+from typing import TypedDict
+
+from onyx.agent_search.answer_query.states import SearchAnswerResults
+from onyx.agent_search.core_state import PrimaryState
+from onyx.agent_search.shared_graph_utils.operators import dedup_inference_sections
+from onyx.context.search.models import InferenceSection
+
+
+class BaseDecompOutput(TypedDict, total=False):
+    initial_decomp_queries: list[str]
+
+
+class InitialAnswerOutput(TypedDict, total=False):
+    initial_answer: str
+
+
+class MainState(
+    PrimaryState,
+    BaseDecompOutput,
+    InitialAnswerOutput,
+    total=True,
+):
+    documents: Annotated[list[InferenceSection], dedup_inference_sections]
+    decomp_answer_results: Annotated[list[SearchAnswerResults], add]
+    original_question_documents: Annotated[
+        list[InferenceSection], dedup_inference_sections
+    ]
+    ranking_scores: Annotated[list[dict[str, float]], add]
+    original_question_ranking_scores: Annotated[list[dict[str, float]], add]
+
+
+class MainInput(PrimaryState, total=True):
+    pass
+
+
+class MainOutput(TypedDict):
+    """
+    This is not used because defining the output only matters for filtering the output of
+      a .invoke() call but we are streaming so we just yield the entire state.
+    """
--- a/backend/onyx/agent_search/run_graph.py
+++ b/backend/onyx/agent_search/run_graph.py
@@ -0,0 +1,27 @@
+from onyx.agent_search.main.graph_builder import main_graph_builder
+from onyx.chat.answer import AnswerStream
+from onyx.llm.interfaces import LLM
+from onyx.tools.tool import Tool
+
+
+def run_graph(
+    query: str,
+    llm: LLM,
+    tools: list[Tool],
+) -> AnswerStream:
+    graph = main_graph_builder()
+
+    inputs = {
+        "original_query": query,
+        "messages": [],
+        "tools": tools,
+        "llm": llm,
+    }
+    compiled_graph = graph.compile()
+    output = compiled_graph.invoke(input=inputs)
+    yield from output
+
+
+if __name__ == "__main__":
+    pass
+    # run_graph("What is the capital of France?", llm, [])
--- a/backend/onyx/agent_search/shared_graph_utils/models.py
+++ b/backend/onyx/agent_search/shared_graph_utils/models.py
@@ -0,0 +1,12 @@
+from typing import Literal
+
+from pydantic import BaseModel
+
+
+# Pydantic models for structured outputs
+class RewrittenQueries(BaseModel):
+    rewritten_queries: list[str]
+
+
+class BinaryDecision(BaseModel):
+    decision: Literal["yes", "no"]
--- a/backend/onyx/agent_search/shared_graph_utils/operators.py
+++ b/backend/onyx/agent_search/shared_graph_utils/operators.py
@@ -0,0 +1,22 @@
+from onyx.chat.prune_and_merge import _merge_sections
+from onyx.context.search.models import InferenceSection
+
+
+def dedup_inference_sections(
+    list1: list[InferenceSection], list2: list[InferenceSection]
+) -> list[InferenceSection]:
+    deduped = _merge_sections(list1 + list2)
+    return deduped
+
+
+def calculate_rank_shift(list1: list, list2: list, top_n: int = 20) -> float:
+    shift = 0
+    for rank_first, doc_id in enumerate(list1[:top_n], 1):
+        try:
+            rank_second = list2.index(doc_id) + 1
+        except ValueError:
+            rank_second = len(list2)  # Document not found in second list
+
+        shift += (rank_first - rank_second) ** 2 / (rank_first * rank_second)
+
+    return shift / top_n
--- a/backend/onyx/agent_search/shared_graph_utils/prompts.py
+++ b/backend/onyx/agent_search/shared_graph_utils/prompts.py
@@ -0,0 +1,446 @@
+REWRITE_PROMPT_MULTI_ORIGINAL = """ \n
+    Please convert an initial user question into a 2-3 more appropriate short and pointed search queries for retrievel from a
+    document store. Particularly, try to think about resolving ambiguities and make the search queries more specific,
+    enabling the system to search more broadly.
+    Also, try to make the search queries not redundant, i.e. not too similar! \n\n
+    Here is the initial question:
+    \n ------- \n
+    {question}
+    \n ------- \n
+    Formulate the queries separated by '--' (Do not say 'Query 1: ...', just write the querytext): """
+
+REWRITE_PROMPT_MULTI = """ \n
+    Please create a list of 2-3 sample documents that could answer an original question. Each document
+    should be about as long as the original question. \n
+    Here is the initial question:
+    \n ------- \n
+    {question}
+    \n ------- \n
+    Formulate the sample documents separated by '--' (Do not say 'Document 1: ...', just write the text): """
+
+BASE_RAG_PROMPT = """ \n
+    You are an assistant for question-answering tasks. Use the context provided below - and only the
+    provided context - to answer the question. If you don't know the answer or if the provided context is
+    empty, just say "I don't know". Do not use your internal knowledge!
+
+    Again, only use the provided context and do not use your internal knowledge! If you cannot answer the
+    question based on the context, say "I don't know". It is a matter of life and death that you do NOT
+    use your internal knowledge, just the provided information!
+
+    Use three sentences maximum and keep the answer concise.
+    answer concise.\nQuestion:\n {question} \nContext:\n {context} \n\n
+    \n\n
+    Answer:"""
+
+
+SUB_CHECK_PROMPT = """ \n
+    Your task is to see whether a given answer addresses a given question.
+    Please do not use any internal knowledge you may have - just focus on whether the answer
+    as given seems to address the question as given.
+
+    Here is the question:
+    \n ------- \n
+    {question}
+    \n ------- \n
+
+    Here is the suggested answer:
+    \n ------- \n
+    {base_answer}
+    \n ------- \n
+
+    Please answer with yes or no:"""
+
+
+BASE_CHECK_PROMPT = """ \n
+    Please check whether 1) the suggested answer seems to fully address the original question AND 2)the
+    original question requests a simple, factual answer, and there are no ambiguities, judgements,
+    aggregations, or any other complications that may require extra context. (I.e., if the question is
+    somewhat addressed, but the answer would benefit from more context, then answer with 'no'.)
+
+    Please only answer with 'yes' or 'no' \n
+    Here is the initial question:
+    \n ------- \n
+    {question}
+    \n ------- \n
+    Here is the proposed answer:
+    \n ------- \n
+    {base_answer}
+    \n ------- \n
+    Please answer with yes or no:"""
+
+VERIFIER_PROMPT = """ \n
+    Please check whether the document provided below seems to be relevant to get an answer the provided question. Please
+    only answer with 'yes' or 'no' \n
+    Here is the initial question:
+    \n ------- \n
+    {question}
+    \n ------- \n
+    Here is the document text:
+    \n ------- \n
+    {document_content}
+    \n ------- \n
+    Please answer with yes or no:"""
+
+INITIAL_DECOMPOSITION_PROMPT_BASIC = """ \n
+    Please decompose an initial user question into not more than 4 appropriate sub-questions that help to
+    answer the original question. The purpose for this decomposition is to isolate individulal entities
+    (i.e., 'compare sales of company A and company B' -> 'what are sales for company A' + 'what are sales
+    for company B'), split ambiguous terms (i.e., 'what is our success with company A' -> 'what are our
+    sales with company A' + 'what is our market share with company A' + 'is company A a reference customer
+    for us'), etc. Each sub-question should be realistically be answerable by a good RAG system. \n
+
+    Here is the initial question:
+    \n ------- \n
+    {question}
+    \n ------- \n
+
+    Please formulate your answer as a list of subquestions:
+
+    Answer:
+    """
+
+REWRITE_PROMPT_SINGLE = """ \n
+    Please convert an initial user question into a more appropriate search query for retrievel from a
+    document store. \n
+    Here is the initial question:
+    \n ------- \n
+    {question}
+    \n ------- \n
+
+    Formulate the query: """
+
+MODIFIED_RAG_PROMPT = """You are an assistant for question-answering tasks. Use the context provided below
+    - and only this context - to answer the question. If you don't know the answer, just say "I don't know".
+    Use three sentences maximum and keep the answer concise.
+    Pay also particular attention to the sub-questions and their answers, at least it may enrich the answer.
+    Again, only use the provided context and do not use your internal knowledge! If you cannot answer the
+    question based on the context, say "I don't know". It is a matter of life and death that you do NOT
+    use your internal knowledge, just the provided information!
+
+    \nQuestion: {question}
+    \nContext: {combined_context} \n
+
+    Answer:"""
+
+ORIG_DEEP_DECOMPOSE_PROMPT = """ \n
+    An initial user question needs to be answered. An initial answer has been provided but it wasn't quite
+    good enough. Also, some sub-questions had been answered and this information has been used to provide
+    the initial answer. Some other subquestions may have been suggested based on little knowledge, but they
+    were not directly answerable. Also, some entities, relationships and terms are givenm to you so that
+    you have an idea of how the avaiolable data looks like.
+
+    Your role is to generate 3-5 new sub-questions that would help to answer the initial question,
+    considering:
+
+    1) The initial question
+    2) The initial answer that was found to be unsatisfactory
+    3) The sub-questions that were answered
+    4) The sub-questions that were suggested but not answered
+    5) The entities, relationships and terms that were extracted from the context
+
+    The individual questions should be answerable by a good RAG system.
+    So a good idea would be to use the sub-questions to resolve ambiguities and/or to separate the
+    question for different entities that may be involved in the original question, but in a way that does
+    not duplicate questions that were already tried.
+
+    Additional Guidelines:
+    - The sub-questions should be specific to the question and provide richer context for the question,
+    resolve ambiguities, or address shortcoming of the initial answer
+    - Each sub-question - when answered - should be relevant for the answer to the original question
+    - The sub-questions should be free from comparisions, ambiguities,judgements, aggregations, or any
+    other complications that may require extra context.
+    - The sub-questions MUST have the full context of the original question so that it can be executed by
+    a RAG system independently without the original question available
+      (Example:
+        - initial question: "What is the capital of France?"
+        - bad sub-question: "What is the name of the river there?"
+        - good sub-question: "What is the name of the river that flows through Paris?"
+    - For each sub-question, please provide a short explanation for why it is a good sub-question. So
+    generate a list of dictionaries with the following format:
+      [{{"sub_question": <sub-question>, "explanation": <explanation>, "search_term": <rewrite the
+      sub-question using as a search phrase for the document store>}}, ...]
+
+    \n\n
+    Here is the initial question:
+    \n ------- \n
+    {question}
+    \n ------- \n
+
+    Here is the initial sub-optimal answer:
+    \n ------- \n
+    {base_answer}
+    \n ------- \n
+
+    Here are the sub-questions that were answered:
+    \n ------- \n
+    {answered_sub_questions}
+    \n ------- \n
+
+    Here are the sub-questions that were suggested but not answered:
+    \n ------- \n
+    {failed_sub_questions}
+    \n ------- \n
+
+    And here are the entities, relationships and terms extracted from the context:
+    \n ------- \n
+    {entity_term_extraction_str}
+    \n ------- \n
+
+   Please generate the list of good, fully contextualized sub-questions that would help to address the
+   main question. Again, please find questions that are NOT overlapping too much with the already answered
+   sub-questions or those that already were suggested and failed.
+   In other words - what can we try in addition to what has been tried so far?
+
+   Please think through it step by step and then generate the list of json dictionaries with the following
+   format:
+
+   {{"sub_questions": [{{"sub_question": <sub-question>,
+        "explanation": <explanation>,
+        "search_term": <rewrite the sub-question using as a search phrase for the document store>}},
+        ...]}} """
+
+DEEP_DECOMPOSE_PROMPT = """ \n
+    An initial user question needs to be answered. An initial answer has been provided but it wasn't quite
+    good enough. Also, some sub-questions had been answered and this information has been used to provide
+    the initial answer. Some other subquestions may have been suggested based on little knowledge, but they
+    were not directly answerable. Also, some entities, relationships and terms are givenm to you so that
+    you have an idea of how the avaiolable data looks like.
+
+    Your role is to generate 4-6 new sub-questions that would help to answer the initial question,
+    considering:
+
+    1) The initial question
+    2) The initial answer that was found to be unsatisfactory
+    3) The sub-questions that were answered
+    4) The sub-questions that were suggested but not answered
+    5) The entities, relationships and terms that were extracted from the context
+
+    The individual questions should be answerable by a good RAG system.
+    So a good idea would be to use the sub-questions to resolve ambiguities and/or to separate the
+    question for different entities that may be involved in the original question, but in a way that does
+    not duplicate questions that were already tried.
+
+    Additional Guidelines:
+    - The sub-questions should be specific to the question and provide richer context for the question,
+    resolve ambiguities, or address shortcoming of the initial answer
+    - Each sub-question - when answered - should be relevant for the answer to the original question
+    - The sub-questions should be free from comparisions, ambiguities,judgements, aggregations, or any
+    other complications that may require extra context.
+    - The sub-questions MUST have the full context of the original question so that it can be executed by
+    a RAG system independently without the original question available
+      (Example:
+        - initial question: "What is the capital of France?"
+        - bad sub-question: "What is the name of the river there?"
+        - good sub-question: "What is the name of the river that flows through Paris?"
+    - For each sub-question, please also provide a search term that can be used to retrieve relevant
+    documents from a document store.
+    \n\n
+    Here is the initial question:
+    \n ------- \n
+    {question}
+    \n ------- \n
+
+    Here is the initial sub-optimal answer:
+    \n ------- \n
+    {base_answer}
+    \n ------- \n
+
+    Here are the sub-questions that were answered:
+    \n ------- \n
+    {answered_sub_questions}
+    \n ------- \n
+
+    Here are the sub-questions that were suggested but not answered:
+    \n ------- \n
+    {failed_sub_questions}
+    \n ------- \n
+
+    And here are the entities, relationships and terms extracted from the context:
+    \n ------- \n
+    {entity_term_extraction_str}
+    \n ------- \n
+
+   Please generate the list of good, fully contextualized sub-questions that would help to address the
+   main question. Again, please find questions that are NOT overlapping too much with the already answered
+   sub-questions or those that already were suggested and failed.
+   In other words - what can we try in addition to what has been tried so far?
+
+   Generate the list of json dictionaries with the following format:
+
+   {{"sub_questions": [{{"sub_question": <sub-question>,
+        "search_term": <rewrite the sub-question using as a search phrase for the document store>}},
+        ...]}} """
+
+DECOMPOSE_PROMPT = """ \n
+    For an initial user question, please generate at 5-10 individual sub-questions whose answers would help
+    \n to answer the initial question. The individual questions should be answerable by a good RAG system.
+    So a good idea would be to \n use the sub-questions to resolve ambiguities and/or to separate the
+    question for different entities that may be involved in the original question.
+
+    In order to arrive at meaningful sub-questions, please also consider the context retrieved from the
+    document store, expressed as entities, relationships and terms. You can also think about the types
+    mentioned in brackets
+
+    Guidelines:
+    - The sub-questions should be specific to the question and provide richer context for the question,
+    and or resolve ambiguities
+    - Each sub-question - when answered - should be relevant for the answer to the original question
+    - The sub-questions should be free from comparisions, ambiguities,judgements, aggregations, or any
+    other complications that may require extra context.
+    - The sub-questions MUST have the full context of the original question so that it can be executed by
+    a RAG system independently without the original question available
+      (Example:
+        - initial question: "What is the capital of France?"
+        - bad sub-question: "What is the name of the river there?"
+        - good sub-question: "What is the name of the river that flows through Paris?"
+    - For each sub-question, please provide a short explanation for why it is a good sub-question. So
+    generate a list of dictionaries with the following format:
+      [{{"sub_question": <sub-question>, "explanation": <explanation>, "search_term": <rewrite the
+      sub-question using as a search phrase for the document store>}}, ...]
+
+    \n\n
+    Here is the initial question:
+    \n ------- \n
+    {question}
+    \n ------- \n
+
+    And here are the entities, relationships and terms extracted from the context:
+    \n ------- \n
+    {entity_term_extraction_str}
+    \n ------- \n
+
+   Please generate the list of good, fully contextualized sub-questions that would help to address the
+   main question. Don't be too specific unless the original question is specific.
+   Please think through it step by step and then generate the list of json dictionaries with the following
+   format:
+   {{"sub_questions": [{{"sub_question": <sub-question>,
+        "explanation": <explanation>,
+        "search_term": <rewrite the sub-question using as a search phrase for the document store>}},
+        ...]}} """
+
+#### Consolidations
+COMBINED_CONTEXT = """-------
+    Below you will find useful information to answer the original question. First, you see a number of
+    sub-questions with their answers. This information should be considered to be more focussed and
+    somewhat more specific to the original question as it tries to contextualized facts.
+    After that will see the documents that were considered to be relevant to answer the original question.
+
+    Here are the sub-questions and their answers:
+    \n\n {deep_answer_context} \n\n
+    \n\n Here are the documents that were considered to be relevant to answer the original question:
+    \n\n {formated_docs} \n\n
+    ----------------
+    """
+
+SUB_QUESTION_EXPLANATION_RANKER_PROMPT = """-------
+    Below you will find a question that we ultimately want to answer (the original question) and a list of
+    motivations in arbitrary order for generated sub-questions that are supposed to help us answering the
+    original question. The motivations are formatted as <motivation number>:  <motivation explanation>.
+    (Again, the numbering is arbitrary and does not necessarily mean that 1 is the most relevant
+    motivation and 2 is less relevant.)
+
+    Please rank the motivations in order of relevance for answering the original question. Also, try to
+    ensure that the top questions do not duplicate too much, i.e. that they are not too similar.
+    Ultimately, create a list with the motivation numbers where the number of the most relevant
+    motivations comes first.
+
+    Here is the original question:
+    \n\n {original_question} \n\n
+    \n\n Here is the list of sub-question motivations:
+    \n\n {sub_question_explanations} \n\n
+    ----------------
+
+    Please think step by step and then generate the ranked list of motivations.
+
+    Please format your answer as a json object in the following format:
+    {{"reasonning": <explain your reasoning for the ranking>,
+      "ranked_motivations": <ranked list of motivation numbers>}}
+    """
+
+
+INITIAL_DECOMPOSITION_PROMPT = """ \n
+    Please decompose an initial user question into 2 or 3 appropriate sub-questions that help to
+    answer the original question. The purpose for this decomposition is to isolate individulal entities
+    (i.e., 'compare sales of company A and company B' -> 'what are sales for company A' + 'what are sales
+    for company B'), split ambiguous terms (i.e., 'what is our success with company A' -> 'what are our
+    sales with company A' + 'what is our market share with company A' + 'is company A a reference customer
+    for us'), etc. Each sub-question should be realistically be answerable by a good RAG system. \n
+
+    For each sub-question, please also create one search term that can be used to retrieve relevant
+    documents from a document store.
+
+    Here is the initial question:
+    \n ------- \n
+    {question}
+    \n ------- \n
+
+    Please formulate your answer as a list of json objects with the following format:
+
+   [{{"sub_question": <sub-question>, "search_term": <search term>}}, ...]
+
+    Answer:
+    """
+
+INITIAL_RAG_PROMPT = """ \n
+    You are an assistant for question-answering tasks. Use the information provided below - and only the
+    provided information - to answer the provided question.
+
+    The information provided below consists of:
+     1) a number of answered sub-questions - these are very important(!) and definitely should be
+     considered to answer the question.
+     2) a number of documents that were also deemed relevant for the question.
+
+    If you don't know the answer or if the provided information is empty or insufficient, just say
+    "I don't know". Do not use your internal knowledge!
+
+    Again, only use the provided informationand do not use your internal knowledge! It is a matter of life
+    and death that you do NOT use your internal knowledge, just the provided information!
+
+    Try to keep your answer concise.
+
+    And here is the question and the provided information:
+    \n
+    \nQuestion:\n {question}
+
+    \nAnswered Sub-questions:\n {answered_sub_questions}
+
+    \nContext:\n {context} \n\n
+    \n\n
+
+    Answer:"""
+
+ENTITY_TERM_PROMPT = """ \n
+    Based on the original question and the context retieved from a dataset, please generate a list of
+    entities (e.g. companies, organizations, industries, products, locations, etc.), terms and concepts
+    (e.g. sales, revenue, etc.) that are relevant for the question, plus their relations to each other.
+
+    \n\n
+    Here is the original question:
+    \n ------- \n
+    {question}
+    \n ------- \n
+   And here is the context retrieved:
+    \n ------- \n
+    {context}
+    \n ------- \n
+
+    Please format your answer as a json object in the following format:
+
+    {{"retrieved_entities_relationships": {{
+        "entities": [{{
+            "entity_name": <assign a name for the entity>,
+            "entity_type": <specify a short type name for the entity, such as 'company', 'location',...>
+        }}],
+        "relationships": [{{
+            "name": <assign a name for the relationship>,
+            "type": <specify a short type name for the relationship, such as 'sales_to', 'is_location_of',...>,
+            "entities": [<related entity name 1>, <related entity name 2>]
+        }}],
+        "terms": [{{
+            "term_name": <assign a name for the term>,
+            "term_type": <specify a short type name for the term, such as 'revenue', 'market_share',...>,
+            "similar_to": <list terms that are similar to this term>
+        }}]
+    }}
+    }}
+   """
--- a/backend/onyx/agent_search/shared_graph_utils/utils.py
+++ b/backend/onyx/agent_search/shared_graph_utils/utils.py
@@ -0,0 +1,101 @@
+import ast
+import json
+import re
+from collections.abc import Sequence
+from datetime import datetime
+from datetime import timedelta
+from typing import Any
+
+from onyx.context.search.models import InferenceSection
+
+
+def normalize_whitespace(text: str) -> str:
+    """Normalize whitespace in text to single spaces and strip leading/trailing whitespace."""
+    import re
+
+    return re.sub(r"\s+", " ", text.strip())
+
+
+# Post-processing
+def format_docs(docs: Sequence[InferenceSection]) -> str:
+    return "\n\n".join(doc.combined_content for doc in docs)
+
+
+def clean_and_parse_list_string(json_string: str) -> list[dict]:
+    # Remove any prefixes/labels before the actual JSON content
+    json_string = re.sub(r"^.*?(?=\[)", "", json_string, flags=re.DOTALL)
+
+    # Remove markdown code block markers and any newline prefixes
+    cleaned_string = re.sub(r"```json\n|\n```", "", json_string)
+    cleaned_string = cleaned_string.replace("\\n", " ").replace("\n", " ")
+    cleaned_string = " ".join(cleaned_string.split())
+
+    # Try parsing with json.loads first, fall back to ast.literal_eval
+    try:
+        return json.loads(cleaned_string)
+    except json.JSONDecodeError:
+        try:
+            return ast.literal_eval(cleaned_string)
+        except (ValueError, SyntaxError) as e:
+            raise ValueError(f"Failed to parse JSON string: {cleaned_string}") from e
+
+
+def clean_and_parse_json_string(json_string: str) -> dict[str, Any]:
+    # Remove markdown code block markers and any newline prefixes
+    cleaned_string = re.sub(r"```json\n|\n```", "", json_string)
+    cleaned_string = cleaned_string.replace("\\n", " ").replace("\n", " ")
+    cleaned_string = " ".join(cleaned_string.split())
+    # Parse the cleaned string into a Python dictionary
+    return json.loads(cleaned_string)
+
+
+def format_entity_term_extraction(entity_term_extraction_dict: dict[str, Any]) -> str:
+    entities = entity_term_extraction_dict["entities"]
+    terms = entity_term_extraction_dict["terms"]
+    relationships = entity_term_extraction_dict["relationships"]
+
+    entity_strs = ["\nEntities:\n"]
+    for entity in entities:
+        entity_str = f"{entity['entity_name']} ({entity['entity_type']})"
+        entity_strs.append(entity_str)
+
+    entity_str = "\n - ".join(entity_strs)
+
+    relationship_strs = ["\n\nRelationships:\n"]
+    for relationship in relationships:
+        relationship_str = f"{relationship['name']} ({relationship['type']}): {relationship['entities']}"
+        relationship_strs.append(relationship_str)
+
+    relationship_str = "\n - ".join(relationship_strs)
+
+    term_strs = ["\n\nTerms:\n"]
+    for term in terms:
+        term_str = f"{term['term_name']} ({term['term_type']}): similar to {term['similar_to']}"
+        term_strs.append(term_str)
+
+    term_str = "\n - ".join(term_strs)
+
+    return "\n".join(entity_strs + relationship_strs + term_strs)
+
+
+def _format_time_delta(time: timedelta) -> str:
+    seconds_from_start = f"{((time).seconds):03d}"
+    microseconds_from_start = f"{((time).microseconds):06d}"
+    return f"{seconds_from_start}.{microseconds_from_start}"
+
+
+def generate_log_message(
+    message: str,
+    node_start_time: datetime,
+    graph_start_time: datetime | None = None,
+) -> str:
+    current_time = datetime.now()
+
+    if graph_start_time is not None:
+        graph_time_str = _format_time_delta(current_time - graph_start_time)
+    else:
+        graph_time_str = "N/A"
+
+    node_time_str = _format_time_delta(current_time - node_start_time)
+
+    return f"{graph_time_str} ({node_time_str} s): {message}"
--- a/backend/onyx/auth/users.py
+++ b/backend/onyx/auth/users.py
@@ -5,7 +5,6 @@ from datetime import datetime
 from datetime import timezone
 from email.mime.multipart import MIMEMultipart
 from email.mime.text import MIMEText
-from typing import cast
 from typing import Dict
 from typing import List
 from typing import Optional
@@ -229,26 +228,18 @@ class UserManager(UUIDIDMixin, BaseUserManager[User, uuid.UUID]):
        safe: bool = False,
        request: Optional[Request] = None,
    ) -> User:
-        # We verify the password here to make sure it's valid before we proceed
-        await self.validate_password(
-            user_create.password, cast(schemas.UC, user_create)
-        )
-
        user_count: int | None = None
-        referral_source = (
-            request.cookies.get("referral_source", None)
-            if request is not None
-            else None
-        )
+        referral_source = None
+        if request is not None:
+            referral_source = request.cookies.get("referral_source", None)

        tenant_id = await fetch_ee_implementation_or_noop(
            "onyx.server.tenants.provisioning",
-            "get_or_provision_tenant",
+            "get_or_create_tenant_id",
            async_return_default_schema,
        )(
            email=user_create.email,
            referral_source=referral_source,
-            request=request,
        )

        async with get_async_session_with_tenant(tenant_id) as db_session:
@@ -291,6 +282,25 @@ class UserManager(UUIDIDMixin, BaseUserManager[User, uuid.UUID]):
            finally:
                CURRENT_TENANT_ID_CONTEXTVAR.reset(token)

+        # Blocking but this should be very quick
+        with get_session_with_tenant(tenant_id) as db_session:
+            if not user_count:
+                create_milestone_and_report(
+                    user=user,
+                    distinct_id=user.email,
+                    event_type=MilestoneRecordType.USER_SIGNED_UP,
+                    properties=None,
+                    db_session=db_session,
+                )
+            else:
+                create_milestone_and_report(
+                    user=user,
+                    distinct_id=user.email,
+                    event_type=MilestoneRecordType.MULTIPLE_USERS,
+                    properties=None,
+                    db_session=db_session,
+                )
+
        return user

    async def validate_password(self, password: str, _: schemas.UC | models.UP) -> None:
@@ -336,18 +346,17 @@ class UserManager(UUIDIDMixin, BaseUserManager[User, uuid.UUID]):
        associate_by_email: bool = False,
        is_verified_by_default: bool = False,
    ) -> User:
-        referral_source = (
-            getattr(request.state, "referral_source", None) if request else None
-        )
+        referral_source = None
+        if request:
+            referral_source = getattr(request.state, "referral_source", None)

        tenant_id = await fetch_ee_implementation_or_noop(
            "onyx.server.tenants.provisioning",
-            "get_or_provision_tenant",
+            "get_or_create_tenant_id",
            async_return_default_schema,
        )(
            email=account_email,
            referral_source=referral_source,
-            request=request,
        )

        if not tenant_id:
@@ -409,7 +418,6 @@ class UserManager(UUIDIDMixin, BaseUserManager[User, uuid.UUID]):

                    # Add OAuth account
                    await self.user_db.add_oauth_account(user, oauth_account_dict)
-
                    await self.on_after_register(user, request)

            else:
@@ -463,39 +471,6 @@ class UserManager(UUIDIDMixin, BaseUserManager[User, uuid.UUID]):
    async def on_after_register(
        self, user: User, request: Optional[Request] = None
    ) -> None:
-        tenant_id = await fetch_ee_implementation_or_noop(
-            "onyx.server.tenants.provisioning",
-            "get_or_provision_tenant",
-            async_return_default_schema,
-        )(
-            email=user.email,
-            request=request,
-        )
-
-        token = CURRENT_TENANT_ID_CONTEXTVAR.set(tenant_id)
-        try:
-            user_count = await get_user_count()
-
-            with get_session_with_tenant(tenant_id=tenant_id) as db_session:
-                if user_count == 1:
-                    create_milestone_and_report(
-                        user=user,
-                        distinct_id=user.email,
-                        event_type=MilestoneRecordType.USER_SIGNED_UP,
-                        properties=None,
-                        db_session=db_session,
-                    )
-                else:
-                    create_milestone_and_report(
-                        user=user,
-                        distinct_id=user.email,
-                        event_type=MilestoneRecordType.MULTIPLE_USERS,
-                        properties=None,
-                        db_session=db_session,
-                    )
-        finally:
-            CURRENT_TENANT_ID_CONTEXTVAR.reset(token)
-
        logger.notice(f"User {user.id} has registered.")
        optional_telemetry(
            record_type=RecordType.SIGN_UP,
@@ -527,7 +502,7 @@ class UserManager(UUIDIDMixin, BaseUserManager[User, uuid.UUID]):
        # Get tenant_id from mapping table
        tenant_id = await fetch_ee_implementation_or_noop(
            "onyx.server.tenants.provisioning",
-            "get_or_provision_tenant",
+            "get_or_create_tenant_id",
            async_return_default_schema,
        )(
            email=email,
@@ -588,7 +563,7 @@ class TenantAwareJWTStrategy(JWTStrategy):
    async def _create_token_data(self, user: User, impersonate: bool = False) -> dict:
        tenant_id = await fetch_ee_implementation_or_noop(
            "onyx.server.tenants.provisioning",
-            "get_or_provision_tenant",
+            "get_or_create_tenant_id",
            async_return_default_schema,
        )(
            email=user.email,
--- a/backend/onyx/background/celery/apps/app_base.py
+++ b/backend/onyx/background/celery/apps/app_base.py
@@ -3,12 +3,11 @@ import multiprocessing
 import time
 from typing import Any

+import requests
 import sentry_sdk
 from celery import Task
 from celery.app import trace
 from celery.exceptions import WorkerShutdown
-from celery.signals import task_postrun
-from celery.signals import task_prerun
 from celery.states import READY_STATES
 from celery.utils.log import get_task_logger
 from celery.worker import strategy  # type: ignore
@@ -22,7 +21,6 @@ from onyx.background.celery.apps.task_formatters import CeleryTaskPlainFormatter
 from onyx.background.celery.celery_utils import celery_is_worker_primary
 from onyx.configs.constants import OnyxRedisLocks
 from onyx.db.engine import get_sqlalchemy_engine
-from onyx.document_index.vespa.shared_utils.utils import get_vespa_http_client
 from onyx.document_index.vespa_constants import VESPA_CONFIG_SERVER_URL
 from onyx.redis.redis_connector import RedisConnector
 from onyx.redis.redis_connector_credential_pair import RedisConnectorCredentialPair
@@ -36,11 +34,8 @@ from onyx.redis.redis_usergroup import RedisUserGroup
 from onyx.utils.logger import ColoredFormatter
 from onyx.utils.logger import PlainFormatter
 from onyx.utils.logger import setup_logger
-from shared_configs.configs import MULTI_TENANT
-from shared_configs.configs import POSTGRES_DEFAULT_SCHEMA
 from shared_configs.configs import SENTRY_DSN
-from shared_configs.configs import TENANT_ID_PREFIX
-from shared_configs.contextvars import CURRENT_TENANT_ID_CONTEXTVAR
+

 logger = setup_logger()

@@ -61,8 +56,8 @@ def on_task_prerun(
    sender: Any | None = None,
    task_id: str | None = None,
    task: Task | None = None,
-    args: tuple[Any, ...] | None = None,
-    kwargs: dict[str, Any] | None = None,
+    args: tuple | None = None,
+    kwargs: dict | None = None,
    **kwds: Any,
 ) -> None:
    pass
@@ -262,8 +257,7 @@ def wait_for_vespa(sender: Any, **kwargs: Any) -> None:
    logger.info("Vespa: Readiness probe starting.")
    while True:
        try:
-            client = get_vespa_http_client()
-            response = client.get(f"{VESPA_CONFIG_SERVER_URL}/state/v1/health")
+            response = requests.get(f"{VESPA_CONFIG_SERVER_URL}/state/v1/health")
            response.raise_for_status()

            response_dict = response.json()
@@ -352,36 +346,26 @@ def on_worker_shutdown(sender: Any, **kwargs: Any) -> None:


 def on_setup_logging(
-    loglevel: int,
-    logfile: str | None,
-    format: str,
-    colorize: bool,
-    **kwargs: Any,
+    loglevel: Any, logfile: Any, format: Any, colorize: Any, **kwargs: Any
 ) -> None:
    # TODO: could unhardcode format and colorize and accept these as options from
    # celery's config

+    # reformats the root logger
    root_logger = logging.getLogger()
-    root_logger.handlers = []

-    # Define the log format
-    log_format = (
-        "%(levelname)-8s %(asctime)s %(filename)15s:%(lineno)-4d: %(name)s %(message)s"
-    )
-
-    # Set up the root handler
-    root_handler = logging.StreamHandler()
+    root_handler = logging.StreamHandler()  # Set up a handler for the root logger
    root_formatter = ColoredFormatter(
-        log_format,
+        "%(asctime)s %(filename)30s %(lineno)4s: %(message)s",
        datefmt="%m/%d/%Y %I:%M:%S %p",
    )
    root_handler.setFormatter(root_formatter)
-    root_logger.addHandler(root_handler)
+    root_logger.addHandler(root_handler)  # Apply the handler to the root logger

    if logfile:
        root_file_handler = logging.FileHandler(logfile)
        root_file_formatter = PlainFormatter(
-            log_format,
+            "%(asctime)s %(filename)30s %(lineno)4s: %(message)s",
            datefmt="%m/%d/%Y %I:%M:%S %p",
        )
        root_file_handler.setFormatter(root_file_formatter)
@@ -389,23 +373,19 @@ def on_setup_logging(

    root_logger.setLevel(loglevel)

-    # Configure the task logger
-    task_logger.handlers = []
-
-    task_handler = logging.StreamHandler()
-    task_handler.addFilter(TenantContextFilter())
+    # reformats celery's task logger
    task_formatter = CeleryTaskColoredFormatter(
-        log_format,
+        "%(asctime)s %(filename)30s %(lineno)4s: %(message)s",
        datefmt="%m/%d/%Y %I:%M:%S %p",
    )
+    task_handler = logging.StreamHandler()  # Set up a handler for the task logger
    task_handler.setFormatter(task_formatter)
-    task_logger.addHandler(task_handler)
+    task_logger.addHandler(task_handler)  # Apply the handler to the task logger

    if logfile:
        task_file_handler = logging.FileHandler(logfile)
-        task_file_handler.addFilter(TenantContextFilter())
        task_file_formatter = CeleryTaskPlainFormatter(
-            log_format,
+            "%(asctime)s %(filename)30s %(lineno)4s: %(message)s",
            datefmt="%m/%d/%Y %I:%M:%S %p",
        )
        task_file_handler.setFormatter(task_file_formatter)
@@ -414,55 +394,10 @@ def on_setup_logging(
    task_logger.setLevel(loglevel)
    task_logger.propagate = False

-    # Hide celery task received and succeeded/failed messages
+    # hide celery task received spam
+    # e.g. "Task check_for_pruning[a1e96171-0ba8-4e00-887b-9fbf7442eab3] received"
    strategy.logger.setLevel(logging.WARNING)
+
+    # hide celery task succeeded/failed spam
+    # e.g. "Task check_for_pruning[a1e96171-0ba8-4e00-887b-9fbf7442eab3] succeeded in 0.03137450001668185s: None"
    trace.logger.setLevel(logging.WARNING)
-
-
-class TenantContextFilter(logging.Filter):
-
-    """Logging filter to inject tenant ID into the logger's name."""
-
-    def filter(self, record: logging.LogRecord) -> bool:
-        if not MULTI_TENANT:
-            record.name = ""
-            return True
-
-        tenant_id = CURRENT_TENANT_ID_CONTEXTVAR.get()
-        if tenant_id:
-            tenant_id = tenant_id.split(TENANT_ID_PREFIX)[-1][:5]
-            record.name = f"[t:{tenant_id}]"
-        else:
-            record.name = ""
-        return True
-
-
-@task_prerun.connect
-def set_tenant_id(
-    sender: Any | None = None,
-    task_id: str | None = None,
-    task: Task | None = None,
-    args: tuple[Any, ...] | None = None,
-    kwargs: dict[str, Any] | None = None,
-    **other_kwargs: Any,
-) -> None:
-    """Signal handler to set tenant ID in context var before task starts."""
-    tenant_id = (
-        kwargs.get("tenant_id", POSTGRES_DEFAULT_SCHEMA)
-        if kwargs
-        else POSTGRES_DEFAULT_SCHEMA
-    )
-    CURRENT_TENANT_ID_CONTEXTVAR.set(tenant_id)
-
-
-@task_postrun.connect
-def reset_tenant_id(
-    sender: Any | None = None,
-    task_id: str | None = None,
-    task: Task | None = None,
-    args: tuple[Any, ...] | None = None,
-    kwargs: dict[str, Any] | None = None,
-    **other_kwargs: Any,
-) -> None:
-    """Signal handler to reset tenant ID in context var after task ends."""
-    CURRENT_TENANT_ID_CONTEXTVAR.set(POSTGRES_DEFAULT_SCHEMA)
--- a/backend/onyx/background/celery/apps/beat.py
+++ b/backend/onyx/background/celery/apps/beat.py
@@ -13,6 +13,7 @@ from onyx.db.engine import SqlEngine
 from onyx.utils.logger import setup_logger
 from onyx.utils.variable_functionality import fetch_versioned_implementation
 from shared_configs.configs import IGNORED_SYNCING_TENANT_LIST
+from shared_configs.configs import MULTI_TENANT

 logger = setup_logger(__name__)

@@ -43,18 +44,18 @@ class DynamicTenantScheduler(PersistentScheduler):
            self._last_reload is None
            or (now - self._last_reload) > self._reload_interval
        ):
-            logger.info("Reload interval reached, initiating task update")
+            logger.info("Reload interval reached, initiating tenant task update")
            self._update_tenant_tasks()
            self._last_reload = now
-            logger.info("Task update completed, reset reload timer")
+            logger.info("Tenant task update completed, reset reload timer")
        return retval

    def _update_tenant_tasks(self) -> None:
-        logger.info("Starting task update process")
+        logger.info("Starting tenant task update process")
        try:
-            logger.info("Fetching all IDs")
+            logger.info("Fetching all tenant IDs")
            tenant_ids = get_all_tenant_ids()
-            logger.info(f"Found {len(tenant_ids)} IDs")
+            logger.info(f"Found {len(tenant_ids)} tenants")

            logger.info("Fetching tasks to schedule")
            tasks_to_schedule = fetch_versioned_implementation(
@@ -69,7 +70,7 @@ class DynamicTenantScheduler(PersistentScheduler):
            for task_name, _ in current_schedule:
                if "-" in task_name:
                    existing_tenants.add(task_name.split("-")[-1])
-            logger.info(f"Found {len(existing_tenants)} existing items in schedule")
+            logger.info(f"Found {len(existing_tenants)} existing tenants in schedule")

            for tenant_id in tenant_ids:
                if (
@@ -82,7 +83,7 @@ class DynamicTenantScheduler(PersistentScheduler):
                    continue

                if tenant_id not in existing_tenants:
-                    logger.info(f"Processing new item: {tenant_id}")
+                    logger.info(f"Processing new tenant: {tenant_id}")

                for task in tasks_to_schedule():
                    task_name = f"{task['name']}-{tenant_id}"
@@ -128,10 +129,11 @@ class DynamicTenantScheduler(PersistentScheduler):
                logger.info("Schedule update completed successfully")
            else:
                logger.info("Schedule is up to date, no changes needed")
-        except (AttributeError, KeyError) as e:
-            logger.exception(f"Failed to process task configuration: {str(e)}")
-        except Exception as e:
-            logger.exception(f"Unexpected error updating tasks: {str(e)}")
+
+        except (AttributeError, KeyError):
+            logger.exception("Failed to process task configuration")
+        except Exception:
+            logger.exception("Unexpected error updating tenant tasks")

    def _should_update_schedule(
        self, current_schedule: dict, new_schedule: dict
@@ -153,6 +155,10 @@ def on_beat_init(sender: Any, **kwargs: Any) -> None:
    SqlEngine.set_app_name(POSTGRES_CELERY_BEAT_APP_NAME)
    SqlEngine.init_engine(pool_size=2, max_overflow=0)

+    # Startup checks are not needed in multi-tenant case
+    if MULTI_TENANT:
+        return
+
    app_base.wait_for_redis(sender, **kwargs)


--- a/backend/onyx/background/celery/apps/heavy.py
+++ b/backend/onyx/background/celery/apps/heavy.py
@@ -61,14 +61,13 @@ def on_worker_init(sender: Any, **kwargs: Any) -> None:
    SqlEngine.set_app_name(POSTGRES_CELERY_WORKER_HEAVY_APP_NAME)
    SqlEngine.init_engine(pool_size=4, max_overflow=12)

-    app_base.wait_for_redis(sender, **kwargs)
-    app_base.wait_for_db(sender, **kwargs)
-    app_base.wait_for_vespa(sender, **kwargs)
-
-    # Less startup checks in multi-tenant case
+    # Startup checks are not needed in multi-tenant case
    if MULTI_TENANT:
        return

+    app_base.wait_for_redis(sender, **kwargs)
+    app_base.wait_for_db(sender, **kwargs)
+    app_base.wait_for_vespa(sender, **kwargs)
    app_base.on_secondary_worker_init(sender, **kwargs)


--- a/backend/onyx/background/celery/apps/indexing.py
+++ b/backend/onyx/background/celery/apps/indexing.py
@@ -62,14 +62,13 @@ def on_worker_init(sender: Any, **kwargs: Any) -> None:
    SqlEngine.set_app_name(POSTGRES_CELERY_WORKER_INDEXING_APP_NAME)
    SqlEngine.init_engine(pool_size=sender.concurrency, max_overflow=sender.concurrency)

-    app_base.wait_for_redis(sender, **kwargs)
-    app_base.wait_for_db(sender, **kwargs)
-    app_base.wait_for_vespa(sender, **kwargs)
-
-    # Less startup checks in multi-tenant case
+    # Startup checks are not needed in multi-tenant case
    if MULTI_TENANT:
        return

+    app_base.wait_for_redis(sender, **kwargs)
+    app_base.wait_for_db(sender, **kwargs)
+    app_base.wait_for_vespa(sender, **kwargs)
    app_base.on_secondary_worker_init(sender, **kwargs)


--- a/backend/onyx/background/celery/apps/light.py
+++ b/backend/onyx/background/celery/apps/light.py
@@ -60,15 +60,13 @@ def on_worker_init(sender: Any, **kwargs: Any) -> None:

    SqlEngine.set_app_name(POSTGRES_CELERY_WORKER_LIGHT_APP_NAME)
    SqlEngine.init_engine(pool_size=sender.concurrency, max_overflow=8)
+    # Startup checks are not needed in multi-tenant case
+    if MULTI_TENANT:
+        return

    app_base.wait_for_redis(sender, **kwargs)
    app_base.wait_for_db(sender, **kwargs)
    app_base.wait_for_vespa(sender, **kwargs)
-
-    # Less startup checks in multi-tenant case
-    if MULTI_TENANT:
-        return
-
    app_base.on_secondary_worker_init(sender, **kwargs)


--- a/backend/onyx/background/celery/apps/primary.py
+++ b/backend/onyx/background/celery/apps/primary.py
@@ -84,14 +84,14 @@ def on_worker_init(sender: Any, **kwargs: Any) -> None:
    SqlEngine.set_app_name(POSTGRES_CELERY_WORKER_PRIMARY_APP_NAME)
    SqlEngine.init_engine(pool_size=8, max_overflow=0)

+    # Startup checks are not needed in multi-tenant case
+    if MULTI_TENANT:
+        return
+
    app_base.wait_for_redis(sender, **kwargs)
    app_base.wait_for_db(sender, **kwargs)
    app_base.wait_for_vespa(sender, **kwargs)

-    # Less startup checks in multi-tenant case
-    if MULTI_TENANT:
-        return
-
    logger.info("Running as the primary celery worker.")

    # This is singleton work that should be done on startup exactly once
--- a/backend/onyx/background/celery/celery_redis.py
+++ b/backend/onyx/background/celery/celery_redis.py
@@ -1,6 +1,4 @@
 # These are helper objects for tracking the keys we need to write in redis
-import json
-from typing import Any
 from typing import cast

 from redis import Redis
@@ -25,25 +23,3 @@ def celery_get_queue_length(queue: str, r: Redis) -> int:
        total_length += cast(int, length)

    return total_length
-
-
-def celery_find_task(task_id: str, queue: str, r: Redis) -> int:
-    """This is a redis specific way to find a task for a particular queue in redis.
-    It is priority aware and knows how to look through the multiple redis lists
-    used to implement task prioritization.
-    This operation is not atomic.
-
-    This is a linear search O(n) ... so be careful using it when the task queues can be larger.
-
-    Returns true if the id is in the queue, False if not.
-    """
-    for priority in range(len(OnyxCeleryPriority)):
-        queue_name = f"{queue}{CELERY_SEPARATOR}{priority}" if priority > 0 else queue
-
-        tasks = cast(list[bytes], r.lrange(queue_name, 0, -1))
-        for task in tasks:
-            task_dict: dict[str, Any] = json.loads(task.decode("utf-8"))
-            if task_dict.get("headers", {}).get("id") == task_id:
-                return True
-
-    return False
--- a/backend/onyx/background/celery/tasks/beat_schedule.py
+++ b/backend/onyx/background/celery/tasks/beat_schedule.py
@@ -4,80 +4,55 @@ from typing import Any
 from onyx.configs.constants import OnyxCeleryPriority
 from onyx.configs.constants import OnyxCeleryTask

-# we set expires because it isn't necessary to queue up these tasks
-# it's only important that they run relatively regularly
+
 tasks_to_schedule = [
    {
        "name": "check-for-vespa-sync",
        "task": OnyxCeleryTask.CHECK_FOR_VESPA_SYNC_TASK,
        "schedule": timedelta(seconds=20),
-        "options": {
-            "priority": OnyxCeleryPriority.HIGH,
-            "expires": 60,
-        },
+        "options": {"priority": OnyxCeleryPriority.HIGH},
    },
    {
        "name": "check-for-connector-deletion",
        "task": OnyxCeleryTask.CHECK_FOR_CONNECTOR_DELETION,
        "schedule": timedelta(seconds=20),
-        "options": {
-            "priority": OnyxCeleryPriority.HIGH,
-            "expires": 60,
-        },
+        "options": {"priority": OnyxCeleryPriority.HIGH},
    },
    {
        "name": "check-for-indexing",
        "task": OnyxCeleryTask.CHECK_FOR_INDEXING,
        "schedule": timedelta(seconds=15),
-        "options": {
-            "priority": OnyxCeleryPriority.HIGH,
-            "expires": 60,
-        },
+        "options": {"priority": OnyxCeleryPriority.HIGH},
    },
    {
        "name": "check-for-prune",
        "task": OnyxCeleryTask.CHECK_FOR_PRUNING,
        "schedule": timedelta(seconds=15),
-        "options": {
-            "priority": OnyxCeleryPriority.HIGH,
-            "expires": 60,
-        },
+        "options": {"priority": OnyxCeleryPriority.HIGH},
    },
    {
        "name": "kombu-message-cleanup",
        "task": OnyxCeleryTask.KOMBU_MESSAGE_CLEANUP_TASK,
        "schedule": timedelta(seconds=3600),
-        "options": {
-            "priority": OnyxCeleryPriority.LOWEST,
-            "expires": 60,
-        },
+        "options": {"priority": OnyxCeleryPriority.LOWEST},
    },
    {
        "name": "monitor-vespa-sync",
        "task": OnyxCeleryTask.MONITOR_VESPA_SYNC,
        "schedule": timedelta(seconds=5),
-        "options": {
-            "priority": OnyxCeleryPriority.HIGH,
-            "expires": 60,
-        },
+        "options": {"priority": OnyxCeleryPriority.HIGH},
    },
    {
        "name": "check-for-doc-permissions-sync",
        "task": OnyxCeleryTask.CHECK_FOR_DOC_PERMISSIONS_SYNC,
        "schedule": timedelta(seconds=30),
-        "options": {
-            "priority": OnyxCeleryPriority.HIGH,
-            "expires": 60,
-        },
+        "options": {"priority": OnyxCeleryPriority.HIGH},
    },
    {
        "name": "check-for-external-group-sync",
        "task": OnyxCeleryTask.CHECK_FOR_EXTERNAL_GROUP_SYNC,
        "schedule": timedelta(seconds=20),
-        "options": {
-            "priority": OnyxCeleryPriority.HIGH,
-            "expires": 60,
-        },
+        "options": {"priority": OnyxCeleryPriority.HIGH},
    },
 ]

--- a/backend/onyx/background/celery/tasks/connector_deletion/tasks.py
+++ b/backend/onyx/background/celery/tasks/connector_deletion/tasks.py
@@ -76,7 +76,7 @@ def check_for_connector_deletion_task(self: Task, *, tenant_id: str | None) -> N
            "Soft time limit exceeded, task is being terminated gracefully."
        )
    except Exception:
-        task_logger.exception("Unexpected exception during connector deletion check")
+        task_logger.exception(f"Unexpected exception: tenant={tenant_id}")
    finally:
        if lock_beat.owned():
            lock_beat.release()
@@ -131,14 +131,14 @@ def try_generate_document_cc_pair_cleanup_tasks(
            redis_connector_index = redis_connector.new_index(search_settings.id)
            if redis_connector_index.fenced:
                raise TaskDependencyError(
-                    "Connector deletion - Delayed (indexing in progress): "
+                    f"Connector deletion - Delayed (indexing in progress): "
                    f"cc_pair={cc_pair_id} "
                    f"search_settings={search_settings.id}"
                )

        if redis_connector.prune.fenced:
            raise TaskDependencyError(
-                "Connector deletion - Delayed (pruning in progress): "
+                f"Connector deletion - Delayed (pruning in progress): "
                f"cc_pair={cc_pair_id}"
            )

@@ -175,7 +175,7 @@ def try_generate_document_cc_pair_cleanup_tasks(
        #     return 0

        task_logger.info(
-            "RedisConnectorDeletion.generate_tasks finished. "
+            f"RedisConnectorDeletion.generate_tasks finished. "
            f"cc_pair={cc_pair_id} tasks_generated={tasks_generated}"
        )

--- a/backend/onyx/background/celery/tasks/indexing/tasks.py
+++ b/backend/onyx/background/celery/tasks/indexing/tasks.py
@@ -1,9 +1,7 @@
-import time
 from datetime import datetime
 from datetime import timezone
 from http import HTTPStatus
 from time import sleep
-from typing import Any

 import redis
 import sentry_sdk
@@ -17,7 +15,6 @@ from redis.lock import Lock as RedisLock
 from sqlalchemy.orm import Session

 from onyx.background.celery.apps.app_base import task_logger
-from onyx.background.celery.celery_redis import celery_find_task
 from onyx.background.indexing.job_client import SimpleJobClient
 from onyx.background.indexing.run_indexing import run_indexing_entrypoint
 from onyx.configs.app_configs import DISABLE_INDEX_UPDATE_ON_SWAP
@@ -165,19 +162,11 @@ def get_unfenced_index_attempt_ids(db_session: Session, r: redis.Redis) -> list[
    bind=True,
 )
 def check_for_indexing(self: Task, *, tenant_id: str | None) -> int | None:
-    """a lightweight task used to kick off indexing tasks.
-    Occcasionally does some validation of existing state to clear up error conditions"""
-    time_start = time.monotonic()
-
    tasks_created = 0
    locked = False
-    redis_client = get_redis_client(tenant_id=tenant_id)
+    r = get_redis_client(tenant_id=tenant_id)

-    # we need to use celery's redis client to access its redis data
-    # (which lives on a different db number)
-    # redis_client_celery: Redis = self.app.broker_connection().channel().client  # type: ignore
-
-    lock_beat: RedisLock = redis_client.lock(
+    lock_beat: RedisLock = r.lock(
        OnyxRedisLocks.CHECK_INDEXING_BEAT_LOCK,
        timeout=CELERY_VESPA_SYNC_BEAT_LOCK_TIMEOUT,
    )
@@ -282,7 +271,7 @@ def check_for_indexing(self: Task, *, tenant_id: str | None) -> int | None:
                        search_settings_instance,
                        reindex,
                        db_session,
-                        redis_client,
+                        r,
                        tenant_id,
                    )
                    if attempt_id:
@@ -297,9 +286,7 @@ def check_for_indexing(self: Task, *, tenant_id: str | None) -> int | None:
        # Fail any index attempts in the DB that don't have fences
        # This shouldn't ever happen!
        with get_session_with_tenant(tenant_id) as db_session:
-            unfenced_attempt_ids = get_unfenced_index_attempt_ids(
-                db_session, redis_client
-            )
+            unfenced_attempt_ids = get_unfenced_index_attempt_ids(db_session, r)
            for attempt_id in unfenced_attempt_ids:
                lock_beat.reacquire()

@@ -317,31 +304,12 @@ def check_for_indexing(self: Task, *, tenant_id: str | None) -> int | None:
                mark_attempt_failed(
                    attempt.id, db_session, failure_reason=failure_reason
                )
-
-        # rkuo: The following code logically appears to work, but the celery inspect code may be unstable
-        # turning off for the moment to see if it helps cloud stability
-
-        # we want to run this less frequently than the overall task
-        # if not redis_client.exists(OnyxRedisSignals.VALIDATE_INDEXING_FENCES):
-        #     # clear any indexing fences that don't have associated celery tasks in progress
-        #     # tasks can be in the queue in redis, in reserved tasks (prefetched by the worker),
-        #     # or be currently executing
-        #     try:
-        #         task_logger.info("Validating indexing fences...")
-        #         validate_indexing_fences(
-        #             tenant_id, self.app, redis_client, redis_client_celery, lock_beat
-        #         )
-        #     except Exception:
-        #         task_logger.exception("Exception while validating indexing fences")
-
-        #     redis_client.set(OnyxRedisSignals.VALIDATE_INDEXING_FENCES, 1, ex=60)
-
    except SoftTimeLimitExceeded:
        task_logger.info(
            "Soft time limit exceeded, task is being terminated gracefully."
        )
    except Exception:
-        task_logger.exception("Unexpected exception during indexing check")
+        task_logger.exception(f"Unexpected exception: tenant={tenant_id}")
    finally:
        if locked:
            if lock_beat.owned():
@@ -352,190 +320,9 @@ def check_for_indexing(self: Task, *, tenant_id: str | None) -> int | None:
                    f"tenant={tenant_id}"
                )

-    time_elapsed = time.monotonic() - time_start
-    task_logger.info(f"check_for_indexing finished: elapsed={time_elapsed:.2f}")
    return tasks_created


-def validate_indexing_fences(
-    tenant_id: str | None,
-    celery_app: Celery,
-    r: Redis,
-    r_celery: Redis,
-    lock_beat: RedisLock,
-) -> None:
-    reserved_indexing_tasks: set[str] = set()
-    active_indexing_tasks: set[str] = set()
-    indexing_worker_names: list[str] = []
-
-    # filter for and create an indexing specific inspect object
-    inspect = celery_app.control.inspect()
-    workers: dict[str, Any] = inspect.ping()  # type: ignore
-    if not workers:
-        raise ValueError("No workers found!")
-
-    for worker_name in list(workers.keys()):
-        if "indexing" in worker_name:
-            indexing_worker_names.append(worker_name)
-
-    if len(indexing_worker_names) == 0:
-        raise ValueError("No indexing workers found!")
-
-    inspect_indexing = celery_app.control.inspect(destination=indexing_worker_names)
-
-    # NOTE: each dict entry is a map of worker name to a list of tasks
-    # we want sets for reserved task and active task id's to optimize
-    # subsequent validation lookups
-
-    # get the list of reserved tasks
-    reserved_tasks: dict[str, list] | None = inspect_indexing.reserved()  # type: ignore
-    if reserved_tasks is None:
-        raise ValueError("inspect_indexing.reserved() returned None!")
-
-    for _, task_list in reserved_tasks.items():
-        for task in task_list:
-            reserved_indexing_tasks.add(task["id"])
-
-    # get the list of active tasks
-    active_tasks: dict[str, list] | None = inspect_indexing.active()  # type: ignore
-    if active_tasks is None:
-        raise ValueError("inspect_indexing.active() returned None!")
-
-    for _, task_list in active_tasks.items():
-        for task in task_list:
-            active_indexing_tasks.add(task["id"])
-
-    # validate all existing indexing jobs
-    for key_bytes in r.scan_iter(RedisConnectorIndex.FENCE_PREFIX + "*"):
-        lock_beat.reacquire()
-        with get_session_with_tenant(tenant_id) as db_session:
-            validate_indexing_fence(
-                tenant_id,
-                key_bytes,
-                reserved_indexing_tasks,
-                active_indexing_tasks,
-                r_celery,
-                db_session,
-            )
-    return
-
-
-def validate_indexing_fence(
-    tenant_id: str | None,
-    key_bytes: bytes,
-    reserved_tasks: set[str],
-    active_tasks: set[str],
-    r_celery: Redis,
-    db_session: Session,
-) -> None:
-    """Checks for the error condition where an indexing fence is set but the associated celery tasks don't exist.
-    This can happen if the indexing worker hard crashes or is terminated.
-    Being in this bad state means the fence will never clear without help, so this function
-    gives the help.
-
-    How this works:
-    1. Active signal is renewed with a 5 minute TTL
-    1.1 When the fence is created
-    1.2. When the task is seen in the redis queue
-    1.3. When the task is seen in the reserved or active list for a worker
-    2. The TTL allows us to get through the transitions on fence startup
-    and when the task starts executing.
-
-    More TTL clarification: it is seemingly impossible to exactly query Celery for
-    whether a task is in the queue or currently executing.
-    1. An unknown task id is always returned as state PENDING.
-    2. Redis can be inspected for the task id, but the task id is gone between the time a worker receives the task
-    and the time it actually starts on the worker.
-    """
-    # if the fence doesn't exist, there's nothing to do
-    fence_key = key_bytes.decode("utf-8")
-    composite_id = RedisConnector.get_id_from_fence_key(fence_key)
-    if composite_id is None:
-        task_logger.warning(
-            f"validate_indexing_fence - could not parse composite_id from {fence_key}"
-        )
-        return
-
-    # parse out metadata and initialize the helper class with it
-    parts = composite_id.split("/")
-    if len(parts) != 2:
-        return
-
-    cc_pair_id = int(parts[0])
-    search_settings_id = int(parts[1])
-
-    redis_connector = RedisConnector(tenant_id, cc_pair_id)
-    redis_connector_index = redis_connector.new_index(search_settings_id)
-    if not redis_connector_index.fenced:
-        return
-
-    payload = redis_connector_index.payload
-    if not payload:
-        return
-
-    # OK, there's actually something for us to validate
-
-    if payload.celery_task_id is None:
-        # the fence is just barely set up.
-        if redis_connector_index.active():
-            return
-
-        # it would be odd to get here as there isn't that much that can go wrong during
-        # initial fence setup, but it's still worth making sure we can recover
-        logger.info(
-            f"validate_indexing_fence - Resetting fence in basic state without any activity: fence={fence_key}"
-        )
-        redis_connector_index.reset()
-        return
-
-    found = celery_find_task(
-        payload.celery_task_id, OnyxCeleryQueues.CONNECTOR_INDEXING, r_celery
-    )
-    if found:
-        # the celery task exists in the redis queue
-        redis_connector_index.set_active()
-        return
-
-    if payload.celery_task_id in reserved_tasks:
-        # the celery task was prefetched and is reserved within the indexing worker
-        redis_connector_index.set_active()
-        return
-
-    if payload.celery_task_id in active_tasks:
-        # the celery task is active (aka currently executing)
-        redis_connector_index.set_active()
-        return
-
-    # we may want to enable this check if using the active task list somehow isn't good enough
-    # if redis_connector_index.generator_locked():
-    #     logger.info(f"{payload.celery_task_id} is currently executing.")
-
-    # we didn't find any direct indication that associated celery tasks exist, but they still might be there
-    # due to gaps in our ability to check states during transitions
-    # Rely on the active signal (which has a duration that allows us to bridge those gaps)
-    if redis_connector_index.active():
-        return
-
-    # celery tasks don't exist and the active signal has expired, possibly due to a crash. Clean it up.
-    logger.warning(
-        f"validate_indexing_fence - Resetting fence because no associated celery tasks were found: fence={fence_key}"
-    )
-    if payload.index_attempt_id:
-        try:
-            mark_attempt_failed(
-                payload.index_attempt_id,
-                db_session,
-                "validate_indexing_fence - Canceling index attempt due to missing celery tasks",
-            )
-        except Exception:
-            logger.exception(
-                "validate_indexing_fence - Exception while marking index attempt as failed."
-            )
-
-    redis_connector_index.reset()
-    return
-
-
 def _should_index(
    cc_pair: ConnectorCredentialPair,
    last_index: IndexAttempt | None,
@@ -682,7 +469,6 @@ def try_creating_indexing_task(
            celery_task_id=None,
        )

-        redis_connector_index.set_active()
        redis_connector_index.set_fence(payload)

        # create the index attempt for tracking purposes
@@ -716,14 +502,13 @@ def try_creating_indexing_task(
            raise RuntimeError("send_task for connector_indexing_proxy_task failed.")

        # now fill out the fence with the rest of the data
-        redis_connector_index.set_active()
-
        payload.index_attempt_id = index_attempt_id
        payload.celery_task_id = result.id
        redis_connector_index.set_fence(payload)
    except Exception:
        task_logger.exception(
            f"try_creating_indexing_task - Unexpected exception: "
+            f"tenant={tenant_id} "
            f"cc_pair={cc_pair.id} "
            f"search_settings={search_settings.id}"
        )
@@ -755,6 +540,7 @@ def connector_indexing_proxy_task(
    """celery tasks are forked, but forking is unstable.  This proxies work to a spawned task."""
    task_logger.info(
        f"Indexing watchdog - starting: attempt={index_attempt_id} "
+        f"tenant={tenant_id} "
        f"cc_pair={cc_pair_id} "
        f"search_settings={search_settings_id}"
    )
@@ -777,14 +563,15 @@ def connector_indexing_proxy_task(
    if not job:
        task_logger.info(
            f"Indexing watchdog - spawn failed: attempt={index_attempt_id} "
+            f"tenant={tenant_id} "
            f"cc_pair={cc_pair_id} "
            f"search_settings={search_settings_id}"
        )
        return

    task_logger.info(
-        f"Indexing proxy - spawn succeeded: attempt={index_attempt_id} "
        f"Indexing watchdog - spawn succeeded: attempt={index_attempt_id} "
+        f"tenant={tenant_id} "
        f"cc_pair={cc_pair_id} "
        f"search_settings={search_settings_id}"
    )
@@ -799,6 +586,7 @@ def connector_indexing_proxy_task(
            task_logger.warning(
                "Indexing watchdog - termination signal detected: "
                f"attempt={index_attempt_id} "
+                f"tenant={tenant_id} "
                f"cc_pair={cc_pair_id} "
                f"search_settings={search_settings_id}"
            )
@@ -858,7 +646,7 @@ def connector_indexing_proxy_task(
            if job.process:
                exit_code = job.process.exitcode

-            # seeing odd behavior where spawned tasks usually return exit code 1 in the cloud,
+            # seeing non-deterministic behavior where spawned tasks occasionally return exit code 1
            # even though logging clearly indicates that they completed successfully
            # to work around this, we ignore the job error state if the completion signal is OK
            status_int = redis_connector_index.get_completion()
@@ -893,6 +681,7 @@ def connector_indexing_proxy_task(

    task_logger.info(
        f"Indexing watchdog - finished: attempt={index_attempt_id} "
+        f"tenant={tenant_id} "
        f"cc_pair={cc_pair_id} "
        f"search_settings={search_settings_id}"
    )
@@ -1088,7 +877,6 @@ def connector_indexing_task(
            f"search_settings={search_settings_id}"
        )

-        # This is where the heavy/real work happens
        run_indexing_entrypoint(
            index_attempt_id,
            tenant_id,
@@ -1118,6 +906,7 @@ def connector_indexing_task(

    logger.info(
        f"Indexing spawned task finished: attempt={index_attempt_id} "
+        f"tenant={tenant_id} "
        f"cc_pair={cc_pair_id} "
        f"search_settings={search_settings_id}"
    )
--- a/backend/onyx/background/celery/tasks/pruning/tasks.py
+++ b/backend/onyx/background/celery/tasks/pruning/tasks.py
@@ -122,7 +122,7 @@ def check_for_pruning(self: Task, *, tenant_id: str | None) -> None:
            "Soft time limit exceeded, task is being terminated gracefully."
        )
    except Exception:
-        task_logger.exception("Unexpected exception during pruning check")
+        task_logger.exception(f"Unexpected exception: tenant={tenant_id}")
    finally:
        if lock_beat.owned():
            lock_beat.release()
@@ -308,7 +308,7 @@ def connector_pruning_generator_task(
            doc_ids_to_remove = list(all_indexed_document_ids - all_connector_doc_ids)

            task_logger.info(
-                "Pruning set collected: "
+                f"Pruning set collected: "
                f"cc_pair={cc_pair_id} "
                f"connector_source={cc_pair.connector.source} "
                f"docs_to_remove={len(doc_ids_to_remove)}"
@@ -324,7 +324,7 @@ def connector_pruning_generator_task(
                return None

            task_logger.info(
-                "RedisConnector.prune.generate_tasks finished. "
+                f"RedisConnector.prune.generate_tasks finished. "
                f"cc_pair={cc_pair_id} tasks_generated={tasks_generated}"
            )

--- a/backend/onyx/background/celery/tasks/shared/tasks.py
+++ b/backend/onyx/background/celery/tasks/shared/tasks.py
@@ -60,7 +60,7 @@ def document_by_cc_pair_cleanup_task(
    connector / credential pair from the access list
    (6) delete all relevant entries from postgres
    """
-    task_logger.debug(f"Task start: doc={document_id}")
+    task_logger.debug(f"Task start: tenant={tenant_id} doc={document_id}")

    try:
        with get_session_with_tenant(tenant_id) as db_session:
@@ -129,13 +129,16 @@ def document_by_cc_pair_cleanup_task(
            db_session.commit()

            task_logger.info(
+                f"tenant={tenant_id} "
                f"doc={document_id} "
                f"action={action} "
                f"refcount={count} "
                f"chunks={chunks_affected}"
            )
    except SoftTimeLimitExceeded:
-        task_logger.info(f"SoftTimeLimitExceeded exception. doc={document_id}")
+        task_logger.info(
+            f"SoftTimeLimitExceeded exception. tenant={tenant_id} doc={document_id}"
+        )
        return False
    except Exception as ex:
        if isinstance(ex, RetryError):
@@ -154,12 +157,15 @@ def document_by_cc_pair_cleanup_task(
            if e.response.status_code == HTTPStatus.BAD_REQUEST:
                task_logger.exception(
                    f"Non-retryable HTTPStatusError: "
+                    f"tenant={tenant_id} "
                    f"doc={document_id} "
                    f"status={e.response.status_code}"
                )
            return False

-        task_logger.exception(f"Unexpected exception: doc={document_id}")
+        task_logger.exception(
+            f"Unexpected exception: tenant={tenant_id} doc={document_id}"
+        )

        if self.request.retries < DOCUMENT_BY_CC_PAIR_CLEANUP_MAX_RETRIES:
            # Still retrying. Exponential backoff from 2^4 to 2^6 ... i.e. 16, 32, 64
@@ -170,7 +176,7 @@ def document_by_cc_pair_cleanup_task(
            # eventually gets fixed out of band via stale document reconciliation
            task_logger.warning(
                f"Max celery task retries reached. Marking doc as dirty for reconciliation: "
-                f"doc={document_id}"
+                f"tenant={tenant_id} doc={document_id}"
            )
            with get_session_with_tenant(tenant_id) as db_session:
                # delete the cc pair relationship now and let reconciliation clean it up
--- a/backend/onyx/background/celery/tasks/vespa/tasks.py
+++ b/backend/onyx/background/celery/tasks/vespa/tasks.py
@@ -1,4 +1,3 @@
-import time
 import traceback
 from datetime import datetime
 from datetime import timezone
@@ -90,11 +89,10 @@ logger = setup_logger()
 def check_for_vespa_sync_task(self: Task, *, tenant_id: str | None) -> None:
    """Runs periodically to check if any document needs syncing.
    Generates sets of tasks for Celery if syncing is needed."""
-    time_start = time.monotonic()

    r = get_redis_client(tenant_id=tenant_id)

-    lock_beat: RedisLock = r.lock(
+    lock_beat = r.lock(
        OnyxRedisLocks.CHECK_VESPA_SYNC_BEAT_LOCK,
        timeout=CELERY_VESPA_SYNC_BEAT_LOCK_TIMEOUT,
    )
@@ -158,15 +156,11 @@ def check_for_vespa_sync_task(self: Task, *, tenant_id: str | None) -> None:
            "Soft time limit exceeded, task is being terminated gracefully."
        )
    except Exception:
-        task_logger.exception("Unexpected exception during vespa metadata sync")
+        task_logger.exception(f"Unexpected exception: tenant={tenant_id}")
    finally:
        if lock_beat.owned():
            lock_beat.release()

-    time_elapsed = time.monotonic() - time_start
-    task_logger.info(f"check_for_vespa_sync_task finished: elapsed={time_elapsed:.2f}")
-    return
-

 def try_generate_stale_document_sync_tasks(
    celery_app: Celery,
@@ -736,7 +730,6 @@ def monitor_vespa_sync(self: Task, tenant_id: str | None) -> bool:

    Returns True if the task actually did work, False if it exited early to prevent overlap
    """
-    time_start = time.monotonic()
    r = get_redis_client(tenant_id=tenant_id)

    lock_beat: RedisLock = r.lock(
@@ -831,8 +824,6 @@ def monitor_vespa_sync(self: Task, tenant_id: str | None) -> bool:
        if lock_beat.owned():
            lock_beat.release()

-    time_elapsed = time.monotonic() - time_start
-    task_logger.info(f"monitor_vespa_sync finished: elapsed={time_elapsed:.2f}")
    return True


@@ -882,9 +873,13 @@ def vespa_metadata_sync_task(
            # the sync might repeat again later
            mark_document_as_synced(document_id, db_session)

-            task_logger.info(f"doc={document_id} action=sync chunks={chunks_affected}")
+            task_logger.info(
+                f"tenant={tenant_id} doc={document_id} action=sync chunks={chunks_affected}"
+            )
    except SoftTimeLimitExceeded:
-        task_logger.info(f"SoftTimeLimitExceeded exception. doc={document_id}")
+        task_logger.info(
+            f"SoftTimeLimitExceeded exception. tenant={tenant_id} doc={document_id}"
+        )
    except Exception as ex:
        if isinstance(ex, RetryError):
            task_logger.warning(
@@ -902,13 +897,14 @@ def vespa_metadata_sync_task(
            if e.response.status_code == HTTPStatus.BAD_REQUEST:
                task_logger.exception(
                    f"Non-retryable HTTPStatusError: "
+                    f"tenant={tenant_id} "
                    f"doc={document_id} "
                    f"status={e.response.status_code}"
                )
            return False

        task_logger.exception(
-            f"Unexpected exception during vespa metadata sync: doc={document_id}"
+            f"Unexpected exception: tenant={tenant_id} doc={document_id}"
        )

        # Exponential backoff from 2^4 to 2^6 ... i.e. 16, 32, 64
--- a/backend/onyx/chat/stream_processing/citation_processing.py
+++ b/backend/onyx/chat/stream_processing/citation_processing.py
@@ -65,7 +65,7 @@ class CitationProcessor:
        # Handle code blocks without language tags
        if "`" in self.curr_segment:
            if self.curr_segment.endswith("`"):
-                pass
+                return
            elif "```" in self.curr_segment:
                piece_that_comes_after = self.curr_segment.split("```")[1][0]
                if piece_that_comes_after == "\n" and in_code_block(self.llm_out):
--- a/backend/onyx/configs/app_configs.py
+++ b/backend/onyx/configs/app_configs.py
@@ -1,7 +1,6 @@
 import json
 import os
 import urllib.parse
-from typing import cast

 from onyx.configs.constants import AuthType
 from onyx.configs.constants import DocumentIndexType
@@ -145,7 +144,6 @@ POSTGRES_PASSWORD = urllib.parse.quote_plus(
 POSTGRES_HOST = os.environ.get("POSTGRES_HOST") or "localhost"
 POSTGRES_PORT = os.environ.get("POSTGRES_PORT") or "5432"
 POSTGRES_DB = os.environ.get("POSTGRES_DB") or "postgres"
-AWS_REGION = os.environ.get("AWS_REGION") or "us-east-2"

 POSTGRES_API_SERVER_POOL_SIZE = int(
    os.environ.get("POSTGRES_API_SERVER_POOL_SIZE") or 40
@@ -176,9 +174,6 @@ try:
 except ValueError:
    POSTGRES_IDLE_SESSIONS_TIMEOUT = POSTGRES_IDLE_SESSIONS_TIMEOUT_DEFAULT

-USE_IAM_AUTH = os.getenv("USE_IAM_AUTH", "False").lower() == "true"
-
-
 REDIS_SSL = os.getenv("REDIS_SSL", "").lower() == "true"
 REDIS_HOST = os.environ.get("REDIS_HOST") or "localhost"
 REDIS_PORT = int(os.environ.get("REDIS_PORT", 6379))
@@ -488,21 +483,6 @@ SYSTEM_RECURSION_LIMIT = int(os.environ.get("SYSTEM_RECURSION_LIMIT") or "1000")

 PARSE_WITH_TRAFILATURA = os.environ.get("PARSE_WITH_TRAFILATURA", "").lower() == "true"

-# allow for custom error messages for different errors returned by litellm
-# for example, can specify: {"Violated content safety policy": "EVIL REQUEST!!!"}
-# to make it so that if an LLM call returns an error containing "Violated content safety policy"
-# the end user will see "EVIL REQUEST!!!" instead of the default error message.
-_LITELLM_CUSTOM_ERROR_MESSAGE_MAPPINGS = os.environ.get(
-    "LITELLM_CUSTOM_ERROR_MESSAGE_MAPPINGS", ""
-)
-LITELLM_CUSTOM_ERROR_MESSAGE_MAPPINGS: dict[str, str] | None = None
-try:
-    LITELLM_CUSTOM_ERROR_MESSAGE_MAPPINGS = cast(
-        dict[str, str], json.loads(_LITELLM_CUSTOM_ERROR_MESSAGE_MAPPINGS)
-    )
-except json.JSONDecodeError:
-    pass
-
 #####
 # Enterprise Edition Configs
 #####
--- a/backend/onyx/configs/chat_configs.py
+++ b/backend/onyx/configs/chat_configs.py
@@ -63,10 +63,6 @@ LANGUAGE_CHAT_NAMING_HINT = (
    or "The name of the conversation must be in the same language as the user query."
 )

-# Number of prompts each persona should have
-NUM_PERSONA_PROMPTS = 4
-NUM_PERSONA_PROMPT_GENERATION_CHUNKS = 5
-
 # Agentic search takes significantly more tokens and therefore has much higher cost.
 # This configuration allows users to get a search-only experience with instant results
 # and no involvement from the LLM.
--- a/backend/onyx/configs/constants.py
+++ b/backend/onyx/configs/constants.py
@@ -49,7 +49,6 @@ POSTGRES_CELERY_WORKER_INDEXING_CHILD_APP_NAME = "celery_worker_indexing_child"
 POSTGRES_PERMISSIONS_APP_NAME = "permissions"
 POSTGRES_UNKNOWN_APP_NAME = "unknown"

-SSL_CERT_FILE = "bundle.pem"
 # API Keys
 DANSWER_API_KEY_PREFIX = "API_KEY__"
 DANSWER_API_KEY_DUMMY_EMAIL_DOMAIN = "onyxapikey.ai"
@@ -275,10 +274,6 @@ class OnyxRedisLocks:
    SLACK_BOT_HEARTBEAT_PREFIX = "da_heartbeat:slack_bot"


-class OnyxRedisSignals:
-    VALIDATE_INDEXING_FENCES = "signal:validate_indexing_fences"
-
-
 class OnyxCeleryPriority(int, Enum):
    HIGHEST = 0
    HIGH = auto()
--- a/backend/onyx/db/chat.py
+++ b/backend/onyx/db/chat.py
@@ -316,23 +316,6 @@ def update_chat_session(
    return chat_session


-def delete_all_chat_sessions_for_user(
-    user: User | None, db_session: Session, hard_delete: bool = HARD_DELETE_CHATS
-) -> None:
-    user_id = user.id if user is not None else None
-
-    query = db_session.query(ChatSession).filter(
-        ChatSession.user_id == user_id, ChatSession.onyxbot_flow.is_(False)
-    )
-
-    if hard_delete:
-        query.delete(synchronize_session=False)
-    else:
-        query.update({ChatSession.deleted: True}, synchronize_session=False)
-
-    db_session.commit()
-
-
 def delete_chat_session(
    user_id: UUID | None,
    chat_session_id: UUID,
--- a/backend/onyx/db/engine.py
+++ b/backend/onyx/db/engine.py
@@ -1,7 +1,5 @@
 import contextlib
-import os
 import re
-import ssl
 import threading
 import time
 from collections.abc import AsyncGenerator
@@ -12,8 +10,6 @@ from datetime import datetime
 from typing import Any
 from typing import ContextManager

-import asyncpg  # type: ignore
-import boto3
 import jwt
 from fastapi import HTTPException
 from fastapi import Request
@@ -27,7 +23,6 @@ from sqlalchemy.ext.asyncio import create_async_engine
 from sqlalchemy.orm import Session
 from sqlalchemy.orm import sessionmaker

-from onyx.configs.app_configs import AWS_REGION
 from onyx.configs.app_configs import LOG_POSTGRES_CONN_COUNTS
 from onyx.configs.app_configs import LOG_POSTGRES_LATENCY
 from onyx.configs.app_configs import POSTGRES_API_SERVER_POOL_OVERFLOW
@@ -42,7 +37,6 @@ from onyx.configs.app_configs import POSTGRES_PORT
 from onyx.configs.app_configs import POSTGRES_USER
 from onyx.configs.app_configs import USER_AUTH_SECRET
 from onyx.configs.constants import POSTGRES_UNKNOWN_APP_NAME
-from onyx.configs.constants import SSL_CERT_FILE
 from onyx.server.utils import BasicAuthenticationError
 from onyx.utils.logger import setup_logger
 from shared_configs.configs import MULTI_TENANT
@@ -55,87 +49,28 @@ logger = setup_logger()
 SYNC_DB_API = "psycopg2"
 ASYNC_DB_API = "asyncpg"

-USE_IAM_AUTH = os.getenv("USE_IAM_AUTH", "False").lower() == "true"
+# global so we don't create more than one engine per process
+# outside of being best practice, this is needed so we can properly pool
+# connections and not create a new pool on every request

-# Global so we don't create more than one engine per process
 _ASYNC_ENGINE: AsyncEngine | None = None
 SessionFactory: sessionmaker[Session] | None = None

-
-def create_ssl_context_if_iam() -> ssl.SSLContext | None:
-    """Create an SSL context if IAM authentication is enabled, else return None."""
-    if USE_IAM_AUTH:
-        return ssl.create_default_context(cafile=SSL_CERT_FILE)
-    return None
-
-
-ssl_context = create_ssl_context_if_iam()
-
-
-def get_iam_auth_token(
-    host: str, port: str, user: str, region: str = "us-east-2"
-) -> str:
-    """
-    Generate an IAM authentication token using boto3.
-    """
-    client = boto3.client("rds", region_name=region)
-    token = client.generate_db_auth_token(
-        DBHostname=host, Port=int(port), DBUsername=user
-    )
-    return token
-
-
-def configure_psycopg2_iam_auth(
-    cparams: dict[str, Any], host: str, port: str, user: str, region: str
-) -> None:
-    """
-    Configure cparams for psycopg2 with IAM token and SSL.
-    """
-    token = get_iam_auth_token(host, port, user, region)
-    cparams["password"] = token
-    cparams["sslmode"] = "require"
-    cparams["sslrootcert"] = SSL_CERT_FILE
-
-
-def build_connection_string(
-    *,
-    db_api: str = ASYNC_DB_API,
-    user: str = POSTGRES_USER,
-    password: str = POSTGRES_PASSWORD,
-    host: str = POSTGRES_HOST,
-    port: str = POSTGRES_PORT,
-    db: str = POSTGRES_DB,
-    app_name: str | None = None,
-    use_iam: bool = USE_IAM_AUTH,
-    region: str = "us-west-2",
-) -> str:
-    if use_iam:
-        base_conn_str = f"postgresql+{db_api}://{user}@{host}:{port}/{db}"
-    else:
-        base_conn_str = f"postgresql+{db_api}://{user}:{password}@{host}:{port}/{db}"
-
-    # For asyncpg, do not include application_name in the connection string
-    if app_name and db_api != "asyncpg":
-        if "?" in base_conn_str:
-            return f"{base_conn_str}&application_name={app_name}"
-        else:
-            return f"{base_conn_str}?application_name={app_name}"
-    return base_conn_str
-
-
 if LOG_POSTGRES_LATENCY:
-
+    # Function to log before query execution
    @event.listens_for(Engine, "before_cursor_execute")
    def before_cursor_execute(  # type: ignore
        conn, cursor, statement, parameters, context, executemany
    ):
        conn.info["query_start_time"] = time.time()

+    # Function to log after query execution
    @event.listens_for(Engine, "after_cursor_execute")
    def after_cursor_execute(  # type: ignore
        conn, cursor, statement, parameters, context, executemany
    ):
        total_time = time.time() - conn.info["query_start_time"]
+        # don't spam TOO hard
        if total_time > 0.1:
            logger.debug(
                f"Query Complete: {statement}\n\nTotal Time: {total_time:.4f} seconds"
@@ -143,6 +78,7 @@ if LOG_POSTGRES_LATENCY:


 if LOG_POSTGRES_CONN_COUNTS:
+    # Global counter for connection checkouts and checkins
    checkout_count = 0
    checkin_count = 0

@@ -169,13 +105,21 @@ if LOG_POSTGRES_CONN_COUNTS:
        logger.debug(f"Total connection checkins: {checkin_count}")


+"""END DEBUGGING LOGGING"""
+
+
 def get_db_current_time(db_session: Session) -> datetime:
+    """Get the current time from Postgres representing the start of the transaction
+    Within the same transaction this value will not update
+    This datetime object returned should be timezone aware, default Postgres timezone is UTC
+    """
    result = db_session.execute(text("SELECT NOW()")).scalar()
    if result is None:
        raise ValueError("Database did not return a time")
    return result


+# Regular expression to validate schema names to prevent SQL injection
 SCHEMA_NAME_REGEX = re.compile(r"^[a-zA-Z0-9_-]+$")


@@ -184,9 +128,16 @@ def is_valid_schema_name(name: str) -> bool:


 class SqlEngine:
+    """Class to manage a global SQLAlchemy engine (needed for proper resource control).
+    Will eventually subsume most of the standalone functions in this file.
+    Sync only for now.
+    """
+
    _engine: Engine | None = None
    _lock: threading.Lock = threading.Lock()
    _app_name: str = POSTGRES_UNKNOWN_APP_NAME
+
+    # Default parameters for engine creation
    DEFAULT_ENGINE_KWARGS = {
        "pool_size": 20,
        "max_overflow": 5,
@@ -194,27 +145,33 @@ class SqlEngine:
        "pool_recycle": POSTGRES_POOL_RECYCLE,
    }

+    def __init__(self) -> None:
+        pass
+
    @classmethod
    def _init_engine(cls, **engine_kwargs: Any) -> Engine:
+        """Private helper method to create and return an Engine."""
        connection_string = build_connection_string(
-            db_api=SYNC_DB_API, app_name=cls._app_name + "_sync", use_iam=USE_IAM_AUTH
+            db_api=SYNC_DB_API, app_name=cls._app_name + "_sync"
        )
        merged_kwargs = {**cls.DEFAULT_ENGINE_KWARGS, **engine_kwargs}
-        engine = create_engine(connection_string, **merged_kwargs)
-
-        if USE_IAM_AUTH:
-            event.listen(engine, "do_connect", provide_iam_token)
-
-        return engine
+        return create_engine(connection_string, **merged_kwargs)

    @classmethod
    def init_engine(cls, **engine_kwargs: Any) -> None:
+        """Allow the caller to init the engine with extra params. Different clients
+        such as the API server and different Celery workers and tasks
+        need different settings.
+        """
        with cls._lock:
            if not cls._engine:
                cls._engine = cls._init_engine(**engine_kwargs)

    @classmethod
    def get_engine(cls) -> Engine:
+        """Gets the SQLAlchemy engine. Will init a default engine if init hasn't
+        already been called. You probably want to init first!
+        """
        if not cls._engine:
            with cls._lock:
                if not cls._engine:
@@ -223,10 +180,12 @@ class SqlEngine:

    @classmethod
    def set_app_name(cls, app_name: str) -> None:
+        """Class method to set the app name."""
        cls._app_name = app_name

    @classmethod
    def get_app_name(cls) -> str:
+        """Class method to get current app name."""
        if not cls._app_name:
            return ""
        return cls._app_name
@@ -258,71 +217,56 @@ def get_all_tenant_ids() -> list[str] | list[None]:
        for tenant in tenant_ids
        if tenant is None or tenant.startswith(TENANT_ID_PREFIX)
    ]
+
    return valid_tenants


+def build_connection_string(
+    *,
+    db_api: str = ASYNC_DB_API,
+    user: str = POSTGRES_USER,
+    password: str = POSTGRES_PASSWORD,
+    host: str = POSTGRES_HOST,
+    port: str = POSTGRES_PORT,
+    db: str = POSTGRES_DB,
+    app_name: str | None = None,
+) -> str:
+    if app_name:
+        return f"postgresql+{db_api}://{user}:{password}@{host}:{port}/{db}?application_name={app_name}"
+    return f"postgresql+{db_api}://{user}:{password}@{host}:{port}/{db}"
+
+
 def get_sqlalchemy_engine() -> Engine:
    return SqlEngine.get_engine()


-async def get_async_connection() -> Any:
-    """
-    Custom connection function for async engine when using IAM auth.
-    """
-    host = POSTGRES_HOST
-    port = POSTGRES_PORT
-    user = POSTGRES_USER
-    db = POSTGRES_DB
-    token = get_iam_auth_token(host, port, user, AWS_REGION)
-
-    # asyncpg requires 'ssl="require"' if SSL needed
-    return await asyncpg.connect(
-        user=user, password=token, host=host, port=int(port), database=db, ssl="require"
-    )
-
-
 def get_sqlalchemy_async_engine() -> AsyncEngine:
    global _ASYNC_ENGINE
    if _ASYNC_ENGINE is None:
-        app_name = SqlEngine.get_app_name() + "_async"
-        connection_string = build_connection_string(
-            db_api=ASYNC_DB_API,
-            use_iam=USE_IAM_AUTH,
-        )
-
-        connect_args: dict[str, Any] = {}
-        if app_name:
-            connect_args["server_settings"] = {"application_name": app_name}
-
-        connect_args["ssl"] = ssl_context
-
+        # Underlying asyncpg cannot accept application_name directly in the connection string
+        # https://github.com/MagicStack/asyncpg/issues/798
+        connection_string = build_connection_string()
        _ASYNC_ENGINE = create_async_engine(
            connection_string,
-            connect_args=connect_args,
+            connect_args={
+                "server_settings": {
+                    "application_name": SqlEngine.get_app_name() + "_async"
+                }
+            },
+            # async engine is only used by API server, so we can use those values
+            # here as well
            pool_size=POSTGRES_API_SERVER_POOL_SIZE,
            max_overflow=POSTGRES_API_SERVER_POOL_OVERFLOW,
            pool_pre_ping=POSTGRES_POOL_PRE_PING,
            pool_recycle=POSTGRES_POOL_RECYCLE,
        )
-
-        if USE_IAM_AUTH:
-
-            @event.listens_for(_ASYNC_ENGINE.sync_engine, "do_connect")
-            def provide_iam_token_async(
-                dialect: Any, conn_rec: Any, cargs: Any, cparams: Any
-            ) -> None:
-                # For async engine using asyncpg, we still need to set the IAM token here.
-                host = POSTGRES_HOST
-                port = POSTGRES_PORT
-                user = POSTGRES_USER
-                token = get_iam_auth_token(host, port, user, AWS_REGION)
-                cparams["password"] = token
-                cparams["ssl"] = ssl_context
-
    return _ASYNC_ENGINE


+# Dependency to get the current tenant ID
+# If no token is present, uses the default schema for this use case
 def get_current_tenant_id(request: Request) -> str:
+    """Dependency that extracts the tenant ID from the JWT token in the request and sets the context variable."""
    if not MULTI_TENANT:
        tenant_id = POSTGRES_DEFAULT_SCHEMA
        CURRENT_TENANT_ID_CONTEXTVAR.set(tenant_id)
@@ -331,6 +275,7 @@ def get_current_tenant_id(request: Request) -> str:
    token = request.cookies.get("fastapiusersauth")
    if not token:
        current_value = CURRENT_TENANT_ID_CONTEXTVAR.get()
+        # If no token is present, use the default schema or handle accordingly
        return current_value

    try:
@@ -344,6 +289,7 @@ def get_current_tenant_id(request: Request) -> str:
        if not is_valid_schema_name(tenant_id):
            raise HTTPException(status_code=400, detail="Invalid tenant ID format")
        CURRENT_TENANT_ID_CONTEXTVAR.set(tenant_id)
+
        return tenant_id
    except jwt.InvalidTokenError:
        return CURRENT_TENANT_ID_CONTEXTVAR.get()
@@ -370,6 +316,7 @@ async def get_async_session_with_tenant(

    async with async_session_factory() as session:
        try:
+            # Set the search_path to the tenant's schema
            await session.execute(text(f'SET search_path = "{tenant_id}"'))
            if POSTGRES_IDLE_SESSIONS_TIMEOUT:
                await session.execute(
@@ -379,6 +326,8 @@ async def get_async_session_with_tenant(
                )
        except Exception:
            logger.exception("Error setting search_path.")
+            # You can choose to re-raise the exception or handle it
+            # Here, we'll re-raise to prevent proceeding with an incorrect session
            raise
        else:
            yield session
@@ -386,6 +335,9 @@ async def get_async_session_with_tenant(

@contextmanager
 def get_session_with_default_tenant() -> Generator[Session, None, None]:
+    """
+    Get a database session using the current tenant ID from the context variable.
+    """
    tenant_id = CURRENT_TENANT_ID_CONTEXTVAR.get()
    with get_session_with_tenant(tenant_id) as session:
        yield session
@@ -397,6 +349,7 @@ def get_session_with_tenant(
 ) -> Generator[Session, None, None]:
    """
    Generate a database session for a specific tenant.
+
    This function:
    1. Sets the database schema to the specified tenant's schema.
    2. Preserves the tenant ID across the session.
@@ -404,20 +357,27 @@ def get_session_with_tenant(
    4. Uses the default schema if no tenant ID is provided.
    """
    engine = get_sqlalchemy_engine()
+
+    # Store the previous tenant ID
    previous_tenant_id = CURRENT_TENANT_ID_CONTEXTVAR.get() or POSTGRES_DEFAULT_SCHEMA

    if tenant_id is None:
        tenant_id = POSTGRES_DEFAULT_SCHEMA

    CURRENT_TENANT_ID_CONTEXTVAR.set(tenant_id)
+
    event.listen(engine, "checkout", set_search_path_on_checkout)

    if not is_valid_schema_name(tenant_id):
        raise HTTPException(status_code=400, detail="Invalid tenant ID")

    try:
+        # Establish a raw connection
        with engine.connect() as connection:
+            # Access the raw DBAPI connection and set the search_path
            dbapi_connection = connection.connection
+
+            # Set the search_path outside of any transaction
            cursor = dbapi_connection.cursor()
            try:
                cursor.execute(f'SET search_path = "{tenant_id}"')
@@ -430,17 +390,21 @@ def get_session_with_tenant(
            finally:
                cursor.close()

+            # Bind the session to the connection
            with Session(bind=connection, expire_on_commit=False) as session:
                try:
                    yield session
                finally:
+                    # Reset search_path to default after the session is used
                    if MULTI_TENANT:
                        cursor = dbapi_connection.cursor()
                        try:
                            cursor.execute('SET search_path TO "$user", public')
                        finally:
                            cursor.close()
+
    finally:
+        # Restore the previous tenant ID
        CURRENT_TENANT_ID_CONTEXTVAR.set(previous_tenant_id)


@@ -460,9 +424,12 @@ def get_session_generator_with_tenant() -> Generator[Session, None, None]:


 def get_session() -> Generator[Session, None, None]:
+    """Generate a database session with the appropriate tenant schema set."""
    tenant_id = CURRENT_TENANT_ID_CONTEXTVAR.get()
    if tenant_id == POSTGRES_DEFAULT_SCHEMA and MULTI_TENANT:
-        raise BasicAuthenticationError(detail="User must authenticate")
+        raise BasicAuthenticationError(
+            detail="User must authenticate",
+        )

    engine = get_sqlalchemy_engine()

@@ -470,17 +437,20 @@ def get_session() -> Generator[Session, None, None]:
        if MULTI_TENANT:
            if not is_valid_schema_name(tenant_id):
                raise HTTPException(status_code=400, detail="Invalid tenant ID")
+            # Set the search_path to the tenant's schema
            session.execute(text(f'SET search_path = "{tenant_id}"'))
        yield session


 async def get_async_session() -> AsyncGenerator[AsyncSession, None]:
+    """Generate an async database session with the appropriate tenant schema set."""
    tenant_id = CURRENT_TENANT_ID_CONTEXTVAR.get()
    engine = get_sqlalchemy_async_engine()
    async with AsyncSession(engine, expire_on_commit=False) as async_session:
        if MULTI_TENANT:
            if not is_valid_schema_name(tenant_id):
                raise HTTPException(status_code=400, detail="Invalid tenant ID")
+            # Set the search_path to the tenant's schema
            await async_session.execute(text(f'SET search_path = "{tenant_id}"'))
        yield async_session

@@ -491,6 +461,7 @@ def get_session_context_manager() -> ContextManager[Session]:


 def get_session_factory() -> sessionmaker[Session]:
+    """Get a session factory."""
    global SessionFactory
    if SessionFactory is None:
        SessionFactory = sessionmaker(bind=get_sqlalchemy_engine())
@@ -518,13 +489,3 @@ async def warm_up_connections(
        await async_conn.execute(text("SELECT 1"))
    for async_conn in async_connections:
        await async_conn.close()
-
-
-def provide_iam_token(dialect: Any, conn_rec: Any, cargs: Any, cparams: Any) -> None:
-    if USE_IAM_AUTH:
-        host = POSTGRES_HOST
-        port = POSTGRES_PORT
-        user = POSTGRES_USER
-        region = os.getenv("AWS_REGION", "us-east-2")
-        # Configure for psycopg2 with IAM token
-        configure_psycopg2_iam_auth(cparams, host, port, user, region)
--- a/backend/onyx/db/models.py
+++ b/backend/onyx/db/models.py
@@ -5,8 +5,6 @@ from typing import Literal
 from typing import NotRequired
 from typing import Optional
 from uuid import uuid4
-
-from pydantic import BaseModel
 from typing_extensions import TypedDict  # noreorder
 from uuid import UUID

@@ -1010,7 +1008,7 @@ class ChatSession(Base):
        "ChatFolder", back_populates="chat_sessions"
    )
    messages: Mapped[list["ChatMessage"]] = relationship(
-        "ChatMessage", back_populates="chat_session", cascade="all, delete-orphan"
+        "ChatMessage", back_populates="chat_session"
    )
    persona: Mapped["Persona"] = relationship("Persona")

@@ -1078,8 +1076,6 @@ class ChatMessage(Base):
        "SearchDoc",
        secondary=ChatMessage__SearchDoc.__table__,
        back_populates="chat_messages",
-        cascade="all, delete-orphan",
-        single_parent=True,
    )

    tool_call: Mapped["ToolCall"] = relationship(
@@ -1348,11 +1344,6 @@ class StarterMessage(TypedDict):
    message: str


-class StarterMessageModel(BaseModel):
-    name: str
-    message: str
-
-
 class Persona(Base):
    __tablename__ = "persona"

--- a/backend/onyx/db/persona.py
+++ b/backend/onyx/db/persona.py
@@ -543,10 +543,6 @@ def upsert_persona(
        if tools is not None:
            existing_persona.tools = tools or []

-        # We should only update display priority if it is not already set
-        if existing_persona.display_priority is None:
-            existing_persona.display_priority = display_priority
-
        persona = existing_persona

    else:
--- a/backend/onyx/document_index/interfaces.py
+++ b/backend/onyx/document_index/interfaces.py
@@ -369,19 +369,6 @@ class AdminCapable(abc.ABC):
        raise NotImplementedError


-class RandomCapable(abc.ABC):
-    """Class must implement random document retrieval capability"""
-
-    @abc.abstractmethod
-    def random_retrieval(
-        self,
-        filters: IndexFilters,
-        num_to_retrieve: int = 10,
-    ) -> list[InferenceChunkUncleaned]:
-        """Retrieve random chunks matching the filters"""
-        raise NotImplementedError
-
-
 class BaseIndex(
    Verifiable,
    Indexable,
@@ -389,7 +376,6 @@ class BaseIndex(
    Deletable,
    AdminCapable,
    IdRetrievalCapable,
-    RandomCapable,
    abc.ABC,
 ):
    """
--- a/backend/onyx/document_index/vespa/app_config/schemas/danswer_chunk.sd
+++ b/backend/onyx/document_index/vespa/app_config/schemas/danswer_chunk.sd
@@ -218,10 +218,4 @@ schema DANSWER_CHUNK_NAME {
            expression: bm25(content) + (5 * bm25(title))
        }
    }
-
-    rank-profile random_ {
-        first-phase {
-            expression: random.match
-        }
-    }
 }
--- a/backend/onyx/document_index/vespa/index.py
+++ b/backend/onyx/document_index/vespa/index.py
@@ -2,7 +2,6 @@ import concurrent.futures
 import io
 import logging
 import os
-import random
 import re
 import time
 import urllib
@@ -535,7 +534,7 @@ class VespaIndex(DocumentIndex):
        if self.secondary_index_name:
            index_names.append(self.secondary_index_name)

-        with get_vespa_http_client(http2=False) as http_client:
+        with get_vespa_http_client() as http_client:
            for index_name in index_names:
                params = httpx.QueryParams(
                    {
@@ -546,12 +545,8 @@ class VespaIndex(DocumentIndex):

                while True:
                    try:
-                        vespa_url = (
-                            f"{DOCUMENT_ID_ENDPOINT.format(index_name=self.index_name)}"
-                        )
-                        logger.debug(f'update_single PUT on URL "{vespa_url}"')
                        resp = http_client.put(
-                            vespa_url,
+                            f"{DOCUMENT_ID_ENDPOINT.format(index_name=self.index_name)}",
                            params=params,
                            headers={"Content-Type": "application/json"},
                            json=update_dict,
@@ -623,7 +618,7 @@ class VespaIndex(DocumentIndex):
        if self.secondary_index_name:
            index_names.append(self.secondary_index_name)

-        with get_vespa_http_client(http2=False) as http_client:
+        with get_vespa_http_client() as http_client:
            for index_name in index_names:
                params = httpx.QueryParams(
                    {
@@ -634,12 +629,8 @@ class VespaIndex(DocumentIndex):

                while True:
                    try:
-                        vespa_url = (
-                            f"{DOCUMENT_ID_ENDPOINT.format(index_name=index_name)}"
-                        )
-                        logger.debug(f'delete_single DELETE on URL "{vespa_url}"')
                        resp = http_client.delete(
-                            vespa_url,
+                            f"{DOCUMENT_ID_ENDPOINT.format(index_name=index_name)}",
                            params=params,
                        )
                        resp.raise_for_status()
@@ -912,32 +903,6 @@ class VespaIndex(DocumentIndex):

        logger.info("Batch deletion completed")

-    def random_retrieval(
-        self,
-        filters: IndexFilters,
-        num_to_retrieve: int = 10,
-    ) -> list[InferenceChunkUncleaned]:
-        """Retrieve random chunks matching the filters using Vespa's random ranking
-
-        This method is currently used for random chunk retrieval in the context of
-        assistant starter message creation (passed as sample context for usage by the assistant).
-        """
-        vespa_where_clauses = build_vespa_filters(filters, remove_trailing_and=True)
-
-        yql = YQL_BASE.format(index_name=self.index_name) + vespa_where_clauses
-
-        random_seed = random.randint(0, 1000000)
-
-        params: dict[str, str | int | float] = {
-            "yql": yql,
-            "hits": num_to_retrieve,
-            "timeout": VESPA_TIMEOUT,
-            "ranking.profile": "random_",
-            "ranking.properties.random.seed": random_seed,
-        }
-
-        return query_vespa(params)
-

 class _VespaDeleteRequest:
    def __init__(self, document_id: str, index_name: str) -> None:
--- a/backend/onyx/document_index/vespa/shared_utils/utils.py
+++ b/backend/onyx/document_index/vespa/shared_utils/utils.py
@@ -55,9 +55,7 @@ def remove_invalid_unicode_chars(text: str) -> str:
    return _illegal_xml_chars_RE.sub("", text)


-def get_vespa_http_client(
-    no_timeout: bool = False, http2: bool = False
-) -> httpx.Client:
+def get_vespa_http_client(no_timeout: bool = False) -> httpx.Client:
    """
    Configure and return an HTTP client for communicating with Vespa,
    including authentication if needed.
@@ -69,5 +67,5 @@ def get_vespa_http_client(
        else None,
        verify=False if not MANAGED_VESPA else True,
        timeout=None if no_timeout else VESPA_REQUEST_TIMEOUT,
-        http2=http2,
+        http2=True,
    )
--- a/backend/onyx/document_index/vespa/shared_utils/vespa_request_builders.py
+++ b/backend/onyx/document_index/vespa/shared_utils/vespa_request_builders.py
@@ -19,12 +19,7 @@ from onyx.utils.logger import setup_logger
 logger = setup_logger()


-def build_vespa_filters(
-    filters: IndexFilters,
-    *,
-    include_hidden: bool = False,
-    remove_trailing_and: bool = False,  # Set to True when using as a complete Vespa query
-) -> str:
+def build_vespa_filters(filters: IndexFilters, include_hidden: bool = False) -> str:
    def _build_or_filters(key: str, vals: list[str] | None) -> str:
        if vals is None:
            return ""
@@ -83,9 +78,6 @@ def build_vespa_filters(

    filter_str += _build_time_filter(filters.time_cutoff)

-    if remove_trailing_and and filter_str.endswith(" and "):
-        filter_str = filter_str[:-5]  # We remove the trailing " and "
-
    return filter_str


--- a/backend/onyx/llm/chat_llm.py
+++ b/backend/onyx/llm/chat_llm.py
@@ -453,9 +453,7 @@ class DefaultMultiLLM(LLM):
        if LOG_DANSWER_MODEL_INTERACTIONS:
            self.log_model_configs()

-        if (
-            DISABLE_LITELLM_STREAMING or self.config.model_name == "o1-2024-12-17"
-        ):  # TODO: remove once litellm supports streaming
+        if DISABLE_LITELLM_STREAMING:
            yield self.invoke(prompt, tools, tool_choice, structured_response_format)
            return

--- a/backend/onyx/llm/llm_provider_options.py
+++ b/backend/onyx/llm/llm_provider_options.py
@@ -29,7 +29,6 @@ OPENAI_PROVIDER_NAME = "openai"
 OPEN_AI_MODEL_NAMES = [
    "o1-mini",
    "o1-preview",
-    "o1-2024-12-17",
    "gpt-4",
    "gpt-4o",
    "gpt-4o-mini",
--- a/backend/onyx/llm/utils.py
+++ b/backend/onyx/llm/utils.py
@@ -28,7 +28,6 @@ from litellm.exceptions import RateLimitError  # type: ignore
 from litellm.exceptions import Timeout  # type: ignore
 from litellm.exceptions import UnprocessableEntityError  # type: ignore

-from onyx.configs.app_configs import LITELLM_CUSTOM_ERROR_MESSAGE_MAPPINGS
 from onyx.configs.constants import MessageType
 from onyx.configs.model_configs import GEN_AI_MAX_TOKENS
 from onyx.configs.model_configs import GEN_AI_MODEL_FALLBACK_MAX_TOKENS
@@ -46,19 +45,10 @@ logger = setup_logger()


 def litellm_exception_to_error_msg(
-    e: Exception,
-    llm: LLM,
-    fallback_to_error_msg: bool = False,
-    custom_error_msg_mappings: dict[str, str]
-    | None = LITELLM_CUSTOM_ERROR_MESSAGE_MAPPINGS,
+    e: Exception, llm: LLM, fallback_to_error_msg: bool = False
 ) -> str:
    error_msg = str(e)

-    if custom_error_msg_mappings:
-        for error_msg_pattern, custom_error_msg in custom_error_msg_mappings.items():
-            if error_msg_pattern in error_msg:
-                return custom_error_msg
-
    if isinstance(e, BadRequestError):
        error_msg = "Bad request: The server couldn't process your request. Please check your input."
    elif isinstance(e, AuthenticationError):
--- a/backend/onyx/prompts/starter_messages.py
+++ b/backend/onyx/prompts/starter_messages.py
@@ -1,46 +0,0 @@
-PERSONA_CATEGORY_GENERATION_PROMPT = """
-Based on the assistant's name, description, and instructions, generate a list of {num_categories}
- **unique and diverse** categories that represent different types of starter messages a user
- might send to initiate a conversation with this chatbot assistant.
-
-**Ensure that the categories are varied and cover a wide range of topics related to the assistant's capabilities.**
-
-Provide the categories as a JSON array of strings **without any code fences or additional text**.
-
-**Context about the assistant:**
- **Name**: {name}
- **Description**: {description}
- **Instructions**: {instructions}
-""".strip()
-
-PERSONA_STARTER_MESSAGE_CREATION_PROMPT = """
-Create a starter message that a **user** might send to initiate a conversation with a chatbot assistant.
-
-**Category**: {category}
-
-Your response should include two parts:
-
-1. **Title**: A short, engaging title that reflects the user's intent
-   (e.g., 'Need Travel Advice', 'Question About Coding', 'Looking for Book Recommendations').
-
-2. **Message**: The actual message that the user would send to the assistant.
-   This should be natural, engaging, and encourage a helpful response from the assistant.
-   **Avoid overly specific details; keep the message general and broadly applicable.**
-
-For example:
- Instead of "I've just adopted a 6-month-old Labrador puppy who's pulling on the leash,"
-write "I'm having trouble training my new puppy to walk nicely on a leash."
-
-Ensure each part is clearly labeled and separated as shown above.
-Do not provide any additional text or explanation and be extremely concise
-
-**Context about the assistant:**
- **Name**: {name}
- **Description**: {description}
- **Instructions**: {instructions}
-""".strip()
-
-
-if __name__ == "__main__":
-    print(PERSONA_CATEGORY_GENERATION_PROMPT)
-    print(PERSONA_STARTER_MESSAGE_CREATION_PROMPT)
--- a/backend/onyx/redis/redis_connector_index.py
+++ b/backend/onyx/redis/redis_connector_index.py
@@ -31,10 +31,6 @@ class RedisConnectorIndex:

    TERMINATE_PREFIX = PREFIX + "_terminate"  # connectorindexing_terminate

-    # used to signal the overall workflow is still active
-    # it's difficult to prevent
-    ACTIVE_PREFIX = PREFIX + "_active"
-
    def __init__(
        self,
        tenant_id: str | None,
@@ -58,7 +54,6 @@ class RedisConnectorIndex:
            f"{self.GENERATOR_LOCK_PREFIX}_{id}/{search_settings_id}"
        )
        self.terminate_key = f"{self.TERMINATE_PREFIX}_{id}/{search_settings_id}"
-        self.active_key = f"{self.ACTIVE_PREFIX}_{id}/{search_settings_id}"

    @classmethod
    def fence_key_with_ids(cls, cc_pair_id: int, search_settings_id: int) -> str:
@@ -112,26 +107,6 @@ class RedisConnectorIndex:
        # 10 minute TTL is good.
        self.redis.set(f"{self.terminate_key}_{celery_task_id}", 0, ex=600)

-    def set_active(self) -> None:
-        """This sets a signal to keep the indexing flow from getting cleaned up within
-        the expiration time.
-
-        The slack in timing is needed to avoid race conditions where simply checking
-        the celery queue and task status could result in race conditions."""
-        self.redis.set(self.active_key, 0, ex=300)
-
-    def active(self) -> bool:
-        if self.redis.exists(self.active_key):
-            return True
-
-        return False
-
-    def generator_locked(self) -> bool:
-        if self.redis.exists(self.generator_lock_key):
-            return True
-
-        return False
-
    def set_generator_complete(self, payload: int | None) -> None:
        if not payload:
            self.redis.delete(self.generator_complete_key)
@@ -163,7 +138,6 @@ class RedisConnectorIndex:
        return status

    def reset(self) -> None:
-        self.redis.delete(self.active_key)
        self.redis.delete(self.generator_lock_key)
        self.redis.delete(self.generator_progress_key)
        self.redis.delete(self.generator_complete_key)
--- a/backend/onyx/secondary_llm_flows/starter_message_creation.py
+++ b/backend/onyx/secondary_llm_flows/starter_message_creation.py
@@ -1,271 +0,0 @@
-import json
-import re
-from typing import Any
-from typing import cast
-from typing import Dict
-from typing import List
-
-from litellm import get_supported_openai_params
-from sqlalchemy.orm import Session
-
-from onyx.configs.chat_configs import NUM_PERSONA_PROMPT_GENERATION_CHUNKS
-from onyx.configs.chat_configs import NUM_PERSONA_PROMPTS
-from onyx.context.search.models import IndexFilters
-from onyx.context.search.models import InferenceChunk
-from onyx.context.search.postprocessing.postprocessing import cleanup_chunks
-from onyx.context.search.preprocessing.access_filters import (
-    build_access_filters_for_user,
-)
-from onyx.db.document_set import get_document_sets_by_ids
-from onyx.db.models import StarterMessageModel as StarterMessage
-from onyx.db.models import User
-from onyx.document_index.document_index_utils import get_both_index_names
-from onyx.document_index.factory import get_default_document_index
-from onyx.llm.factory import get_default_llms
-from onyx.prompts.starter_messages import PERSONA_CATEGORY_GENERATION_PROMPT
-from onyx.prompts.starter_messages import PERSONA_STARTER_MESSAGE_CREATION_PROMPT
-from onyx.utils.logger import setup_logger
-from onyx.utils.threadpool_concurrency import FunctionCall
-from onyx.utils.threadpool_concurrency import run_functions_in_parallel
-
-logger = setup_logger()
-
-
-def get_random_chunks_from_doc_sets(
-    doc_sets: List[str], db_session: Session, user: User | None = None
-) -> List[InferenceChunk]:
-    """
-    Retrieves random chunks from the specified document sets.
-    """
-    curr_ind_name, sec_ind_name = get_both_index_names(db_session)
-    document_index = get_default_document_index(curr_ind_name, sec_ind_name)
-
-    acl_filters = build_access_filters_for_user(user, db_session)
-    filters = IndexFilters(document_set=doc_sets, access_control_list=acl_filters)
-
-    chunks = document_index.random_retrieval(
-        filters=filters, num_to_retrieve=NUM_PERSONA_PROMPT_GENERATION_CHUNKS
-    )
-    return cleanup_chunks(chunks)
-
-
-def parse_categories(content: str) -> List[str]:
-    """
-    Parses the JSON array of categories from the LLM response.
-    """
-    # Clean the response to remove code fences and extra whitespace
-    content = content.strip().strip("```").strip()
-    if content.startswith("json"):
-        content = content[4:].strip()
-
-    try:
-        categories = json.loads(content)
-        if not isinstance(categories, list):
-            logger.error("Categories are not a list.")
-            return []
-        return categories
-    except json.JSONDecodeError as e:
-        logger.error(f"Failed to parse categories: {e}")
-        return []
-
-
-def generate_start_message_prompts(
-    name: str,
-    description: str,
-    instructions: str,
-    categories: List[str],
-    chunk_contents: str,
-    supports_structured_output: bool,
-    fast_llm: Any,
-) -> List[FunctionCall]:
-    """
-    Generates the list of FunctionCall objects for starter message generation.
-    """
-    functions = []
-    for category in categories:
-        # Create a prompt specific to the category
-        start_message_generation_prompt = (
-            PERSONA_STARTER_MESSAGE_CREATION_PROMPT.format(
-                name=name,
-                description=description,
-                instructions=instructions,
-                category=category,
-            )
-        )
-
-        if chunk_contents:
-            start_message_generation_prompt += (
-                "\n\nExample content this assistant has access to:\n"
-                "'''\n"
-                f"{chunk_contents}"
-                "\n'''"
-            )
-
-        if supports_structured_output:
-            functions.append(
-                FunctionCall(
-                    fast_llm.invoke,
-                    (start_message_generation_prompt, None, None, StarterMessage),
-                )
-            )
-        else:
-            functions.append(
-                FunctionCall(
-                    fast_llm.invoke,
-                    (start_message_generation_prompt,),
-                )
-            )
-    return functions
-
-
-def parse_unstructured_output(output: str) -> Dict[str, str]:
-    """
-    Parses the assistant's unstructured output into a dictionary with keys:
-    - 'name' (Title)
-    - 'message' (Message)
-    """
-
-    # Debug output
-    logger.debug(f"LLM Output for starter message creation: {output}")
-
-    # Patterns to match
-    title_pattern = r"(?i)^\**Title\**\s*:\s*(.+)"
-    message_pattern = r"(?i)^\**Message\**\s*:\s*(.+)"
-
-    # Initialize the response dictionary
-    response_dict = {}
-
-    # Split the output into lines
-    lines = output.strip().split("\n")
-
-    # Variables to keep track of the current key being processed
-    current_key = None
-    current_value_lines = []
-
-    for line in lines:
-        # Check for title
-        title_match = re.match(title_pattern, line.strip())
-        if title_match:
-            # Save previous key-value pair if any
-            if current_key and current_value_lines:
-                response_dict[current_key] = " ".join(current_value_lines).strip()
-                current_value_lines = []
-            current_key = "name"
-            current_value_lines.append(title_match.group(1).strip())
-            continue
-
-        # Check for message
-        message_match = re.match(message_pattern, line.strip())
-        if message_match:
-            if current_key and current_value_lines:
-                response_dict[current_key] = " ".join(current_value_lines).strip()
-                current_value_lines = []
-            current_key = "message"
-            current_value_lines.append(message_match.group(1).strip())
-            continue
-
-        # If the line doesn't match a new key, append it to the current value
-        if current_key:
-            current_value_lines.append(line.strip())
-
-    # Add the last key-value pair
-    if current_key and current_value_lines:
-        response_dict[current_key] = " ".join(current_value_lines).strip()
-
-    # Validate that the necessary keys are present
-    if not all(k in response_dict for k in ["name", "message"]):
-        raise ValueError("Failed to parse the assistant's response.")
-
-    return response_dict
-
-
-def generate_starter_messages(
-    name: str,
-    description: str,
-    instructions: str,
-    document_set_ids: List[int],
-    db_session: Session,
-    user: User | None,
-) -> List[StarterMessage]:
-    """
-    Generates starter messages by first obtaining categories and then generating messages for each category.
-    On failure, returns an empty list (or list with processed starter messages if some messages are processed successfully).
-    """
-    _, fast_llm = get_default_llms(temperature=0.5)
-
-    provider = fast_llm.config.model_provider
-    model = fast_llm.config.model_name
-
-    params = get_supported_openai_params(model=model, custom_llm_provider=provider)
-    supports_structured_output = (
-        isinstance(params, list) and "response_format" in params
-    )
-
-    # Generate categories
-    category_generation_prompt = PERSONA_CATEGORY_GENERATION_PROMPT.format(
-        name=name,
-        description=description,
-        instructions=instructions,
-        num_categories=NUM_PERSONA_PROMPTS,
-    )
-
-    category_response = fast_llm.invoke(category_generation_prompt)
-    categories = parse_categories(cast(str, category_response.content))
-
-    if not categories:
-        logger.error("No categories were generated.")
-        return []
-
-    # Fetch example content if document sets are provided
-    if document_set_ids:
-        document_sets = get_document_sets_by_ids(
-            document_set_ids=document_set_ids,
-            db_session=db_session,
-        )
-
-        chunks = get_random_chunks_from_doc_sets(
-            doc_sets=[doc_set.name for doc_set in document_sets],
-            db_session=db_session,
-            user=user,
-        )
-
-        # Add example content context
-        chunk_contents = "\n".join(chunk.content.strip() for chunk in chunks)
-    else:
-        chunk_contents = ""
-
-    # Generate prompts for starter messages
-    functions = generate_start_message_prompts(
-        name,
-        description,
-        instructions,
-        categories,
-        chunk_contents,
-        supports_structured_output,
-        fast_llm,
-    )
-
-    # Run LLM calls in parallel
-    if not functions:
-        logger.error("No functions to execute for starter message generation.")
-        return []
-
-    results = run_functions_in_parallel(function_calls=functions)
-    prompts = []
-
-    for response in results.values():
-        try:
-            if supports_structured_output:
-                response_dict = json.loads(response.content)
-            else:
-                response_dict = parse_unstructured_output(response.content)
-            starter_message = StarterMessage(
-                name=response_dict["name"],
-                message=response_dict["message"],
-            )
-            prompts.append(starter_message)
-        except (json.JSONDecodeError, ValueError) as e:
-            logger.error(f"Failed to parse starter message: {e}")
-            continue
-
-    return prompts
--- a/backend/onyx/seeding/load_yamls.py
+++ b/backend/onyx/seeding/load_yamls.py
@@ -48,7 +48,6 @@ def load_personas_from_yaml(
        data = yaml.safe_load(file)

    all_personas = data.get("personas", [])
-
    for persona in all_personas:
        doc_set_names = persona["document_sets"]
        doc_sets: list[DocumentSetDBModel] = [
@@ -128,7 +127,6 @@ def load_personas_from_yaml(
            display_priority=(
                existing_persona.display_priority
                if existing_persona is not None
-                and persona.get("display_priority") is None
                else persona.get("display_priority")
            ),
            is_visible=(
--- a/backend/onyx/seeding/personas.yaml
+++ b/backend/onyx/seeding/personas.yaml
@@ -39,7 +39,7 @@ personas:
    document_sets: []
    icon_shape: 23013
    icon_color: "#6FB1FF"
-    display_priority: 0
+    display_priority: 1
    is_visible: true
    starter_messages:
      - name: "Give me an overview of what's here"
@@ -64,7 +64,7 @@ personas:
    document_sets: []
    icon_shape: 50910
    icon_color: "#FF6F6F"
-    display_priority: 1
+    display_priority: 0
    is_visible: true
    starter_messages:
      - name: "Summarize a document"
--- a/backend/onyx/server/features/persona/api.py
+++ b/backend/onyx/server/features/persona/api.py
@@ -19,7 +19,6 @@ from onyx.configs.constants import MilestoneRecordType
 from onyx.configs.constants import NotificationType
 from onyx.db.engine import get_current_tenant_id
 from onyx.db.engine import get_session
-from onyx.db.models import StarterMessageModel as StarterMessage
 from onyx.db.models import User
 from onyx.db.notification import create_notification
 from onyx.db.persona import create_assistant_category
@@ -37,11 +36,7 @@ from onyx.db.persona import update_persona_shared_users
 from onyx.db.persona import update_persona_visibility
 from onyx.file_store.file_store import get_default_file_store
 from onyx.file_store.models import ChatFileType
-from onyx.secondary_llm_flows.starter_message_creation import (
-    generate_starter_messages,
-)
 from onyx.server.features.persona.models import CreatePersonaRequest
-from onyx.server.features.persona.models import GenerateStarterMessageRequest
 from onyx.server.features.persona.models import ImageGenerationToolStatus
 from onyx.server.features.persona.models import PersonaCategoryCreate
 from onyx.server.features.persona.models import PersonaCategoryResponse
@@ -382,26 +377,3 @@ def build_final_template_prompt(
            retrieval_disabled=retrieval_disabled,
        )
    )
-
-
-@basic_router.post("/assistant-prompt-refresh")
-def build_assistant_prompts(
-    generate_persona_prompt_request: GenerateStarterMessageRequest,
-    db_session: Session = Depends(get_session),
-    user: User | None = Depends(current_user),
-) -> list[StarterMessage]:
-    try:
-        logger.info(
-            "Generating starter messages for user: %s", user.id if user else "Anonymous"
-        )
-        return generate_starter_messages(
-            name=generate_persona_prompt_request.name,
-            description=generate_persona_prompt_request.description,
-            instructions=generate_persona_prompt_request.instructions,
-            document_set_ids=generate_persona_prompt_request.document_set_ids,
-            db_session=db_session,
-            user=user,
-        )
-    except Exception as e:
-        logger.exception("Failed to generate starter messages")
-        raise HTTPException(status_code=500, detail=str(e))
--- a/backend/onyx/server/features/persona/models.py
+++ b/backend/onyx/server/features/persona/models.py
@@ -17,14 +17,6 @@ from onyx.utils.logger import setup_logger
 logger = setup_logger()


-# More minimal request for generating a persona prompt
-class GenerateStarterMessageRequest(BaseModel):
-    name: str
-    description: str
-    instructions: str
-    document_set_ids: list[int]
-
-
 class CreatePersonaRequest(BaseModel):
    name: str
    description: str
--- a/backend/onyx/server/query_and_chat/chat_backend.py
+++ b/backend/onyx/server/query_and_chat/chat_backend.py
@@ -35,7 +35,6 @@ from onyx.configs.model_configs import LITELLM_PASS_THROUGH_HEADERS
 from onyx.db.chat import add_chats_to_session_from_slack_thread
 from onyx.db.chat import create_chat_session
 from onyx.db.chat import create_new_chat_message
-from onyx.db.chat import delete_all_chat_sessions_for_user
 from onyx.db.chat import delete_chat_session
 from onyx.db.chat import duplicate_chat_session_for_user_from_slack
 from onyx.db.chat import get_chat_message
@@ -281,17 +280,6 @@ def patch_chat_session(
    return None


-@router.delete("/delete-all-chat-sessions")
-def delete_all_chat_sessions(
-    user: User | None = Depends(current_user),
-    db_session: Session = Depends(get_session),
-) -> None:
-    try:
-        delete_all_chat_sessions_for_user(user=user, db_session=db_session)
-    except ValueError as e:
-        raise HTTPException(status_code=400, detail=str(e))
-
-
@router.delete("/delete-chat-session/{session_id}")
 def delete_chat_session_by_id(
    session_id: UUID,
--- a/backend/onyx/server/query_and_chat/models.py
+++ b/backend/onyx/server/query_and_chat/models.py
@@ -11,7 +11,6 @@ from onyx.chat.models import RetrievalDocs
 from onyx.configs.constants import DocumentSource
 from onyx.configs.constants import MessageType
 from onyx.configs.constants import SearchFeedbackType
-from onyx.configs.constants import SessionType
 from onyx.context.search.models import BaseFilters
 from onyx.context.search.models import ChunkContext
 from onyx.context.search.models import RerankingDetails
@@ -152,10 +151,6 @@ class ChatSessionUpdateRequest(BaseModel):
    sharing_status: ChatSessionSharedStatus


-class DeleteAllSessionsRequest(BaseModel):
-    session_type: SessionType
-
-
 class RenameChatSessionResponse(BaseModel):
    new_name: str  # This is only really useful if the name is generated

--- a/backend/onyx/tools/message.py
+++ b/backend/onyx/tools/message.py
@@ -25,6 +25,11 @@ class ToolCallSummary(BaseModel__v1):
    tool_call_request: AIMessage
    tool_call_result: ToolMessage

+    # This is a workaround to allow arbitrary types in the model
+    # TODO: Remove this once we have a better solution
+    class Config:
+        arbitrary_types_allowed = True
+

 def tool_call_tokens(
    tool_call_summary: ToolCallSummary, llm_tokenizer: BaseTokenizer
--- a/backend/onyx/utils/telemetry.py
+++ b/backend/onyx/utils/telemetry.py
@@ -22,7 +22,6 @@ from onyx.utils.variable_functionality import (
 from onyx.utils.variable_functionality import noop_fallback
 from shared_configs.configs import MULTI_TENANT

-
 _DANSWER_TELEMETRY_ENDPOINT = "https://telemetry.onyx.app/anonymous_telemetry"
 _CACHED_UUID: str | None = None
 _CACHED_INSTANCE_DOMAIN: str | None = None
@@ -118,12 +117,9 @@ def mt_cloud_telemetry(
    event: MilestoneRecordType,
    properties: dict | None = None,
 ) -> None:
-    print(f"mt_cloud_telemetry {distinct_id} {event} {properties}")
    if not MULTI_TENANT:
-        print("mt_cloud_telemetry not MULTI_TENANT")
        return

-    print("mt_cloud_telemetry MULTI_TENANT")
    # MIT version should not need to include any Posthog code
    # This is only for Onyx MT Cloud, this code should also never be hit, no reason for any orgs to
    # be running the Multi Tenant version of Onyx.
@@ -141,11 +137,8 @@ def create_milestone_and_report(
    properties: dict | None,
    db_session: Session,
 ) -> None:
-    print(f"create_milestone_and_report {user} {event_type} {db_session}")
    _, is_new = create_milestone_if_not_exists(user, event_type, db_session)
-    print(f"create_milestone_and_report {is_new}")
    if is_new:
-        print("create_milestone_and_report is_new")
        mt_cloud_telemetry(
            distinct_id=distinct_id,
            event=event_type,
--- a/backend/requirements/default.txt
+++ b/backend/requirements/default.txt
@@ -26,10 +26,15 @@ huggingface-hub==0.20.1
 jira==3.5.1
 jsonref==1.1.0
 trafilatura==1.12.2
-langchain==0.1.17
-langchain-core==0.1.50
-langchain-text-splitters==0.0.1
-litellm==1.55.4
+langchain==0.3.7
+langchain-core==0.3.24
+langchain-openai==0.2.9
+langchain-text-splitters==0.3.2
+langchainhub==0.1.21
+langgraph==0.2.59
+langgraph-checkpoint==2.0.5
+langgraph-sdk==0.1.44
+litellm==1.53.1
 lxml==5.3.0
 lxml_html_clean==0.2.2
 llama-index==0.9.45
--- a/backend/requirements/model_server.txt
+++ b/backend/requirements/model_server.txt
@@ -12,5 +12,5 @@ torch==2.2.0
 transformers==4.39.2
 uvicorn==0.21.1
 voyageai==0.2.3
-litellm==1.55.4
+litellm==1.54.1
 sentry-sdk[fastapi,celery,starlette]==2.14.0
--- a/backend/tests/integration/connector_job_tests/slack/test_permission_sync.py
+++ b/backend/tests/integration/connector_job_tests/slack/test_permission_sync.py
@@ -219,7 +219,6 @@ def test_slack_permission_sync(
    assert private_message not in onyx_doc_message_strings


-@pytest.mark.xfail(reason="flaky", strict=False)
 def test_slack_group_permission_sync(
    reset: None,
    vespa_client: vespa_fixture,
--- a/backend/tests/unit/onyx/chat/stream_processing/test_citation_processing.py
+++ b/backend/tests/unit/onyx/chat/stream_processing/test_citation_processing.py
@@ -376,26 +376,6 @@ def process_text(
            "The code demonstrates variable assignment.",
            [],
        ),
-        (
-            "Long JSON string in code block",
-            [
-                "```json\n{",
-                '"name": "John Doe",',
-                '"age": 30,',
-                '"city": "New York",',
-                '"hobbies": ["reading", "swimming", "cycling"],',
-                '"education": {',
-                '    "degree": "Bachelor\'s",',
-                '    "major": "Computer Science",',
-                '    "university": "Example University"',
-                "}",
-                "}\n```",
-            ],
-            '```json\n{"name": "John Doe","age": 30,"city": "New York","hobbies": '
-            '["reading", "swimming", "cycling"],"education": {    '
-            '"degree": "Bachelor\'s",    "major": "Computer Science",    "university": "Example University"}}\n```',
-            [],
-        ),
        (
            "Citation as a single token",
            [
--- a/deployment/cloud_kubernetes/workers/beat.yaml
+++ b/deployment/cloud_kubernetes/workers/beat.yaml
@@ -14,7 +14,7 @@ spec:
    spec:
      containers:
        - name: celery-beat
-          image: onyxdotapp/onyx-backend-cloud:v0.14.0-cloud.beta.21
+          image: onyxdotapp/onyx-backend-cloud:v0.14.0-cloud.beta.20
          imagePullPolicy: IfNotPresent
          command:
            [
--- a/deployment/cloud_kubernetes/workers/heavy_worker.yaml
+++ b/deployment/cloud_kubernetes/workers/heavy_worker.yaml
@@ -14,7 +14,7 @@ spec:
    spec:
      containers:
        - name: celery-worker-heavy
-          image: onyxdotapp/onyx-backend-cloud:v0.14.0-cloud.beta.21
+          image: onyxdotapp/onyx-backend-cloud:v0.14.0-cloud.beta.20
          imagePullPolicy: IfNotPresent
          command:
            [
--- a/deployment/cloud_kubernetes/workers/indexing_worker.yaml
+++ b/deployment/cloud_kubernetes/workers/indexing_worker.yaml
@@ -14,7 +14,7 @@ spec:
    spec:
      containers:
        - name: celery-worker-indexing
-          image: onyxdotapp/onyx-backend-cloud:v0.14.0-cloud.beta.21
+          image: onyxdotapp/onyx-backend-cloud:v0.14.0-cloud.beta.20
          imagePullPolicy: IfNotPresent
          command:
            [
--- a/deployment/cloud_kubernetes/workers/light_worker.yaml
+++ b/deployment/cloud_kubernetes/workers/light_worker.yaml
@@ -14,7 +14,7 @@ spec:
    spec:
      containers:
        - name: celery-worker-light
-          image: onyxdotapp/onyx-backend-cloud:v0.14.0-cloud.beta.21
+          image: onyxdotapp/onyx-backend-cloud:v0.14.0-cloud.beta.20
          imagePullPolicy: IfNotPresent
          command:
            [
--- a/deployment/cloud_kubernetes/workers/primary.yaml
+++ b/deployment/cloud_kubernetes/workers/primary.yaml
@@ -14,7 +14,7 @@ spec:
    spec:
      containers:
        - name: celery-worker-primary
-          image: onyxdotapp/onyx-backend-cloud:v0.14.0-cloud.beta.21
+          image: onyxdotapp/onyx-backend-cloud:v0.14.0-cloud.beta.20
          imagePullPolicy: IfNotPresent
          command:
            [
--- a/deployment/docker_compose/docker-compose.dev.yml
+++ b/deployment/docker_compose/docker-compose.dev.yml
@@ -92,7 +92,6 @@ services:
      - LOG_POSTGRES_LATENCY=${LOG_POSTGRES_LATENCY:-}
      - LOG_POSTGRES_CONN_COUNTS=${LOG_POSTGRES_CONN_COUNTS:-}
      - CELERY_BROKER_POOL_LIMIT=${CELERY_BROKER_POOL_LIMIT:-}
-      - LITELLM_CUSTOM_ERROR_MESSAGE_MAPPINGS=${LITELLM_CUSTOM_ERROR_MESSAGE_MAPPINGS:-}

      # Analytics Configs
      - SENTRY_DSN=${SENTRY_DSN:-}
@@ -104,13 +103,6 @@ services:
      - ENABLE_PAID_ENTERPRISE_EDITION_FEATURES=${ENABLE_PAID_ENTERPRISE_EDITION_FEATURES:-false}
      - API_KEY_HASH_ROUNDS=${API_KEY_HASH_ROUNDS:-}
      # Seeding configuration
-      - USE_IAM_AUTH=${USE_IAM_AUTH:-}
-      - AWS_REGION=${AWS_REGION-}
-      - AWS_ACCESS_KEY_ID=${AWS_ACCESS_KEY_ID-}
-      - AWS_SECRET_ACCESS_KEY=${AWS_SECRET_ACCESS_KEY-}
-    # Uncomment the line below to use if IAM_AUTH is true and you are using iam auth for postgres
-    # volumes:
-    #   - ./bundle.pem:/app/bundle.pem:ro
    extra_hosts:
      - "host.docker.internal:host-gateway"
    logging:
@@ -231,13 +223,6 @@ services:

      # Enterprise Edition stuff
      - ENABLE_PAID_ENTERPRISE_EDITION_FEATURES=${ENABLE_PAID_ENTERPRISE_EDITION_FEATURES:-false}
-      - USE_IAM_AUTH=${USE_IAM_AUTH:-}
-      - AWS_REGION=${AWS_REGION-}
-      - AWS_ACCESS_KEY_ID=${AWS_ACCESS_KEY_ID-}
-      - AWS_SECRET_ACCESS_KEY=${AWS_SECRET_ACCESS_KEY-}
-    # Uncomment the line below to use if IAM_AUTH is true and you are using iam auth for postgres
-    # volumes:
-    #   - ./bundle.pem:/app/bundle.pem:ro
    extra_hosts:
      - "host.docker.internal:host-gateway"
    logging:
--- a/deployment/docker_compose/docker-compose.gpu-dev.yml
+++ b/deployment/docker_compose/docker-compose.gpu-dev.yml
@@ -84,7 +84,6 @@ services:
      # (time spent on finding the right docs + time spent fetching summaries from disk)
      - LOG_VESPA_TIMING_INFORMATION=${LOG_VESPA_TIMING_INFORMATION:-}
      - CELERY_BROKER_POOL_LIMIT=${CELERY_BROKER_POOL_LIMIT:-}
-      - LITELLM_CUSTOM_ERROR_MESSAGE_MAPPINGS=${LITELLM_CUSTOM_ERROR_MESSAGE_MAPPINGS:-}

      # Chat Configs
      - HARD_DELETE_CHATS=${HARD_DELETE_CHATS:-}
@@ -92,13 +91,6 @@ services:
      # Enterprise Edition only
      - API_KEY_HASH_ROUNDS=${API_KEY_HASH_ROUNDS:-}
      - ENABLE_PAID_ENTERPRISE_EDITION_FEATURES=${ENABLE_PAID_ENTERPRISE_EDITION_FEATURES:-false}
-      - USE_IAM_AUTH=${USE_IAM_AUTH}
-      - AWS_REGION=${AWS_REGION-}
-      - AWS_ACCESS_KEY_ID=${AWS_ACCESS_KEY_ID-}
-      - AWS_SECRET_ACCESS_KEY=${AWS_SECRET_ACCESS_KEY-}
-    # Uncomment the line below to use if IAM_AUTH is true and you are using iam auth for postgres
-    # volumes:
-    #   - ./bundle.pem:/app/bundle.pem:ro
    extra_hosts:
      - "host.docker.internal:host-gateway"
    logging:
@@ -200,13 +192,6 @@ services:
      # Enterprise Edition only
      - API_KEY_HASH_ROUNDS=${API_KEY_HASH_ROUNDS:-}
      - ENABLE_PAID_ENTERPRISE_EDITION_FEATURES=${ENABLE_PAID_ENTERPRISE_EDITION_FEATURES:-false}
-      - USE_IAM_AUTH=${USE_IAM_AUTH}
-      - AWS_REGION=${AWS_REGION-}
-      - AWS_ACCESS_KEY_ID=${AWS_ACCESS_KEY_ID-}
-      - AWS_SECRET_ACCESS_KEY=${AWS_SECRET_ACCESS_KEY-}
-    # Uncomment the line below to use if IAM_AUTH is true and you are using iam auth for postgres
-    # volumes:
-    #   - ./bundle.pem:/app/bundle.pem:ro
    extra_hosts:
      - "host.docker.internal:host-gateway"
    logging:
--- a/deployment/docker_compose/docker-compose.prod-no-letsencrypt.yml
+++ b/deployment/docker_compose/docker-compose.prod-no-letsencrypt.yml
@@ -22,13 +22,6 @@ services:
      - VESPA_HOST=index
      - REDIS_HOST=cache
      - MODEL_SERVER_HOST=${MODEL_SERVER_HOST:-inference_model_server}
-      - USE_IAM_AUTH=${USE_IAM_AUTH}
-      - AWS_REGION=${AWS_REGION-}
-      - AWS_ACCESS_KEY_ID=${AWS_ACCESS_KEY_ID-}
-      - AWS_SECRET_ACCESS_KEY=${AWS_SECRET_ACCESS_KEY-}
-    # Uncomment the line below to use if IAM_AUTH is true and you are using iam auth for postgres
-    # volumes:
-    #   - ./bundle.pem:/app/bundle.pem:ro
    extra_hosts:
      - "host.docker.internal:host-gateway"
    logging:
@@ -59,13 +52,6 @@ services:
      - REDIS_HOST=cache
      - MODEL_SERVER_HOST=${MODEL_SERVER_HOST:-inference_model_server}
      - INDEXING_MODEL_SERVER_HOST=${INDEXING_MODEL_SERVER_HOST:-indexing_model_server}
-      - USE_IAM_AUTH=${USE_IAM_AUTH}
-      - AWS_REGION=${AWS_REGION-}
-      - AWS_ACCESS_KEY_ID=${AWS_ACCESS_KEY_ID-}
-      - AWS_SECRET_ACCESS_KEY=${AWS_SECRET_ACCESS_KEY-}
-    # Uncomment the line below to use if IAM_AUTH is true and you are using iam auth for postgres
-    # volumes:
-    #   - ./bundle.pem:/app/bundle.pem:ro
    extra_hosts:
      - "host.docker.internal:host-gateway"
    logging:
--- a/deployment/docker_compose/docker-compose.prod.yml
+++ b/deployment/docker_compose/docker-compose.prod.yml
@@ -23,13 +23,6 @@ services:
      - VESPA_HOST=index
      - REDIS_HOST=cache
      - MODEL_SERVER_HOST=${MODEL_SERVER_HOST:-inference_model_server}
-      - USE_IAM_AUTH=${USE_IAM_AUTH}
-      - AWS_REGION=${AWS_REGION-}
-      - AWS_ACCESS_KEY_ID=${AWS_ACCESS_KEY_ID-}
-      - AWS_SECRET_ACCESS_KEY=${AWS_SECRET_ACCESS_KEY-}
-    # Uncomment the line below to use if IAM_AUTH is true and you are using iam auth for postgres
-    # volumes:
-    #   - ./bundle.pem:/app/bundle.pem:ro
    extra_hosts:
      - "host.docker.internal:host-gateway"
    logging:
@@ -64,13 +57,6 @@ services:
      - REDIS_HOST=cache
      - MODEL_SERVER_HOST=${MODEL_SERVER_HOST:-inference_model_server}
      - INDEXING_MODEL_SERVER_HOST=${INDEXING_MODEL_SERVER_HOST:-indexing_model_server}
-      - USE_IAM_AUTH=${USE_IAM_AUTH}
-      - AWS_REGION=${AWS_REGION-}
-      - AWS_ACCESS_KEY_ID=${AWS_ACCESS_KEY_ID-}
-      - AWS_SECRET_ACCESS_KEY=${AWS_SECRET_ACCESS_KEY-}
-    # Uncomment the line below to use if IAM_AUTH is true and you are using iam auth for postgres
-    # volumes:
-    #   - ./bundle.pem:/app/bundle.pem:ro
    extra_hosts:
      - "host.docker.internal:host-gateway"
    logging:
@@ -237,7 +223,7 @@ services:
    volumes:
      - ../data/certbot/conf:/etc/letsencrypt
      - ../data/certbot/www:/var/www/certbot
-    logging::wq
+    logging:
      driver: json-file
      options:
        max-size: "50m"
@@ -259,6 +245,3 @@ volumes:
  # Created by the container itself
  model_cache_huggingface:
  indexing_huggingface_model_cache:
-
-
-
--- a/deployment/kubernetes/api_server-service-deployment.yaml
+++ b/deployment/kubernetes/api_server-service-deployment.yaml
@@ -60,12 +60,3 @@ spec:
          envFrom:
            - configMapRef:
                name: env-configmap
-      # Uncomment if you are using IAM auth for Postgres
-      #     volumeMounts:
-      #       - name: bundle-pem
-      #         mountPath: "/app/certs"
-      #         readOnly: true
-      # volumes:
-      #   - name: bundle-pem
-      #     secret:
-      #       secretName: bundle-pem-secret
--- a/deployment/kubernetes/background-deployment.yaml
+++ b/deployment/kubernetes/background-deployment.yaml
@@ -43,7 +43,6 @@ spec:
      #     - name: my-ca-cert-volume
      #       mountPath: /etc/ssl/certs/custom-ca.crt
      #       subPath: my-ca.crt
-
      # Optional volume for CA certificate
      # volumes:
      #   - name: my-cas-cert-volume
@@ -52,13 +51,3 @@ spec:
      #       items:
      #         - key: my-ca.crt
      #           path: my-ca.crt
-
-      # Uncomment if you are using IAM auth for Postgres
-      #     volumeMounts:
-      #       - name: bundle-pem
-      #         mountPath: "/app/certs"
-      #         readOnly: true
-      # volumes:
-      #   - name: bundle-pem
-      #     secret:
-      #       secretName: bundle-pem-secret
--- a/web/public/Amazon.svg
+++ b/web/public/Amazon.svg
--- a/web/public/Meta.svg
+++ b/web/public/Meta.svg
--- a/web/public/Microsoft.svg
+++ b/web/public/Microsoft.svg
@@ -1,6 +0,0 @@
-<svg width="16" height="16" viewBox="0 0 16 16" fill="none" xmlns="http://www.w3.org/2000/svg">
-<rect x="1.33325" y="1.3335" width="6.33333" height="6.33333" fill="#F25022"/>
-<rect x="8.33325" y="1.3335" width="6.33333" height="6.33333" fill="#80BA01"/>
-<rect x="8.33325" y="8.3335" width="6.33333" height="6.33333" fill="#FFB902"/>
-<rect x="1.33325" y="8.3335" width="6.33333" height="6.33333" fill="#02A4EF"/>
-</svg>
--- a/web/public/Mistral.svg
+++ b/web/public/Mistral.svg
@@ -1 +0,0 @@
-<svg viewBox="0 0 512 512" xmlns="http://www.w3.org/2000/svg" fill-rule="evenodd" clip-rule="evenodd" stroke-linejoin="round" stroke-miterlimit="2"><path d="M189.08 303.228H94.587l.044-94.446h94.497l-.048 94.446z" fill="#1c1c1b" fill-rule="nonzero"/><path d="M283.528 397.674h-94.493l.044-94.446h94.496l-.047 94.446z" fill="#1c1c1b" fill-rule="nonzero"/><path d="M283.575 303.228H189.08l.046-94.446h94.496l-.047 94.446z" fill="#1c1c1b" fill-rule="nonzero"/><path d="M378.07 303.228h-94.495l.044-94.446h94.498l-.047 94.446zM189.128 208.779H94.633l.044-94.448h94.498l-.047 94.448zM378.115 208.779h-94.494l.045-94.448h94.496l-.047 94.448zM94.587 303.227H.093l.044-96.017h94.496l-.046 96.017z" fill="#1c1c1b" fill-rule="nonzero"/><path d="M94.633 208.779H.138l.046-94.448H94.68l-.047 94.448z" fill="#1c1c1b" fill-rule="nonzero"/><path d="M94.68 115.902H.185L.23 19.885h94.498l-.047 96.017zM472.657 114.331h-94.495l.044-94.446h94.497l-.046 94.446zM94.54 399.244H.046l.044-97.588h94.497l-.047 97.588z" fill="#1c1c1b" fill-rule="nonzero"/><path d="M94.495 492.123H0l.044-94.446H94.54l-.045 94.446zM472.563 303.228H378.07l.044-94.446h94.496l-.047 94.446zM472.61 208.779h-94.495l.044-94.448h94.498l-.047 94.448z" fill="#1c1c1b" fill-rule="nonzero"/><path d="M472.517 397.674h-94.494l.044-94.446h94.497l-.047 94.446z" fill="#1c1c1b" fill-rule="nonzero"/><path d="M472.47 492.121h-94.493l.044-96.017h94.496l-.047 96.017z" fill="#1c1c1b" fill-rule="nonzero"/><path d="M228.375 303.22h-96.061l.046-94.446h96.067l-.052 94.446z" fill="#ff7000" fill-rule="nonzero"/><path d="M322.827 397.666h-94.495l.044-96.018h94.498l-.047 96.018z" fill="#ff4900" fill-rule="nonzero"/><path d="M324.444 303.22h-97.636l.046-94.446h97.638l-.048 94.446z" fill="#ff7000" fill-rule="nonzero"/><path d="M418.938 303.22h-96.064l.045-94.446h96.066l-.047 94.446z" fill="#ff7000" fill-rule="nonzero"/><path d="M228.423 208.77H132.36l.045-94.445h96.066l-.05 94.446zM418.985 208.77H322.92l.044-94.445h96.069l-.048 94.446z" fill="#ffa300" fill-rule="nonzero"/><path d="M133.883 304.79H39.392l.044-96.017h94.496l-.049 96.017z" fill="#ff7000" fill-rule="nonzero"/><path d="M133.929 208.77H39.437l.044-95.445h94.496l-.048 95.445z" fill="#ffa300" fill-rule="nonzero"/><path d="M133.976 114.325H39.484l.044-94.448h94.497l-.05 94.448zM511.954 115.325h-94.493l.044-95.448h94.497l-.048 95.448z" fill="#ffce00" fill-rule="nonzero"/><path d="M133.836 399.667H39.345l.044-96.447h94.496l-.049 96.447z" fill="#ff4900" fill-rule="nonzero"/><path d="M133.79 492.117H39.3l.044-94.448h94.496l-.049 94.448z" fill="#ff0107" fill-rule="nonzero"/><path d="M511.862 303.22h-94.495l.046-94.446h94.496l-.047 94.446z" fill="#ff7000" fill-rule="nonzero"/><path d="M511.907 208.77h-94.493l.044-94.445h94.496l-.047 94.446z" fill="#ffa300" fill-rule="nonzero"/><path d="M511.815 398.666h-94.493l.044-95.447h94.496l-.047 95.447z" fill="#ff4900" fill-rule="nonzero"/><path d="M511.77 492.117h-94.496l.046-94.448h94.496l-.047 94.448z" fill="#ff0107" fill-rule="nonzero"/></svg>
--- a/web/src/app/admin/add-connector/page.tsx
+++ b/web/src/app/admin/add-connector/page.tsx
@@ -75,8 +75,7 @@ export default function Page() {
      },
      {} as Record<SourceCategory, SourceMetadata[]>
    );
-  }, [sources, filterSources, searchTerm]);
-
+  }, [sources, searchTerm]);
  const handleKeyPress = (e: React.KeyboardEvent<HTMLInputElement>) => {
    if (e.key === "Enter") {
      const filteredCategories = Object.entries(categorizedSources).filter(
--- a/Show More
+++ b/Show More
Author	SHA1	Message	Date
joachim-danswer	7c5a4d4c8e	updates	2024-12-17 17:53:39 -08:00
joachim-danswer	95c1bedd10	scores_v0	2024-12-17 16:43:40 -08:00
joachim-danswer	a7182fb60f	proper documents for sub-answers	2024-12-16 21:49:55 -08:00
joachim-danswer	91a9e2e6be	sub-questions & retrieval working TODOs: - consolidate documents & reranked_documents to Sub_question_documents - find a better way to distinguish search results from sub-questions & original question	2024-12-16 17:42:32 -08:00
joachim-danswer	46c3788941	fixed sub-questions	2024-12-16 14:37:01 -08:00
hagen-danswer	82914ad365	fixed key issue	2024-12-16 13:26:09 -08:00
hagen-danswer	11ce2a62ab	fix: update staged changes	2024-12-16 12:24:17 -08:00
joachim-danswer	6311b70cc6	initial onyx changes	2024-12-16 11:23:01 -08:00
				`@@ -1 +0,0 @@`
				<svg viewBox="0 0 512 512" xmlns="http://www.w3.org/2000/svg" fill-rule="evenodd" clip-rule="evenodd" stroke-linejoin="round" stroke-miterlimit="2"><path d="M189.08 303.228H94.587l.044-94.446h94.497l-.048 94.446z" fill="#1c1c1b" fill-rule="nonzero"/><path d="M283.528 397.674h-94.493l.044-94.446h94.496l-.047 94.446z" fill="#1c1c1b" fill-rule="nonzero"/><path d="M283.575 303.228H189.08l.046-94.446h94.496l-.047 94.446z" fill="#1c1c1b" fill-rule="nonzero"/><path d="M378.07 303.228h-94.495l.044-94.446h94.498l-.047 94.446zM189.128 208.779H94.633l.044-94.448h94.498l-.047 94.448zM378.115 208.779h-94.494l.045-94.448h94.496l-.047 94.448zM94.587 303.227H.093l.044-96.017h94.496l-.046 96.017z" fill="#1c1c1b" fill-rule="nonzero"/><path d="M94.633 208.779H.138l.046-94.448H94.68l-.047 94.448z" fill="#1c1c1b" fill-rule="nonzero"/><path d="M94.68 115.902H.185L.23 19.885h94.498l-.047 96.017zM472.657 114.331h-94.495l.044-94.446h94.497l-.046 94.446zM94.54 399.244H.046l.044-97.588h94.497l-.047 97.588z" fill="#1c1c1b" fill-rule="nonzero"/><path d="M94.495 492.123H0l.044-94.446H94.54l-.045 94.446zM472.563 303.228H378.07l.044-94.446h94.496l-.047 94.446zM472.61 208.779h-94.495l.044-94.448h94.498l-.047 94.448z" fill="#1c1c1b" fill-rule="nonzero"/><path d="M472.517 397.674h-94.494l.044-94.446h94.497l-.047 94.446z" fill="#1c1c1b" fill-rule="nonzero"/><path d="M472.47 492.121h-94.493l.044-96.017h94.496l-.047 96.017z" fill="#1c1c1b" fill-rule="nonzero"/><path d="M228.375 303.22h-96.061l.046-94.446h96.067l-.052 94.446z" fill="#ff7000" fill-rule="nonzero"/><path d="M322.827 397.666h-94.495l.044-96.018h94.498l-.047 96.018z" fill="#ff4900" fill-rule="nonzero"/><path d="M324.444 303.22h-97.636l.046-94.446h97.638l-.048 94.446z" fill="#ff7000" fill-rule="nonzero"/><path d="M418.938 303.22h-96.064l.045-94.446h96.066l-.047 94.446z" fill="#ff7000" fill-rule="nonzero"/><path d="M228.423 208.77H132.36l.045-94.445h96.066l-.05 94.446zM418.985 208.77H322.92l.044-94.445h96.069l-.048 94.446z" fill="#ffa300" fill-rule="nonzero"/><path d="M133.883 304.79H39.392l.044-96.017h94.496l-.049 96.017z" fill="#ff7000" fill-rule="nonzero"/><path d="M133.929 208.77H39.437l.044-95.445h94.496l-.048 95.445z" fill="#ffa300" fill-rule="nonzero"/><path d="M133.976 114.325H39.484l.044-94.448h94.497l-.05 94.448zM511.954 115.325h-94.493l.044-95.448h94.497l-.048 95.448z" fill="#ffce00" fill-rule="nonzero"/><path d="M133.836 399.667H39.345l.044-96.447h94.496l-.049 96.447z" fill="#ff4900" fill-rule="nonzero"/><path d="M133.79 492.117H39.3l.044-94.448h94.496l-.049 94.448z" fill="#ff0107" fill-rule="nonzero"/><path d="M511.862 303.22h-94.495l.046-94.446h94.496l-.047 94.446z" fill="#ff7000" fill-rule="nonzero"/><path d="M511.907 208.77h-94.493l.044-94.445h94.496l-.047 94.446z" fill="#ffa300" fill-rule="nonzero"/><path d="M511.815 398.666h-94.493l.044-95.447h94.496l-.047 95.447z" fill="#ff4900" fill-rule="nonzero"/><path d="M511.77 492.117h-94.496l.046-94.448h94.496l-.047 94.448z" fill="#ff0107" fill-rule="nonzero"/></svg>