k

remove playwright for now
2026-02-20 09:15:47 +00:00 · 2025-02-16 12:35:52 -08:00 · 2025-02-16 12:35:18 -08:00 · 2025-02-16 12:33:53 -08:00 · 2025-02-16 12:32:44 -08:00
54 changed files with 271 additions and 777 deletions
--- a/backend/Dockerfile
+++ b/backend/Dockerfile
@@ -28,11 +28,11 @@ RUN apt-get update && \
        curl \
        zip \
        ca-certificates \
-        libgnutls30 \
-        libblkid1 \
-        libmount1 \
-        libsmartcols1 \
-        libuuid1 \
+        libgnutls30=3.7.9-2+deb12u3 \
+        libblkid1=2.38.1-5+deb12u1 \
+        libmount1=2.38.1-5+deb12u1 \
+        libsmartcols1=2.38.1-5+deb12u1 \
+        libuuid1=2.38.1-5+deb12u1 \
        libxmlsec1-dev \
        pkg-config \
        gcc \
--- a/backend/onyx/agents/agent_search/basic/graph_builder.py
+++ b/backend/onyx/agents/agent_search/basic/graph_builder.py
@@ -5,14 +5,14 @@ from langgraph.graph import StateGraph
 from onyx.agents.agent_search.basic.states import BasicInput
 from onyx.agents.agent_search.basic.states import BasicOutput
 from onyx.agents.agent_search.basic.states import BasicState
-from onyx.agents.agent_search.orchestration.nodes.call_tool import call_tool
-from onyx.agents.agent_search.orchestration.nodes.choose_tool import choose_tool
+from onyx.agents.agent_search.orchestration.nodes.basic_use_tool_response import (
+    basic_use_tool_response,
+)
+from onyx.agents.agent_search.orchestration.nodes.llm_tool_choice import llm_tool_choice
 from onyx.agents.agent_search.orchestration.nodes.prepare_tool_input import (
    prepare_tool_input,
 )
-from onyx.agents.agent_search.orchestration.nodes.use_tool_response import (
-    basic_use_tool_response,
-)
+from onyx.agents.agent_search.orchestration.nodes.tool_call import tool_call
 from onyx.utils.logger import setup_logger

 logger = setup_logger()
@@ -33,13 +33,13 @@ def basic_graph_builder() -> StateGraph:
    )

    graph.add_node(
-        node="choose_tool",
-        action=choose_tool,
+        node="llm_tool_choice",
+        action=llm_tool_choice,
    )

    graph.add_node(
-        node="call_tool",
-        action=call_tool,
+        node="tool_call",
+        action=tool_call,
    )

    graph.add_node(
@@ -51,12 +51,12 @@ def basic_graph_builder() -> StateGraph:

    graph.add_edge(start_key=START, end_key="prepare_tool_input")

-    graph.add_edge(start_key="prepare_tool_input", end_key="choose_tool")
+    graph.add_edge(start_key="prepare_tool_input", end_key="llm_tool_choice")

-    graph.add_conditional_edges("choose_tool", should_continue, ["call_tool", END])
+    graph.add_conditional_edges("llm_tool_choice", should_continue, ["tool_call", END])

    graph.add_edge(
-        start_key="call_tool",
+        start_key="tool_call",
        end_key="basic_use_tool_response",
    )

@@ -73,7 +73,7 @@ def should_continue(state: BasicState) -> str:
        # If there are no tool calls, basic graph already streamed the answer
        END
        if state.tool_choice is None
-        else "call_tool"
+        else "tool_call"
    )


--- a/backend/onyx/agents/agent_search/deep_search/initial/generate_individual_sub_answer/nodes/check_sub_answer.py
+++ b/backend/onyx/agents/agent_search/deep_search/initial/generate_individual_sub_answer/nodes/check_sub_answer.py
@@ -31,14 +31,12 @@ from onyx.agents.agent_search.shared_graph_utils.utils import (
    get_langgraph_node_log_string,
 )
 from onyx.agents.agent_search.shared_graph_utils.utils import parse_question_id
-from onyx.configs.agent_configs import AGENT_TIMEOUT_CONNECT_LLM_SUBANSWER_CHECK
-from onyx.configs.agent_configs import AGENT_TIMEOUT_LLM_SUBANSWER_CHECK
+from onyx.configs.agent_configs import AGENT_TIMEOUT_OVERRIDE_LLM_SUBANSWER_CHECK
 from onyx.llm.chat_llm import LLMRateLimitError
 from onyx.llm.chat_llm import LLMTimeoutError
 from onyx.prompts.agent_search import SUB_ANSWER_CHECK_PROMPT
 from onyx.prompts.agent_search import UNKNOWN_ANSWER
 from onyx.utils.logger import setup_logger
-from onyx.utils.threadpool_concurrency import run_with_timeout
 from onyx.utils.timing import log_function_time

 logger = setup_logger()
@@ -87,11 +85,9 @@ def check_sub_answer(
    agent_error: AgentErrorLog | None = None
    response: BaseMessage | None = None
    try:
-        response = run_with_timeout(
-            AGENT_TIMEOUT_LLM_SUBANSWER_CHECK,
-            fast_llm.invoke,
+        response = fast_llm.invoke(
            prompt=msg,
-            timeout_override=AGENT_TIMEOUT_CONNECT_LLM_SUBANSWER_CHECK,
+            timeout_override=AGENT_TIMEOUT_OVERRIDE_LLM_SUBANSWER_CHECK,
        )

        quality_str: str = cast(str, response.content)
@@ -100,7 +96,7 @@ def check_sub_answer(
        )
        log_result = f"Answer quality: {quality_str}"

-    except (LLMTimeoutError, TimeoutError):
+    except LLMTimeoutError:
        agent_error = AgentErrorLog(
            error_type=AgentLLMErrorType.TIMEOUT,
            error_message=AGENT_LLM_TIMEOUT_MESSAGE,
--- a/backend/onyx/agents/agent_search/deep_search/initial/generate_individual_sub_answer/nodes/generate_sub_answer.py
+++ b/backend/onyx/agents/agent_search/deep_search/initial/generate_individual_sub_answer/nodes/generate_sub_answer.py
@@ -1,4 +1,5 @@
 from datetime import datetime
+from typing import Any
 from typing import cast

 from langchain_core.messages import merge_message_runs
@@ -46,13 +47,11 @@ from onyx.chat.models import StreamStopInfo
 from onyx.chat.models import StreamStopReason
 from onyx.chat.models import StreamType
 from onyx.configs.agent_configs import AGENT_MAX_ANSWER_CONTEXT_DOCS
-from onyx.configs.agent_configs import AGENT_TIMEOUT_CONNECT_LLM_SUBANSWER_GENERATION
-from onyx.configs.agent_configs import AGENT_TIMEOUT_LLM_SUBANSWER_GENERATION
+from onyx.configs.agent_configs import AGENT_TIMEOUT_OVERRIDE_LLM_SUBANSWER_GENERATION
 from onyx.llm.chat_llm import LLMRateLimitError
 from onyx.llm.chat_llm import LLMTimeoutError
 from onyx.prompts.agent_search import NO_RECOVERED_DOCS
 from onyx.utils.logger import setup_logger
-from onyx.utils.threadpool_concurrency import run_with_timeout
 from onyx.utils.timing import log_function_time

 logger = setup_logger()
@@ -111,14 +110,15 @@ def generate_sub_answer(
            config=fast_llm.config,
        )

+        response: list[str | list[str | dict[str, Any]]] = []
        dispatch_timings: list[float] = []
-        agent_error: AgentErrorLog | None = None
-        response: list[str] = []

-        def stream_sub_answer() -> list[str]:
+        agent_error: AgentErrorLog | None = None
+
+        try:
            for message in fast_llm.stream(
                prompt=msg,
-                timeout_override=AGENT_TIMEOUT_CONNECT_LLM_SUBANSWER_GENERATION,
+                timeout_override=AGENT_TIMEOUT_OVERRIDE_LLM_SUBANSWER_GENERATION,
            ):
                # TODO: in principle, the answer here COULD contain images, but we don't support that yet
                content = message.content
@@ -142,15 +142,8 @@ def generate_sub_answer(
                    (end_stream_token - start_stream_token).microseconds
                )
                response.append(content)
-            return response

-        try:
-            response = run_with_timeout(
-                AGENT_TIMEOUT_LLM_SUBANSWER_GENERATION,
-                stream_sub_answer,
-            )
-
-        except (LLMTimeoutError, TimeoutError):
+        except LLMTimeoutError:
            agent_error = AgentErrorLog(
                error_type=AgentLLMErrorType.TIMEOUT,
                error_message=AGENT_LLM_TIMEOUT_MESSAGE,
--- a/backend/onyx/agents/agent_search/deep_search/initial/generate_initial_answer/nodes/generate_initial_answer.py
+++ b/backend/onyx/agents/agent_search/deep_search/initial/generate_initial_answer/nodes/generate_initial_answer.py
@@ -1,4 +1,5 @@
 from datetime import datetime
+from typing import Any
 from typing import cast

 from langchain_core.messages import HumanMessage
@@ -59,15 +60,11 @@ from onyx.agents.agent_search.shared_graph_utils.utils import write_custom_event
 from onyx.chat.models import AgentAnswerPiece
 from onyx.chat.models import ExtendedToolResponse
 from onyx.chat.models import StreamingError
-from onyx.configs.agent_configs import AGENT_ANSWER_GENERATION_BY_FAST_LLM
 from onyx.configs.agent_configs import AGENT_MAX_ANSWER_CONTEXT_DOCS
 from onyx.configs.agent_configs import AGENT_MAX_STREAMED_DOCS_FOR_INITIAL_ANSWER
 from onyx.configs.agent_configs import AGENT_MIN_ORIG_QUESTION_DOCS
 from onyx.configs.agent_configs import (
-    AGENT_TIMEOUT_CONNECT_LLM_INITIAL_ANSWER_GENERATION,
-)
-from onyx.configs.agent_configs import (
-    AGENT_TIMEOUT_LLM_INITIAL_ANSWER_GENERATION,
+    AGENT_TIMEOUT_OVERRIDE_LLM_INITIAL_ANSWER_GENERATION,
 )
 from onyx.llm.chat_llm import LLMRateLimitError
 from onyx.llm.chat_llm import LLMTimeoutError
@@ -80,7 +77,6 @@ from onyx.prompts.agent_search import (
 )
 from onyx.prompts.agent_search import UNKNOWN_ANSWER
 from onyx.tools.tool_implementations.search.search_tool import yield_search_responses
-from onyx.utils.threadpool_concurrency import run_with_timeout
 from onyx.utils.timing import log_function_time

 _llm_node_error_strings = LLMNodeErrorStrings(
@@ -234,11 +230,7 @@ def generate_initial_answer(

        sub_questions = all_sub_questions  # Replace the original assignment

-        model = (
-            graph_config.tooling.fast_llm
-            if AGENT_ANSWER_GENERATION_BY_FAST_LLM
-            else graph_config.tooling.primary_llm
-        )
+        model = graph_config.tooling.fast_llm

        doc_context = format_docs(answer_generation_documents.context_documents)
        doc_context = trim_prompt_piece(
@@ -268,16 +260,15 @@ def generate_initial_answer(
            )
        ]

-        streamed_tokens: list[str] = [""]
+        streamed_tokens: list[str | list[str | dict[str, Any]]] = [""]
        dispatch_timings: list[float] = []

        agent_error: AgentErrorLog | None = None

-        def stream_initial_answer() -> list[str]:
-            response: list[str] = []
+        try:
            for message in model.stream(
                msg,
-                timeout_override=AGENT_TIMEOUT_CONNECT_LLM_INITIAL_ANSWER_GENERATION,
+                timeout_override=AGENT_TIMEOUT_OVERRIDE_LLM_INITIAL_ANSWER_GENERATION,
            ):
                # TODO: in principle, the answer here COULD contain images, but we don't support that yet
                content = message.content
@@ -301,16 +292,9 @@ def generate_initial_answer(
                dispatch_timings.append(
                    (end_stream_token - start_stream_token).microseconds
                )
-                response.append(content)
-            return response
+                streamed_tokens.append(content)

-        try:
-            streamed_tokens = run_with_timeout(
-                AGENT_TIMEOUT_LLM_INITIAL_ANSWER_GENERATION,
-                stream_initial_answer,
-            )
-
-        except (LLMTimeoutError, TimeoutError):
+        except LLMTimeoutError:
            agent_error = AgentErrorLog(
                error_type=AgentLLMErrorType.TIMEOUT,
                error_message=AGENT_LLM_TIMEOUT_MESSAGE,
--- a/backend/onyx/agents/agent_search/deep_search/initial/generate_sub_answers/nodes/decompose_orig_question.py
+++ b/backend/onyx/agents/agent_search/deep_search/initial/generate_sub_answers/nodes/decompose_orig_question.py
@@ -36,10 +36,7 @@ from onyx.chat.models import StreamType
 from onyx.chat.models import SubQuestionPiece
 from onyx.configs.agent_configs import AGENT_NUM_DOCS_FOR_DECOMPOSITION
 from onyx.configs.agent_configs import (
-    AGENT_TIMEOUT_CONNECT_LLM_SUBQUESTION_GENERATION,
-)
-from onyx.configs.agent_configs import (
-    AGENT_TIMEOUT_LLM_SUBQUESTION_GENERATION,
+    AGENT_TIMEOUT_OVERRIDE_LLM_SUBQUESTION_GENERATION,
 )
 from onyx.llm.chat_llm import LLMRateLimitError
 from onyx.llm.chat_llm import LLMTimeoutError
@@ -50,7 +47,6 @@ from onyx.prompts.agent_search import (
    INITIAL_QUESTION_DECOMPOSITION_PROMPT_ASSUMING_REFINEMENT,
 )
 from onyx.utils.logger import setup_logger
-from onyx.utils.threadpool_concurrency import run_with_timeout
 from onyx.utils.timing import log_function_time

 logger = setup_logger()
@@ -135,12 +131,10 @@ def decompose_orig_question(
    streamed_tokens: list[BaseMessage_Content] = []

    try:
-        streamed_tokens = run_with_timeout(
-            AGENT_TIMEOUT_LLM_SUBQUESTION_GENERATION,
-            dispatch_separated,
+        streamed_tokens = dispatch_separated(
            model.stream(
                msg,
-                timeout_override=AGENT_TIMEOUT_CONNECT_LLM_SUBQUESTION_GENERATION,
+                timeout_override=AGENT_TIMEOUT_OVERRIDE_LLM_SUBQUESTION_GENERATION,
            ),
            dispatch_subquestion(0, writer),
            sep_callback=dispatch_subquestion_sep(0, writer),
@@ -160,7 +154,7 @@ def decompose_orig_question(
        )
        write_custom_event("stream_finished", stop_event, writer)

-    except (LLMTimeoutError, TimeoutError) as e:
+    except LLMTimeoutError as e:
        logger.error("LLM Timeout Error - decompose orig question")
        raise e  # fail loudly on this critical step
    except LLMRateLimitError as e:
--- a/backend/onyx/agents/agent_search/deep_search/main/edges.py
+++ b/backend/onyx/agents/agent_search/deep_search/main/edges.py
@@ -25,7 +25,7 @@ logger = setup_logger()

 def route_initial_tool_choice(
    state: MainState, config: RunnableConfig
-) -> Literal["call_tool", "start_agent_search", "logging_node"]:
+) -> Literal["tool_call", "start_agent_search", "logging_node"]:
    """
    LangGraph edge to route to agent search.
    """
@@ -38,7 +38,7 @@ def route_initial_tool_choice(
        ):
            return "start_agent_search"
        else:
-            return "call_tool"
+            return "tool_call"
    else:
        return "logging_node"

--- a/backend/onyx/agents/agent_search/deep_search/main/graph_builder.py
+++ b/backend/onyx/agents/agent_search/deep_search/main/graph_builder.py
@@ -43,14 +43,14 @@ from onyx.agents.agent_search.deep_search.main.states import MainState
 from onyx.agents.agent_search.deep_search.refinement.consolidate_sub_answers.graph_builder import (
    answer_refined_query_graph_builder,
 )
-from onyx.agents.agent_search.orchestration.nodes.call_tool import call_tool
-from onyx.agents.agent_search.orchestration.nodes.choose_tool import choose_tool
+from onyx.agents.agent_search.orchestration.nodes.basic_use_tool_response import (
+    basic_use_tool_response,
+)
+from onyx.agents.agent_search.orchestration.nodes.llm_tool_choice import llm_tool_choice
 from onyx.agents.agent_search.orchestration.nodes.prepare_tool_input import (
    prepare_tool_input,
 )
-from onyx.agents.agent_search.orchestration.nodes.use_tool_response import (
-    basic_use_tool_response,
-)
+from onyx.agents.agent_search.orchestration.nodes.tool_call import tool_call
 from onyx.agents.agent_search.shared_graph_utils.utils import get_test_config
 from onyx.utils.logger import setup_logger

@@ -77,13 +77,13 @@ def main_graph_builder(test_mode: bool = False) -> StateGraph:
    # Choose the initial tool
    graph.add_node(
        node="initial_tool_choice",
-        action=choose_tool,
+        action=llm_tool_choice,
    )

    # Call the tool, if required
    graph.add_node(
-        node="call_tool",
-        action=call_tool,
+        node="tool_call",
+        action=tool_call,
    )

    # Use the tool response
@@ -168,11 +168,11 @@ def main_graph_builder(test_mode: bool = False) -> StateGraph:
    graph.add_conditional_edges(
        "initial_tool_choice",
        route_initial_tool_choice,
-        ["call_tool", "start_agent_search", "logging_node"],
+        ["tool_call", "start_agent_search", "logging_node"],
    )

    graph.add_edge(
-        start_key="call_tool",
+        start_key="tool_call",
        end_key="basic_use_tool_response",
    )
    graph.add_edge(
--- a/backend/onyx/agents/agent_search/deep_search/main/nodes/compare_answers.py
+++ b/backend/onyx/agents/agent_search/deep_search/main/nodes/compare_answers.py
@@ -33,15 +33,13 @@ from onyx.agents.agent_search.shared_graph_utils.utils import (
 )
 from onyx.agents.agent_search.shared_graph_utils.utils import write_custom_event
 from onyx.chat.models import RefinedAnswerImprovement
-from onyx.configs.agent_configs import AGENT_TIMEOUT_CONNECT_LLM_COMPARE_ANSWERS
-from onyx.configs.agent_configs import AGENT_TIMEOUT_LLM_COMPARE_ANSWERS
+from onyx.configs.agent_configs import AGENT_TIMEOUT_OVERRIDE_LLM_COMPARE_ANSWERS
 from onyx.llm.chat_llm import LLMRateLimitError
 from onyx.llm.chat_llm import LLMTimeoutError
 from onyx.prompts.agent_search import (
    INITIAL_REFINED_ANSWER_COMPARISON_PROMPT,
 )
 from onyx.utils.logger import setup_logger
-from onyx.utils.threadpool_concurrency import run_with_timeout
 from onyx.utils.timing import log_function_time

 logger = setup_logger()
@@ -107,14 +105,11 @@ def compare_answers(
    refined_answer_improvement: bool | None = None
    # no need to stream this
    try:
-        resp = run_with_timeout(
-            AGENT_TIMEOUT_LLM_COMPARE_ANSWERS,
-            model.invoke,
-            prompt=msg,
-            timeout_override=AGENT_TIMEOUT_CONNECT_LLM_COMPARE_ANSWERS,
+        resp = model.invoke(
+            msg, timeout_override=AGENT_TIMEOUT_OVERRIDE_LLM_COMPARE_ANSWERS
        )

-    except (LLMTimeoutError, TimeoutError):
+    except LLMTimeoutError:
        agent_error = AgentErrorLog(
            error_type=AgentLLMErrorType.TIMEOUT,
            error_message=AGENT_LLM_TIMEOUT_MESSAGE,
--- a/backend/onyx/agents/agent_search/deep_search/main/nodes/create_refined_sub_questions.py
+++ b/backend/onyx/agents/agent_search/deep_search/main/nodes/create_refined_sub_questions.py
@@ -44,10 +44,7 @@ from onyx.agents.agent_search.shared_graph_utils.utils import make_question_id
 from onyx.agents.agent_search.shared_graph_utils.utils import write_custom_event
 from onyx.chat.models import StreamingError
 from onyx.configs.agent_configs import (
-    AGENT_TIMEOUT_CONNECT_LLM_REFINED_SUBQUESTION_GENERATION,
-)
-from onyx.configs.agent_configs import (
-    AGENT_TIMEOUT_LLM_REFINED_SUBQUESTION_GENERATION,
+    AGENT_TIMEOUT_OVERRIDE_LLM_REFINED_SUBQUESTION_GENERATION,
 )
 from onyx.llm.chat_llm import LLMRateLimitError
 from onyx.llm.chat_llm import LLMTimeoutError
@@ -56,7 +53,6 @@ from onyx.prompts.agent_search import (
 )
 from onyx.tools.models import ToolCallKickoff
 from onyx.utils.logger import setup_logger
-from onyx.utils.threadpool_concurrency import run_with_timeout
 from onyx.utils.timing import log_function_time

 logger = setup_logger()
@@ -138,17 +134,15 @@ def create_refined_sub_questions(
    agent_error: AgentErrorLog | None = None
    streamed_tokens: list[BaseMessage_Content] = []
    try:
-        streamed_tokens = run_with_timeout(
-            AGENT_TIMEOUT_LLM_REFINED_SUBQUESTION_GENERATION,
-            dispatch_separated,
+        streamed_tokens = dispatch_separated(
            model.stream(
                msg,
-                timeout_override=AGENT_TIMEOUT_CONNECT_LLM_REFINED_SUBQUESTION_GENERATION,
+                timeout_override=AGENT_TIMEOUT_OVERRIDE_LLM_REFINED_SUBQUESTION_GENERATION,
            ),
            dispatch_subquestion(1, writer),
            sep_callback=dispatch_subquestion_sep(1, writer),
        )
-    except (LLMTimeoutError, TimeoutError):
+    except LLMTimeoutError:
        agent_error = AgentErrorLog(
            error_type=AgentLLMErrorType.TIMEOUT,
            error_message=AGENT_LLM_TIMEOUT_MESSAGE,
--- a/backend/onyx/agents/agent_search/deep_search/main/nodes/extract_entities_terms.py
+++ b/backend/onyx/agents/agent_search/deep_search/main/nodes/extract_entities_terms.py
@@ -22,17 +22,11 @@ from onyx.agents.agent_search.shared_graph_utils.utils import (
    get_langgraph_node_log_string,
 )
 from onyx.configs.agent_configs import (
-    AGENT_TIMEOUT_CONNECT_LLM_ENTITY_TERM_EXTRACTION,
-)
-from onyx.configs.agent_configs import (
-    AGENT_TIMEOUT_LLM_ENTITY_TERM_EXTRACTION,
+    AGENT_TIMEOUT_OVERRIDE_LLM_ENTITY_TERM_EXTRACTION,
 )
 from onyx.configs.constants import NUM_EXPLORATORY_DOCS
-from onyx.llm.chat_llm import LLMRateLimitError
-from onyx.llm.chat_llm import LLMTimeoutError
 from onyx.prompts.agent_search import ENTITY_TERM_EXTRACTION_PROMPT
 from onyx.prompts.agent_search import ENTITY_TERM_EXTRACTION_PROMPT_JSON_EXAMPLE
-from onyx.utils.threadpool_concurrency import run_with_timeout
 from onyx.utils.timing import log_function_time


@@ -90,42 +84,30 @@ def extract_entities_terms(
    ]
    fast_llm = graph_config.tooling.fast_llm
    # Grader
+    llm_response = fast_llm.invoke(
+        prompt=msg,
+        timeout_override=AGENT_TIMEOUT_OVERRIDE_LLM_ENTITY_TERM_EXTRACTION,
+    )
+
+    cleaned_response = (
+        str(llm_response.content).replace("```json\n", "").replace("\n```", "")
+    )
+    first_bracket = cleaned_response.find("{")
+    last_bracket = cleaned_response.rfind("}")
+    cleaned_response = cleaned_response[first_bracket : last_bracket + 1]
+
    try:
-        llm_response = run_with_timeout(
-            AGENT_TIMEOUT_LLM_ENTITY_TERM_EXTRACTION,
-            fast_llm.invoke,
-            prompt=msg,
-            timeout_override=AGENT_TIMEOUT_CONNECT_LLM_ENTITY_TERM_EXTRACTION,
+        entity_extraction_result = EntityExtractionResult.model_validate_json(
+            cleaned_response
        )
-
-        cleaned_response = (
-            str(llm_response.content).replace("```json\n", "").replace("\n```", "")
-        )
-        first_bracket = cleaned_response.find("{")
-        last_bracket = cleaned_response.rfind("}")
-        cleaned_response = cleaned_response[first_bracket : last_bracket + 1]
-
-        try:
-            entity_extraction_result = EntityExtractionResult.model_validate_json(
-                cleaned_response
-            )
-        except ValueError:
-            logger.error(
-                "Failed to parse LLM response as JSON in Entity-Term Extraction"
-            )
-            entity_extraction_result = EntityExtractionResult(
-                retrieved_entities_relationships=EntityRelationshipTermExtraction(),
-            )
-    except (LLMTimeoutError, TimeoutError):
-        logger.error("LLM Timeout Error - extract entities terms")
+    except ValueError:
+        logger.error("Failed to parse LLM response as JSON in Entity-Term Extraction")
        entity_extraction_result = EntityExtractionResult(
-            retrieved_entities_relationships=EntityRelationshipTermExtraction(),
-        )
-
-    except LLMRateLimitError:
-        logger.error("LLM Rate Limit Error - extract entities terms")
-        entity_extraction_result = EntityExtractionResult(
-            retrieved_entities_relationships=EntityRelationshipTermExtraction(),
+            retrieved_entities_relationships=EntityRelationshipTermExtraction(
+                entities=[],
+                relationships=[],
+                terms=[],
+            ),
        )

    return EntityTermExtractionUpdate(
--- a/backend/onyx/agents/agent_search/deep_search/main/nodes/generate_validate_refined_answer.py
+++ b/backend/onyx/agents/agent_search/deep_search/main/nodes/generate_validate_refined_answer.py
@@ -1,4 +1,5 @@
 from datetime import datetime
+from typing import Any
 from typing import cast

 from langchain_core.messages import HumanMessage
@@ -65,21 +66,14 @@ from onyx.agents.agent_search.shared_graph_utils.utils import write_custom_event
 from onyx.chat.models import AgentAnswerPiece
 from onyx.chat.models import ExtendedToolResponse
 from onyx.chat.models import StreamingError
-from onyx.configs.agent_configs import AGENT_ANSWER_GENERATION_BY_FAST_LLM
 from onyx.configs.agent_configs import AGENT_MAX_ANSWER_CONTEXT_DOCS
 from onyx.configs.agent_configs import AGENT_MAX_STREAMED_DOCS_FOR_REFINED_ANSWER
 from onyx.configs.agent_configs import AGENT_MIN_ORIG_QUESTION_DOCS
 from onyx.configs.agent_configs import (
-    AGENT_TIMEOUT_CONNECT_LLM_REFINED_ANSWER_GENERATION,
+    AGENT_TIMEOUT_OVERRIDE_LLM_REFINED_ANSWER_GENERATION,
 )
 from onyx.configs.agent_configs import (
-    AGENT_TIMEOUT_CONNECT_LLM_REFINED_ANSWER_VALIDATION,
-)
-from onyx.configs.agent_configs import (
-    AGENT_TIMEOUT_LLM_REFINED_ANSWER_GENERATION,
-)
-from onyx.configs.agent_configs import (
-    AGENT_TIMEOUT_LLM_REFINED_ANSWER_VALIDATION,
+    AGENT_TIMEOUT_OVERRIDE_LLM_REFINED_ANSWER_VALIDATION,
 )
 from onyx.llm.chat_llm import LLMRateLimitError
 from onyx.llm.chat_llm import LLMTimeoutError
@@ -98,7 +92,6 @@ from onyx.prompts.agent_search import (
 from onyx.prompts.agent_search import UNKNOWN_ANSWER
 from onyx.tools.tool_implementations.search.search_tool import yield_search_responses
 from onyx.utils.logger import setup_logger
-from onyx.utils.threadpool_concurrency import run_with_timeout
 from onyx.utils.timing import log_function_time

 logger = setup_logger()
@@ -260,12 +253,7 @@ def generate_validate_refined_answer(
        else REFINED_ANSWER_PROMPT_WO_SUB_QUESTIONS
    )

-    model = (
-        graph_config.tooling.fast_llm
-        if AGENT_ANSWER_GENERATION_BY_FAST_LLM
-        else graph_config.tooling.primary_llm
-    )
-
+    model = graph_config.tooling.fast_llm
    relevant_docs_str = format_docs(answer_generation_documents.context_documents)
    relevant_docs_str = trim_prompt_piece(
        model.config,
@@ -296,13 +284,13 @@ def generate_validate_refined_answer(
        )
    ]

-    streamed_tokens: list[str] = [""]
+    streamed_tokens: list[str | list[str | dict[str, Any]]] = [""]
    dispatch_timings: list[float] = []
    agent_error: AgentErrorLog | None = None

-    def stream_refined_answer() -> list[str]:
+    try:
        for message in model.stream(
-            msg, timeout_override=AGENT_TIMEOUT_CONNECT_LLM_REFINED_ANSWER_GENERATION
+            msg, timeout_override=AGENT_TIMEOUT_OVERRIDE_LLM_REFINED_ANSWER_GENERATION
        ):
            # TODO: in principle, the answer here COULD contain images, but we don't support that yet
            content = message.content
@@ -327,15 +315,8 @@ def generate_validate_refined_answer(
                (end_stream_token - start_stream_token).microseconds
            )
            streamed_tokens.append(content)
-        return streamed_tokens

-    try:
-        streamed_tokens = run_with_timeout(
-            AGENT_TIMEOUT_LLM_REFINED_ANSWER_GENERATION,
-            stream_refined_answer,
-        )
-
-    except (LLMTimeoutError, TimeoutError):
+    except LLMTimeoutError:
        agent_error = AgentErrorLog(
            error_type=AgentLLMErrorType.TIMEOUT,
            error_message=AGENT_LLM_TIMEOUT_MESSAGE,
@@ -402,20 +383,16 @@ def generate_validate_refined_answer(
        )
    ]

-    validation_model = graph_config.tooling.fast_llm
    try:
-        validation_response = run_with_timeout(
-            AGENT_TIMEOUT_LLM_REFINED_ANSWER_VALIDATION,
-            validation_model.invoke,
-            prompt=msg,
-            timeout_override=AGENT_TIMEOUT_CONNECT_LLM_REFINED_ANSWER_VALIDATION,
+        validation_response = model.invoke(
+            msg, timeout_override=AGENT_TIMEOUT_OVERRIDE_LLM_REFINED_ANSWER_VALIDATION
        )
        refined_answer_quality = binary_string_test_after_answer_separator(
            text=cast(str, validation_response.content),
            positive_value=AGENT_POSITIVE_VALUE_STR,
            separator=AGENT_ANSWER_SEPARATOR,
        )
-    except (LLMTimeoutError, TimeoutError):
+    except LLMTimeoutError:
        refined_answer_quality = True
        logger.error("LLM Timeout Error - validate refined answer")

--- a/backend/onyx/agents/agent_search/deep_search/shared/expanded_retrieval/nodes/expand_queries.py
+++ b/backend/onyx/agents/agent_search/deep_search/shared/expanded_retrieval/nodes/expand_queries.py
@@ -34,16 +34,14 @@ from onyx.agents.agent_search.shared_graph_utils.utils import (
 )
 from onyx.agents.agent_search.shared_graph_utils.utils import parse_question_id
 from onyx.configs.agent_configs import (
-    AGENT_TIMEOUT_CONNECT_LLM_QUERY_REWRITING_GENERATION,
+    AGENT_TIMEOUT_OVERRIDE_LLM_QUERY_REWRITING_GENERATION,
 )
-from onyx.configs.agent_configs import AGENT_TIMEOUT_LLM_QUERY_REWRITING_GENERATION
 from onyx.llm.chat_llm import LLMRateLimitError
 from onyx.llm.chat_llm import LLMTimeoutError
 from onyx.prompts.agent_search import (
    QUERY_REWRITING_PROMPT,
 )
 from onyx.utils.logger import setup_logger
-from onyx.utils.threadpool_concurrency import run_with_timeout
 from onyx.utils.timing import log_function_time

 logger = setup_logger()
@@ -71,7 +69,7 @@ def expand_queries(
    node_start_time = datetime.now()
    question = state.question

-    model = graph_config.tooling.fast_llm
+    llm = graph_config.tooling.fast_llm
    sub_question_id = state.sub_question_id
    if sub_question_id is None:
        level, question_num = 0, 0
@@ -90,12 +88,10 @@ def expand_queries(
    rewritten_queries = []

    try:
-        llm_response_list = run_with_timeout(
-            AGENT_TIMEOUT_LLM_QUERY_REWRITING_GENERATION,
-            dispatch_separated,
-            model.stream(
+        llm_response_list = dispatch_separated(
+            llm.stream(
                prompt=msg,
-                timeout_override=AGENT_TIMEOUT_CONNECT_LLM_QUERY_REWRITING_GENERATION,
+                timeout_override=AGENT_TIMEOUT_OVERRIDE_LLM_QUERY_REWRITING_GENERATION,
            ),
            dispatch_subquery(level, question_num, writer),
        )
@@ -105,7 +101,7 @@ def expand_queries(
        rewritten_queries = llm_response.split("\n")
        log_result = f"Number of expanded queries: {len(rewritten_queries)}"

-    except (LLMTimeoutError, TimeoutError):
+    except LLMTimeoutError:
        agent_error = AgentErrorLog(
            error_type=AgentLLMErrorType.TIMEOUT,
            error_message=AGENT_LLM_TIMEOUT_MESSAGE,
--- a/backend/onyx/agents/agent_search/deep_search/shared/expanded_retrieval/nodes/rerank_documents.py
+++ b/backend/onyx/agents/agent_search/deep_search/shared/expanded_retrieval/nodes/rerank_documents.py
@@ -55,7 +55,6 @@ def rerank_documents(

    # Note that these are passed in values from the API and are overrides which are typically None
    rerank_settings = graph_config.inputs.search_request.rerank_settings
-    allow_agent_reranking = graph_config.behavior.allow_agent_reranking

    if rerank_settings is None:
        with get_session_context_manager() as db_session:
@@ -63,31 +62,23 @@ def rerank_documents(
            if not search_settings.disable_rerank_for_streaming:
                rerank_settings = RerankingDetails.from_db_model(search_settings)

-    # Initial default: no reranking. Will be overwritten below if reranking is warranted
-    reranked_documents = verified_documents
-
    if should_rerank(rerank_settings) and len(verified_documents) > 0:
        if len(verified_documents) > 1:
-            if not allow_agent_reranking:
-                logger.info("Use of local rerank model without GPU, skipping reranking")
-            # No reranking, stay with verified_documents as default
-
-            else:
-                # Reranking is warranted, use the rerank_sections functon
-                reranked_documents = rerank_sections(
-                    query_str=question,
-                    # if runnable, then rerank_settings is not None
-                    rerank_settings=cast(RerankingDetails, rerank_settings),
-                    sections_to_rerank=verified_documents,
-                )
+            reranked_documents = rerank_sections(
+                query_str=question,
+                # if runnable, then rerank_settings is not None
+                rerank_settings=cast(RerankingDetails, rerank_settings),
+                sections_to_rerank=verified_documents,
+            )
        else:
            logger.warning(
                f"{len(verified_documents)} verified document(s) found, skipping reranking"
            )
-            # No reranking, stay with verified_documents as default
+            reranked_documents = verified_documents
    else:
        logger.warning("No reranking settings found, using unranked documents")
-        # No reranking, stay with verified_documents as default
+        reranked_documents = verified_documents
+
    if AGENT_RERANKING_STATS:
        fit_scores = get_fit_scores(verified_documents, reranked_documents)
    else:
--- a/backend/onyx/agents/agent_search/deep_search/shared/expanded_retrieval/nodes/verify_documents.py
+++ b/backend/onyx/agents/agent_search/deep_search/shared/expanded_retrieval/nodes/verify_documents.py
@@ -25,15 +25,13 @@ from onyx.agents.agent_search.shared_graph_utils.models import LLMNodeErrorStrin
 from onyx.agents.agent_search.shared_graph_utils.utils import (
    get_langgraph_node_log_string,
 )
-from onyx.configs.agent_configs import AGENT_TIMEOUT_CONNECT_LLM_DOCUMENT_VERIFICATION
-from onyx.configs.agent_configs import AGENT_TIMEOUT_LLM_DOCUMENT_VERIFICATION
+from onyx.configs.agent_configs import AGENT_TIMEOUT_OVERRIDE_LLM_DOCUMENT_VERIFICATION
 from onyx.llm.chat_llm import LLMRateLimitError
 from onyx.llm.chat_llm import LLMTimeoutError
 from onyx.prompts.agent_search import (
    DOCUMENT_VERIFICATION_PROMPT,
 )
 from onyx.utils.logger import setup_logger
-from onyx.utils.threadpool_concurrency import run_with_timeout
 from onyx.utils.timing import log_function_time

 logger = setup_logger()
@@ -88,11 +86,8 @@ def verify_documents(
    ]  # default is to treat document as relevant

    try:
-        response = run_with_timeout(
-            AGENT_TIMEOUT_LLM_DOCUMENT_VERIFICATION,
-            fast_llm.invoke,
-            prompt=msg,
-            timeout_override=AGENT_TIMEOUT_CONNECT_LLM_DOCUMENT_VERIFICATION,
+        response = fast_llm.invoke(
+            msg, timeout_override=AGENT_TIMEOUT_OVERRIDE_LLM_DOCUMENT_VERIFICATION
        )

        assert isinstance(response.content, str)
@@ -101,7 +96,7 @@ def verify_documents(
        ):
            verified_documents = []

-    except (LLMTimeoutError, TimeoutError):
+    except LLMTimeoutError:
        # In this case, we decide to continue and don't raise an error, as
        # little harm in letting some docs through that are less relevant.
        logger.error("LLM Timeout Error - verify documents")
--- a/backend/onyx/agents/agent_search/models.py
+++ b/backend/onyx/agents/agent_search/models.py
@@ -67,7 +67,6 @@ class GraphSearchConfig(BaseModel):
    # Whether to allow creation of refinement questions (and entity extraction, etc.)
    allow_refinement: bool = True
    skip_gen_ai_answer_generation: bool = False
-    allow_agent_reranking: bool = False


 class GraphConfig(BaseModel):
--- a/backend/onyx/agents/agent_search/orchestration/nodes/basic_use_tool_response.py
+++ b/backend/onyx/agents/agent_search/orchestration/nodes/basic_use_tool_response.py
--- a/backend/onyx/agents/agent_search/orchestration/nodes/llm_tool_choice.py
+++ b/backend/onyx/agents/agent_search/orchestration/nodes/llm_tool_choice.py
@@ -25,7 +25,7 @@ logger = setup_logger()
 # and a function that handles extracting the necessary fields
 # from the state and config
 # TODO: fan-out to multiple tool call nodes? Make this configurable?
-def choose_tool(
+def llm_tool_choice(
    state: ToolChoiceState,
    config: RunnableConfig,
    writer: StreamWriter = lambda _: None,
--- a/backend/onyx/agents/agent_search/orchestration/nodes/tool_call.py
+++ b/backend/onyx/agents/agent_search/orchestration/nodes/tool_call.py
@@ -28,7 +28,7 @@ def emit_packet(packet: AnswerPacket, writer: StreamWriter) -> None:
    write_custom_event("basic_response", packet, writer)


-def call_tool(
+def tool_call(
    state: ToolChoiceUpdate,
    config: RunnableConfig,
    writer: StreamWriter = lambda _: None,
--- a/backend/onyx/agents/agent_search/shared_graph_utils/utils.py
+++ b/backend/onyx/agents/agent_search/shared_graph_utils/utils.py
@@ -43,9 +43,8 @@ from onyx.chat.models import StreamStopReason
 from onyx.chat.models import StreamType
 from onyx.chat.prompt_builder.answer_prompt_builder import AnswerPromptBuilder
 from onyx.configs.agent_configs import (
-    AGENT_TIMEOUT_CONNECT_LLM_HISTORY_SUMMARY_GENERATION,
+    AGENT_TIMEOUT_OVERRIDE_LLM_HISTORY_SUMMARY_GENERATION,
 )
-from onyx.configs.agent_configs import AGENT_TIMEOUT_LLM_HISTORY_SUMMARY_GENERATION
 from onyx.configs.chat_configs import CHAT_TARGET_CHUNK_PERCENTAGE
 from onyx.configs.chat_configs import MAX_CHUNKS_FED_TO_CHAT
 from onyx.configs.constants import DEFAULT_PERSONA_ID
@@ -81,7 +80,6 @@ from onyx.tools.tool_implementations.search.search_tool import SearchResponseSum
 from onyx.tools.tool_implementations.search.search_tool import SearchTool
 from onyx.tools.utils import explicit_tool_calling_supported
 from onyx.utils.logger import setup_logger
-from onyx.utils.threadpool_concurrency import run_with_timeout

 logger = setup_logger()

@@ -397,13 +395,11 @@ def summarize_history(
    )

    try:
-        history_response = run_with_timeout(
-            AGENT_TIMEOUT_LLM_HISTORY_SUMMARY_GENERATION,
-            llm.invoke,
+        history_response = llm.invoke(
            history_context_prompt,
-            timeout_override=AGENT_TIMEOUT_CONNECT_LLM_HISTORY_SUMMARY_GENERATION,
+            timeout_override=AGENT_TIMEOUT_OVERRIDE_LLM_HISTORY_SUMMARY_GENERATION,
        )
-    except (LLMTimeoutError, TimeoutError):
+    except LLMTimeoutError:
        logger.error("LLM Timeout Error - summarize history")
        return (
            history  # this is what is done at this point anyway, so we default to this
--- a/backend/onyx/auth/users.py
+++ b/backend/onyx/auth/users.py
@@ -94,7 +94,6 @@ from onyx.db.models import User
 from onyx.db.users import get_user_by_email
 from onyx.redis.redis_pool import get_async_redis_connection
 from onyx.redis.redis_pool import get_redis_client
-from onyx.server.utils import BasicAuthenticationError
 from onyx.utils.logger import setup_logger
 from onyx.utils.telemetry import create_milestone_and_report
 from onyx.utils.telemetry import optional_telemetry
@@ -108,6 +107,11 @@ from shared_configs.contextvars import CURRENT_TENANT_ID_CONTEXTVAR
 logger = setup_logger()


+class BasicAuthenticationError(HTTPException):
+    def __init__(self, detail: str):
+        super().__init__(status_code=status.HTTP_403_FORBIDDEN, detail=detail)
+
+
 def is_user_admin(user: User | None) -> bool:
    if AUTH_TYPE == AuthType.DISABLED:
        return True
--- a/backend/onyx/background/celery/tasks/shared/tasks.py
+++ b/backend/onyx/background/celery/tasks/shared/tasks.py
@@ -105,7 +105,6 @@ def document_by_cc_pair_cleanup_task(
                    tenant_id=tenant_id,
                    chunk_count=chunk_count,
                )
-
                delete_documents_complete__no_commit(
                    db_session=db_session,
                    document_ids=[document_id],
--- a/backend/onyx/chat/answer.py
+++ b/backend/onyx/chat/answer.py
@@ -27,10 +27,8 @@ from onyx.file_store.utils import InMemoryChatFile
 from onyx.llm.interfaces import LLM
 from onyx.tools.force import ForceUseTool
 from onyx.tools.tool import Tool
-from onyx.tools.tool_implementations.search.search_tool import QUERY_FIELD
 from onyx.tools.tool_implementations.search.search_tool import SearchTool
 from onyx.tools.utils import explicit_tool_calling_supported
-from onyx.utils.gpu_utils import gpu_status_request
 from onyx.utils.logger import setup_logger

 logger = setup_logger()
@@ -82,26 +80,6 @@ class Answer:
            and not skip_explicit_tool_calling
        )

-        rerank_settings = search_request.rerank_settings
-
-        using_cloud_reranking = (
-            rerank_settings is not None
-            and rerank_settings.rerank_provider_type is not None
-        )
-        allow_agent_reranking = gpu_status_request() or using_cloud_reranking
-
-        # TODO: this is a hack to force the query to be used for the search tool
-        #       this should be removed once we fully unify graph inputs (i.e.
-        #       remove SearchQuery entirely)
-        if (
-            force_use_tool.force_use
-            and search_tool
-            and force_use_tool.args
-            and force_use_tool.tool_name == search_tool.name
-            and QUERY_FIELD in force_use_tool.args
-        ):
-            search_request.query = force_use_tool.args[QUERY_FIELD]
-
        self.graph_inputs = GraphInputs(
            search_request=search_request,
            prompt_builder=prompt_builder,
@@ -116,6 +94,7 @@ class Answer:
            force_use_tool=force_use_tool,
            using_tool_calling_llm=using_tool_calling_llm,
        )
+        assert db_session, "db_session must be provided for agentic persistence"
        self.graph_persistence = GraphPersistence(
            db_session=db_session,
            chat_session_id=chat_session_id,
@@ -125,7 +104,6 @@ class Answer:
            use_agentic_search=use_agentic_search,
            skip_gen_ai_answer_generation=skip_gen_ai_answer_generation,
            allow_refinement=True,
-            allow_agent_reranking=allow_agent_reranking,
        )
        self.graph_config = GraphConfig(
            inputs=self.graph_inputs,
--- a/backend/onyx/chat/process_message.py
+++ b/backend/onyx/chat/process_message.py
@@ -7,7 +7,7 @@ from typing import cast

 from sqlalchemy.orm import Session

-from onyx.agents.agent_search.orchestration.nodes.call_tool import ToolCallException
+from onyx.agents.agent_search.orchestration.nodes.tool_call import ToolCallException
 from onyx.chat.answer import Answer
 from onyx.chat.chat_utils import create_chat_chain
 from onyx.chat.chat_utils import create_temporary_persona
--- a/backend/onyx/configs/agent_configs.py
+++ b/backend/onyx/configs/agent_configs.py
@@ -31,9 +31,22 @@ AGENT_DEFAULT_MIN_ORIG_QUESTION_DOCS = 3
 AGENT_DEFAULT_MAX_ANSWER_CONTEXT_DOCS = 10
 AGENT_DEFAULT_MAX_STATIC_HISTORY_WORD_LENGTH = 2000

-AGENT_ANSWER_GENERATION_BY_FAST_LLM = (
-    os.environ.get("AGENT_ANSWER_GENERATION_BY_FAST_LLM", "").lower() == "true"
-)
+AGENT_DEFAULT_TIMEOUT_OVERRIDE_LLM_GENERAL_GENERATION = 30  # in seconds
+
+AGENT_DEFAULT_TIMEOUT_OVERRIDE_LLM_HISTORY_SUMMARY_GENERATION = 10  # in seconds
+AGENT_DEFAULT_TIMEOUT_OVERRIDE_LLM_ENTITY_TERM_EXTRACTION = 25  # in seconds
+AGENT_DEFAULT_TIMEOUT_OVERRIDE_LLM_QUERY_REWRITING_GENERATION = 4  # in seconds
+AGENT_DEFAULT_TIMEOUT_OVERRIDE_LLM_DOCUMENT_VERIFICATION = 1  # in seconds
+AGENT_DEFAULT_TIMEOUT_OVERRIDE_LLM_SUBQUESTION_GENERATION = 3  # in seconds
+AGENT_DEFAULT_TIMEOUT_OVERRIDE_LLM_SUBANSWER_GENERATION = 12  # in seconds
+AGENT_DEFAULT_TIMEOUT_OVERRIDE_LLM_SUBANSWER_CHECK = 8  # in seconds
+AGENT_DEFAULT_TIMEOUT_OVERRIDE_LLM_INITIAL_ANSWER_GENERATION = 25  # in seconds
+
+AGENT_DEFAULT_TIMEOUT_OVERRIDE_LLM_REFINED_SUBQUESTION_GENERATION = 6  # in seconds
+AGENT_DEFAULT_TIMEOUT_OVERRIDE_LLM_REFINED_ANSWER_GENERATION = 25  # in seconds
+AGENT_DEFAULT_TIMEOUT_OVERRIDE_LLM_REFINED_ANSWER_VALIDATION = 8  # in seconds
+AGENT_DEFAULT_TIMEOUT_OVERRIDE_LLM_COMPARE_ANSWERS = 8  # in seconds
+

 AGENT_RETRIEVAL_STATS = (
    not os.environ.get("AGENT_RETRIEVAL_STATS") == "False"
@@ -165,172 +178,80 @@ AGENT_MAX_STATIC_HISTORY_WORD_LENGTH = int(
 )  # 2000


-AGENT_DEFAULT_TIMEOUT_CONNECT_LLM_ENTITY_TERM_EXTRACTION = 10  # in seconds
-AGENT_TIMEOUT_CONNECT_LLM_ENTITY_TERM_EXTRACTION = int(
-    os.environ.get("AGENT_TIMEOUT_CONNECT_LLM_ENTITY_TERM_EXTRACTION")
-    or AGENT_DEFAULT_TIMEOUT_CONNECT_LLM_ENTITY_TERM_EXTRACTION
-)
-
-AGENT_DEFAULT_TIMEOUT_LLM_ENTITY_TERM_EXTRACTION = 30  # in seconds
-AGENT_TIMEOUT_LLM_ENTITY_TERM_EXTRACTION = int(
-    os.environ.get("AGENT_TIMEOUT_LLM_ENTITY_TERM_EXTRACTION")
-    or AGENT_DEFAULT_TIMEOUT_LLM_ENTITY_TERM_EXTRACTION
-)
+AGENT_TIMEOUT_OVERRIDE_LLM_ENTITY_TERM_EXTRACTION = int(
+    os.environ.get("AGENT_TIMEOUT_OVERRIDE_LLM_ENTITY_TERM_EXTRACTION")
+    or AGENT_DEFAULT_TIMEOUT_OVERRIDE_LLM_ENTITY_TERM_EXTRACTION
+)  # 25


-AGENT_DEFAULT_TIMEOUT_CONNECT_LLM_DOCUMENT_VERIFICATION = 2  # in seconds
-AGENT_TIMEOUT_CONNECT_LLM_DOCUMENT_VERIFICATION = int(
-    os.environ.get("AGENT_TIMEOUT_CONNECT_LLM_DOCUMENT_VERIFICATION")
-    or AGENT_DEFAULT_TIMEOUT_CONNECT_LLM_DOCUMENT_VERIFICATION
-)
+AGENT_TIMEOUT_OVERRIDE_LLM_DOCUMENT_VERIFICATION = int(
+    os.environ.get("AGENT_TIMEOUT_OVERRIDE_LLM_DOCUMENT_VERIFICATION")
+    or AGENT_DEFAULT_TIMEOUT_OVERRIDE_LLM_DOCUMENT_VERIFICATION
+)  # 3

-AGENT_DEFAULT_TIMEOUT_LLM_DOCUMENT_VERIFICATION = 4  # in seconds
-AGENT_TIMEOUT_LLM_DOCUMENT_VERIFICATION = int(
-    os.environ.get("AGENT_TIMEOUT_LLM_DOCUMENT_VERIFICATION")
-    or AGENT_DEFAULT_TIMEOUT_LLM_DOCUMENT_VERIFICATION
-)
+AGENT_TIMEOUT_OVERRIDE_LLM_GENERAL_GENERATION = int(
+    os.environ.get("AGENT_TIMEOUT_OVERRIDE_LLM_GENERAL_GENERATION")
+    or AGENT_DEFAULT_TIMEOUT_OVERRIDE_LLM_GENERAL_GENERATION
+)  # 30


-AGENT_DEFAULT_TIMEOUT_CONNECT_LLM_GENERAL_GENERATION = 5  # in seconds
-AGENT_TIMEOUT_CONNECT_LLM_GENERAL_GENERATION = int(
-    os.environ.get("AGENT_TIMEOUT_CONNECT_LLM_GENERAL_GENERATION")
-    or AGENT_DEFAULT_TIMEOUT_CONNECT_LLM_GENERAL_GENERATION
-)
-
-AGENT_DEFAULT_TIMEOUT_LLM_GENERAL_GENERATION = 30  # in seconds
-AGENT_TIMEOUT_LLM_GENERAL_GENERATION = int(
-    os.environ.get("AGENT_TIMEOUT_LLM_GENERAL_GENERATION")
-    or AGENT_DEFAULT_TIMEOUT_LLM_GENERAL_GENERATION
-)
+AGENT_TIMEOUT_OVERRIDE_LLM_SUBQUESTION_GENERATION = int(
+    os.environ.get("AGENT_TIMEOUT_OVERRIDE_LLM_SUBQUESTION_GENERATION")
+    or AGENT_DEFAULT_TIMEOUT_OVERRIDE_LLM_SUBQUESTION_GENERATION
+)  # 8


-AGENT_DEFAULT_TIMEOUT_CONNECT_LLM_SUBQUESTION_GENERATION = 2  # in seconds
-AGENT_TIMEOUT_CONNECT_LLM_SUBQUESTION_GENERATION = int(
-    os.environ.get("AGENT_TIMEOUT_CONNECT_LLM_SUBQUESTION_GENERATION")
-    or AGENT_DEFAULT_TIMEOUT_CONNECT_LLM_SUBQUESTION_GENERATION
-)
-
-AGENT_DEFAULT_TIMEOUT_LLM_SUBQUESTION_GENERATION = 5  # in seconds
-AGENT_TIMEOUT_LLM_SUBQUESTION_GENERATION = int(
-    os.environ.get("AGENT_TIMEOUT_LLM_SUBQUESTION_GENERATION")
-    or AGENT_DEFAULT_TIMEOUT_LLM_SUBQUESTION_GENERATION
-)
+AGENT_TIMEOUT_OVERRIDE_LLM_SUBANSWER_GENERATION = int(
+    os.environ.get("AGENT_TIMEOUT_OVERRIDE_LLM_SUBANSWER_GENERATION")
+    or AGENT_DEFAULT_TIMEOUT_OVERRIDE_LLM_SUBANSWER_GENERATION
+)  # 12


-AGENT_DEFAULT_TIMEOUT_CONNECT_LLM_SUBANSWER_GENERATION = 3  # in seconds
-AGENT_TIMEOUT_CONNECT_LLM_SUBANSWER_GENERATION = int(
-    os.environ.get("AGENT_TIMEOUT_CONNECT_LLM_SUBANSWER_GENERATION")
-    or AGENT_DEFAULT_TIMEOUT_CONNECT_LLM_SUBANSWER_GENERATION
-)
-
-AGENT_DEFAULT_TIMEOUT_LLM_SUBANSWER_GENERATION = 30  # in seconds
-AGENT_TIMEOUT_LLM_SUBANSWER_GENERATION = int(
-    os.environ.get("AGENT_TIMEOUT_LLM_SUBANSWER_GENERATION")
-    or AGENT_DEFAULT_TIMEOUT_LLM_SUBANSWER_GENERATION
-)
+AGENT_TIMEOUT_OVERRIDE_LLM_INITIAL_ANSWER_GENERATION = int(
+    os.environ.get("AGENT_TIMEOUT_OVERRIDE_LLM_INITIAL_ANSWER_GENERATION")
+    or AGENT_DEFAULT_TIMEOUT_OVERRIDE_LLM_INITIAL_ANSWER_GENERATION
+)  # 25


-AGENT_DEFAULT_TIMEOUT_CONNECT_LLM_INITIAL_ANSWER_GENERATION = 5  # in seconds
-AGENT_TIMEOUT_CONNECT_LLM_INITIAL_ANSWER_GENERATION = int(
-    os.environ.get("AGENT_TIMEOUT_CONNECT_LLM_INITIAL_ANSWER_GENERATION")
-    or AGENT_DEFAULT_TIMEOUT_CONNECT_LLM_INITIAL_ANSWER_GENERATION
-)
-
-AGENT_DEFAULT_TIMEOUT_LLM_INITIAL_ANSWER_GENERATION = 25  # in seconds
-AGENT_TIMEOUT_LLM_INITIAL_ANSWER_GENERATION = int(
-    os.environ.get("AGENT_TIMEOUT_LLM_INITIAL_ANSWER_GENERATION")
-    or AGENT_DEFAULT_TIMEOUT_LLM_INITIAL_ANSWER_GENERATION
-)
+AGENT_TIMEOUT_OVERRIDE_LLM_REFINED_ANSWER_GENERATION = int(
+    os.environ.get("AGENT_TIMEOUT_OVERRIDE_LLM_REFINED_ANSWER_GENERATION")
+    or AGENT_DEFAULT_TIMEOUT_OVERRIDE_LLM_REFINED_ANSWER_GENERATION
+)  # 25


-AGENT_DEFAULT_TIMEOUT_CONNECT_LLM_REFINED_ANSWER_GENERATION = 5  # in seconds
-AGENT_TIMEOUT_CONNECT_LLM_REFINED_ANSWER_GENERATION = int(
-    os.environ.get("AGENT_TIMEOUT_CONNECT_LLM_REFINED_ANSWER_GENERATION")
-    or AGENT_DEFAULT_TIMEOUT_CONNECT_LLM_REFINED_ANSWER_GENERATION
-)
-
-AGENT_DEFAULT_TIMEOUT_LLM_REFINED_ANSWER_GENERATION = 30  # in seconds
-AGENT_TIMEOUT_LLM_REFINED_ANSWER_GENERATION = int(
-    os.environ.get("AGENT_TIMEOUT_LLM_REFINED_ANSWER_GENERATION")
-    or AGENT_DEFAULT_TIMEOUT_LLM_REFINED_ANSWER_GENERATION
-)
+AGENT_TIMEOUT_OVERRIDE_LLM_SUBANSWER_CHECK = int(
+    os.environ.get("AGENT_TIMEOUT_OVERRIDE_LLM_SUBANSWER_CHECK")
+    or AGENT_DEFAULT_TIMEOUT_OVERRIDE_LLM_SUBANSWER_CHECK
+)  # 8


-AGENT_DEFAULT_TIMEOUT_CONNECT_LLM_SUBANSWER_CHECK = 2  # in seconds
-AGENT_TIMEOUT_CONNECT_LLM_SUBANSWER_CHECK = int(
-    os.environ.get("AGENT_TIMEOUT_CONNECT_LLM_SUBANSWER_CHECK")
-    or AGENT_DEFAULT_TIMEOUT_CONNECT_LLM_SUBANSWER_CHECK
-)
-
-AGENT_DEFAULT_TIMEOUT_LLM_SUBANSWER_CHECK = 8  # in seconds
-AGENT_TIMEOUT_LLM_SUBANSWER_CHECK = int(
-    os.environ.get("AGENT_TIMEOUT_LLM_SUBANSWER_CHECK")
-    or AGENT_DEFAULT_TIMEOUT_LLM_SUBANSWER_CHECK
-)
+AGENT_TIMEOUT_OVERRIDE_LLM_REFINED_SUBQUESTION_GENERATION = int(
+    os.environ.get("AGENT_TIMEOUT_OVERRIDE_LLM_REFINED_SUBQUESTION_GENERATION")
+    or AGENT_DEFAULT_TIMEOUT_OVERRIDE_LLM_REFINED_SUBQUESTION_GENERATION
+)  # 6


-AGENT_DEFAULT_TIMEOUT_CONNECT_LLM_REFINED_SUBQUESTION_GENERATION = 3  # in seconds
-AGENT_TIMEOUT_CONNECT_LLM_REFINED_SUBQUESTION_GENERATION = int(
-    os.environ.get("AGENT_TIMEOUT_CONNECT_LLM_REFINED_SUBQUESTION_GENERATION")
-    or AGENT_DEFAULT_TIMEOUT_CONNECT_LLM_REFINED_SUBQUESTION_GENERATION
-)
-
-AGENT_DEFAULT_TIMEOUT_LLM_REFINED_SUBQUESTION_GENERATION = 8  # in seconds
-AGENT_TIMEOUT_LLM_REFINED_SUBQUESTION_GENERATION = int(
-    os.environ.get("AGENT_TIMEOUT_LLM_REFINED_SUBQUESTION_GENERATION")
-    or AGENT_DEFAULT_TIMEOUT_LLM_REFINED_SUBQUESTION_GENERATION
-)
+AGENT_TIMEOUT_OVERRIDE_LLM_QUERY_REWRITING_GENERATION = int(
+    os.environ.get("AGENT_TIMEOUT_OVERRIDE_LLM_QUERY_REWRITING_GENERATION")
+    or AGENT_DEFAULT_TIMEOUT_OVERRIDE_LLM_QUERY_REWRITING_GENERATION
+)  # 1


-AGENT_DEFAULT_TIMEOUT_CONNECT_LLM_QUERY_REWRITING_GENERATION = 1  # in seconds
-AGENT_TIMEOUT_CONNECT_LLM_QUERY_REWRITING_GENERATION = int(
-    os.environ.get("AGENT_TIMEOUT_CONNECT_LLM_QUERY_REWRITING_GENERATION")
-    or AGENT_DEFAULT_TIMEOUT_CONNECT_LLM_QUERY_REWRITING_GENERATION
-)
-
-AGENT_DEFAULT_TIMEOUT_LLM_QUERY_REWRITING_GENERATION = 3  # in seconds
-AGENT_TIMEOUT_LLM_QUERY_REWRITING_GENERATION = int(
-    os.environ.get("AGENT_TIMEOUT_LLM_QUERY_REWRITING_GENERATION")
-    or AGENT_DEFAULT_TIMEOUT_LLM_QUERY_REWRITING_GENERATION
-)
+AGENT_TIMEOUT_OVERRIDE_LLM_HISTORY_SUMMARY_GENERATION = int(
+    os.environ.get("AGENT_TIMEOUT_OVERRIDE_LLM_HISTORY_SUMMARY_GENERATION")
+    or AGENT_DEFAULT_TIMEOUT_OVERRIDE_LLM_HISTORY_SUMMARY_GENERATION
+)  # 4


-AGENT_DEFAULT_TIMEOUT_CONNECT_LLM_HISTORY_SUMMARY_GENERATION = 2  # in seconds
-AGENT_TIMEOUT_CONNECT_LLM_HISTORY_SUMMARY_GENERATION = int(
-    os.environ.get("AGENT_TIMEOUT_CONNECT_LLM_HISTORY_SUMMARY_GENERATION")
-    or AGENT_DEFAULT_TIMEOUT_CONNECT_LLM_HISTORY_SUMMARY_GENERATION
-)
-
-AGENT_DEFAULT_TIMEOUT_LLM_HISTORY_SUMMARY_GENERATION = 5  # in seconds
-AGENT_TIMEOUT_LLM_HISTORY_SUMMARY_GENERATION = int(
-    os.environ.get("AGENT_TIMEOUT_LLM_HISTORY_SUMMARY_GENERATION")
-    or AGENT_DEFAULT_TIMEOUT_LLM_HISTORY_SUMMARY_GENERATION
-)
+AGENT_TIMEOUT_OVERRIDE_LLM_COMPARE_ANSWERS = int(
+    os.environ.get("AGENT_TIMEOUT_OVERRIDE_LLM_COMPARE_ANSWERS")
+    or AGENT_DEFAULT_TIMEOUT_OVERRIDE_LLM_COMPARE_ANSWERS
+)  # 8


-AGENT_DEFAULT_TIMEOUT_CONNECT_LLM_COMPARE_ANSWERS = 2  # in seconds
-AGENT_TIMEOUT_CONNECT_LLM_COMPARE_ANSWERS = int(
-    os.environ.get("AGENT_TIMEOUT_CONNECT_LLM_COMPARE_ANSWERS")
-    or AGENT_DEFAULT_TIMEOUT_CONNECT_LLM_COMPARE_ANSWERS
-)
-
-AGENT_DEFAULT_TIMEOUT_LLM_COMPARE_ANSWERS = 8  # in seconds
-AGENT_TIMEOUT_LLM_COMPARE_ANSWERS = int(
-    os.environ.get("AGENT_TIMEOUT_LLM_COMPARE_ANSWERS")
-    or AGENT_DEFAULT_TIMEOUT_LLM_COMPARE_ANSWERS
-)
-
-
-AGENT_DEFAULT_TIMEOUT_CONNECT_LLM_REFINED_ANSWER_VALIDATION = 2  # in seconds
-AGENT_TIMEOUT_CONNECT_LLM_REFINED_ANSWER_VALIDATION = int(
-    os.environ.get("AGENT_TIMEOUT_CONNECT_LLM_REFINED_ANSWER_VALIDATION")
-    or AGENT_DEFAULT_TIMEOUT_CONNECT_LLM_REFINED_ANSWER_VALIDATION
-)
-
-AGENT_DEFAULT_TIMEOUT_LLM_REFINED_ANSWER_VALIDATION = 8  # in seconds
-AGENT_TIMEOUT_LLM_REFINED_ANSWER_VALIDATION = int(
-    os.environ.get("AGENT_TIMEOUT_LLM_REFINED_ANSWER_VALIDATION")
-    or AGENT_DEFAULT_TIMEOUT_LLM_REFINED_ANSWER_VALIDATION
-)
+AGENT_TIMEOUT_OVERRIDE_LLM_REFINED_ANSWER_VALIDATION = int(
+    os.environ.get("AGENT_TIMEOUT_OVERRIDE_LLM_REFINED_ANSWER_VALIDATION")
+    or AGENT_DEFAULT_TIMEOUT_OVERRIDE_LLM_REFINED_ANSWER_VALIDATION
+)  # 8

 GRAPH_VERSION_NAME: str = "a"
--- a/backend/onyx/db/chat.py
+++ b/backend/onyx/db/chat.py
@@ -628,7 +628,7 @@ def create_new_chat_message(
    commit: bool = True,
    reserved_message_id: int | None = None,
    overridden_model: str | None = None,
-    refined_answer_improvement: bool | None = None,
+    refined_answer_improvement: bool = True,
 ) -> ChatMessage:
    if reserved_message_id is not None:
        # Edit existing message
--- a/backend/onyx/llm/chat_llm.py
+++ b/backend/onyx/llm/chat_llm.py
@@ -409,6 +409,10 @@ class DefaultMultiLLM(LLM):
        self._record_call(processed_prompt)

        try:
+            print(
+                "model is",
+                f"{self.config.model_provider}/{self.config.deployment_name or self.config.model_name}",
+            )
            return litellm.completion(
                mock_response=MOCK_LLM_RESPONSE,
                # model choice
--- a/backend/onyx/seeding/load_docs.py
+++ b/backend/onyx/seeding/load_docs.py
@@ -61,10 +61,10 @@ def _create_indexable_chunks(
            doc_updated_at=None,
            primary_owners=[],
            secondary_owners=[],
-            chunk_count=preprocessed_doc["chunk_ind"] + 1,
+            chunk_count=1,
        )
-
-        ids_to_documents[document.id] = document
+        if preprocessed_doc["chunk_ind"] == 0:
+            ids_to_documents[document.id] = document

        chunk = DocMetadataAwareIndexChunk(
            chunk_id=preprocessed_doc["chunk_ind"],
@@ -92,7 +92,6 @@ def _create_indexable_chunks(
            boost=DEFAULT_BOOST,
            large_chunk_id=None,
        )
-
        chunks.append(chunk)

    return list(ids_to_documents.values()), chunks
@@ -193,7 +192,6 @@ def seed_initial_documents(
        last_successful_index_time=last_index_time,
        seeding_flow=True,
    )
-
    cc_pair_id = cast(int, result.data)
    processed_docs = fetch_versioned_implementation(
        "onyx.seeding.load_docs",
@@ -251,5 +249,4 @@ def seed_initial_documents(
            .values(chunk_count=doc.chunk_count)
        )

-    db_session.commit()
    kv_store.store(KV_DOCUMENTS_SEEDED_KEY, True)
--- a/backend/onyx/server/query_and_chat/chat_backend.py
+++ b/backend/onyx/server/query_and_chat/chat_backend.py
@@ -213,6 +213,8 @@ def get_chat_session(
        # we need the tool call objs anyways, so just fetch them in a single call
        prefetch_tool_calls=True,
    )
+    for message in session_messages:
+        translate_db_message_to_chat_message_detail(message)

    return ChatSessionDetailResponse(
        chat_session_id=session_id,
--- a/backend/onyx/tools/tool_implementations/search/search_tool.py
+++ b/backend/onyx/tools/tool_implementations/search/search_tool.py
@@ -58,7 +58,6 @@ SEARCH_RESPONSE_SUMMARY_ID = "search_response_summary"
 SEARCH_DOC_CONTENT_ID = "search_doc_content"
 SECTION_RELEVANCE_LIST_ID = "section_relevance_list"
 SEARCH_EVALUATION_ID = "llm_doc_eval"
-QUERY_FIELD = "query"


 class SearchResponseSummary(SearchQueryInfo):
@@ -180,12 +179,12 @@ class SearchTool(Tool[SearchToolOverrideKwargs]):
                "parameters": {
                    "type": "object",
                    "properties": {
-                        QUERY_FIELD: {
+                        "query": {
                            "type": "string",
                            "description": "What to search for",
                        },
                    },
-                    "required": [QUERY_FIELD],
+                    "required": ["query"],
                },
            },
        }
@@ -224,7 +223,7 @@ class SearchTool(Tool[SearchToolOverrideKwargs]):
        rephrased_query = history_based_query_rephrase(
            query=query, history=history, llm=llm
        )
-        return {QUERY_FIELD: rephrased_query}
+        return {"query": rephrased_query}

    """Actual tool execution"""

@@ -280,7 +279,7 @@ class SearchTool(Tool[SearchToolOverrideKwargs]):
    def run(
        self, override_kwargs: SearchToolOverrideKwargs | None = None, **llm_kwargs: Any
    ) -> Generator[ToolResponse, None, None]:
-        query = cast(str, llm_kwargs[QUERY_FIELD])
+        query = cast(str, llm_kwargs["query"])
        force_no_rerank = False
        alternate_db_session = None
        retrieved_sections_callback = None
--- a/backend/onyx/utils/threadpool_concurrency.py
+++ b/backend/onyx/utils/threadpool_concurrency.py
@@ -1,4 +1,3 @@
-import threading
 import uuid
 from collections.abc import Callable
 from concurrent.futures import as_completed
@@ -14,10 +13,6 @@ logger = setup_logger()
 R = TypeVar("R")


-# WARNING: it is not currently well understood whether we lose access to contextvars when functions are
-# executed through this wrapper Do NOT try to acquire a db session in a function run through this unless
-# you have heavily tested that multi-tenancy is respected. If/when we know for sure that it is or
-# is not safe, update this comment.
 def run_functions_tuples_in_parallel(
    functions_with_args: list[tuple[Callable, tuple]],
    allow_failures: bool = False,
@@ -83,10 +78,6 @@ class FunctionCall(Generic[R]):
        return self.func(*self.args, **self.kwargs)


-# WARNING: it is not currently well understood whether we lose access to contextvars when functions are
-# executed through this wrapper Do NOT try to acquire a db session in a function run through this unless
-# you have heavily tested that multi-tenancy is respected. If/when we know for sure that it is or
-# is not safe, update this comment.
 def run_functions_in_parallel(
    function_calls: list[FunctionCall],
    allow_failures: bool = False,
@@ -118,49 +109,3 @@ def run_functions_in_parallel(
                    raise

    return results
-
-
-class TimeoutThread(threading.Thread):
-    def __init__(
-        self, timeout: float, func: Callable[..., R], *args: Any, **kwargs: Any
-    ):
-        super().__init__()
-        self.timeout = timeout
-        self.func = func
-        self.args = args
-        self.kwargs = kwargs
-        self.exception: Exception | None = None
-
-    def run(self) -> None:
-        try:
-            self.result = self.func(*self.args, **self.kwargs)
-        except Exception as e:
-            self.exception = e
-
-    def end(self) -> None:
-        raise TimeoutError(
-            f"Function {self.func.__name__} timed out after {self.timeout} seconds"
-        )
-
-
-# WARNING: it is not currently well understood whether we lose access to contextvars when functions are
-# executed through this wrapper Do NOT try to acquire a db session in a function run through this unless
-# you have heavily tested that multi-tenancy is respected. If/when we know for sure that it is or
-# is not safe, update this comment.
-def run_with_timeout(
-    timeout: float, func: Callable[..., R], *args: Any, **kwargs: Any
-) -> R:
-    """
-    Executes a function with a timeout. If the function doesn't complete within the specified
-    timeout, raises TimeoutError.
-    """
-    task = TimeoutThread(timeout, func, *args, **kwargs)
-    task.start()
-    task.join(timeout)
-
-    if task.exception is not None:
-        raise task.exception
-    if task.is_alive():
-        task.end()
-
-    return task.result
--- a/backend/tests/unit/onyx/chat/test_answer.py
+++ b/backend/tests/unit/onyx/chat/test_answer.py
@@ -11,7 +11,6 @@ from langchain_core.messages import HumanMessage
 from langchain_core.messages import SystemMessage
 from langchain_core.messages import ToolCall
 from langchain_core.messages import ToolCallChunk
-from pytest_mock import MockerFixture
 from sqlalchemy.orm import Session

 from onyx.chat.answer import Answer
@@ -26,7 +25,6 @@ from onyx.chat.models import StreamStopReason
 from onyx.chat.prompt_builder.answer_prompt_builder import AnswerPromptBuilder
 from onyx.chat.prompt_builder.answer_prompt_builder import default_build_system_message
 from onyx.chat.prompt_builder.answer_prompt_builder import default_build_user_message
-from onyx.context.search.models import RerankingDetails
 from onyx.context.search.models import SearchRequest
 from onyx.llm.interfaces import LLM
 from onyx.tools.force import ForceUseTool
@@ -37,7 +35,6 @@ from onyx.tools.tool_implementations.search.search_tool import SEARCH_DOC_CONTEN
 from onyx.tools.tool_implementations.search_like_tool_utils import (
    FINAL_CONTEXT_DOCUMENTS_ID,
 )
-from shared_configs.enums import RerankerProvider
 from tests.unit.onyx.chat.conftest import DEFAULT_SEARCH_ARGS
 from tests.unit.onyx.chat.conftest import QUERY

@@ -47,20 +44,6 @@ def answer_instance(
    mock_llm: LLM,
    answer_style_config: AnswerStyleConfig,
    prompt_config: PromptConfig,
-    mocker: MockerFixture,
-) -> Answer:
-    mocker.patch(
-        "onyx.chat.answer.gpu_status_request",
-        return_value=True,
-    )
-    return _answer_fixture_impl(mock_llm, answer_style_config, prompt_config)
-
-
-def _answer_fixture_impl(
-    mock_llm: LLM,
-    answer_style_config: AnswerStyleConfig,
-    prompt_config: PromptConfig,
-    rerank_settings: RerankingDetails | None = None,
 ) -> Answer:
    return Answer(
        prompt_builder=AnswerPromptBuilder(
@@ -81,13 +64,13 @@ def _answer_fixture_impl(
        llm=mock_llm,
        fast_llm=mock_llm,
        force_use_tool=ForceUseTool(force_use=False, tool_name="", args=None),
-        search_request=SearchRequest(query=QUERY, rerank_settings=rerank_settings),
+        search_request=SearchRequest(query=QUERY),
        chat_session_id=UUID("123e4567-e89b-12d3-a456-426614174000"),
        current_agent_message_id=0,
    )


-def test_basic_answer(answer_instance: Answer, mocker: MockerFixture) -> None:
+def test_basic_answer(answer_instance: Answer) -> None:
    mock_llm = cast(Mock, answer_instance.graph_config.tooling.primary_llm)
    mock_llm.stream.return_value = [
        AIMessageChunk(content="This is a "),
@@ -380,49 +363,3 @@ def test_is_cancelled(answer_instance: Answer) -> None:

    # Verify LLM calls
    mock_llm.stream.assert_called_once()
-
-
-@pytest.mark.parametrize(
-    "gpu_enabled,is_local_model",
-    [
-        (True, False),
-        (False, True),
-        (True, True),
-        (False, False),
-    ],
-)
-def test_no_slow_reranking(
-    gpu_enabled: bool,
-    is_local_model: bool,
-    mock_llm: LLM,
-    answer_style_config: AnswerStyleConfig,
-    prompt_config: PromptConfig,
-    mocker: MockerFixture,
-) -> None:
-    mocker.patch(
-        "onyx.chat.answer.gpu_status_request",
-        return_value=gpu_enabled,
-    )
-    rerank_settings = (
-        None
-        if is_local_model
-        else RerankingDetails(
-            rerank_model_name="test_model",
-            rerank_api_url="test_url",
-            rerank_api_key="test_key",
-            num_rerank=10,
-            rerank_provider_type=RerankerProvider.COHERE,
-        )
-    )
-    answer_instance = _answer_fixture_impl(
-        mock_llm, answer_style_config, prompt_config, rerank_settings=rerank_settings
-    )
-
-    assert (
-        answer_instance.graph_config.inputs.search_request.rerank_settings
-        == rerank_settings
-    )
-    assert (
-        answer_instance.graph_config.behavior.allow_agent_reranking == gpu_enabled
-        or not is_local_model
-    )
--- a/backend/tests/unit/onyx/chat/test_skip_gen_ai.py
+++ b/backend/tests/unit/onyx/chat/test_skip_gen_ai.py
@@ -36,12 +36,7 @@ def test_skip_gen_ai_answer_generation_flag(
    mock_search_tool: SearchTool,
    answer_style_config: AnswerStyleConfig,
    prompt_config: PromptConfig,
-    mocker: MockerFixture,
 ) -> None:
-    mocker.patch(
-        "onyx.chat.answer.gpu_status_request",
-        return_value=True,
-    )
    question = config["question"]
    skip_gen_ai_answer_generation = config["skip_gen_ai_answer_generation"]

--- a/backend/tests/unit/onyx/utils/test_threadpool_concurrency.py
+++ b/backend/tests/unit/onyx/utils/test_threadpool_concurrency.py
@@ -1,61 +0,0 @@
-import time
-
-import pytest
-
-from onyx.utils.threadpool_concurrency import run_with_timeout
-
-
-def test_run_with_timeout_completes() -> None:
-    """Test that a function that completes within timeout works correctly"""
-
-    def quick_function(x: int) -> int:
-        return x * 2
-
-    result = run_with_timeout(1.0, quick_function, x=21)
-    assert result == 42
-
-
-@pytest.mark.parametrize("slow,timeout", [(1, 0.1), (0.3, 0.2)])
-def test_run_with_timeout_raises_on_timeout(slow: float, timeout: float) -> None:
-    """Test that a function that exceeds timeout raises TimeoutError"""
-
-    def slow_function() -> None:
-        time.sleep(slow)  # Sleep for 2 seconds
-
-    with pytest.raises(TimeoutError) as exc_info:
-        start = time.time()
-        run_with_timeout(timeout, slow_function)  # Set timeout to 0.1 seconds
-        end = time.time()
-        assert end - start >= timeout
-        assert end - start < (slow + timeout) / 2
-    assert f"timed out after {timeout} seconds" in str(exc_info.value)
-
-
-@pytest.mark.filterwarnings("ignore::pytest.PytestUnhandledThreadExceptionWarning")
-def test_run_with_timeout_propagates_exceptions() -> None:
-    """Test that other exceptions from the function are propagated properly"""
-
-    def error_function() -> None:
-        raise ValueError("Test error")
-
-    with pytest.raises(ValueError) as exc_info:
-        run_with_timeout(1.0, error_function)
-
-    assert "Test error" in str(exc_info.value)
-
-
-def test_run_with_timeout_with_args_and_kwargs() -> None:
-    """Test that args and kwargs are properly passed to the function"""
-
-    def complex_function(x: int, y: int, multiply: bool = False) -> int:
-        if multiply:
-            return x * y
-        return x + y
-
-    # Test with just positional args
-    result1 = run_with_timeout(1.0, complex_function, x=5, y=3)
-    assert result1 == 8
-
-    # Test with positional and keyword args
-    result2 = run_with_timeout(1.0, complex_function, x=5, y=3, multiply=True)
-    assert result2 == 15
--- a/web/src/app/chat/ChatPage.tsx
+++ b/web/src/app/chat/ChatPage.tsx
@@ -1,11 +1,6 @@
 "use client";

-import {
-  redirect,
-  usePathname,
-  useRouter,
-  useSearchParams,
-} from "next/navigation";
+import { redirect, useRouter, useSearchParams } from "next/navigation";
 import {
  BackendChatSession,
  BackendMessage,
@@ -135,7 +130,6 @@ import {
 } from "@/lib/browserUtilities";
 import { Button } from "@/components/ui/button";
 import { ConfirmEntityModal } from "@/components/modals/ConfirmEntityModal";
-import { MessageChannel } from "node:worker_threads";

 const TEMP_USER_MESSAGE_ID = -1;
 const TEMP_ASSISTANT_MESSAGE_ID = -2;
@@ -1151,7 +1145,6 @@ export function ChatPage({
    regenerationRequest?: RegenerationRequest | null;
    overrideFileDescriptors?: FileDescriptor[];
  } = {}) => {
-    navigatingAway.current = false;
    let frozenSessionId = currentSessionId();
    updateCanContinue(false, frozenSessionId);

@@ -1274,6 +1267,7 @@ export function ChatPage({
    let stackTrace: string | null = null;

    let sub_questions: SubQuestionDetail[] = [];
+    let second_level_sub_questions: SubQuestionDetail[] = [];
    let is_generating: boolean = false;
    let second_level_generating: boolean = false;
    let finalMessage: BackendMessage | null = null;
@@ -1297,7 +1291,7 @@ export function ChatPage({

      const stack = new CurrentMessageFIFO();
      updateCurrentMessageFIFO(stack, {
-        signal: controller.signal,
+        signal: controller.signal, // Add this line
        message: currMessage,
        alternateAssistantId: currentAssistantId,
        fileDescriptors: overrideFileDescriptors || currentMessageFiles,
@@ -1718,10 +1712,7 @@ export function ChatPage({
        const newUrl = buildChatUrl(searchParams, currChatSessionId, null);
        // newUrl is like /chat?chatId=10
        // current page is like /chat
-
-        if (pathname == "/chat" && !navigatingAway.current) {
-          router.push(newUrl, { scroll: false });
-        }
+        router.push(newUrl, { scroll: false });
      }
    }
    if (
@@ -2095,31 +2086,6 @@ export function ChatPage({
    llmOverrideManager.updateImageFilesPresent(imageFileInMessageHistory);
  }, [imageFileInMessageHistory]);

-  const pathname = usePathname();
-  useEffect(() => {
-    return () => {
-      // Cleanup which only runs when the component unmounts (i.e. when you navigate away).
-      const currentSession = currentSessionId();
-      const controller = abortControllersRef.current.get(currentSession);
-      if (controller) {
-        controller.abort();
-        navigatingAway.current = true;
-        setAbortControllers((prev) => {
-          const newControllers = new Map(prev);
-          newControllers.delete(currentSession);
-          return newControllers;
-        });
-      }
-    };
-  }, [pathname]);
-
-  const navigatingAway = useRef(false);
-  // Keep a ref to abortControllers to ensure we always have the latest value
-  const abortControllersRef = useRef(abortControllers);
-  useEffect(() => {
-    abortControllersRef.current = abortControllers;
-  }, [abortControllers]);
-
  useSidebarShortcut(router, toggleSidebar);

  const [sharedChatSession, setSharedChatSession] =
@@ -2334,7 +2300,7 @@ export function ChatPage({
                fixed
                left-0
                z-40
-                bg-neutral-200
+                bg-background-100
                h-screen
                transition-all
                bg-opacity-80
@@ -2591,21 +2557,12 @@ export function ChatPage({
                                ) {
                                  return <></>;
                                }
-                                const nextMessage =
-                                  messageHistory.length > i + 1
-                                    ? messageHistory[i + 1]
-                                    : null;
                                return (
                                  <div
                                    id={`message-${message.messageId}`}
                                    key={messageReactComponentKey}
                                  >
                                    <HumanMessage
-                                      disableSwitchingForStreaming={
-                                        (nextMessage &&
-                                          nextMessage.is_generating) ||
-                                        false
-                                      }
                                      stopGenerating={stopGenerating}
                                      content={message.message}
                                      files={message.files}
--- a/web/src/app/chat/input/AgenticToggle.tsx
+++ b/web/src/app/chat/input/AgenticToggle.tsx
@@ -94,7 +94,7 @@ export function AgenticToggle({
              Agent Search (BETA)
            </h3>
          </div>
-          <p className="text-xs text-neutral-600  dark:text-neutral-700 mb-2">
+          <p className="text-xs text-neutarl-600  dark:text-neutral-700 mb-2">
            Use AI agents to break down questions and run deep iterative
            research through promising pathways. Gives more thorough and
            accurate responses but takes slightly longer.
--- a/web/src/app/chat/input/LLMPopover.tsx
+++ b/web/src/app/chat/input/LLMPopover.tsx
@@ -113,7 +113,7 @@ export default function LLMPopover({
    <Popover open={isOpen} onOpenChange={setIsOpen}>
      <PopoverTrigger asChild>
        <button
-          className="dark:text-[#fff] text-[#000] focus:outline-none"
+          className="focus:outline-none"
          data-testid="llm-popover-trigger"
        >
          <ChatInputOption
--- a/web/src/app/chat/lib.tsx
+++ b/web/src/app/chat/lib.tsx
@@ -250,7 +250,7 @@ export async function* sendMessage({
    throw new Error(`HTTP error! status: ${response.status}`);
  }

-  yield* handleSSEStream<PacketType>(response, signal);
+  yield* handleSSEStream<PacketType>(response);
 }

 export async function nameChatSession(chatSessionId: string) {
--- a/web/src/app/chat/message/AgenticMessage.tsx
+++ b/web/src/app/chat/message/AgenticMessage.tsx
@@ -9,12 +9,6 @@ import React, {
  useMemo,
  useState,
 } from "react";
-import {
-  Tooltip,
-  TooltipContent,
-  TooltipProvider,
-  TooltipTrigger,
-} from "@/components/ui/tooltip";
 import ReactMarkdown from "react-markdown";
 import { OnyxDocument, FilteredOnyxDocument } from "@/lib/search/interfaces";
 import remarkGfm from "remark-gfm";
@@ -314,7 +308,7 @@ export const AgenticMessage = ({
  const renderedAlternativeMarkdown = useMemo(() => {
    return (
      <ReactMarkdown
-        className="prose dark:prose-invert max-w-full text-base"
+        className="prose max-w-full text-base"
        components={{
          ...markdownComponents,
          code: ({ node, className, children }: any) => {
@@ -341,7 +335,7 @@ export const AgenticMessage = ({
  const renderedMarkdown = useMemo(() => {
    return (
      <ReactMarkdown
-        className="prose dark:prose-invert max-w-full text-base"
+        className="prose max-w-full text-base"
        components={markdownComponents}
        remarkPlugins={[remarkGfm, remarkMath]}
        rehypePlugins={[[rehypePrism, { ignoreMissing: true }], rehypeKatex]}
@@ -536,7 +530,6 @@ export const AgenticMessage = ({
                            {includeMessageSwitcher && (
                              <div className="-mx-1 mr-auto">
                                <MessageSwitcher
-                                  disableForStreaming={!isComplete}
                                  currentPage={currentMessageInd + 1}
                                  totalPages={otherMessagesCanSwitchTo.length}
                                  handlePrevious={() => {
@@ -623,7 +616,6 @@ export const AgenticMessage = ({
                            {includeMessageSwitcher && (
                              <div className="-mx-1 mr-auto">
                                <MessageSwitcher
-                                  disableForStreaming={!isComplete}
                                  currentPage={currentMessageInd + 1}
                                  totalPages={otherMessagesCanSwitchTo.length}
                                  handlePrevious={() => {
@@ -702,52 +694,27 @@ function MessageSwitcher({
  totalPages,
  handlePrevious,
  handleNext,
-  disableForStreaming,
 }: {
  currentPage: number;
  totalPages: number;
  handlePrevious: () => void;
  handleNext: () => void;
-  disableForStreaming?: boolean;
 }) {
  return (
    <div className="flex items-center text-sm space-x-0.5">
-      <TooltipProvider>
-        <Tooltip>
-          <TooltipTrigger asChild>
-            <div>
-              <Hoverable
-                icon={FiChevronLeft}
-                onClick={currentPage === 1 ? undefined : handlePrevious}
-              />
-            </div>
-          </TooltipTrigger>
-          <TooltipContent>
-            {disableForStreaming ? "Disabled" : "Previous"}
-          </TooltipContent>
-        </Tooltip>
-      </TooltipProvider>
+      <Hoverable
+        icon={FiChevronLeft}
+        onClick={currentPage === 1 ? undefined : handlePrevious}
+      />

      <span className="text-text-darker select-none">
        {currentPage} / {totalPages}
-        {disableForStreaming ? "Complete" : "Generating"}
      </span>

-      <TooltipProvider>
-        <Tooltip>
-          <TooltipTrigger asChild>
-            <div>
-              <Hoverable
-                icon={FiChevronRight}
-                onClick={currentPage === totalPages ? undefined : handleNext}
-              />
-            </div>
-          </TooltipTrigger>
-          <TooltipContent>
-            {disableForStreaming ? "Disabled" : "Next"}
-          </TooltipContent>
-        </Tooltip>
-      </TooltipProvider>
+      <Hoverable
+        icon={FiChevronRight}
+        onClick={currentPage === totalPages ? undefined : handleNext}
+      />
    </div>
  );
 }
--- a/web/src/app/chat/message/Messages.tsx
+++ b/web/src/app/chat/message/Messages.tsx
@@ -383,7 +383,7 @@ export const AIMessage = ({
          dangerouslySetInnerHTML={{ __html: htmlContent }}
        />
        <ReactMarkdown
-          className="prose dark:prose-invert max-w-full text-base"
+          className="prose max-w-full text-base"
          components={markdownComponents}
          remarkPlugins={[remarkGfm, remarkMath]}
          rehypePlugins={[[rehypePrism, { ignoreMissing: true }], rehypeKatex]}
@@ -495,10 +495,7 @@ export const AIMessage = ({
                    {docs && docs.length > 0 && (
                      <div
                        className={`mobile:hidden ${
-                          (query ||
-                            toolCall?.tool_name ===
-                              INTERNET_SEARCH_TOOL_NAME) &&
-                          "mt-2"
+                          query && "mt-2"
                        }  -mx-8 w-full mb-4 flex relative`}
                      >
                        <div className="w-full">
@@ -798,67 +795,27 @@ function MessageSwitcher({
  totalPages,
  handlePrevious,
  handleNext,
-  disableForStreaming,
 }: {
  currentPage: number;
  totalPages: number;
  handlePrevious: () => void;
  handleNext: () => void;
-  disableForStreaming?: boolean;
 }) {
  return (
    <div className="flex items-center text-sm space-x-0.5">
-      <TooltipProvider>
-        <Tooltip>
-          <TooltipTrigger asChild>
-            <div>
-              <Hoverable
-                icon={FiChevronLeft}
-                onClick={
-                  disableForStreaming
-                    ? () => null
-                    : currentPage === 1
-                      ? undefined
-                      : handlePrevious
-                }
-              />
-            </div>
-          </TooltipTrigger>
-          <TooltipContent>
-            {disableForStreaming
-              ? "Wait for agent message to complete"
-              : "Previous"}
-          </TooltipContent>
-        </Tooltip>
-      </TooltipProvider>
+      <Hoverable
+        icon={FiChevronLeft}
+        onClick={currentPage === 1 ? undefined : handlePrevious}
+      />

      <span className="text-text-darker select-none">
        {currentPage} / {totalPages}
      </span>

-      <TooltipProvider>
-        <Tooltip>
-          <TooltipTrigger>
-            <div>
-              <Hoverable
-                icon={FiChevronRight}
-                onClick={
-                  disableForStreaming
-                    ? () => null
-                    : currentPage === totalPages
-                      ? undefined
-                      : handleNext
-                }
-              />
-            </div>
-          </TooltipTrigger>
-          <TooltipContent>
-            {disableForStreaming
-              ? "Wait for agent message to complete"
-              : "Next"}
-          </TooltipContent>
-        </Tooltip>
-      </TooltipProvider>
+      <Hoverable
+        icon={FiChevronRight}
+        onClick={currentPage === totalPages ? undefined : handleNext}
+      />
    </div>
  );
 }
@@ -872,7 +829,6 @@ export const HumanMessage = ({
  onMessageSelection,
  shared,
  stopGenerating = () => null,
-  disableSwitchingForStreaming = false,
 }: {
  shared?: boolean;
  content: string;
@@ -882,7 +838,6 @@ export const HumanMessage = ({
  onEdit?: (editedContent: string) => void;
  onMessageSelection?: (messageId: number) => void;
  stopGenerating?: () => void;
-  disableSwitchingForStreaming?: boolean;
 }) => {
  const textareaRef = useRef<HTMLTextAreaElement>(null);

@@ -1112,7 +1067,6 @@ export const HumanMessage = ({
              otherMessagesCanSwitchTo.length > 1 && (
                <div className="ml-auto mr-3">
                  <MessageSwitcher
-                    disableForStreaming={disableSwitchingForStreaming}
                    currentPage={currentMessageInd + 1}
                    totalPages={otherMessagesCanSwitchTo.length}
                    handlePrevious={() => {
--- a/web/src/app/chat/message/SubQuestionsDisplay.tsx
+++ b/web/src/app/chat/message/SubQuestionsDisplay.tsx
@@ -294,7 +294,7 @@ const SubQuestionDisplay: React.FC<{
  const renderedMarkdown = useMemo(() => {
    return (
      <ReactMarkdown
-        className="prose dark:prose-invert max-w-full text-base"
+        className="prose max-w-full text-base"
        components={markdownComponents}
        remarkPlugins={[remarkGfm, remarkMath]}
        rehypePlugins={[rehypeKatex]}
@@ -340,7 +340,7 @@ const SubQuestionDisplay: React.FC<{
              {subQuestion?.question || temporaryDisplay?.question}
            </div>
            <ChevronDown
-              className={`mt-0.5 flex-none text-text-darker transition-transform duration-500 ease-in-out ${
+              className={`mt-0.5 text-text-darker transition-transform duration-500 ease-in-out ${
                toggled ? "" : "-rotate-90"
              }`}
              size={20}
@@ -632,7 +632,9 @@ const SubQuestionsDisplay: React.FC<SubQuestionsDisplayProps> = ({
        }
      `}</style>
      <div className="relative">
+        {/* {subQuestions.map((subQuestion, index) => ( */}
        {memoizedSubQuestions.map((subQuestion, index) => (
+          // {dynamicSubQuestions.map((subQuestion, index) => (
          <SubQuestionDisplay
            currentlyOpen={
              currentlyOpenQuestion?.level === subQuestion.level &&
--- a/web/src/app/ee/admin/standard-answer/page.tsx
+++ b/web/src/app/ee/admin/standard-answer/page.tsx
@@ -131,7 +131,7 @@ const StandardAnswersTableRow = ({
        />,
        <ReactMarkdown
          key={`answer-${standardAnswer.id}`}
-          className="prose dark:prose-invert"
+          className="prose"
          remarkPlugins={[remarkGfm]}
        >
          {standardAnswer.answer}
--- a/web/src/app/globals.css
+++ b/web/src/app/globals.css
@@ -562,7 +562,6 @@ body {
 .prose :where(pre):not(:where([class~="not-prose"], [class~="not-prose"] *)) {
  background-color: theme("colors.code-bg");
  font-size: theme("fontSize.code-sm");
-  color: #fff;
 }

 pre[class*="language-"],
@@ -656,3 +655,16 @@ ul > li > p {
  display: inline;
  /* Make paragraphs inline to reduce vertical space */
 }
+
+.dark strong {
+  color: white;
+}
+
+.prose.dark li,
+.prose.dark h1,
+.prose.dark h2,
+.prose.dark h3,
+.prose.dark h4,
+.prose.dark h5 {
+  color: #e5e5e5;
+}
--- a/web/src/components/Hoverable.tsx
+++ b/web/src/components/Hoverable.tsx
@@ -17,7 +17,7 @@ export const Hoverable: React.FC<{
      <div className="flex items-center">
        <Icon
          size={size}
-          className="dark:text-[#B4B4B4] text-neutral-600  rounded h-fit cursor-pointer"
+          className="hover:bg-background-chat-hover dark:text-[#B4B4B4] text-neutral-600  rounded h-fit cursor-pointer"
        />
        {hoverText && (
          <div className="max-w-0 leading-none whitespace-nowrap overflow-hidden transition-all duration-300 ease-in-out group-hover:max-w-xs group-hover:ml-2">
--- a/web/src/components/SearchResultIcon.tsx
+++ b/web/src/components/SearchResultIcon.tsx
@@ -50,7 +50,7 @@ export function SearchResultIcon({ url }: { url: string }) {
    return <SourceIcon sourceType={ValidSources.Web} iconSize={18} />;
  }
  if (url.includes("docs.onyx.app")) {
-    return <OnyxIcon size={18} className="dark:text-[#fff] text-[#000]" />;
+    return <OnyxIcon size={18} />;
  }

  return (
--- a/web/src/components/WebResultIcon.tsx
+++ b/web/src/components/WebResultIcon.tsx
@@ -23,7 +23,7 @@ export function WebResultIcon({
  return (
    <>
      {hostname == "docs.onyx.app" ? (
-        <OnyxIcon size={size} className="dark:text-[#fff] text-[#000]" />
+        <OnyxIcon size={size} />
      ) : !error ? (
        <img
          className="my-0 rounded-full py-0"
--- a/web/src/components/admin/connectors/Field.tsx
+++ b/web/src/components/admin/connectors/Field.tsx
@@ -432,10 +432,7 @@ export const MarkdownFormField = ({
        </div>
        {isPreviewOpen ? (
          <div className="p-4 border-t border-background-300">
-            <ReactMarkdown
-              className="prose dark:prose-invert"
-              remarkPlugins={[remarkGfm]}
-            >
+            <ReactMarkdown className="prose" remarkPlugins={[remarkGfm]}>
              {field.value}
            </ReactMarkdown>
          </div>
--- a/web/src/components/chat/BlurBackground.tsx
+++ b/web/src/components/chat/BlurBackground.tsx
@@ -9,7 +9,7 @@ export default function BlurBackground({
    <div
      onClick={onClick}
      className={`
-        desktop:hidden w-full h-full fixed inset-0 bg-neutral-700 bg-opacity-50 backdrop-blur-sm z-30 transition-opacity duration-300 ease-in-out ${
+        desktop:hidden w-full h-full fixed inset-0 bg-black bg-opacity-50 backdrop-blur-sm z-30 transition-opacity duration-300 ease-in-out ${
          visible
            ? "opacity-100 pointer-events-auto"
            : "opacity-0 pointer-events-none"
--- a/web/src/components/chat/MinimalMarkdown.tsx
+++ b/web/src/components/chat/MinimalMarkdown.tsx
@@ -35,7 +35,7 @@ export const MinimalMarkdown: React.FC<MinimalMarkdownProps> = ({

  return (
    <ReactMarkdown
-      className={`w-full text-wrap break-word prose dark:prose-invert ${className}`}
+      className={`w-full text-wrap break-word ${className}`}
      components={markdownComponents}
      remarkPlugins={[remarkGfm]}
    >
--- a/web/src/components/chat/sources/SourceCard.tsx
+++ b/web/src/components/chat/sources/SourceCard.tsx
@@ -78,7 +78,7 @@ export function getUniqueIcons(docs: OnyxDocument[]): JSX.Element[] {

  for (const doc of docs) {
    // If it's a web source, we check domain uniqueness
-    if ((doc.is_internet || doc.source_type === ValidSources.Web) && doc.link) {
+    if (doc.source_type === ValidSources.Web && doc.link) {
      const domain = getDomainFromUrl(doc.link);
      if (domain && !seenDomains.has(domain)) {
        seenDomains.add(domain);
--- a/web/src/components/header/LogoWithText.tsx
+++ b/web/src/components/header/LogoWithText.tsx
@@ -47,7 +47,7 @@ export default function LogoWithText({
          className="flex gap-x-2 items-center ml-0 cursor-pointer desktop:hidden "
        >
          {!toggled ? (
-            <Logo className="desktop:hidden" height={24} width={24} />
+            <Logo className="desktop:hidden -my-2" height={24} width={24} />
          ) : (
            <LogoComponent
              show={toggled}
--- a/web/src/lib/hooks.ts
+++ b/web/src/lib/hooks.ts
@@ -23,11 +23,8 @@ import { AllUsersResponse } from "./types";
 import { Credential } from "./connectors/credentials";
 import { SettingsContext } from "@/components/settings/SettingsProvider";
 import { Persona, PersonaLabel } from "@/app/admin/assistants/interfaces";
-import {
-  isAnthropic,
-  LLMProviderDescriptor,
-} from "@/app/admin/configuration/llm/interfaces";
-
+import { LLMProviderDescriptor } from "@/app/admin/configuration/llm/interfaces";
+import { isAnthropic } from "@/app/admin/configuration/llm/interfaces";
 import { getSourceMetadata } from "./sources";
 import { AuthType, NEXT_PUBLIC_CLOUD_ENABLED } from "./constants";
 import { useUser } from "@/components/user/UserProvider";
--- a/web/src/lib/search/streamingUtils.ts
+++ b/web/src/lib/search/streamingUtils.ts
@@ -79,18 +79,12 @@ export async function* handleStream<T extends NonEmptyObject>(
 }

 export async function* handleSSEStream<T extends PacketType>(
-  streamingResponse: Response,
-  signal?: AbortSignal
+  streamingResponse: Response
 ): AsyncGenerator<T, void, unknown> {
  const reader = streamingResponse.body?.getReader();
  const decoder = new TextDecoder();
  let buffer = "";
-  if (signal) {
-    signal.addEventListener("abort", () => {
-      console.log("aborting");
-      reader?.cancel();
-    });
-  }
+
  while (true) {
    const rawChunk = await reader?.read();
    if (!rawChunk) {
--- a/web/tailwind-themes/tailwind.config.js
+++ b/web/tailwind-themes/tailwind.config.js
@@ -21,6 +21,7 @@ module.exports = {
      transitionProperty: {
        spacing: "margin, padding",
      },
+
      keyframes: {
        "subtle-pulse": {
          "0%, 100%": { opacity: 0.9 },
@@ -147,6 +148,7 @@ module.exports = {
        "text-mobile-sidebar": "var(--text-800)",
        "background-search-filter": "var(--neutral-100-border-light)",
        "background-search-filter-dropdown": "var(--neutral-100-border-light)",
+        "tw-prose-bold": "var(--text-800)",

        "user-bubble": "var(--off-white)",
Author	SHA1	Message	Date
pablonyx	3ce02ccc01	k	2025-02-16 12:35:52 -08:00
pablonyx	d78c8e2e05	k	2025-02-16 12:35:18 -08:00
pablonyx	501ad93153	remove playwright for now	2025-02-16 12:33:53 -08:00
pablonyx	f4686440ae	temporarily silence playwright	2025-02-16 12:32:44 -08:00