fix(LLM config): resolve API Key before fetching models (#10056 ) to release v3.2 (#10057 )

Co-authored-by: Jamison Lahman <jamison@lahman.dev>
feat(federated): full thread replies + direct URL fetch in Slack search (#9940 ) to release v3.2 (#10050 )
2026-04-11 18:02:42 +00:00 · 2026-04-10 00:02:33 -07:00 · 2026-04-09 18:24:02 -07:00 · 2026-04-09 20:50:41 -04:00 · 2026-04-09 18:29:31 +00:00 · 2026-04-09 18:06:35 +00:00
47 changed files with 1466 additions and 1484 deletions
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -9,7 +9,6 @@ repos:
    rev: d30b4298e4fb63ce8609e29acdbcf4c9018a483c
    hooks:
      - id: uv-sync
-        args: ["--locked", "--all-extras"]
      - id: uv-lock
      - id: uv-export
        name: uv-export default.txt
@@ -18,7 +17,7 @@ repos:
            "--no-emit-project",
            "--no-default-groups",
            "--no-hashes",
-            "--extra",
+            "--group",
            "backend",
            "-o",
            "backend/requirements/default.txt",
@@ -31,7 +30,7 @@ repos:
            "--no-emit-project",
            "--no-default-groups",
            "--no-hashes",
-            "--extra",
+            "--group",
            "dev",
            "-o",
            "backend/requirements/dev.txt",
@@ -44,7 +43,7 @@ repos:
            "--no-emit-project",
            "--no-default-groups",
            "--no-hashes",
-            "--extra",
+            "--group",
            "ee",
            "-o",
            "backend/requirements/ee.txt",
@@ -57,7 +56,7 @@ repos:
            "--no-emit-project",
            "--no-default-groups",
            "--no-hashes",
-            "--extra",
+            "--group",
            "model_server",
            "-o",
            "backend/requirements/model_server.txt",
--- a/.vscode/launch.json
+++ b/.vscode/launch.json
@@ -531,8 +531,7 @@
      "request": "launch",
      "runtimeExecutable": "uv",
      "runtimeArgs": [
-        "sync",
-        "--all-extras"
+        "sync"
      ],
      "cwd": "${workspaceFolder}",
      "console": "integratedTerminal",
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -117,7 +117,7 @@ If using PowerShell, the command slightly differs:
 Install the required Python dependencies:

 ```bash
-uv sync --all-extras
+uv sync
 ```

 Install Playwright for Python (headless browser required by the Web Connector):
--- a/backend/onyx/connectors/jira/connector.py
+++ b/backend/onyx/connectors/jira/connector.py
@@ -60,8 +60,10 @@ logger = setup_logger()

 ONE_HOUR = 3600

-_MAX_RESULTS_FETCH_IDS = 5000  # 5000
+_MAX_RESULTS_FETCH_IDS = 5000
 _JIRA_FULL_PAGE_SIZE = 50
+# https://developer.atlassian.com/cloud/jira/platform/rest/v3/api-group-issues/
+_JIRA_BULK_FETCH_LIMIT = 100

 # Constants for Jira field names
 _FIELD_REPORTER = "reporter"
@@ -255,15 +257,13 @@ def _bulk_fetch_request(
    return resp.json()["issues"]


-def bulk_fetch_issues(
-    jira_client: JIRA, issue_ids: list[str], fields: str | None = None
-) -> list[Issue]:
-    # TODO(evan): move away from this jira library if they continue to not support
-    # the endpoints we need. Using private fields is not ideal, but
-    # is likely fine for now since we pin the library version
-
+def _bulk_fetch_batch(
+    jira_client: JIRA, issue_ids: list[str], fields: str | None
+) -> list[dict[str, Any]]:
+    """Fetch a single batch (must be <= _JIRA_BULK_FETCH_LIMIT).
+    On JSONDecodeError, recursively bisects until it succeeds or reaches size 1."""
    try:
-        raw_issues = _bulk_fetch_request(jira_client, issue_ids, fields)
+        return _bulk_fetch_request(jira_client, issue_ids, fields)
    except requests.exceptions.JSONDecodeError:
        if len(issue_ids) <= 1:
            logger.exception(
@@ -277,12 +277,25 @@ def bulk_fetch_issues(
            f"Jira bulk-fetch JSON decode failed for batch of {len(issue_ids)} issues. "
            f"Splitting into sub-batches of {mid} and {len(issue_ids) - mid}."
        )
-        left = bulk_fetch_issues(jira_client, issue_ids[:mid], fields)
-        right = bulk_fetch_issues(jira_client, issue_ids[mid:], fields)
+        left = _bulk_fetch_batch(jira_client, issue_ids[:mid], fields)
+        right = _bulk_fetch_batch(jira_client, issue_ids[mid:], fields)
        return left + right
-    except Exception as e:
-        logger.error(f"Error fetching issues: {e}")
-        raise
+
+
+def bulk_fetch_issues(
+    jira_client: JIRA, issue_ids: list[str], fields: str | None = None
+) -> list[Issue]:
+    # TODO(evan): move away from this jira library if they continue to not support
+    # the endpoints we need. Using private fields is not ideal, but
+    # is likely fine for now since we pin the library version
+
+    raw_issues: list[dict[str, Any]] = []
+    for batch in chunked(issue_ids, _JIRA_BULK_FETCH_LIMIT):
+        try:
+            raw_issues.extend(_bulk_fetch_batch(jira_client, list(batch), fields))
+        except Exception as e:
+            logger.error(f"Error fetching issues: {e}")
+            raise

    return [
        Issue(jira_client._options, jira_client._session, raw=issue)
--- a/backend/onyx/context/search/federated/models.py
+++ b/backend/onyx/context/search/federated/models.py
@@ -1,3 +1,4 @@
+from dataclasses import dataclass
 from datetime import datetime
 from typing import TypedDict

@@ -6,6 +7,14 @@ from pydantic import BaseModel
 from onyx.onyxbot.slack.models import ChannelType


+@dataclass(frozen=True)
+class DirectThreadFetch:
+    """Request to fetch a Slack thread directly by channel and timestamp."""
+
+    channel_id: str
+    thread_ts: str
+
+
 class ChannelMetadata(TypedDict):
    """Type definition for cached channel metadata."""

--- a/backend/onyx/context/search/federated/slack_search.py
+++ b/backend/onyx/context/search/federated/slack_search.py
@@ -19,6 +19,7 @@ from onyx.configs.chat_configs import DOC_TIME_DECAY
 from onyx.connectors.models import IndexingDocument
 from onyx.connectors.models import TextSection
 from onyx.context.search.federated.models import ChannelMetadata
+from onyx.context.search.federated.models import DirectThreadFetch
 from onyx.context.search.federated.models import SlackMessage
 from onyx.context.search.federated.slack_search_utils import ALL_CHANNEL_TYPES
 from onyx.context.search.federated.slack_search_utils import build_channel_query_filter
@@ -49,7 +50,6 @@ from onyx.server.federated.models import FederatedConnectorDetail
 from onyx.utils.logger import setup_logger
 from onyx.utils.threadpool_concurrency import run_functions_tuples_in_parallel
 from onyx.utils.timing import log_function_time
-from shared_configs.configs import DOC_EMBEDDING_CONTEXT_SIZE

 logger = setup_logger()

@@ -58,7 +58,6 @@ HIGHLIGHT_END_CHAR = "\ue001"

 CHANNEL_METADATA_CACHE_TTL = 60 * 60 * 24  # 24 hours
 USER_PROFILE_CACHE_TTL = 60 * 60 * 24  # 24 hours
-SLACK_THREAD_CONTEXT_WINDOW = 3  # Number of messages before matched message to include
 CHANNEL_METADATA_MAX_RETRIES = 3  # Maximum retry attempts for channel metadata fetching
 CHANNEL_METADATA_RETRY_DELAY = 1  # Initial retry delay in seconds (exponential backoff)

@@ -421,6 +420,94 @@ class SlackQueryResult(BaseModel):
    filtered_channels: list[str]  # Channels filtered out during this query


+def _fetch_thread_from_url(
+    thread_fetch: DirectThreadFetch,
+    access_token: str,
+    channel_metadata_dict: dict[str, ChannelMetadata] | None = None,
+) -> SlackQueryResult:
+    """Fetch a thread directly from a Slack URL via conversations.replies."""
+    channel_id = thread_fetch.channel_id
+    thread_ts = thread_fetch.thread_ts
+
+    slack_client = WebClient(token=access_token)
+    try:
+        response = slack_client.conversations_replies(
+            channel=channel_id,
+            ts=thread_ts,
+        )
+        response.validate()
+        messages: list[dict[str, Any]] = response.get("messages", [])
+    except SlackApiError as e:
+        logger.warning(
+            f"Failed to fetch thread from URL (channel={channel_id}, ts={thread_ts}): {e}"
+        )
+        return SlackQueryResult(messages=[], filtered_channels=[])
+
+    if not messages:
+        logger.warning(
+            f"No messages found for URL override (channel={channel_id}, ts={thread_ts})"
+        )
+        return SlackQueryResult(messages=[], filtered_channels=[])
+
+    # Build thread text from all messages
+    thread_text = _build_thread_text(messages, access_token, None, slack_client)
+
+    # Get channel name from metadata cache or API
+    channel_name = "unknown"
+    if channel_metadata_dict and channel_id in channel_metadata_dict:
+        channel_name = channel_metadata_dict[channel_id].get("name", "unknown")
+    else:
+        try:
+            ch_response = slack_client.conversations_info(channel=channel_id)
+            ch_response.validate()
+            channel_info: dict[str, Any] = ch_response.get("channel", {})
+            channel_name = channel_info.get("name", "unknown")
+        except SlackApiError:
+            pass
+
+    # Build the SlackMessage
+    parent_msg = messages[0]
+    message_ts = parent_msg.get("ts", thread_ts)
+    username = parent_msg.get("user", "unknown_user")
+    parent_text = parent_msg.get("text", "")
+    snippet = (
+        parent_text[:50].rstrip() + "..." if len(parent_text) > 50 else parent_text
+    ).replace("\n", " ")
+
+    doc_time = datetime.fromtimestamp(float(message_ts))
+    decay_factor = DOC_TIME_DECAY
+    doc_age_years = (datetime.now() - doc_time).total_seconds() / (365 * 24 * 60 * 60)
+    recency_bias = max(1 / (1 + decay_factor * doc_age_years), 0.75)
+
+    permalink = (
+        f"https://slack.com/archives/{channel_id}/p{message_ts.replace('.', '')}"
+    )
+
+    slack_message = SlackMessage(
+        document_id=f"{channel_id}_{message_ts}",
+        channel_id=channel_id,
+        message_id=message_ts,
+        thread_id=None,  # Prevent double-enrichment in thread context fetch
+        link=permalink,
+        metadata={
+            "channel": channel_name,
+            "time": doc_time.isoformat(),
+        },
+        timestamp=doc_time,
+        recency_bias=recency_bias,
+        semantic_identifier=f"{username} in #{channel_name}: {snippet}",
+        text=thread_text,
+        highlighted_texts=set(),
+        slack_score=100000.0,  # High priority — user explicitly asked for this thread
+    )
+
+    logger.info(
+        f"URL override: fetched thread from channel={channel_id}, ts={thread_ts}, {len(messages)} messages"
+    )
+
+    return SlackQueryResult(messages=[slack_message], filtered_channels=[])
+
+
 def query_slack(
    query_string: str,
    access_token: str,
@@ -432,7 +519,6 @@ def query_slack(
    available_channels: list[str] | None = None,
    channel_metadata_dict: dict[str, ChannelMetadata] | None = None,
 ) -> SlackQueryResult:
-
    # Check if query has channel override (user specified channels in query)
    has_channel_override = query_string.startswith("__CHANNEL_OVERRIDE__")

@@ -662,7 +748,6 @@ def _fetch_thread_context(
    """
    channel_id = message.channel_id
    thread_id = message.thread_id
-    message_id = message.message_id

    # If not a thread, return original text as success
    if thread_id is None:
@@ -695,62 +780,37 @@ def _fetch_thread_context(
    if len(messages) <= 1:
        return ThreadContextResult.success(message.text)

-    # Build thread text from thread starter + context window around matched message
-    thread_text = _build_thread_text(
-        messages, message_id, thread_id, access_token, team_id, slack_client
-    )
+    # Build thread text from thread starter + all replies
+    thread_text = _build_thread_text(messages, access_token, team_id, slack_client)
    return ThreadContextResult.success(thread_text)


 def _build_thread_text(
    messages: list[dict[str, Any]],
-    message_id: str,
-    thread_id: str,
    access_token: str,
    team_id: str | None,
    slack_client: WebClient,
 ) -> str:
-    """Build the thread text from messages."""
+    """Build thread text including all replies.
+
+    Includes the thread parent message followed by all replies in order.
+    """
    msg_text = messages[0].get("text", "")
    msg_sender = messages[0].get("user", "")
    thread_text = f"<@{msg_sender}>: {msg_text}"

+    # All messages after index 0 are replies
+    replies = messages[1:]
+    if not replies:
+        return thread_text
+
+    logger.debug(f"Thread {messages[0].get('ts')}: {len(replies)} replies included")
    thread_text += "\n\nReplies:"
-    if thread_id == message_id:
-        message_id_idx = 0
-    else:
-        message_id_idx = next(
-            (i for i, msg in enumerate(messages) if msg.get("ts") == message_id), 0
-        )
-        if not message_id_idx:
-            return thread_text

-        start_idx = max(1, message_id_idx - SLACK_THREAD_CONTEXT_WINDOW)
-
-        if start_idx > 1:
-            thread_text += "\n..."
-
-        for i in range(start_idx, message_id_idx):
-            msg_text = messages[i].get("text", "")
-            msg_sender = messages[i].get("user", "")
-            thread_text += f"\n\n<@{msg_sender}>: {msg_text}"
-
-        msg_text = messages[message_id_idx].get("text", "")
-        msg_sender = messages[message_id_idx].get("user", "")
-        thread_text += f"\n\n<@{msg_sender}>: {msg_text}"
-
-    # Add following replies
-    len_replies = 0
-    for msg in messages[message_id_idx + 1 :]:
+    for msg in replies:
        msg_text = msg.get("text", "")
        msg_sender = msg.get("user", "")
-        reply = f"\n\n<@{msg_sender}>: {msg_text}"
-        thread_text += reply
-
-        len_replies += len(reply)
-        if len_replies >= DOC_EMBEDDING_CONTEXT_SIZE * 4:
-            thread_text += "\n..."
-            break
+        thread_text += f"\n\n<@{msg_sender}>: {msg_text}"

    # Replace user IDs with names using cached lookups
    userids: set[str] = set(re.findall(r"<@([A-Z0-9]+)>", thread_text))
@@ -976,7 +1036,16 @@ def slack_retrieval(

    # Query slack with entity filtering
    llm = get_default_llm()
-    query_strings = build_slack_queries(query, llm, entities, available_channels)
+    query_items = build_slack_queries(query, llm, entities, available_channels)
+
+    # Partition into direct thread fetches and search query strings
+    direct_fetches: list[DirectThreadFetch] = []
+    query_strings: list[str] = []
+    for item in query_items:
+        if isinstance(item, DirectThreadFetch):
+            direct_fetches.append(item)
+        else:
+            query_strings.append(item)

    # Determine filtering based on entities OR context (bot)
    include_dm = False
@@ -993,8 +1062,16 @@ def slack_retrieval(
                f"Private channel context: will only allow messages from {allowed_private_channel} + public channels"
            )

-    # Build search tasks
-    search_tasks = [
+    # Build search tasks — direct thread fetches + keyword searches
+    search_tasks: list[tuple] = [
+        (
+            _fetch_thread_from_url,
+            (fetch, access_token, channel_metadata_dict),
+        )
+        for fetch in direct_fetches
+    ]
+
+    search_tasks.extend(
        (
            query_slack,
            (
@@ -1010,7 +1087,7 @@ def slack_retrieval(
            ),
        )
        for query_string in query_strings
-    ]
+    )

    # If include_dm is True AND we're not already searching all channels,
    # add additional searches without channel filters.
--- a/backend/onyx/context/search/federated/slack_search_utils.py
+++ b/backend/onyx/context/search/federated/slack_search_utils.py
@@ -10,6 +10,7 @@ from pydantic import ValidationError

 from onyx.configs.app_configs import MAX_SLACK_QUERY_EXPANSIONS
 from onyx.context.search.federated.models import ChannelMetadata
+from onyx.context.search.federated.models import DirectThreadFetch
 from onyx.context.search.models import ChunkIndexRequest
 from onyx.federated_connectors.slack.models import SlackEntities
 from onyx.llm.interfaces import LLM
@@ -638,12 +639,38 @@ def expand_query_with_llm(query_text: str, llm: LLM) -> list[str]:
        return [query_text]


+SLACK_URL_PATTERN = re.compile(
+    r"https?://[a-z0-9-]+\.slack\.com/archives/([A-Z0-9]+)/p(\d{16})"
+)
+
+
+def extract_slack_message_urls(
+    query_text: str,
+) -> list[tuple[str, str]]:
+    """Extract Slack message URLs from query text.
+
+    Parses URLs like:
+      https://onyx-company.slack.com/archives/C097NBWMY8Y/p1775491616524769
+
+    Returns list of (channel_id, thread_ts) tuples.
+    The 16-digit timestamp is converted to Slack ts format (with dot).
+    """
+    results = []
+    for match in SLACK_URL_PATTERN.finditer(query_text):
+        channel_id = match.group(1)
+        raw_ts = match.group(2)
+        # Convert p1775491616524769 -> 1775491616.524769
+        thread_ts = f"{raw_ts[:10]}.{raw_ts[10:]}"
+        results.append((channel_id, thread_ts))
+    return results
+
+
 def build_slack_queries(
    query: ChunkIndexRequest,
    llm: LLM,
    entities: dict[str, Any] | None = None,
    available_channels: list[str] | None = None,
-) -> list[str]:
+) -> list[str | DirectThreadFetch]:
    """Build Slack query strings with date filtering and query expansion."""
    default_search_days = 30
    if entities:
@@ -668,6 +695,15 @@ def build_slack_queries(
            cutoff_date = datetime.now(timezone.utc) - timedelta(days=days_back)
            time_filter = f" after:{cutoff_date.strftime('%Y-%m-%d')}"

+    # Check for Slack message URLs — if found, add direct fetch requests
+    url_fetches: list[DirectThreadFetch] = []
+    slack_urls = extract_slack_message_urls(query.query)
+    for channel_id, thread_ts in slack_urls:
+        url_fetches.append(
+            DirectThreadFetch(channel_id=channel_id, thread_ts=thread_ts)
+        )
+        logger.info(f"Detected Slack URL: channel={channel_id}, ts={thread_ts}")
+
    # ALWAYS extract channel references from the query (not just for recency queries)
    channel_references = extract_channel_references_from_query(query.query)

@@ -684,7 +720,9 @@ def build_slack_queries(

            # If valid channels detected, use ONLY those channels with NO keywords
            # Return query with ONLY time filter + channel filter (no keywords)
-            return [build_channel_override_query(channel_references, time_filter)]
+            return url_fetches + [
+                build_channel_override_query(channel_references, time_filter)
+            ]
        except ValueError as e:
            # If validation fails, log the error and continue with normal flow
            logger.warning(f"Channel reference validation failed: {e}")
@@ -702,7 +740,8 @@ def build_slack_queries(
        rephrased_queries = expand_query_with_llm(query.query, llm)

    # Build final query strings with time filters
-    return [
+    search_queries = [
        rephrased_query.strip() + time_filter
        for rephrased_query in rephrased_queries[:MAX_SLACK_QUERY_EXPANSIONS]
    ]
+    return url_fetches + search_queries
--- a/backend/onyx/server/manage/llm/api.py
+++ b/backend/onyx/server/manage/llm/api.py
@@ -47,8 +47,6 @@ from onyx.llm.factory import get_llm
 from onyx.llm.factory import get_max_input_tokens_from_llm_provider
 from onyx.llm.utils import get_bedrock_token_limit
 from onyx.llm.utils import get_llm_contextual_cost
-from onyx.llm.utils import get_max_input_tokens
-from onyx.llm.utils import litellm_thinks_model_supports_image_input
 from onyx.llm.utils import test_llm
 from onyx.llm.well_known_providers.auto_update_service import (
    fetch_llm_recommendations_from_github,
@@ -64,8 +62,6 @@ from onyx.server.manage.llm.models import BedrockFinalModelResponse
 from onyx.server.manage.llm.models import BedrockModelsRequest
 from onyx.server.manage.llm.models import BifrostFinalModelResponse
 from onyx.server.manage.llm.models import BifrostModelsRequest
-from onyx.server.manage.llm.models import CustomProviderModelResponse
-from onyx.server.manage.llm.models import CustomProviderModelsRequest
 from onyx.server.manage.llm.models import CustomProviderOption
 from onyx.server.manage.llm.models import DefaultModel
 from onyx.server.manage.llm.models import LitellmFinalModelResponse
@@ -115,6 +111,43 @@ def _mask_string(value: str) -> str:
    return value[:4] + "****" + value[-4:]


+def _resolve_api_key(
+    api_key: str | None,
+    provider_name: str | None,
+    api_base: str | None,
+    db_session: Session,
+) -> str | None:
+    """Return the real API key for model-fetch endpoints.
+
+    When editing an existing provider the form value is masked (e.g.
+    ``sk-a****b1c2``).  If *provider_name* is supplied we can look up
+    the unmasked key from the database so the external request succeeds.
+
+    The stored key is only returned when the request's *api_base*
+    matches the value stored in the database.
+    """
+    if not provider_name:
+        return api_key
+
+    existing_provider = fetch_existing_llm_provider(
+        name=provider_name, db_session=db_session
+    )
+    if existing_provider and existing_provider.api_key:
+        # Normalise both URLs before comparing so trailing-slash
+        # differences don't cause a false mismatch.
+        stored_base = (existing_provider.api_base or "").strip().rstrip("/")
+        request_base = (api_base or "").strip().rstrip("/")
+        if stored_base != request_base:
+            return api_key
+
+        stored_key = existing_provider.api_key.get_value(apply_mask=False)
+        # Only resolve when the incoming value is the masked form of the
+        # stored key — i.e. the user hasn't typed a new key.
+        if api_key and api_key == _mask_string(stored_key):
+            return stored_key
+    return api_key
+
+
 def _sync_fetched_models(
    db_session: Session,
    provider_name: str,
@@ -280,158 +313,6 @@ def fetch_custom_provider_names(
    )


-@admin_router.post("/custom/available-models")
-def fetch_custom_provider_models(
-    request: CustomProviderModelsRequest,
-    _: User = Depends(require_permission(Permission.FULL_ADMIN_PANEL_ACCESS)),
-) -> list[CustomProviderModelResponse]:
-    """Fetch models for a custom provider.
-
-    When ``api_base`` is provided the endpoint hits the provider's
-    OpenAI-compatible ``/v1/models`` (or ``/{api_version}/models``) to
-    discover live models.  Otherwise it falls back to the static list
-    that LiteLLM ships for the given provider slug.
-
-    In both cases the response is enriched with metadata from LiteLLM
-    (display name, max input tokens, vision support) when available.
-    """
-    if request.api_base:
-        return _fetch_custom_models_from_api(
-            provider=request.provider,
-            api_base=request.api_base,
-            api_key=request.api_key,
-            api_version=request.api_version,
-        )
-
-    return _fetch_custom_models_from_litellm(request.provider)
-
-
-def _enrich_custom_model(
-    name: str,
-    provider: str,
-    *,
-    api_display_name: str | None = None,
-    api_max_input_tokens: int | None = None,
-    api_supports_image_input: bool | None = None,
-) -> CustomProviderModelResponse:
-    """Build a ``CustomProviderModelResponse`` enriched with LiteLLM metadata.
-
-    Values explicitly provided by the source API take precedence; LiteLLM
-    metadata is used as a fallback.
-    """
-    from onyx.llm.model_name_parser import parse_litellm_model_name
-
-    # LiteLLM keys are typically "provider/model"
-    litellm_key = f"{provider}/{name}" if not name.startswith(f"{provider}/") else name
-    parsed = parse_litellm_model_name(litellm_key)
-
-    # display_name: prefer API-provided name, then LiteLLM enrichment, then raw name
-    if api_display_name and api_display_name != name:
-        display_name = api_display_name
-    else:
-        display_name = parsed.display_name or name
-
-    # max_input_tokens: prefer API value, then LiteLLM lookup
-    if api_max_input_tokens is not None:
-        max_input_tokens: int | None = api_max_input_tokens
-    else:
-        try:
-            max_input_tokens = get_max_input_tokens(name, provider)
-        except Exception:
-            max_input_tokens = None
-
-    # supports_image_input: prefer API value, then LiteLLM inference
-    if api_supports_image_input is not None:
-        supports_image = api_supports_image_input
-    else:
-        supports_image = litellm_thinks_model_supports_image_input(name, provider)
-
-    return CustomProviderModelResponse(
-        name=name,
-        display_name=display_name,
-        max_input_tokens=max_input_tokens,
-        supports_image_input=supports_image,
-    )
-
-
-def _fetch_custom_models_from_api(
-    provider: str,
-    api_base: str,
-    api_key: str | None,
-    api_version: str | None,
-) -> list[CustomProviderModelResponse]:
-    """Hit an OpenAI-compatible ``/v1/models`` (or versioned variant)."""
-    cleaned = api_base.strip().rstrip("/")
-    if api_version:
-        url = f"{cleaned}/{api_version.strip().strip('/')}/models"
-    elif cleaned.endswith("/v1"):
-        url = f"{cleaned}/models"
-    else:
-        url = f"{cleaned}/v1/models"
-
-    response_json = _get_openai_compatible_models_response(
-        url=url,
-        source_name="Custom provider",
-        api_key=api_key,
-    )
-
-    models = response_json.get("data", [])
-    if not isinstance(models, list) or len(models) == 0:
-        raise OnyxError(
-            OnyxErrorCode.VALIDATION_ERROR,
-            "No models found from the provider's API.",
-        )
-
-    results: list[CustomProviderModelResponse] = []
-    for model in models:
-        try:
-            model_id = model.get("id", "")
-            if not model_id:
-                continue
-            if is_embedding_model(model_id):
-                continue
-            results.append(
-                _enrich_custom_model(
-                    model_id,
-                    provider,
-                    api_display_name=model.get("name"),
-                    api_max_input_tokens=model.get("context_length"),
-                    api_supports_image_input=infer_vision_support(model_id),
-                )
-            )
-        except Exception as e:
-            logger.warning(
-                "Failed to parse custom provider model entry",
-                extra={"error": str(e), "item": str(model)[:1000]},
-            )
-
-    if not results:
-        raise OnyxError(
-            OnyxErrorCode.VALIDATION_ERROR,
-            "No compatible models found from the provider's API.",
-        )
-
-    return sorted(results, key=lambda m: m.name.lower())
-
-
-def _fetch_custom_models_from_litellm(
-    provider: str,
-) -> list[CustomProviderModelResponse]:
-    """Fall back to litellm's static ``models_by_provider`` mapping."""
-    import litellm
-
-    model_names = litellm.models_by_provider.get(provider)
-    if model_names is None:
-        raise OnyxError(
-            OnyxErrorCode.NOT_FOUND,
-            f"Unknown provider: {provider}",
-        )
-    return sorted(
-        (_enrich_custom_model(name, provider) for name in model_names),
-        key=lambda m: m.name.lower(),
-    )
-
-
@admin_router.get("/built-in/options")
 def fetch_llm_options(
    _: User = Depends(require_permission(Permission.FULL_ADMIN_PANEL_ACCESS)),
@@ -1330,16 +1211,17 @@ def get_ollama_available_models(
    return sorted_results


-def _get_openrouter_models_response(api_base: str, api_key: str) -> dict:
+def _get_openrouter_models_response(api_base: str, api_key: str | None) -> dict:
    """Perform GET to OpenRouter /models and return parsed JSON."""
    cleaned_api_base = api_base.strip().rstrip("/")
    url = f"{cleaned_api_base}/models"
-    headers = {
-        "Authorization": f"Bearer {api_key}",
+    headers: dict[str, str] = {
        # Optional headers recommended by OpenRouter for attribution
        "HTTP-Referer": "https://onyx.app",
        "X-Title": "Onyx",
    }
+    if api_key:
+        headers["Authorization"] = f"Bearer {api_key}"
    try:
        response = httpx.get(url, headers=headers, timeout=10.0)
        response.raise_for_status()
@@ -1362,8 +1244,12 @@ def get_openrouter_available_models(
    Parses id, name (display), context_length, and architecture.input_modalities.
    """

+    api_key = _resolve_api_key(
+        request.api_key, request.provider_name, request.api_base, db_session
+    )
+
    response_json = _get_openrouter_models_response(
-        api_base=request.api_base, api_key=request.api_key
+        api_base=request.api_base, api_key=api_key
    )

    data = response_json.get("data", [])
@@ -1456,13 +1342,18 @@ def get_lm_studio_available_models(

    # If provider_name is given and the api_key hasn't been changed by the user,
    # fall back to the stored API key from the database (the form value is masked).
+    # Only do so when the api_base matches what is stored.
    api_key = request.api_key
    if request.provider_name and not request.api_key_changed:
        existing_provider = fetch_existing_llm_provider(
            name=request.provider_name, db_session=db_session
        )
        if existing_provider and existing_provider.custom_config:
-            api_key = existing_provider.custom_config.get(LM_STUDIO_API_KEY_CONFIG_KEY)
+            stored_base = (existing_provider.api_base or "").strip().rstrip("/")
+            if stored_base == cleaned_api_base:
+                api_key = existing_provider.custom_config.get(
+                    LM_STUDIO_API_KEY_CONFIG_KEY
+                )

    url = f"{cleaned_api_base}/api/v1/models"
    headers: dict[str, str] = {}
@@ -1546,8 +1437,12 @@ def get_litellm_available_models(
    db_session: Session = Depends(get_session),
 ) -> list[LitellmFinalModelResponse]:
    """Fetch available models from Litellm proxy /v1/models endpoint."""
+    api_key = _resolve_api_key(
+        request.api_key, request.provider_name, request.api_base, db_session
+    )
+
    response_json = _get_litellm_models_response(
-        api_key=request.api_key, api_base=request.api_base
+        api_key=api_key, api_base=request.api_base
    )

    models = response_json.get("data", [])
@@ -1604,7 +1499,7 @@ def get_litellm_available_models(
    return sorted_results


-def _get_litellm_models_response(api_key: str, api_base: str) -> dict:
+def _get_litellm_models_response(api_key: str | None, api_base: str) -> dict:
    """Perform GET to Litellm proxy /api/v1/models and return parsed JSON."""
    cleaned_api_base = api_base.strip().rstrip("/")
    url = f"{cleaned_api_base}/v1/models"
@@ -1679,8 +1574,12 @@ def get_bifrost_available_models(
    db_session: Session = Depends(get_session),
 ) -> list[BifrostFinalModelResponse]:
    """Fetch available models from Bifrost gateway /v1/models endpoint."""
+    api_key = _resolve_api_key(
+        request.api_key, request.provider_name, request.api_base, db_session
+    )
+
    response_json = _get_bifrost_models_response(
-        api_base=request.api_base, api_key=request.api_key
+        api_base=request.api_base, api_key=api_key
    )

    models = response_json.get("data", [])
@@ -1769,8 +1668,12 @@ def get_openai_compatible_server_available_models(
    db_session: Session = Depends(get_session),
 ) -> list[OpenAICompatibleFinalModelResponse]:
    """Fetch available models from a generic OpenAI-compatible /v1/models endpoint."""
+    api_key = _resolve_api_key(
+        request.api_key, request.provider_name, request.api_base, db_session
+    )
+
    response_json = _get_openai_compatible_server_response(
-        api_base=request.api_base, api_key=request.api_key
+        api_base=request.api_base, api_key=api_key
    )

    models = response_json.get("data", [])
--- a/backend/onyx/server/manage/llm/models.py
+++ b/backend/onyx/server/manage/llm/models.py
@@ -477,21 +477,6 @@ class BifrostFinalModelResponse(BaseModel):
    supports_reasoning: bool


-# Custom provider dynamic models fetch
-class CustomProviderModelsRequest(BaseModel):
-    provider: str  # LiteLLM provider slug (e.g. "deepseek", "fireworks_ai")
-    api_base: str | None = None  # If set, fetches live models via /v1/models
-    api_key: str | None = None
-    api_version: str | None = None  # If set, used to construct the models URL
-
-
-class CustomProviderModelResponse(BaseModel):
-    name: str
-    display_name: str
-    max_input_tokens: int | None
-    supports_image_input: bool
-
-
 # OpenAI Compatible dynamic models fetch
 class OpenAICompatibleModelsRequest(BaseModel):
    api_base: str
--- a/backend/pyproject.toml
+++ b/backend/pyproject.toml
@@ -1,10 +0,0 @@
-[project]
-name = "onyx-backend"
-version = "0.0.0"
-requires-python = ">=3.11"
-dependencies = [
-    "onyx[backend,dev,ee]",
-]
-
-[tool.uv.sources]
-onyx = { workspace = true }
--- a/backend/requirements/README.md
+++ b/backend/requirements/README.md
@@ -46,11 +46,11 @@ curl -LsSf https://astral.py/uv/install.sh | sh

 1. Edit `pyproject.toml`
 2. Add/update/remove dependencies in the appropriate section:
-   - `[dependency-groups]` for dev tools
   - `[project.dependencies]` for **shared** dependencies (used by both backend and model_server)
-   - `[project.optional-dependencies.backend]` for backend-only dependencies
-   - `[project.optional-dependencies.model_server]` for model_server-only dependencies (ML packages)
-   - `[project.optional-dependencies.ee]` for EE features
+   - `[dependency-groups.backend]` for backend-only dependencies
+   - `[dependency-groups.dev]` for dev tools
+   - `[dependency-groups.ee]` for EE features
+   - `[dependency-groups.model_server]` for model_server-only dependencies (ML packages)
 3. Commit your changes - pre-commit hooks will automatically regenerate the lock file and requirements

 ### 3. Generating Lock File and Requirements
@@ -64,10 +64,10 @@ To manually regenerate:

 ```bash
 uv lock
-uv export --no-emit-project --no-default-groups --no-hashes --extra backend -o backend/requirements/default.txt
+uv export --no-emit-project --no-default-groups --no-hashes --group backend -o backend/requirements/default.txt
 uv export --no-emit-project --no-default-groups --no-hashes --group dev -o backend/requirements/dev.txt
-uv export --no-emit-project --no-default-groups --no-hashes --extra ee -o backend/requirements/ee.txt
-uv export --no-emit-project --no-default-groups --no-hashes --extra model_server -o backend/requirements/model_server.txt
+uv export --no-emit-project --no-default-groups --no-hashes --group ee -o backend/requirements/ee.txt
+uv export --no-emit-project --no-default-groups --no-hashes --group model_server -o backend/requirements/model_server.txt
 ```

 ### 4. Installing Dependencies
@@ -76,30 +76,14 @@ If enabled, all packages are installed automatically by the `uv-sync` pre-commit
 branches or pulling new changes.

 ```bash
-# For everything (most common)
-uv sync --all-extras
+# For development (most common) — installs shared + backend + dev + ee
+uv sync

-# For backend production (shared + backend dependencies)
-uv sync --extra backend
-
-# For backend development (shared + backend + dev tools)
-uv sync --extra backend --extra dev
-
-# For backend with EE (shared + backend + ee)
-uv sync --extra backend --extra ee
+# For backend production only (shared + backend dependencies)
+uv sync --no-default-groups --group backend

 # For model server (shared + model_server, NO backend deps!)
-uv sync --extra model_server
-```
-
-`uv` aggressively [ignores active virtual environments](https://docs.astral.sh/uv/concepts/projects/config/#project-environment-path) and prefers the root virtual environment.
-When working in workspace packages, be sure to pass `--active` when syncing the virtual environment:
-
-```bash
-cd backend/
-source .venv/bin/activate
-uv sync --active
-uv run --active ...
+uv sync --no-default-groups --group model_server
 ```

 ### 5. Upgrading Dependencies
--- a/backend/requirements/default.txt
+++ b/backend/requirements/default.txt
@@ -1,5 +1,5 @@
 # This file was autogenerated by uv via the following command:
-#    uv export --no-emit-project --no-default-groups --no-hashes --extra backend -o backend/requirements/default.txt
+#    uv export --no-emit-project --no-default-groups --no-hashes --group backend -o backend/requirements/default.txt
 agent-client-protocol==0.7.1
    # via onyx
 aioboto3==15.1.0
@@ -19,7 +19,6 @@ aiohttp==3.13.4
    #   aiobotocore
    #   discord-py
    #   litellm
-    #   onyx
    #   voyageai
 aioitertools==0.13.0
    # via aiobotocore
@@ -28,7 +27,6 @@ aiolimiter==1.2.1
 aiosignal==1.4.0
    # via aiohttp
 alembic==1.10.4
-    # via onyx
 amqp==5.3.1
    # via kombu
 annotated-doc==0.0.4
@@ -51,13 +49,10 @@ argon2-cffi==23.1.0
 argon2-cffi-bindings==25.1.0
    # via argon2-cffi
 asana==5.0.8
-    # via onyx
 async-timeout==5.0.1 ; python_full_version < '3.11.3'
    # via redis
 asyncpg==0.30.0
-    # via onyx
 atlassian-python-api==3.41.16
-    # via onyx
 attrs==25.4.0
    # via
    #   aiohttp
@@ -68,7 +63,6 @@ attrs==25.4.0
 authlib==1.6.9
    # via fastmcp
 azure-cognitiveservices-speech==1.38.0
-    # via onyx
 babel==2.17.0
    # via courlan
 backoff==2.2.1
@@ -86,7 +80,6 @@ beautifulsoup4==4.12.3
    #   atlassian-python-api
    #   markdownify
    #   markitdown
-    #   onyx
    #   unstructured
 billiard==4.2.3
    # via celery
@@ -94,9 +87,7 @@ boto3==1.39.11
    # via
    #   aiobotocore
    #   cohere
-    #   onyx
 boto3-stubs==1.39.11
-    # via onyx
 botocore==1.39.11
    # via
    #   aiobotocore
@@ -105,7 +96,6 @@ botocore==1.39.11
 botocore-stubs==1.40.74
    # via boto3-stubs
 braintrust==0.3.9
-    # via onyx
 brotli==1.2.0
    # via onyx
 bytecode==0.17.0
@@ -115,7 +105,6 @@ cachetools==6.2.2
 caio==0.9.25
    # via aiofile
 celery==5.5.1
-    # via onyx
 certifi==2025.11.12
    # via
    #   asana
@@ -134,7 +123,6 @@ cffi==2.0.0
    #   pynacl
    #   zstandard
 chardet==5.2.0
-    # via onyx
 charset-normalizer==3.4.4
    # via
    #   htmldate
@@ -146,7 +134,6 @@ charset-normalizer==3.4.4
 chevron==0.14.0
    # via braintrust
 chonkie==1.0.10
-    # via onyx
 claude-agent-sdk==0.1.19
    # via onyx
 click==8.3.1
@@ -201,15 +188,12 @@ cryptography==46.0.6
 cyclopts==4.2.4
    # via fastmcp
 dask==2026.1.1
-    # via
-    #   distributed
-    #   onyx
+    # via distributed
 dataclasses-json==0.6.7
    # via unstructured
 dateparser==1.2.2
    # via htmldate
 ddtrace==3.10.0
-    # via onyx
 decorator==5.2.1
    # via retry
 defusedxml==0.7.1
@@ -223,7 +207,6 @@ deprecated==1.3.1
 discord-py==2.4.0
    # via onyx
 distributed==2026.1.1
-    # via onyx
 distro==1.9.0
    # via
    #   openai
@@ -235,7 +218,6 @@ docstring-parser==0.17.0
 docutils==0.22.3
    # via rich-rst
 dropbox==12.0.2
-    # via onyx
 durationpy==0.10
    # via kubernetes
 email-validator==2.2.0
@@ -251,7 +233,6 @@ et-xmlfile==2.0.0
 events==0.5
    # via opensearch-py
 exa-py==1.15.4
-    # via onyx
 exceptiongroup==1.3.0
    # via
    #   braintrust
@@ -262,23 +243,16 @@ fastapi==0.133.1
    #   fastapi-users
    #   onyx
 fastapi-limiter==0.1.6
-    # via onyx
 fastapi-users==15.0.4
-    # via
-    #   fastapi-users-db-sqlalchemy
-    #   onyx
+    # via fastapi-users-db-sqlalchemy
 fastapi-users-db-sqlalchemy==7.0.0
-    # via onyx
 fastavro==1.12.1
    # via cohere
 fastmcp==3.2.0
-    # via onyx
 fastuuid==0.14.0
    # via litellm
 filelock==3.20.3
-    # via
-    #   huggingface-hub
-    #   onyx
+    # via huggingface-hub
 filetype==1.2.0
    # via unstructured
 flatbuffers==25.9.23
@@ -298,7 +272,6 @@ gitpython==3.1.45
 google-api-core==2.28.1
    # via google-api-python-client
 google-api-python-client==2.86.0
-    # via onyx
 google-auth==2.48.0
    # via
    #   google-api-core
@@ -308,11 +281,8 @@ google-auth==2.48.0
    #   google-genai
    #   kubernetes
 google-auth-httplib2==0.1.0
-    # via
-    #   google-api-python-client
-    #   onyx
+    # via google-api-python-client
 google-auth-oauthlib==1.0.0
-    # via onyx
 google-genai==1.52.0
    # via onyx
 googleapis-common-protos==1.72.0
@@ -340,7 +310,6 @@ htmldate==1.9.1
 httpcore==1.0.9
    # via
    #   httpx
-    #   onyx
    #   unstructured-client
 httplib2==0.31.0
    # via
@@ -357,21 +326,16 @@ httpx==0.28.1
    #   langsmith
    #   litellm
    #   mcp
-    #   onyx
    #   openai
    #   unstructured-client
 httpx-oauth==0.15.1
-    # via onyx
 httpx-sse==0.4.3
    # via
    #   cohere
    #   mcp
 hubspot-api-client==11.1.0
-    # via onyx
 huggingface-hub==0.35.3
-    # via
-    #   onyx
-    #   tokenizers
+    # via tokenizers
 humanfriendly==10.0
    # via coloredlogs
 hyperframe==6.1.0
@@ -390,9 +354,7 @@ importlib-metadata==8.7.0
    #   litellm
    #   opentelemetry-api
 inflection==0.5.1
-    # via
-    #   onyx
-    #   pyairtable
+    # via pyairtable
 iniconfig==2.3.0
    # via pytest
 isodate==0.7.2
@@ -414,7 +376,6 @@ jinja2==3.1.6
    #   distributed
    #   litellm
 jira==3.10.5
-    # via onyx
 jiter==0.12.0
    # via openai
 jmespath==1.0.1
@@ -430,9 +391,7 @@ jsonpatch==1.33
 jsonpointer==3.0.0
    # via jsonpatch
 jsonref==1.1.0
-    # via
-    #   fastmcp
-    #   onyx
+    # via fastmcp
 jsonschema==4.25.1
    # via
    #   litellm
@@ -450,15 +409,12 @@ kombu==5.5.4
 kubernetes==31.0.0
    # via onyx
 langchain-core==1.2.22
-    # via onyx
 langdetect==1.0.9
    # via unstructured
 langfuse==3.10.0
-    # via onyx
 langsmith==0.3.45
    # via langchain-core
 lazy-imports==1.0.1
-    # via onyx
 legacy-cgi==2.6.4 ; python_full_version >= '3.13'
    # via ddtrace
 litellm==1.81.6
@@ -473,7 +429,6 @@ lxml==5.3.0
    #   justext
    #   lxml-html-clean
    #   markitdown
-    #   onyx
    #   python-docx
    #   python-pptx
    #   python3-saml
@@ -488,9 +443,7 @@ magika==0.6.3
 makefun==1.16.0
    # via fastapi-users
 mako==1.2.4
-    # via
-    #   alembic
-    #   onyx
+    # via alembic
 mammoth==1.11.0
    # via markitdown
 markdown-it-py==4.0.0
@@ -498,7 +451,6 @@ markdown-it-py==4.0.0
 markdownify==1.2.2
    # via markitdown
 markitdown==0.1.2
-    # via onyx
 markupsafe==3.0.3
    # via
    #   jinja2
@@ -512,11 +464,9 @@ mcp==1.26.0
    # via
    #   claude-agent-sdk
    #   fastmcp
-    #   onyx
 mdurl==0.1.2
    # via markdown-it-py
 mistune==3.2.0
-    # via onyx
 more-itertools==10.8.0
    # via
    #   jaraco-classes
@@ -525,13 +475,10 @@ more-itertools==10.8.0
 mpmath==1.3.0
    # via sympy
 msal==1.34.0
-    # via
-    #   office365-rest-python-client
-    #   onyx
+    # via office365-rest-python-client
 msgpack==1.1.2
    # via distributed
 msoffcrypto-tool==5.4.2
-    # via onyx
 multidict==6.7.0
    # via
    #   aiobotocore
@@ -548,7 +495,6 @@ mypy-extensions==1.0.0
    #   mypy
    #   typing-inspect
 nest-asyncio==1.6.0
-    # via onyx
 nltk==3.9.4
    # via unstructured
 numpy==2.4.1
@@ -563,10 +509,8 @@ oauthlib==3.2.2
    # via
    #   atlassian-python-api
    #   kubernetes
-    #   onyx
    #   requests-oauthlib
 office365-rest-python-client==2.6.2
-    # via onyx
 olefile==0.47
    # via
    #   msoffcrypto-tool
@@ -582,15 +526,11 @@ openai==2.14.0
 openapi-pydantic==0.5.1
    # via fastmcp
 openinference-instrumentation==0.1.42
-    # via onyx
 openinference-semantic-conventions==0.1.25
    # via openinference-instrumentation
 openpyxl==3.0.10
-    # via
-    #   markitdown
-    #   onyx
+    # via markitdown
 opensearch-py==3.0.0
-    # via onyx
 opentelemetry-api==1.39.1
    # via
    #   ddtrace
@@ -606,7 +546,6 @@ opentelemetry-exporter-otlp-proto-http==1.39.1
    # via langfuse
 opentelemetry-proto==1.39.1
    # via
-    #   onyx
    #   opentelemetry-exporter-otlp-proto-common
    #   opentelemetry-exporter-otlp-proto-http
 opentelemetry-sdk==1.39.1
@@ -640,7 +579,6 @@ parameterized==0.9.0
 partd==1.4.2
    # via dask
 passlib==1.7.4
-    # via onyx
 pathable==0.4.4
    # via jsonschema-path
 pdfminer-six==20251107
@@ -652,9 +590,7 @@ platformdirs==4.5.0
    #   fastmcp
    #   zeep
 playwright==1.55.0
-    # via
-    #   onyx
-    #   pytest-playwright
+    # via pytest-playwright
 pluggy==1.6.0
    # via pytest
 ply==3.11
@@ -684,12 +620,9 @@ protobuf==6.33.5
 psutil==7.1.3
    # via
    #   distributed
-    #   onyx
    #   unstructured
 psycopg2-binary==2.9.9
-    # via onyx
 puremagic==1.28
-    # via onyx
 pwdlib==0.3.0
    # via fastapi-users
 py==1.11.0
@@ -697,7 +630,6 @@ py==1.11.0
 py-key-value-aio==0.4.4
    # via fastmcp
 pyairtable==3.0.1
-    # via onyx
 pyasn1==0.6.3
    # via
    #   pyasn1-modules
@@ -707,7 +639,6 @@ pyasn1-modules==0.4.2
 pycparser==2.23 ; implementation_name != 'PyPy'
    # via cffi
 pycryptodome==3.19.1
-    # via onyx
 pydantic==2.11.7
    # via
    #   agent-client-protocol
@@ -734,7 +665,6 @@ pydantic-settings==2.12.0
 pyee==13.0.0
    # via playwright
 pygithub==2.5.0
-    # via onyx
 pygments==2.20.0
    # via rich
 pyjwt==2.12.0
@@ -745,17 +675,13 @@ pyjwt==2.12.0
    #   pygithub
    #   simple-salesforce
 pympler==1.1
-    # via onyx
 pynacl==1.6.2
    # via pygithub
 pypandoc-binary==1.16.2
-    # via onyx
 pyparsing==3.2.5
    # via httplib2
 pypdf==6.9.2
-    # via
-    #   onyx
-    #   unstructured-client
+    # via unstructured-client
 pyperclip==1.11.0
    # via fastmcp
 pyreadline3==3.5.4 ; sys_platform == 'win32'
@@ -768,9 +694,7 @@ pytest==8.3.5
 pytest-base-url==2.1.0
    # via pytest-playwright
 pytest-mock==3.12.0
-    # via onyx
 pytest-playwright==0.7.0
-    # via onyx
 python-dateutil==2.8.2
    # via
    #   aiobotocore
@@ -781,11 +705,9 @@ python-dateutil==2.8.2
    #   htmldate
    #   hubspot-api-client
    #   kubernetes
-    #   onyx
    #   opensearch-py
    #   pandas
 python-docx==1.1.2
-    # via onyx
 python-dotenv==1.1.1
    # via
    #   braintrust
@@ -793,10 +715,8 @@ python-dotenv==1.1.1
    #   litellm
    #   magika
    #   mcp
-    #   onyx
    #   pydantic-settings
 python-gitlab==5.6.0
-    # via onyx
 python-http-client==3.3.7
    # via sendgrid
 python-iso639==2025.11.16
@@ -807,19 +727,15 @@ python-multipart==0.0.22
    # via
    #   fastapi-users
    #   mcp
-    #   onyx
 python-oxmsg==0.0.2
    # via unstructured
 python-pptx==0.6.23
-    # via
-    #   markitdown
-    #   onyx
+    # via markitdown
 python-slugify==8.0.4
    # via
    #   braintrust
    #   pytest-playwright
 python3-saml==1.15.0
-    # via onyx
 pytz==2025.2
    # via
    #   dateparser
@@ -827,7 +743,6 @@ pytz==2025.2
    #   pandas
    #   zeep
 pywikibot==9.0.0
-    # via onyx
 pywin32==311 ; sys_platform == 'win32'
    # via
    #   mcp
@@ -844,13 +759,9 @@ pyyaml==6.0.3
    #   kubernetes
    #   langchain-core
 rapidfuzz==3.13.0
-    # via
-    #   onyx
-    #   unstructured
+    # via unstructured
 redis==5.0.8
-    # via
-    #   fastapi-limiter
-    #   onyx
+    # via fastapi-limiter
 referencing==0.36.2
    # via
    #   jsonschema
@@ -881,7 +792,6 @@ requests==2.33.0
    #   matrix-client
    #   msal
    #   office365-rest-python-client
-    #   onyx
    #   opensearch-py
    #   opentelemetry-exporter-otlp-proto-http
    #   pyairtable
@@ -907,7 +817,6 @@ requests-oauthlib==1.3.1
    #   google-auth-oauthlib
    #   jira
    #   kubernetes
-    #   onyx
 requests-toolbelt==1.0.0
    # via
    #   jira
@@ -918,7 +827,6 @@ requests-toolbelt==1.0.0
 retry==0.9.2
    # via onyx
 rfc3986==1.5.0
-    # via onyx
 rich==14.2.0
    # via
    #   cyclopts
@@ -938,15 +846,12 @@ s3transfer==0.13.1
 secretstorage==3.5.0 ; sys_platform == 'linux'
    # via keyring
 sendgrid==6.12.5
-    # via onyx
 sentry-sdk==2.14.0
    # via onyx
 shapely==2.0.6
-    # via onyx
 shellingham==1.5.4
    # via typer
 simple-salesforce==1.12.6
-    # via onyx
 six==1.17.0
    # via
    #   asana
@@ -961,7 +866,6 @@ six==1.17.0
    #   python-dateutil
    #   stone
 slack-sdk==3.20.2
-    # via onyx
 smmap==5.0.2
    # via gitdb
 sniffio==1.3.1
@@ -976,7 +880,6 @@ sqlalchemy==2.0.15
    # via
    #   alembic
    #   fastapi-users-db-sqlalchemy
-    #   onyx
 sse-starlette==3.0.3
    # via mcp
 sseclient-py==1.8.0
@@ -985,14 +888,11 @@ starlette==0.49.3
    # via
    #   fastapi
    #   mcp
-    #   onyx
    #   prometheus-fastapi-instrumentator
 stone==3.3.1
    # via dropbox
 stripe==10.12.0
-    # via onyx
 supervisor==4.3.0
-    # via onyx
 sympy==1.14.0
    # via onnxruntime
 tblib==3.2.2
@@ -1005,11 +905,8 @@ tenacity==9.1.2
 text-unidecode==1.3
    # via python-slugify
 tiktoken==0.7.0
-    # via
-    #   litellm
-    #   onyx
+    # via litellm
 timeago==1.0.16
-    # via onyx
 tld==0.13.1
    # via courlan
 tokenizers==0.21.4
@@ -1033,13 +930,11 @@ tqdm==4.67.1
    #   openai
    #   unstructured
 trafilatura==1.12.2
-    # via onyx
 typer==0.20.0
    # via mcp
 types-awscrt==0.28.4
    # via botocore-stubs
 types-openpyxl==3.0.4.7
-    # via onyx
 types-requests==2.32.0.20250328
    # via cohere
 types-s3transfer==0.14.0
@@ -1105,11 +1000,8 @@ tzlocal==5.3.1
 uncalled-for==0.2.0
    # via fastmcp
 unstructured==0.18.27
-    # via onyx
 unstructured-client==0.42.6
-    # via
-    #   onyx
-    #   unstructured
+    # via unstructured
 uritemplate==4.2.0
    # via google-api-python-client
 urllib3==2.6.3
@@ -1121,7 +1013,6 @@ urllib3==2.6.3
    #   htmldate
    #   hubspot-api-client
    #   kubernetes
-    #   onyx
    #   opensearch-py
    #   pyairtable
    #   pygithub
@@ -1171,9 +1062,7 @@ xlrd==2.0.2
 xlsxwriter==3.2.9
    # via python-pptx
 xmlsec==1.3.14
-    # via
-    #   onyx
-    #   python3-saml
+    # via python3-saml
 xmltodict==1.0.2
    # via ddtrace
 yarl==1.22.0
@@ -1187,4 +1076,3 @@ zipp==3.23.0
 zstandard==0.23.0
    # via langsmith
 zulip==0.8.2
-    # via onyx
--- a/backend/requirements/dev.txt
+++ b/backend/requirements/dev.txt
@@ -1,5 +1,5 @@
 # This file was autogenerated by uv via the following command:
-#    uv export --no-emit-project --no-default-groups --no-hashes --extra dev -o backend/requirements/dev.txt
+#    uv export --no-emit-project --no-default-groups --no-hashes --group dev -o backend/requirements/dev.txt
 agent-client-protocol==0.7.1
    # via onyx
 aioboto3==15.1.0
@@ -47,7 +47,6 @@ attrs==25.4.0
    #   jsonschema
    #   referencing
 black==25.1.0
-    # via onyx
 boto3==1.39.11
    # via
    #   aiobotocore
@@ -60,7 +59,6 @@ botocore==1.39.11
 brotli==1.2.0
    # via onyx
 celery-types==0.19.0
-    # via onyx
 certifi==2025.11.12
    # via
    #   httpcore
@@ -122,7 +120,6 @@ execnet==2.1.2
 executing==2.2.1
    # via stack-data
 faker==40.1.2
-    # via onyx
 fastapi==0.133.1
    # via
    #   onyx
@@ -156,7 +153,6 @@ h11==0.16.0
    #   httpcore
    #   uvicorn
 hatchling==1.28.0
-    # via onyx
 hf-xet==1.2.0 ; platform_machine == 'aarch64' or platform_machine == 'amd64' or platform_machine == 'arm64' or platform_machine == 'x86_64'
    # via huggingface-hub
 httpcore==1.0.9
@@ -187,7 +183,6 @@ importlib-metadata==8.7.0
 iniconfig==2.3.0
    # via pytest
 ipykernel==6.29.5
-    # via onyx
 ipython==9.7.0
    # via ipykernel
 ipython-pygments-lexers==1.1.1
@@ -224,13 +219,11 @@ litellm==1.81.6
 mako==1.2.4
    # via alembic
 manygo==0.2.0
-    # via onyx
 markupsafe==3.0.3
    # via
    #   jinja2
    #   mako
 matplotlib==3.10.8
-    # via onyx
 matplotlib-inline==0.2.1
    # via
    #   ipykernel
@@ -243,12 +236,10 @@ multidict==6.7.0
    #   aiohttp
    #   yarl
 mypy==1.13.0
-    # via onyx
 mypy-extensions==1.0.0
    # via
    #   black
    #   mypy
-    #   onyx
 nest-asyncio==1.6.0
    # via ipykernel
 nodeenv==1.9.1
@@ -264,15 +255,12 @@ oauthlib==3.2.2
    #   kubernetes
    #   requests-oauthlib
 onyx-devtools==0.7.3
-    # via onyx
 openai==2.14.0
    # via
    #   litellm
    #   onyx
 openapi-generator-cli==7.17.0
-    # via
-    #   onyx
-    #   onyx-devtools
+    # via onyx-devtools
 packaging==24.2
    # via
    #   black
@@ -282,7 +270,6 @@ packaging==24.2
    #   matplotlib
    #   pytest
 pandas-stubs==2.3.3.251201
-    # via onyx
 parameterized==0.9.0
    # via cohere
 parso==0.8.5
@@ -305,7 +292,6 @@ pluggy==1.6.0
    #   hatchling
    #   pytest
 pre-commit==3.2.2
-    # via onyx
 prometheus-client==0.23.1
    # via
    #   onyx
@@ -359,22 +345,16 @@ pyparsing==3.2.5
    # via matplotlib
 pytest==8.3.5
    # via
-    #   onyx
    #   pytest-alembic
    #   pytest-asyncio
    #   pytest-dotenv
    #   pytest-repeat
    #   pytest-xdist
 pytest-alembic==0.12.1
-    # via onyx
 pytest-asyncio==1.3.0
-    # via onyx
 pytest-dotenv==0.5.2
-    # via onyx
 pytest-repeat==0.9.4
-    # via onyx
 pytest-xdist==3.8.0
-    # via onyx
 python-dateutil==2.8.2
    # via
    #   aiobotocore
@@ -407,9 +387,7 @@ referencing==0.36.2
 regex==2025.11.3
    # via tiktoken
 release-tag==0.5.2
-    # via onyx
 reorder-python-imports-black==3.14.0
-    # via onyx
 requests==2.33.0
    # via
    #   cohere
@@ -430,7 +408,6 @@ rpds-py==0.29.0
 rsa==4.9.1
    # via google-auth
 ruff==0.12.0
-    # via onyx
 s3transfer==0.13.1
    # via boto3
 sentry-sdk==2.14.0
@@ -484,39 +461,22 @@ traitlets==5.14.3
 trove-classifiers==2025.12.1.14
    # via hatchling
 types-beautifulsoup4==4.12.0.3
-    # via onyx
 types-html5lib==1.1.11.13
-    # via
-    #   onyx
-    #   types-beautifulsoup4
+    # via types-beautifulsoup4
 types-oauthlib==3.2.0.9
-    # via onyx
 types-passlib==1.7.7.20240106
-    # via onyx
 types-pillow==10.2.0.20240822
-    # via onyx
 types-psutil==7.1.3.20251125
-    # via onyx
 types-psycopg2==2.9.21.10
-    # via onyx
 types-python-dateutil==2.8.19.13
-    # via onyx
 types-pytz==2023.3.1.1
-    # via
-    #   onyx
-    #   pandas-stubs
+    # via pandas-stubs
 types-pyyaml==6.0.12.11
-    # via onyx
 types-regex==2023.3.23.1
-    # via onyx
 types-requests==2.32.0.20250328
-    # via
-    #   cohere
-    #   onyx
+    # via cohere
 types-retry==0.9.9.3
-    # via onyx
 types-setuptools==68.0.0.3
-    # via onyx
 typing-extensions==4.15.0
    # via
    #   aiosignal
@@ -574,4 +534,3 @@ yarl==1.22.0
 zipp==3.23.0
    # via importlib-metadata
 zizmor==1.18.0
-    # via onyx
--- a/backend/requirements/ee.txt
+++ b/backend/requirements/ee.txt
@@ -1,5 +1,5 @@
 # This file was autogenerated by uv via the following command:
-#    uv export --no-emit-project --no-default-groups --no-hashes --extra ee -o backend/requirements/ee.txt
+#    uv export --no-emit-project --no-default-groups --no-hashes --group ee -o backend/requirements/ee.txt
 agent-client-protocol==0.7.1
    # via onyx
 aioboto3==15.1.0
@@ -182,7 +182,6 @@ packaging==24.2
 parameterized==0.9.0
    # via cohere
 posthog==3.7.4
-    # via onyx
 prometheus-client==0.23.1
    # via
    #   onyx
--- a/backend/requirements/model_server.txt
+++ b/backend/requirements/model_server.txt
@@ -1,7 +1,6 @@
 # This file was autogenerated by uv via the following command:
-#    uv export --no-emit-project --no-default-groups --no-hashes --extra model_server -o backend/requirements/model_server.txt
+#    uv export --no-emit-project --no-default-groups --no-hashes --group model_server -o backend/requirements/model_server.txt
 accelerate==1.6.0
-    # via onyx
 agent-client-protocol==0.7.1
    # via onyx
 aioboto3==15.1.0
@@ -105,7 +104,6 @@ distro==1.9.0
 durationpy==0.10
    # via kubernetes
 einops==0.8.1
-    # via onyx
 fastapi==0.133.1
    # via
    #   onyx
@@ -207,7 +205,6 @@ networkx==3.5
 numpy==2.4.1
    # via
    #   accelerate
-    #   onyx
    #   scikit-learn
    #   scipy
    #   transformers
@@ -363,7 +360,6 @@ s3transfer==0.13.1
 safetensors==0.5.3
    # via
    #   accelerate
-    #   onyx
    #   transformers
 scikit-learn==1.7.2
    # via sentence-transformers
@@ -372,7 +368,6 @@ scipy==1.16.3
    #   scikit-learn
    #   sentence-transformers
 sentence-transformers==4.0.2
-    # via onyx
 sentry-sdk==2.14.0
    # via onyx
 setuptools==80.9.0 ; python_full_version >= '3.12'
@@ -411,7 +406,6 @@ tokenizers==0.21.4
 torch==2.9.1
    # via
    #   accelerate
-    #   onyx
    #   sentence-transformers
 tqdm==4.67.1
    # via
@@ -420,9 +414,7 @@ tqdm==4.67.1
    #   sentence-transformers
    #   transformers
 transformers==4.53.0
-    # via
-    #   onyx
-    #   sentence-transformers
+    # via sentence-transformers
 triton==3.5.1 ; platform_machine == 'x86_64' and sys_platform == 'linux'
    # via torch
 types-requests==2.32.0.20250328
--- a/backend/tests/unit/onyx/connectors/jira/test_jira_bulk_fetch.py
+++ b/backend/tests/unit/onyx/connectors/jira/test_jira_bulk_fetch.py
@@ -6,6 +6,7 @@ import requests
 from jira import JIRA
 from jira.resources import Issue

+from onyx.connectors.jira.connector import _JIRA_BULK_FETCH_LIMIT
 from onyx.connectors.jira.connector import bulk_fetch_issues


@@ -145,3 +146,29 @@ def test_bulk_fetch_recursive_splitting_raises_on_bad_issue() -> None:

    with pytest.raises(requests.exceptions.JSONDecodeError):
        bulk_fetch_issues(client, ["1", "2", bad_id, "3", "4", "5"])
+
+
+def test_bulk_fetch_respects_api_batch_limit() -> None:
+    """Requests to the bulkfetch endpoint never exceed _JIRA_BULK_FETCH_LIMIT IDs."""
+    client = _mock_jira_client()
+    total_issues = _JIRA_BULK_FETCH_LIMIT * 3 + 7
+    all_ids = [str(i) for i in range(total_issues)]
+
+    batch_sizes: list[int] = []
+
+    def _post_side_effect(url: str, json: dict[str, Any]) -> MagicMock:  # noqa: ARG001
+        ids = json["issueIdsOrKeys"]
+        batch_sizes.append(len(ids))
+        resp = MagicMock()
+        resp.json.return_value = {"issues": [_make_raw_issue(i) for i in ids]}
+        return resp
+
+    client._session.post.side_effect = _post_side_effect
+
+    result = bulk_fetch_issues(client, all_ids)
+
+    assert len(result) == total_issues
+    # keeping this hardcoded because it's the documented limit
+    # https://developer.atlassian.com/cloud/jira/platform/rest/v3/api-group-issues/
+    assert all(size <= 100 for size in batch_sizes)
+    assert len(batch_sizes) == 4
--- a/backend/tests/unit/onyx/context/search/federated/test_build_thread_text.py
+++ b/backend/tests/unit/onyx/context/search/federated/test_build_thread_text.py
@@ -0,0 +1,67 @@
+"""Tests for _build_thread_text function."""
+
+from unittest.mock import MagicMock
+from unittest.mock import patch
+
+from onyx.context.search.federated.slack_search import _build_thread_text
+
+
+def _make_msg(user: str, text: str, ts: str) -> dict[str, str]:
+    return {"user": user, "text": text, "ts": ts}
+
+
+class TestBuildThreadText:
+    """Verify _build_thread_text includes full thread replies up to cap."""
+
+    @patch("onyx.context.search.federated.slack_search.batch_get_user_profiles")
+    def test_includes_all_replies(self, mock_profiles: MagicMock) -> None:
+        """All replies within cap are included in output."""
+        mock_profiles.return_value = {}
+        messages = [
+            _make_msg("U1", "parent msg", "1000.0"),
+            _make_msg("U2", "reply 1", "1001.0"),
+            _make_msg("U3", "reply 2", "1002.0"),
+            _make_msg("U4", "reply 3", "1003.0"),
+        ]
+        result = _build_thread_text(messages, "token", "T123", MagicMock())
+        assert "parent msg" in result
+        assert "reply 1" in result
+        assert "reply 2" in result
+        assert "reply 3" in result
+        assert "..." not in result
+
+    @patch("onyx.context.search.federated.slack_search.batch_get_user_profiles")
+    def test_non_thread_returns_parent_only(self, mock_profiles: MagicMock) -> None:
+        """Single message (no replies) returns just the parent text."""
+        mock_profiles.return_value = {}
+        messages = [_make_msg("U1", "just a message", "1000.0")]
+        result = _build_thread_text(messages, "token", "T123", MagicMock())
+        assert "just a message" in result
+        assert "Replies:" not in result
+
+    @patch("onyx.context.search.federated.slack_search.batch_get_user_profiles")
+    def test_parent_always_first(self, mock_profiles: MagicMock) -> None:
+        """Thread parent message is always the first line of output."""
+        mock_profiles.return_value = {}
+        messages = [
+            _make_msg("U1", "I am the parent", "1000.0"),
+            _make_msg("U2", "I am a reply", "1001.0"),
+        ]
+        result = _build_thread_text(messages, "token", "T123", MagicMock())
+        parent_pos = result.index("I am the parent")
+        reply_pos = result.index("I am a reply")
+        assert parent_pos < reply_pos
+
+    @patch("onyx.context.search.federated.slack_search.batch_get_user_profiles")
+    def test_user_profiles_resolved(self, mock_profiles: MagicMock) -> None:
+        """User IDs in thread text are replaced with display names."""
+        mock_profiles.return_value = {"U1": "Alice", "U2": "Bob"}
+        messages = [
+            _make_msg("U1", "hello", "1000.0"),
+            _make_msg("U2", "world", "1001.0"),
+        ]
+        result = _build_thread_text(messages, "token", "T123", MagicMock())
+        assert "Alice" in result
+        assert "Bob" in result
+        assert "<@U1>" not in result
+        assert "<@U2>" not in result
--- a/backend/tests/unit/onyx/context/search/federated/test_url_override.py
+++ b/backend/tests/unit/onyx/context/search/federated/test_url_override.py
@@ -0,0 +1,108 @@
+"""Tests for Slack URL parsing and direct thread fetch via URL override."""
+
+from unittest.mock import MagicMock
+from unittest.mock import patch
+
+from onyx.context.search.federated.models import DirectThreadFetch
+from onyx.context.search.federated.slack_search import _fetch_thread_from_url
+from onyx.context.search.federated.slack_search_utils import extract_slack_message_urls
+
+
+class TestExtractSlackMessageUrls:
+    """Verify URL parsing extracts channel_id and timestamp correctly."""
+
+    def test_standard_url(self) -> None:
+        query = "summarize https://mycompany.slack.com/archives/C097NBWMY8Y/p1775491616524769"
+        results = extract_slack_message_urls(query)
+        assert len(results) == 1
+        assert results[0] == ("C097NBWMY8Y", "1775491616.524769")
+
+    def test_multiple_urls(self) -> None:
+        query = (
+            "compare https://co.slack.com/archives/C111/p1234567890123456 "
+            "and https://co.slack.com/archives/C222/p9876543210987654"
+        )
+        results = extract_slack_message_urls(query)
+        assert len(results) == 2
+        assert results[0] == ("C111", "1234567890.123456")
+        assert results[1] == ("C222", "9876543210.987654")
+
+    def test_no_urls(self) -> None:
+        query = "what happened in #general last week?"
+        results = extract_slack_message_urls(query)
+        assert len(results) == 0
+
+    def test_non_slack_url_ignored(self) -> None:
+        query = "check https://google.com/archives/C111/p1234567890123456"
+        results = extract_slack_message_urls(query)
+        assert len(results) == 0
+
+    def test_timestamp_conversion(self) -> None:
+        """p prefix removed, dot inserted after 10th digit."""
+        query = "https://x.slack.com/archives/CABC123/p1775491616524769"
+        results = extract_slack_message_urls(query)
+        channel_id, ts = results[0]
+        assert channel_id == "CABC123"
+        assert ts == "1775491616.524769"
+        assert not ts.startswith("p")
+        assert "." in ts
+
+
+class TestFetchThreadFromUrl:
+    """Verify _fetch_thread_from_url calls conversations.replies and returns SlackMessage."""
+
+    @patch("onyx.context.search.federated.slack_search._build_thread_text")
+    @patch("onyx.context.search.federated.slack_search.WebClient")
+    def test_successful_fetch(
+        self, mock_webclient_cls: MagicMock, mock_build_thread: MagicMock
+    ) -> None:
+        mock_client = MagicMock()
+        mock_webclient_cls.return_value = mock_client
+
+        # Mock conversations_replies
+        mock_response = MagicMock()
+        mock_response.get.return_value = [
+            {"user": "U1", "text": "parent", "ts": "1775491616.524769"},
+            {"user": "U2", "text": "reply 1", "ts": "1775491617.000000"},
+            {"user": "U3", "text": "reply 2", "ts": "1775491618.000000"},
+        ]
+        mock_client.conversations_replies.return_value = mock_response
+
+        # Mock channel info
+        mock_ch_response = MagicMock()
+        mock_ch_response.get.return_value = {"name": "general"}
+        mock_client.conversations_info.return_value = mock_ch_response
+
+        mock_build_thread.return_value = (
+            "U1: parent\n\nReplies:\n\nU2: reply 1\n\nU3: reply 2"
+        )
+
+        fetch = DirectThreadFetch(
+            channel_id="C097NBWMY8Y", thread_ts="1775491616.524769"
+        )
+        result = _fetch_thread_from_url(fetch, "xoxp-token")
+
+        assert len(result.messages) == 1
+        msg = result.messages[0]
+        assert msg.channel_id == "C097NBWMY8Y"
+        assert msg.thread_id is None  # Prevents double-enrichment
+        assert msg.slack_score == 100000.0
+        assert "parent" in msg.text
+        mock_client.conversations_replies.assert_called_once_with(
+            channel="C097NBWMY8Y", ts="1775491616.524769"
+        )
+
+    @patch("onyx.context.search.federated.slack_search.WebClient")
+    def test_api_error_returns_empty(self, mock_webclient_cls: MagicMock) -> None:
+        from slack_sdk.errors import SlackApiError
+
+        mock_client = MagicMock()
+        mock_webclient_cls.return_value = mock_client
+        mock_client.conversations_replies.side_effect = SlackApiError(
+            message="channel_not_found",
+            response=MagicMock(status_code=404),
+        )
+
+        fetch = DirectThreadFetch(channel_id="CBAD", thread_ts="1234567890.123456")
+        result = _fetch_thread_from_url(fetch, "xoxp-token")
+        assert len(result.messages) == 0
--- a/backend/tests/unit/onyx/server/manage/llm/test_fetch_models_api.py
+++ b/backend/tests/unit/onyx/server/manage/llm/test_fetch_models_api.py
@@ -505,6 +505,7 @@ class TestGetLMStudioAvailableModels:

        mock_session = MagicMock()
        mock_provider = MagicMock()
+        mock_provider.api_base = "http://localhost:1234"
        mock_provider.custom_config = {"LM_STUDIO_API_KEY": "stored-secret"}

        response = {
--- a/deployment/helm/charts/onyx/templates/celery-worker-heavy.yaml
+++ b/deployment/helm/charts/onyx/templates/celery-worker-heavy.yaml
@@ -70,6 +70,10 @@ spec:
              "-Q",
              "connector_pruning,connector_doc_permissions_sync,connector_external_group_sync,csv_generation,sandbox",
            ]
+          ports:
+            - name: metrics
+              containerPort: 9094
+              protocol: TCP
          resources:
            {{- toYaml .Values.celery_worker_heavy.resources | nindent 12 }}
          envFrom:
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -28,7 +28,7 @@ dependencies = [
    "kubernetes>=31.0.0",
 ]

-[project.optional-dependencies]
+[dependency-groups]
 # Main backend application dependencies
 backend = [
    "aiohttp==3.13.4",
@@ -195,6 +195,9 @@ model_server = [
    "sentry-sdk[fastapi,celery,starlette]==2.14.0",
 ]

+[tool.uv]
+default-groups = ["backend", "dev", "ee", "model_server"]
+
 [tool.mypy]
 plugins = "sqlalchemy.ext.mypy.plugin"
 mypy_path = "backend"
@@ -230,7 +233,7 @@ follow_imports = "skip"
 ignore_errors = true

 [tool.uv.workspace]
-members = ["backend", "tools/ods"]
+members = ["tools/ods"]

 [tool.basedpyright]
 include = ["backend"]
--- a/uv.lock
+++ b/uv.lock
@@ -14,12 +14,6 @@ resolution-markers = [
    "python_full_version < '3.12' and sys_platform != 'win32'",
 ]

-[manifest]
-members = [
-    "onyx",
-    "onyx-backend",
-]
-
 [[package]]
 name = "accelerate"
 version = "1.6.0"
@@ -4234,7 +4228,7 @@ dependencies = [
    { name = "voyageai" },
 ]

-[package.optional-dependencies]
+[package.dev-dependencies]
 backend = [
    { name = "aiohttp" },
    { name = "alembic" },
@@ -4388,179 +4382,175 @@ model-server = [

 [package.metadata]
 requires-dist = [
-    { name = "accelerate", marker = "extra == 'model-server'", specifier = "==1.6.0" },
    { name = "agent-client-protocol", specifier = ">=0.7.1" },
    { name = "aioboto3", specifier = "==15.1.0" },
-    { name = "aiohttp", marker = "extra == 'backend'", specifier = "==3.13.4" },
-    { name = "alembic", marker = "extra == 'backend'", specifier = "==1.10.4" },
-    { name = "asana", marker = "extra == 'backend'", specifier = "==5.0.8" },
-    { name = "asyncpg", marker = "extra == 'backend'", specifier = "==0.30.0" },
-    { name = "atlassian-python-api", marker = "extra == 'backend'", specifier = "==3.41.16" },
-    { name = "azure-cognitiveservices-speech", marker = "extra == 'backend'", specifier = "==1.38.0" },
-    { name = "beautifulsoup4", marker = "extra == 'backend'", specifier = "==4.12.3" },
-    { name = "black", marker = "extra == 'dev'", specifier = "==25.1.0" },
-    { name = "boto3", marker = "extra == 'backend'", specifier = "==1.39.11" },
-    { name = "boto3-stubs", extras = ["s3"], marker = "extra == 'backend'", specifier = "==1.39.11" },
-    { name = "braintrust", marker = "extra == 'backend'", specifier = "==0.3.9" },
    { name = "brotli", specifier = ">=1.2.0" },
-    { name = "celery", marker = "extra == 'backend'", specifier = "==5.5.1" },
-    { name = "celery-types", marker = "extra == 'dev'", specifier = "==0.19.0" },
-    { name = "chardet", marker = "extra == 'backend'", specifier = "==5.2.0" },
-    { name = "chonkie", marker = "extra == 'backend'", specifier = "==1.0.10" },
    { name = "claude-agent-sdk", specifier = ">=0.1.19" },
    { name = "cohere", specifier = "==5.6.1" },
-    { name = "dask", marker = "extra == 'backend'", specifier = "==2026.1.1" },
-    { name = "ddtrace", marker = "extra == 'backend'", specifier = "==3.10.0" },
    { name = "discord-py", specifier = "==2.4.0" },
-    { name = "discord-py", marker = "extra == 'backend'", specifier = "==2.4.0" },
-    { name = "distributed", marker = "extra == 'backend'", specifier = "==2026.1.1" },
-    { name = "dropbox", marker = "extra == 'backend'", specifier = "==12.0.2" },
-    { name = "einops", marker = "extra == 'model-server'", specifier = "==0.8.1" },
-    { name = "exa-py", marker = "extra == 'backend'", specifier = "==1.15.4" },
-    { name = "faker", marker = "extra == 'dev'", specifier = "==40.1.2" },
    { name = "fastapi", specifier = "==0.133.1" },
-    { name = "fastapi-limiter", marker = "extra == 'backend'", specifier = "==0.1.6" },
-    { name = "fastapi-users", marker = "extra == 'backend'", specifier = "==15.0.4" },
-    { name = "fastapi-users-db-sqlalchemy", marker = "extra == 'backend'", specifier = "==7.0.0" },
-    { name = "fastmcp", marker = "extra == 'backend'", specifier = "==3.2.0" },
-    { name = "filelock", marker = "extra == 'backend'", specifier = "==3.20.3" },
-    { name = "google-api-python-client", marker = "extra == 'backend'", specifier = "==2.86.0" },
-    { name = "google-auth-httplib2", marker = "extra == 'backend'", specifier = "==0.1.0" },
-    { name = "google-auth-oauthlib", marker = "extra == 'backend'", specifier = "==1.0.0" },
    { name = "google-genai", specifier = "==1.52.0" },
-    { name = "hatchling", marker = "extra == 'dev'", specifier = "==1.28.0" },
-    { name = "httpcore", marker = "extra == 'backend'", specifier = "==1.0.9" },
-    { name = "httpx", extras = ["http2"], marker = "extra == 'backend'", specifier = "==0.28.1" },
-    { name = "httpx-oauth", marker = "extra == 'backend'", specifier = "==0.15.1" },
-    { name = "hubspot-api-client", marker = "extra == 'backend'", specifier = "==11.1.0" },
-    { name = "huggingface-hub", marker = "extra == 'backend'", specifier = "==0.35.3" },
-    { name = "inflection", marker = "extra == 'backend'", specifier = "==0.5.1" },
-    { name = "ipykernel", marker = "extra == 'dev'", specifier = "==6.29.5" },
-    { name = "jira", marker = "extra == 'backend'", specifier = "==3.10.5" },
-    { name = "jsonref", marker = "extra == 'backend'", specifier = "==1.1.0" },
    { name = "kubernetes", specifier = ">=31.0.0" },
-    { name = "kubernetes", marker = "extra == 'backend'", specifier = "==31.0.0" },
-    { name = "langchain-core", marker = "extra == 'backend'", specifier = "==1.2.22" },
-    { name = "langfuse", marker = "extra == 'backend'", specifier = "==3.10.0" },
-    { name = "lazy-imports", marker = "extra == 'backend'", specifier = "==1.0.1" },
    { name = "litellm", specifier = "==1.81.6" },
-    { name = "lxml", marker = "extra == 'backend'", specifier = "==5.3.0" },
-    { name = "mako", marker = "extra == 'backend'", specifier = "==1.2.4" },
-    { name = "manygo", marker = "extra == 'dev'", specifier = "==0.2.0" },
-    { name = "markitdown", extras = ["pdf", "docx", "pptx", "xlsx", "xls"], marker = "extra == 'backend'", specifier = "==0.1.2" },
-    { name = "matplotlib", marker = "extra == 'dev'", specifier = "==3.10.8" },
-    { name = "mcp", extras = ["cli"], marker = "extra == 'backend'", specifier = "==1.26.0" },
-    { name = "mistune", marker = "extra == 'backend'", specifier = "==3.2.0" },
-    { name = "msal", marker = "extra == 'backend'", specifier = "==1.34.0" },
-    { name = "msoffcrypto-tool", marker = "extra == 'backend'", specifier = "==5.4.2" },
-    { name = "mypy", marker = "extra == 'dev'", specifier = "==1.13.0" },
-    { name = "mypy-extensions", marker = "extra == 'dev'", specifier = "==1.0.0" },
-    { name = "nest-asyncio", marker = "extra == 'backend'", specifier = "==1.6.0" },
-    { name = "numpy", marker = "extra == 'model-server'", specifier = "==2.4.1" },
-    { name = "oauthlib", marker = "extra == 'backend'", specifier = "==3.2.2" },
-    { name = "office365-rest-python-client", marker = "extra == 'backend'", specifier = "==2.6.2" },
-    { name = "onyx-devtools", marker = "extra == 'dev'", specifier = "==0.7.3" },
    { name = "openai", specifier = "==2.14.0" },
-    { name = "openapi-generator-cli", marker = "extra == 'dev'", specifier = "==7.17.0" },
-    { name = "openinference-instrumentation", marker = "extra == 'backend'", specifier = "==0.1.42" },
-    { name = "openpyxl", marker = "extra == 'backend'", specifier = "==3.0.10" },
-    { name = "opensearch-py", marker = "extra == 'backend'", specifier = "==3.0.0" },
-    { name = "opentelemetry-proto", marker = "extra == 'backend'", specifier = ">=1.39.0" },
-    { name = "pandas-stubs", marker = "extra == 'dev'", specifier = "~=2.3.3" },
-    { name = "passlib", marker = "extra == 'backend'", specifier = "==1.7.4" },
-    { name = "playwright", marker = "extra == 'backend'", specifier = "==1.55.0" },
-    { name = "posthog", marker = "extra == 'ee'", specifier = "==3.7.4" },
-    { name = "pre-commit", marker = "extra == 'dev'", specifier = "==3.2.2" },
    { name = "prometheus-client", specifier = ">=0.21.1" },
    { name = "prometheus-fastapi-instrumentator", specifier = "==7.1.0" },
-    { name = "psutil", marker = "extra == 'backend'", specifier = "==7.1.3" },
-    { name = "psycopg2-binary", marker = "extra == 'backend'", specifier = "==2.9.9" },
-    { name = "puremagic", marker = "extra == 'backend'", specifier = "==1.28" },
-    { name = "pyairtable", marker = "extra == 'backend'", specifier = "==3.0.1" },
-    { name = "pycryptodome", marker = "extra == 'backend'", specifier = "==3.19.1" },
    { name = "pydantic", specifier = "==2.11.7" },
-    { name = "pygithub", marker = "extra == 'backend'", specifier = "==2.5.0" },
-    { name = "pympler", marker = "extra == 'backend'", specifier = "==1.1" },
-    { name = "pypandoc-binary", marker = "extra == 'backend'", specifier = "==1.16.2" },
-    { name = "pypdf", marker = "extra == 'backend'", specifier = "==6.9.2" },
-    { name = "pytest", marker = "extra == 'dev'", specifier = "==8.3.5" },
-    { name = "pytest-alembic", marker = "extra == 'dev'", specifier = "==0.12.1" },
-    { name = "pytest-asyncio", marker = "extra == 'dev'", specifier = "==1.3.0" },
-    { name = "pytest-dotenv", marker = "extra == 'dev'", specifier = "==0.5.2" },
-    { name = "pytest-mock", marker = "extra == 'backend'", specifier = "==3.12.0" },
-    { name = "pytest-playwright", marker = "extra == 'backend'", specifier = "==0.7.0" },
-    { name = "pytest-repeat", marker = "extra == 'dev'", specifier = "==0.9.4" },
-    { name = "pytest-xdist", marker = "extra == 'dev'", specifier = "==3.8.0" },
-    { name = "python-dateutil", marker = "extra == 'backend'", specifier = "==2.8.2" },
-    { name = "python-docx", marker = "extra == 'backend'", specifier = "==1.1.2" },
-    { name = "python-dotenv", marker = "extra == 'backend'", specifier = "==1.1.1" },
-    { name = "python-gitlab", marker = "extra == 'backend'", specifier = "==5.6.0" },
-    { name = "python-multipart", marker = "extra == 'backend'", specifier = "==0.0.22" },
-    { name = "python-pptx", marker = "extra == 'backend'", specifier = "==0.6.23" },
-    { name = "python3-saml", marker = "extra == 'backend'", specifier = "==1.15.0" },
-    { name = "pywikibot", marker = "extra == 'backend'", specifier = "==9.0.0" },
-    { name = "rapidfuzz", marker = "extra == 'backend'", specifier = "==3.13.0" },
-    { name = "redis", marker = "extra == 'backend'", specifier = "==5.0.8" },
-    { name = "release-tag", marker = "extra == 'dev'", specifier = "==0.5.2" },
-    { name = "reorder-python-imports-black", marker = "extra == 'dev'", specifier = "==3.14.0" },
-    { name = "requests", marker = "extra == 'backend'", specifier = "==2.33.0" },
-    { name = "requests-oauthlib", marker = "extra == 'backend'", specifier = "==1.3.1" },
    { name = "retry", specifier = "==0.9.2" },
-    { name = "rfc3986", marker = "extra == 'backend'", specifier = "==1.5.0" },
-    { name = "ruff", marker = "extra == 'dev'", specifier = "==0.12.0" },
-    { name = "safetensors", marker = "extra == 'model-server'", specifier = "==0.5.3" },
-    { name = "sendgrid", marker = "extra == 'backend'", specifier = "==6.12.5" },
-    { name = "sentence-transformers", marker = "extra == 'model-server'", specifier = "==4.0.2" },
    { name = "sentry-sdk", specifier = "==2.14.0" },
-    { name = "sentry-sdk", extras = ["fastapi", "celery", "starlette"], marker = "extra == 'model-server'", specifier = "==2.14.0" },
-    { name = "shapely", marker = "extra == 'backend'", specifier = "==2.0.6" },
-    { name = "simple-salesforce", marker = "extra == 'backend'", specifier = "==1.12.6" },
-    { name = "slack-sdk", marker = "extra == 'backend'", specifier = "==3.20.2" },
-    { name = "sqlalchemy", extras = ["mypy"], marker = "extra == 'backend'", specifier = "==2.0.15" },
-    { name = "starlette", marker = "extra == 'backend'", specifier = "==0.49.3" },
-    { name = "stripe", marker = "extra == 'backend'", specifier = "==10.12.0" },
-    { name = "supervisor", marker = "extra == 'backend'", specifier = "==4.3.0" },
-    { name = "tiktoken", marker = "extra == 'backend'", specifier = "==0.7.0" },
-    { name = "timeago", marker = "extra == 'backend'", specifier = "==1.0.16" },
-    { name = "torch", marker = "extra == 'model-server'", specifier = "==2.9.1" },
-    { name = "trafilatura", marker = "extra == 'backend'", specifier = "==1.12.2" },
-    { name = "transformers", marker = "extra == 'model-server'", specifier = "==4.53.0" },
-    { name = "types-beautifulsoup4", marker = "extra == 'dev'", specifier = "==4.12.0.3" },
-    { name = "types-html5lib", marker = "extra == 'dev'", specifier = "==1.1.11.13" },
-    { name = "types-oauthlib", marker = "extra == 'dev'", specifier = "==3.2.0.9" },
-    { name = "types-openpyxl", marker = "extra == 'backend'", specifier = "==3.0.4.7" },
-    { name = "types-passlib", marker = "extra == 'dev'", specifier = "==1.7.7.20240106" },
-    { name = "types-pillow", marker = "extra == 'dev'", specifier = "==10.2.0.20240822" },
-    { name = "types-psutil", marker = "extra == 'dev'", specifier = "==7.1.3.20251125" },
-    { name = "types-psycopg2", marker = "extra == 'dev'", specifier = "==2.9.21.10" },
-    { name = "types-python-dateutil", marker = "extra == 'dev'", specifier = "==2.8.19.13" },
-    { name = "types-pytz", marker = "extra == 'dev'", specifier = "==2023.3.1.1" },
-    { name = "types-pyyaml", marker = "extra == 'dev'", specifier = "==6.0.12.11" },
-    { name = "types-regex", marker = "extra == 'dev'", specifier = "==2023.3.23.1" },
-    { name = "types-requests", marker = "extra == 'dev'", specifier = "==2.32.0.20250328" },
-    { name = "types-retry", marker = "extra == 'dev'", specifier = "==0.9.9.3" },
-    { name = "types-setuptools", marker = "extra == 'dev'", specifier = "==68.0.0.3" },
-    { name = "unstructured", marker = "extra == 'backend'", specifier = "==0.18.27" },
-    { name = "unstructured-client", marker = "extra == 'backend'", specifier = "==0.42.6" },
-    { name = "urllib3", marker = "extra == 'backend'", specifier = "==2.6.3" },
    { name = "uvicorn", specifier = "==0.35.0" },
    { name = "voyageai", specifier = "==0.2.3" },
-    { name = "xmlsec", marker = "extra == 'backend'", specifier = "==1.3.14" },
-    { name = "zizmor", marker = "extra == 'dev'", specifier = "==1.18.0" },
-    { name = "zulip", marker = "extra == 'backend'", specifier = "==0.8.2" },
-]
-provides-extras = ["backend", "dev", "ee", "model-server"]
-
-[[package]]
-name = "onyx-backend"
-version = "0.0.0"
-source = { virtual = "backend" }
-dependencies = [
-    { name = "onyx", extra = ["backend", "dev", "ee"] },
 ]

-[package.metadata]
-requires-dist = [{ name = "onyx", extras = ["backend", "dev", "ee"], editable = "." }]
+[package.metadata.requires-dev]
+backend = [
+    { name = "aiohttp", specifier = "==3.13.4" },
+    { name = "alembic", specifier = "==1.10.4" },
+    { name = "asana", specifier = "==5.0.8" },
+    { name = "asyncpg", specifier = "==0.30.0" },
+    { name = "atlassian-python-api", specifier = "==3.41.16" },
+    { name = "azure-cognitiveservices-speech", specifier = "==1.38.0" },
+    { name = "beautifulsoup4", specifier = "==4.12.3" },
+    { name = "boto3", specifier = "==1.39.11" },
+    { name = "boto3-stubs", extras = ["s3"], specifier = "==1.39.11" },
+    { name = "braintrust", specifier = "==0.3.9" },
+    { name = "celery", specifier = "==5.5.1" },
+    { name = "chardet", specifier = "==5.2.0" },
+    { name = "chonkie", specifier = "==1.0.10" },
+    { name = "dask", specifier = "==2026.1.1" },
+    { name = "ddtrace", specifier = "==3.10.0" },
+    { name = "discord-py", specifier = "==2.4.0" },
+    { name = "distributed", specifier = "==2026.1.1" },
+    { name = "dropbox", specifier = "==12.0.2" },
+    { name = "exa-py", specifier = "==1.15.4" },
+    { name = "fastapi-limiter", specifier = "==0.1.6" },
+    { name = "fastapi-users", specifier = "==15.0.4" },
+    { name = "fastapi-users-db-sqlalchemy", specifier = "==7.0.0" },
+    { name = "fastmcp", specifier = "==3.2.0" },
+    { name = "filelock", specifier = "==3.20.3" },
+    { name = "google-api-python-client", specifier = "==2.86.0" },
+    { name = "google-auth-httplib2", specifier = "==0.1.0" },
+    { name = "google-auth-oauthlib", specifier = "==1.0.0" },
+    { name = "httpcore", specifier = "==1.0.9" },
+    { name = "httpx", extras = ["http2"], specifier = "==0.28.1" },
+    { name = "httpx-oauth", specifier = "==0.15.1" },
+    { name = "hubspot-api-client", specifier = "==11.1.0" },
+    { name = "huggingface-hub", specifier = "==0.35.3" },
+    { name = "inflection", specifier = "==0.5.1" },
+    { name = "jira", specifier = "==3.10.5" },
+    { name = "jsonref", specifier = "==1.1.0" },
+    { name = "kubernetes", specifier = "==31.0.0" },
+    { name = "langchain-core", specifier = "==1.2.22" },
+    { name = "langfuse", specifier = "==3.10.0" },
+    { name = "lazy-imports", specifier = "==1.0.1" },
+    { name = "lxml", specifier = "==5.3.0" },
+    { name = "mako", specifier = "==1.2.4" },
+    { name = "markitdown", extras = ["pdf", "docx", "pptx", "xlsx", "xls"], specifier = "==0.1.2" },
+    { name = "mcp", extras = ["cli"], specifier = "==1.26.0" },
+    { name = "mistune", specifier = "==3.2.0" },
+    { name = "msal", specifier = "==1.34.0" },
+    { name = "msoffcrypto-tool", specifier = "==5.4.2" },
+    { name = "nest-asyncio", specifier = "==1.6.0" },
+    { name = "oauthlib", specifier = "==3.2.2" },
+    { name = "office365-rest-python-client", specifier = "==2.6.2" },
+    { name = "openinference-instrumentation", specifier = "==0.1.42" },
+    { name = "openpyxl", specifier = "==3.0.10" },
+    { name = "opensearch-py", specifier = "==3.0.0" },
+    { name = "opentelemetry-proto", specifier = ">=1.39.0" },
+    { name = "passlib", specifier = "==1.7.4" },
+    { name = "playwright", specifier = "==1.55.0" },
+    { name = "psutil", specifier = "==7.1.3" },
+    { name = "psycopg2-binary", specifier = "==2.9.9" },
+    { name = "puremagic", specifier = "==1.28" },
+    { name = "pyairtable", specifier = "==3.0.1" },
+    { name = "pycryptodome", specifier = "==3.19.1" },
+    { name = "pygithub", specifier = "==2.5.0" },
+    { name = "pympler", specifier = "==1.1" },
+    { name = "pypandoc-binary", specifier = "==1.16.2" },
+    { name = "pypdf", specifier = "==6.9.2" },
+    { name = "pytest-mock", specifier = "==3.12.0" },
+    { name = "pytest-playwright", specifier = "==0.7.0" },
+    { name = "python-dateutil", specifier = "==2.8.2" },
+    { name = "python-docx", specifier = "==1.1.2" },
+    { name = "python-dotenv", specifier = "==1.1.1" },
+    { name = "python-gitlab", specifier = "==5.6.0" },
+    { name = "python-multipart", specifier = "==0.0.22" },
+    { name = "python-pptx", specifier = "==0.6.23" },
+    { name = "python3-saml", specifier = "==1.15.0" },
+    { name = "pywikibot", specifier = "==9.0.0" },
+    { name = "rapidfuzz", specifier = "==3.13.0" },
+    { name = "redis", specifier = "==5.0.8" },
+    { name = "requests", specifier = "==2.33.0" },
+    { name = "requests-oauthlib", specifier = "==1.3.1" },
+    { name = "rfc3986", specifier = "==1.5.0" },
+    { name = "sendgrid", specifier = "==6.12.5" },
+    { name = "shapely", specifier = "==2.0.6" },
+    { name = "simple-salesforce", specifier = "==1.12.6" },
+    { name = "slack-sdk", specifier = "==3.20.2" },
+    { name = "sqlalchemy", extras = ["mypy"], specifier = "==2.0.15" },
+    { name = "starlette", specifier = "==0.49.3" },
+    { name = "stripe", specifier = "==10.12.0" },
+    { name = "supervisor", specifier = "==4.3.0" },
+    { name = "tiktoken", specifier = "==0.7.0" },
+    { name = "timeago", specifier = "==1.0.16" },
+    { name = "trafilatura", specifier = "==1.12.2" },
+    { name = "types-openpyxl", specifier = "==3.0.4.7" },
+    { name = "unstructured", specifier = "==0.18.27" },
+    { name = "unstructured-client", specifier = "==0.42.6" },
+    { name = "urllib3", specifier = "==2.6.3" },
+    { name = "xmlsec", specifier = "==1.3.14" },
+    { name = "zulip", specifier = "==0.8.2" },
+]
+dev = [
+    { name = "black", specifier = "==25.1.0" },
+    { name = "celery-types", specifier = "==0.19.0" },
+    { name = "faker", specifier = "==40.1.2" },
+    { name = "hatchling", specifier = "==1.28.0" },
+    { name = "ipykernel", specifier = "==6.29.5" },
+    { name = "manygo", specifier = "==0.2.0" },
+    { name = "matplotlib", specifier = "==3.10.8" },
+    { name = "mypy", specifier = "==1.13.0" },
+    { name = "mypy-extensions", specifier = "==1.0.0" },
+    { name = "onyx-devtools", specifier = "==0.7.3" },
+    { name = "openapi-generator-cli", specifier = "==7.17.0" },
+    { name = "pandas-stubs", specifier = "~=2.3.3" },
+    { name = "pre-commit", specifier = "==3.2.2" },
+    { name = "pytest", specifier = "==8.3.5" },
+    { name = "pytest-alembic", specifier = "==0.12.1" },
+    { name = "pytest-asyncio", specifier = "==1.3.0" },
+    { name = "pytest-dotenv", specifier = "==0.5.2" },
+    { name = "pytest-repeat", specifier = "==0.9.4" },
+    { name = "pytest-xdist", specifier = "==3.8.0" },
+    { name = "release-tag", specifier = "==0.5.2" },
+    { name = "reorder-python-imports-black", specifier = "==3.14.0" },
+    { name = "ruff", specifier = "==0.12.0" },
+    { name = "types-beautifulsoup4", specifier = "==4.12.0.3" },
+    { name = "types-html5lib", specifier = "==1.1.11.13" },
+    { name = "types-oauthlib", specifier = "==3.2.0.9" },
+    { name = "types-passlib", specifier = "==1.7.7.20240106" },
+    { name = "types-pillow", specifier = "==10.2.0.20240822" },
+    { name = "types-psutil", specifier = "==7.1.3.20251125" },
+    { name = "types-psycopg2", specifier = "==2.9.21.10" },
+    { name = "types-python-dateutil", specifier = "==2.8.19.13" },
+    { name = "types-pytz", specifier = "==2023.3.1.1" },
+    { name = "types-pyyaml", specifier = "==6.0.12.11" },
+    { name = "types-regex", specifier = "==2023.3.23.1" },
+    { name = "types-requests", specifier = "==2.32.0.20250328" },
+    { name = "types-retry", specifier = "==0.9.9.3" },
+    { name = "types-setuptools", specifier = "==68.0.0.3" },
+    { name = "zizmor", specifier = "==1.18.0" },
+]
+ee = [{ name = "posthog", specifier = "==3.7.4" }]
+model-server = [
+    { name = "accelerate", specifier = "==1.6.0" },
+    { name = "einops", specifier = "==0.8.1" },
+    { name = "numpy", specifier = "==2.4.1" },
+    { name = "safetensors", specifier = "==0.5.3" },
+    { name = "sentence-transformers", specifier = "==4.0.2" },
+    { name = "sentry-sdk", extras = ["fastapi", "celery", "starlette"], specifier = "==2.14.0" },
+    { name = "torch", specifier = "==2.9.1" },
+    { name = "transformers", specifier = "==4.53.0" },
+]

 [[package]]
 name = "onyx-devtools"
--- a/web/src/app/admin/configuration/llm/ModelIcon.tsx
+++ b/web/src/app/admin/configuration/llm/ModelIcon.tsx
@@ -1,5 +1,5 @@
 import { defaultTailwindCSS } from "@/components/icons/icons";
-import { getModelIcon } from "@/lib/llmConfig/providers";
+import { getModelIcon } from "@/lib/llmConfig";
 import { IconProps } from "@opal/types";

 export interface ModelIconProps extends IconProps {
--- a/web/src/app/admin/configuration/llm/page.tsx
+++ b/web/src/app/admin/configuration/llm/page.tsx
@@ -1 +1 @@
-export { default } from "@/refresh-pages/admin/LLMProviderConfigurationPage";
+export { default } from "@/refresh-pages/admin/LLMConfigurationPage";
--- a/web/src/app/app/message/MultiModelPanel.tsx
+++ b/web/src/app/app/message/MultiModelPanel.tsx
@@ -5,7 +5,7 @@ import { Button } from "@opal/components";
 import { Text } from "@opal/components";
 import { ContentAction } from "@opal/layouts";
 import { SvgEyeOff, SvgX } from "@opal/icons";
-import { getModelIcon } from "@/lib/llmConfig/providers";
+import { getModelIcon } from "@/lib/llmConfig";
 import AgentMessage, {
  AgentMessageProps,
 } from "@/app/app/message/messageComponents/AgentMessage";
@@ -28,6 +28,8 @@ export interface MultiModelPanelProps {
  isNonPreferredInSelection: boolean;
  /** Callback when user clicks this panel to select as preferred */
  onSelect: () => void;
+  /** Callback to deselect this panel as preferred */
+  onDeselect?: () => void;
  /** Callback to hide/show this panel */
  onToggleVisibility: () => void;
  /** Props to pass through to AgentMessage */
@@ -63,6 +65,7 @@ export default function MultiModelPanel({
  isHidden,
  isNonPreferredInSelection,
  onSelect,
+  onDeselect,
  onToggleVisibility,
  agentMessageProps,
  errorMessage,
@@ -93,11 +96,25 @@ export default function MultiModelPanel({
        rightChildren={
          <div className="flex items-center gap-1 px-2">
            {isPreferred && (
-              <span className="text-action-link-05 shrink-0">
-                <Text font="secondary-body" color="inherit" nowrap>
-                  Preferred Response
-                </Text>
-              </span>
+              <>
+                <span className="text-action-link-05 shrink-0">
+                  <Text font="secondary-body" color="inherit" nowrap>
+                    Preferred Response
+                  </Text>
+                </span>
+                {onDeselect && (
+                  <Button
+                    prominence="tertiary"
+                    icon={SvgX}
+                    size="sm"
+                    onClick={(e) => {
+                      e.stopPropagation();
+                      onDeselect();
+                    }}
+                    tooltip="Deselect preferred response"
+                  />
+                )}
+              </>
            )}
            {!isPreferred && (
              <Button
--- a/web/src/app/app/message/MultiModelResponseView.tsx
+++ b/web/src/app/app/message/MultiModelResponseView.tsx
@@ -30,7 +30,7 @@ const SELECTION_PANEL_W = 400;
 // Compact width for hidden panels in the carousel track
 const HIDDEN_PANEL_W = 220;
 // Generation-mode panel widths (from Figma)
-const GEN_PANEL_W_2 = 640; // 2 panels side-by-side
+const GEN_PANEL_W_2 = 720; // 2 panels side-by-side
 const GEN_PANEL_W_3 = 436; // 3 panels side-by-side
 // Gap between panels — matches CSS gap-6 (24px)
 const PANEL_GAP = 24;
@@ -64,14 +64,31 @@ export default function MultiModelResponseView({
  onMessageSelection,
  onHiddenPanelsChange,
 }: MultiModelResponseViewProps) {
-  const [preferredIndex, setPreferredIndex] = useState<number | null>(null);
+  // Initialize preferredIndex from the backend's preferred_response_id when
+  // loading an existing conversation.
+  const [preferredIndex, setPreferredIndex] = useState<number | null>(() => {
+    if (!parentMessage?.preferredResponseId) return null;
+    const match = responses.find(
+      (r) => r.messageId === parentMessage.preferredResponseId
+    );
+    return match?.modelIndex ?? null;
+  });
  const [hiddenPanels, setHiddenPanels] = useState<Set<number>>(new Set());
  // Controls animation: false = panels at start position, true = panels at peek position
-  const [selectionEntered, setSelectionEntered] = useState(false);
+  const [selectionEntered, setSelectionEntered] = useState(
+    () => preferredIndex !== null
+  );
+  // Tracks the deselect animation timeout so it can be cancelled if the user
+  // re-selects a panel during the 450ms animation window.
+  const deselectTimeoutRef = useRef<ReturnType<typeof setTimeout> | null>(null);
+  // True while the reverse animation is playing (deselect → back to equal panels)
+  const [selectionExiting, setSelectionExiting] = useState(false);
  // Measures the overflow-hidden carousel container for responsive preferred-panel sizing.
  const [trackContainerW, setTrackContainerW] = useState(0);
  const roRef = useRef<ResizeObserver | null>(null);
+  const trackContainerElRef = useRef<HTMLDivElement | null>(null);
  const trackContainerRef = useCallback((el: HTMLDivElement | null) => {
+    trackContainerElRef.current = el;
    if (roRef.current) {
      roRef.current.disconnect();
      roRef.current = null;
@@ -90,6 +107,9 @@ export default function MultiModelResponseView({
    number | null
  >(null);
  const preferredRoRef = useRef<ResizeObserver | null>(null);
+  // Refs to each panel wrapper for height animation on deselect
+  const panelElsRef = useRef<Map<number, HTMLDivElement>>(new Map());
+
  // Tracks which non-preferred panels overflow the preferred height cap
  const [overflowingPanels, setOverflowingPanels] = useState<Set<number>>(
    new Set()
@@ -152,12 +172,43 @@ export default function MultiModelResponseView({
  const handleSelectPreferred = useCallback(
    (modelIndex: number) => {
      if (isGenerating) return;
+
+      // Cancel any pending deselect animation so it doesn't overwrite this selection
+      if (deselectTimeoutRef.current !== null) {
+        clearTimeout(deselectTimeoutRef.current);
+        deselectTimeoutRef.current = null;
+        setSelectionExiting(false);
+      }
+
+      // Only freeze scroll when entering selection mode for the first time.
+      // When switching preferred within selection mode, panels are already
+      // capped and the track just slides — no height changes to worry about.
+      const alreadyInSelection = preferredIndex !== null;
+      if (!alreadyInSelection) {
+        const scrollContainer = trackContainerElRef.current?.closest(
+          "[data-chat-scroll]"
+        ) as HTMLElement | null;
+        const scrollTop = scrollContainer?.scrollTop ?? 0;
+        if (scrollContainer) scrollContainer.style.overflow = "hidden";
+
+        setTimeout(() => {
+          if (scrollContainer) {
+            scrollContainer.scrollTop = scrollTop;
+            requestAnimationFrame(() => {
+              requestAnimationFrame(() => {
+                if (scrollContainer) {
+                  scrollContainer.scrollTop = scrollTop;
+                  scrollContainer.style.overflow = "";
+                }
+              });
+            });
+          }
+        }, 450);
+      }
+
      setPreferredIndex(modelIndex);
      const response = responses.find((r) => r.modelIndex === modelIndex);
      if (!response) return;
-      if (onMessageSelection) {
-        onMessageSelection(response.nodeId);
-      }

      // Persist preferred response to backend + update local tree so the
      // input bar unblocks (awaitingPreferredSelection clears).
@@ -185,17 +236,111 @@ export default function MultiModelResponseView({
    [
      isGenerating,
      responses,
-      onMessageSelection,
+      preferredIndex,
      parentMessage,
      currentSessionId,
      updateSessionMessageTree,
    ]
  );

+  // NOTE: Deselect only clears the local tree — no backend call to clear
+  // preferred_response_id. The SetPreferredResponseRequest model doesn't
+  // accept null. A backend endpoint for clearing preference would be needed
+  // if deselect should persist across reloads.
+  const handleDeselectPreferred = useCallback(() => {
+    const scrollContainer = trackContainerElRef.current?.closest(
+      "[data-chat-scroll]"
+    ) as HTMLElement | null;
+
+    // Animate panels back to equal positions, then clear preferred after transition
+    setSelectionExiting(true);
+    setSelectionEntered(false);
+    deselectTimeoutRef.current = setTimeout(() => {
+      deselectTimeoutRef.current = null;
+      const scrollTop = scrollContainer?.scrollTop ?? 0;
+      if (scrollContainer) scrollContainer.style.overflow = "hidden";
+
+      // Before clearing state, animate each capped panel's height from
+      // its current clientHeight to its natural scrollHeight.
+      const animations: Animation[] = [];
+      panelElsRef.current.forEach((el, modelIndex) => {
+        if (modelIndex === preferredIndex) return;
+        if (hiddenPanels.has(modelIndex)) return;
+        const from = el.clientHeight;
+        const to = el.scrollHeight;
+        if (to <= from) return;
+        // Lock current height, remove maxHeight cap, then animate
+        el.style.maxHeight = `${from}px`;
+        el.style.overflow = "hidden";
+        const anim = el.animate(
+          [{ maxHeight: `${from}px` }, { maxHeight: `${to}px` }],
+          {
+            duration: 350,
+            easing: "cubic-bezier(0.2, 0, 0, 1)",
+            fill: "forwards",
+          }
+        );
+        animations.push(anim);
+        anim.onfinish = () => {
+          el.style.maxHeight = "";
+          el.style.overflow = "";
+        };
+      });
+
+      setSelectionExiting(false);
+      setPreferredIndex(null);
+
+      // Restore scroll after animations + React settle
+      const restoreScroll = () => {
+        requestAnimationFrame(() => {
+          if (scrollContainer) {
+            scrollContainer.scrollTop = scrollTop;
+            scrollContainer.style.overflow = "";
+          }
+        });
+      };
+
+      if (animations.length > 0) {
+        Promise.all(animations.map((a) => a.finished))
+          .then(restoreScroll)
+          .catch(restoreScroll);
+      } else {
+        restoreScroll();
+      }
+
+      // Clear preferredResponseId in the local tree so input bar re-gates
+      if (parentMessage && currentSessionId) {
+        const tree = useChatSessionStore
+          .getState()
+          .sessions.get(currentSessionId)?.messageTree;
+        if (tree) {
+          const userMsg = tree.get(parentMessage.nodeId);
+          if (userMsg) {
+            const updated = new Map(tree);
+            updated.set(parentMessage.nodeId, {
+              ...userMsg,
+              preferredResponseId: undefined,
+            });
+            updateSessionMessageTree(currentSessionId, updated);
+          }
+        }
+      }
+    }, 450);
+  }, [
+    parentMessage,
+    currentSessionId,
+    updateSessionMessageTree,
+    preferredIndex,
+    hiddenPanels,
+  ]);
+
  // Clear preferred selection when generation starts
+  // Reset selection state when generation restarts
  useEffect(() => {
    if (isGenerating) {
      setPreferredIndex(null);
+      setHasEnteredSelection(false);
+      setSelectionExiting(false);
    }
  }, [isGenerating]);

@@ -204,22 +349,39 @@ export default function MultiModelResponseView({
    (r) => r.modelIndex === preferredIndex
  );

-  // Selection mode when preferred is set, found in responses, not generating, and at least 2 visible panels
-  const showSelectionMode =
+  // Track whether selection mode was ever entered — once it has been,
+  // we stay in the selection layout (even after deselect) to avoid a
+  // jarring DOM swap between the two layout strategies.
+  const [hasEnteredSelection, setHasEnteredSelection] = useState(
+    () => preferredIndex !== null
+  );
+
+  const isActivelySelected =
    preferredIndex !== null &&
    preferredIdx !== -1 &&
    !isGenerating &&
    visibleResponses.length > 1;

-  // Trigger the slide-out animation one frame after entering selection mode
  useEffect(() => {
-    if (!showSelectionMode) {
-      setSelectionEntered(false);
+    if (isActivelySelected) setHasEnteredSelection(true);
+  }, [isActivelySelected]);
+
+  // Use the selection layout once a preferred response has been chosen,
+  // even after deselect. Only fall through to generation layout before
+  // the first selection or during active streaming.
+  const showSelectionMode = isActivelySelected || hasEnteredSelection;
+
+  // Trigger the slide-out animation one frame after a preferred panel is selected.
+  // Uses isActivelySelected (not showSelectionMode) so re-selecting after a
+  // deselect still triggers the animation.
+  useEffect(() => {
+    if (!isActivelySelected) {
+      // Don't reset selectionEntered here — handleDeselectPreferred manages it
      return;
    }
    const raf = requestAnimationFrame(() => setSelectionEntered(true));
    return () => cancelAnimationFrame(raf);
-  }, [showSelectionMode]);
+  }, [isActivelySelected]);

  // Build panel props — isHidden reflects actual hidden state
  const buildPanelProps = useCallback(
@@ -231,6 +393,7 @@ export default function MultiModelResponseView({
      isHidden: hiddenPanels.has(response.modelIndex),
      isNonPreferredInSelection: isNonPreferred,
      onSelect: () => handleSelectPreferred(response.modelIndex),
+      onDeselect: handleDeselectPreferred,
      onToggleVisibility: () => toggleVisibility(response.modelIndex),
      agentMessageProps: {
        rawPackets: response.packets,
@@ -255,6 +418,7 @@ export default function MultiModelResponseView({
      preferredIndex,
      hiddenPanels,
      handleSelectPreferred,
+      handleDeselectPreferred,
      toggleVisibility,
      chatState,
      llmManager,
@@ -310,25 +474,30 @@ export default function MultiModelResponseView({
      <div
        ref={trackContainerRef}
        className="w-full overflow-hidden"
-        style={{
-          maskImage: `linear-gradient(to right, transparent 0px, black ${PEEK_W}px, black calc(100% - ${PEEK_W}px), transparent 100%)`,
-          WebkitMaskImage: `linear-gradient(to right, transparent 0px, black ${PEEK_W}px, black calc(100% - ${PEEK_W}px), transparent 100%)`,
-        }}
+        style={
+          isActivelySelected
+            ? {
+                maskImage: `linear-gradient(to right, transparent 0px, black ${PEEK_W}px, black calc(100% - ${PEEK_W}px), transparent 100%)`,
+                WebkitMaskImage: `linear-gradient(to right, transparent 0px, black ${PEEK_W}px, black calc(100% - ${PEEK_W}px), transparent 100%)`,
+              }
+            : undefined
+        }
      >
        <div
          className="flex items-start"
          style={{
            gap: `${PANEL_GAP}px`,
-            transition: selectionEntered
-              ? "transform 0.45s cubic-bezier(0.2, 0, 0, 1)"
-              : "none",
+            transition:
+              selectionEntered || selectionExiting
+                ? "transform 0.45s cubic-bezier(0.2, 0, 0, 1)"
+                : "none",
            transform: trackTransform,
          }}
        >
          {responses.map((r, i) => {
            const isHidden = hiddenPanels.has(r.modelIndex);
            const isPref = r.modelIndex === preferredIndex;
-            const isNonPref = !isHidden && !isPref;
+            const isNonPref = !isHidden && !isPref && preferredIndex !== null;
            const finalW = selectionWidths[i]!;
            const startW = isHidden ? HIDDEN_PANEL_W : SELECTION_PANEL_W;
            const capped = isNonPref && preferredPanelHeight != null;
@@ -337,6 +506,11 @@ export default function MultiModelResponseView({
              <div
                key={r.modelIndex}
                ref={(el) => {
+                  if (el) {
+                    panelElsRef.current.set(r.modelIndex, el);
+                  } else {
+                    panelElsRef.current.delete(r.modelIndex);
+                  }
                  if (isPref) preferredPanelRef(el);
                  if (capped && el) {
                    const doesOverflow = el.scrollHeight > el.clientHeight;
@@ -353,9 +527,10 @@ export default function MultiModelResponseView({
                style={{
                  width: `${selectionEntered ? finalW : startW}px`,
                  flexShrink: 0,
-                  transition: selectionEntered
-                    ? "width 0.45s cubic-bezier(0.2, 0, 0, 1)"
-                    : "none",
+                  transition:
+                    selectionEntered || selectionExiting
+                      ? "width 0.45s cubic-bezier(0.2, 0, 0, 1)"
+                      : "none",
                  maxHeight: capped ? preferredPanelHeight : undefined,
                  overflow: capped ? "hidden" : undefined,
                  position: capped ? "relative" : undefined,
@@ -388,7 +563,7 @@ export default function MultiModelResponseView({

  return (
    <div className="overflow-x-auto">
-      <div className="flex gap-6 items-start w-full">
+      <div className="flex gap-6 items-start justify-center w-full">
        {responses.map((r) => {
          const isHidden = hiddenPanels.has(r.modelIndex);
          return (
--- a/web/src/app/craft/components/BuildLLMPopover.tsx
+++ b/web/src/app/craft/components/BuildLLMPopover.tsx
@@ -18,7 +18,7 @@ import {
  isRecommendedModel,
 } from "@/app/craft/onboarding/constants";
 import { ToggleWarningModal } from "./ToggleWarningModal";
-import { getModelIcon } from "@/lib/llmConfig/providers";
+import { getModelIcon } from "@/lib/llmConfig";
 import { Section } from "@/layouts/general-layouts";
 import {
  Accordion,
--- a/web/src/app/craft/v1/configure/page.tsx
+++ b/web/src/app/craft/v1/configure/page.tsx
@@ -48,7 +48,7 @@ import NotAllowedModal from "@/app/craft/onboarding/components/NotAllowedModal";
 import { useOnboarding } from "@/app/craft/onboarding/BuildOnboardingProvider";
 import { useLLMProviders } from "@/hooks/useLLMProviders";
 import { useUser } from "@/providers/UserProvider";
-import { getModelIcon } from "@/lib/llmConfig/providers";
+import { getModelIcon } from "@/lib/llmConfig";
 import {
  getBuildUserPersona,
  getPersonaInfo,
--- a/web/src/app/css/sizes.css
+++ b/web/src/app/css/sizes.css
@@ -1,5 +1,5 @@
 :root {
-  --app-page-main-content-width: 52.5rem;
+  --app-page-main-content-width: 45rem;
  --block-width-form-input-min: 10rem;

  --container-sm: 42rem;
--- a/web/src/app/nrf/NRFPage.tsx
+++ b/web/src/app/nrf/NRFPage.tsx
@@ -45,6 +45,9 @@ import { personaIncludesRetrieval } from "@/app/app/services/lib";
 import { useQueryController } from "@/providers/QueryControllerProvider";
 import { eeGated } from "@/ce";
 import EESearchUI from "@/ee/sections/SearchUI";
+import useMultiModelChat from "@/hooks/useMultiModelChat";
+import ModelSelector from "@/refresh-components/popovers/ModelSelector";
+import { Section } from "@/layouts/general-layouts";

 const SearchUI = eeGated(EESearchUI);

@@ -105,6 +108,20 @@ export default function NRFPage({ isSidePanel = false }: NRFPageProps) {
  // If no LLM provider is configured (e.g., fresh signup), the input bar is
  // disabled and a "Set up an LLM" button is shown (see bottom of component).
  const llmManager = useLlmManager(undefined, liveAgent ?? undefined);
+  const multiModel = useMultiModelChat(llmManager);
+
+  // Sync single-model selection to llmManager so the submission path
+  // uses the correct provider/version (mirrors AppPage behaviour).
+  useEffect(() => {
+    if (multiModel.selectedModels.length === 1) {
+      const model = multiModel.selectedModels[0]!;
+      llmManager.updateCurrentLlm({
+        name: model.name,
+        provider: model.provider,
+        modelName: model.modelName,
+      });
+    }
+  }, [multiModel.selectedModels]);

  // Deep research toggle
  const { deepResearchEnabled, toggleDeepResearch } = useDeepResearchToggle({
@@ -295,12 +312,17 @@ export default function NRFPage({ isSidePanel = false }: NRFPageProps) {

      // If we already have messages (chat session started), always use chat mode
      // (matches AppPage behavior where existing sessions bypass classification)
+      const selectedModels = multiModel.isMultiModelActive
+        ? multiModel.selectedModels
+        : undefined;
+
      if (hasMessages) {
        onSubmit({
          message: submittedMessage,
          currentMessageFiles: currentMessageFiles,
          deepResearch: deepResearchEnabled,
          additionalContext,
+          selectedModels,
        });
        return;
      }
@@ -312,6 +334,7 @@ export default function NRFPage({ isSidePanel = false }: NRFPageProps) {
          currentMessageFiles: currentMessageFiles,
          deepResearch: deepResearchEnabled,
          additionalContext,
+          selectedModels,
        });
      };

@@ -328,6 +351,8 @@ export default function NRFPage({ isSidePanel = false }: NRFPageProps) {
      submitQuery,
      tabReadingEnabled,
      currentTabUrl,
+      multiModel.isMultiModelActive,
+      multiModel.selectedModels,
    ]
  );

@@ -456,6 +481,7 @@ export default function NRFPage({ isSidePanel = false }: NRFPageProps) {
                    onResubmit={handleResubmitLastMessage}
                    deepResearchEnabled={deepResearchEnabled}
                    anchorNodeId={anchorNodeId}
+                    selectedModels={multiModel.selectedModels}
                  />
                </ChatScrollContainer>
              </>
@@ -464,7 +490,23 @@ export default function NRFPage({ isSidePanel = false }: NRFPageProps) {
            {/* Welcome message - centered when no messages and not in search mode */}
            {!hasMessages && !isSearch && (
              <div className="relative w-full flex-1 flex flex-col items-center justify-end">
-                <WelcomeMessage isDefaultAgent />
+                <Section
+                  flexDirection="row"
+                  justifyContent="between"
+                  alignItems="end"
+                  className="max-w-[var(--app-page-main-content-width)]"
+                >
+                  <WelcomeMessage isDefaultAgent />
+                  {liveAgent && !llmManager.isLoadingProviders && (
+                    <ModelSelector
+                      llmManager={llmManager}
+                      selectedModels={multiModel.selectedModels}
+                      onAdd={multiModel.addModel}
+                      onRemove={multiModel.removeModel}
+                      onReplace={multiModel.replaceModel}
+                    />
+                  )}
+                </Section>
                <Spacer rem={1.5} />
              </div>
            )}
@@ -478,6 +520,17 @@ export default function NRFPage({ isSidePanel = false }: NRFPageProps) {
                  "max-w-[var(--app-page-main-content-width)] px-4"
              )}
            >
+              {hasMessages && liveAgent && !llmManager.isLoadingProviders && (
+                <div className="pb-1">
+                  <ModelSelector
+                    llmManager={llmManager}
+                    selectedModels={multiModel.selectedModels}
+                    onAdd={multiModel.addModel}
+                    onRemove={multiModel.removeModel}
+                    onReplace={multiModel.replaceModel}
+                  />
+                </div>
+              )}
              <AppInputBar
                ref={chatInputBarRef}
                deepResearchEnabled={deepResearchEnabled}
--- a/web/src/components/llm/LLMSelector.tsx
+++ b/web/src/components/llm/LLMSelector.tsx
@@ -3,7 +3,7 @@
 import { useMemo } from "react";
 import { parseLlmDescriptor, structureValue } from "@/lib/llmConfig/utils";
 import { DefaultModel, LLMProviderDescriptor } from "@/interfaces/llm";
-import { getModelIcon } from "@/lib/llmConfig/providers";
+import { getModelIcon } from "@/lib/llmConfig";
 import InputSelect from "@/refresh-components/inputs/InputSelect";
 import { createIcon } from "@/components/icons/icons";

--- a/web/src/lib/llmConfig/index.ts
+++ b/web/src/lib/llmConfig/index.ts
@@ -0,0 +1,251 @@
+import type { IconFunctionComponent } from "@opal/types";
+import { SvgCpu, SvgPlug, SvgServer } from "@opal/icons";
+import {
+  SvgBifrost,
+  SvgOpenai,
+  SvgClaude,
+  SvgOllama,
+  SvgAws,
+  SvgOpenrouter,
+  SvgAzure,
+  SvgGemini,
+  SvgLitellm,
+  SvgLmStudio,
+  SvgMicrosoft,
+  SvgMistral,
+  SvgDeepseek,
+  SvgQwen,
+  SvgGoogle,
+} from "@opal/logos";
+import { ZAIIcon } from "@/components/icons/icons";
+import { LLMProviderFormProps, LLMProviderName } from "@/interfaces/llm";
+import type { LLMProviderView } from "@/interfaces/llm";
+import OpenAIModal from "@/sections/modals/llmConfig/OpenAIModal";
+import AnthropicModal from "@/sections/modals/llmConfig/AnthropicModal";
+import OllamaModal from "@/sections/modals/llmConfig/OllamaModal";
+import AzureModal from "@/sections/modals/llmConfig/AzureModal";
+import BedrockModal from "@/sections/modals/llmConfig/BedrockModal";
+import VertexAIModal from "@/sections/modals/llmConfig/VertexAIModal";
+import OpenRouterModal from "@/sections/modals/llmConfig/OpenRouterModal";
+import CustomModal from "@/sections/modals/llmConfig/CustomModal";
+import LMStudioModal from "@/sections/modals/llmConfig/LMStudioModal";
+import LiteLLMProxyModal from "@/sections/modals/llmConfig/LiteLLMProxyModal";
+import BifrostModal from "@/sections/modals/llmConfig/BifrostModal";
+import OpenAICompatibleModal from "@/sections/modals/llmConfig/OpenAICompatibleModal";
+
+// ─── Text (LLM) providers ────────────────────────────────────────────────────
+
+export interface ProviderEntry {
+  icon: IconFunctionComponent;
+  productName: string;
+  companyName: string;
+  Modal: React.ComponentType<LLMProviderFormProps>;
+}
+
+const PROVIDERS: Record<string, ProviderEntry> = {
+  [LLMProviderName.OPENAI]: {
+    icon: SvgOpenai,
+    productName: "GPT",
+    companyName: "OpenAI",
+    Modal: OpenAIModal,
+  },
+  [LLMProviderName.ANTHROPIC]: {
+    icon: SvgClaude,
+    productName: "Claude",
+    companyName: "Anthropic",
+    Modal: AnthropicModal,
+  },
+  [LLMProviderName.VERTEX_AI]: {
+    icon: SvgGemini,
+    productName: "Gemini",
+    companyName: "Google Cloud Vertex AI",
+    Modal: VertexAIModal,
+  },
+  [LLMProviderName.BEDROCK]: {
+    icon: SvgAws,
+    productName: "Amazon Bedrock",
+    companyName: "AWS",
+    Modal: BedrockModal,
+  },
+  [LLMProviderName.AZURE]: {
+    icon: SvgAzure,
+    productName: "Azure OpenAI",
+    companyName: "Microsoft Azure",
+    Modal: AzureModal,
+  },
+  [LLMProviderName.LITELLM]: {
+    icon: SvgLitellm,
+    productName: "LiteLLM",
+    companyName: "LiteLLM",
+    Modal: CustomModal,
+  },
+  [LLMProviderName.LITELLM_PROXY]: {
+    icon: SvgLitellm,
+    productName: "LiteLLM Proxy",
+    companyName: "LiteLLM Proxy",
+    Modal: LiteLLMProxyModal,
+  },
+  [LLMProviderName.OLLAMA_CHAT]: {
+    icon: SvgOllama,
+    productName: "Ollama",
+    companyName: "Ollama",
+    Modal: OllamaModal,
+  },
+  [LLMProviderName.OPENROUTER]: {
+    icon: SvgOpenrouter,
+    productName: "OpenRouter",
+    companyName: "OpenRouter",
+    Modal: OpenRouterModal,
+  },
+  [LLMProviderName.LM_STUDIO]: {
+    icon: SvgLmStudio,
+    productName: "LM Studio",
+    companyName: "LM Studio",
+    Modal: LMStudioModal,
+  },
+  [LLMProviderName.BIFROST]: {
+    icon: SvgBifrost,
+    productName: "Bifrost",
+    companyName: "Bifrost",
+    Modal: BifrostModal,
+  },
+  [LLMProviderName.OPENAI_COMPATIBLE]: {
+    icon: SvgPlug,
+    productName: "OpenAI-Compatible",
+    companyName: "OpenAI-Compatible",
+    Modal: OpenAICompatibleModal,
+  },
+  [LLMProviderName.CUSTOM]: {
+    icon: SvgServer,
+    productName: "Custom Models",
+    companyName: "models from other LiteLLM-compatible providers",
+    Modal: CustomModal,
+  },
+};
+
+const DEFAULT_ENTRY: ProviderEntry = {
+  icon: SvgCpu,
+  productName: "",
+  companyName: "",
+  Modal: CustomModal,
+};
+
+// Providers that don't use custom_config themselves — if custom_config is
+// present it means the provider was originally created via CustomModal.
+const CUSTOM_CONFIG_OVERRIDES = new Set<string>([
+  LLMProviderName.OPENAI,
+  LLMProviderName.ANTHROPIC,
+  LLMProviderName.AZURE,
+  LLMProviderName.OPENROUTER,
+]);
+
+export function getProvider(
+  providerName: string,
+  existingProvider?: LLMProviderView
+): ProviderEntry {
+  const entry = PROVIDERS[providerName] ?? {
+    ...DEFAULT_ENTRY,
+    productName: providerName,
+    companyName: providerName,
+  };
+
+  if (
+    existingProvider?.custom_config != null &&
+    CUSTOM_CONFIG_OVERRIDES.has(providerName)
+  ) {
+    return { ...entry, Modal: CustomModal };
+  }
+
+  return entry;
+}
+
+// ─── Aggregator providers ────────────────────────────────────────────────────
+// Providers that host models from multiple vendors (e.g. Bedrock hosts Claude,
+// Llama, etc.) Used by the model-icon resolver to prioritise vendor icons.
+
+export const AGGREGATOR_PROVIDERS = new Set([
+  LLMProviderName.BEDROCK,
+  "bedrock_converse",
+  LLMProviderName.OPENROUTER,
+  LLMProviderName.OLLAMA_CHAT,
+  LLMProviderName.LM_STUDIO,
+  LLMProviderName.LITELLM_PROXY,
+  LLMProviderName.BIFROST,
+  LLMProviderName.OPENAI_COMPATIBLE,
+  LLMProviderName.VERTEX_AI,
+]);
+
+// ─── Model-aware icon resolver ───────────────────────────────────────────────
+
+const MODEL_ICON_MAP: Record<string, IconFunctionComponent> = {
+  [LLMProviderName.OPENAI]: SvgOpenai,
+  [LLMProviderName.ANTHROPIC]: SvgClaude,
+  [LLMProviderName.OLLAMA_CHAT]: SvgOllama,
+  [LLMProviderName.LM_STUDIO]: SvgLmStudio,
+  [LLMProviderName.OPENROUTER]: SvgOpenrouter,
+  [LLMProviderName.VERTEX_AI]: SvgGemini,
+  [LLMProviderName.BEDROCK]: SvgAws,
+  [LLMProviderName.LITELLM_PROXY]: SvgLitellm,
+  [LLMProviderName.BIFROST]: SvgBifrost,
+  [LLMProviderName.OPENAI_COMPATIBLE]: SvgPlug,
+
+  amazon: SvgAws,
+  phi: SvgMicrosoft,
+  mistral: SvgMistral,
+  ministral: SvgMistral,
+  llama: SvgCpu,
+  ollama: SvgOllama,
+  gemini: SvgGemini,
+  deepseek: SvgDeepseek,
+  claude: SvgClaude,
+  azure: SvgAzure,
+  microsoft: SvgMicrosoft,
+  meta: SvgCpu,
+  google: SvgGoogle,
+  qwen: SvgQwen,
+  qwq: SvgQwen,
+  zai: ZAIIcon,
+  bedrock_converse: SvgAws,
+};
+
+/**
+ * Model-aware icon resolver that checks both provider name and model name
+ * to pick the most specific icon (e.g. Claude icon for a Bedrock Claude model).
+ */
+export function getModelIcon(
+  providerName: string,
+  modelName?: string
+): IconFunctionComponent {
+  const lowerProviderName = providerName.toLowerCase();
+
+  // For aggregator providers, prioritise showing the vendor icon based on model name
+  if (AGGREGATOR_PROVIDERS.has(lowerProviderName) && modelName) {
+    const lowerModelName = modelName.toLowerCase();
+    for (const [key, icon] of Object.entries(MODEL_ICON_MAP)) {
+      if (lowerModelName.includes(key)) {
+        return icon;
+      }
+    }
+  }
+
+  // Check if provider name directly matches an icon
+  if (lowerProviderName in MODEL_ICON_MAP) {
+    const icon = MODEL_ICON_MAP[lowerProviderName];
+    if (icon) {
+      return icon;
+    }
+  }
+
+  // For non-aggregator providers, check if model name contains any of the keys
+  if (modelName) {
+    const lowerModelName = modelName.toLowerCase();
+    for (const [key, icon] of Object.entries(MODEL_ICON_MAP)) {
+      if (lowerModelName.includes(key)) {
+        return icon;
+      }
+    }
+  }
+
+  // Fallback to CPU icon if no matches
+  return SvgCpu;
+}
--- a/web/src/lib/llmConfig/providers.ts
+++ b/web/src/lib/llmConfig/providers.ts
@@ -1,176 +0,0 @@
-import type { IconFunctionComponent } from "@opal/types";
-import { SvgCpu, SvgPlug, SvgServer } from "@opal/icons";
-import {
-  SvgBifrost,
-  SvgOpenai,
-  SvgClaude,
-  SvgOllama,
-  SvgAws,
-  SvgOpenrouter,
-  SvgAzure,
-  SvgGemini,
-  SvgLitellm,
-  SvgLmStudio,
-  SvgMicrosoft,
-  SvgMistral,
-  SvgDeepseek,
-  SvgQwen,
-  SvgGoogle,
-} from "@opal/logos";
-import { ZAIIcon } from "@/components/icons/icons";
-import { LLMProviderName } from "@/interfaces/llm";
-
-export const AGGREGATOR_PROVIDERS = new Set([
-  LLMProviderName.BEDROCK,
-  "bedrock_converse",
-  LLMProviderName.OPENROUTER,
-  LLMProviderName.OLLAMA_CHAT,
-  LLMProviderName.LM_STUDIO,
-  LLMProviderName.LITELLM_PROXY,
-  LLMProviderName.BIFROST,
-  LLMProviderName.OPENAI_COMPATIBLE,
-  LLMProviderName.VERTEX_AI,
-]);
-
-const PROVIDER_ICONS: Record<string, IconFunctionComponent> = {
-  [LLMProviderName.OPENAI]: SvgOpenai,
-  [LLMProviderName.ANTHROPIC]: SvgClaude,
-  [LLMProviderName.VERTEX_AI]: SvgGemini,
-  [LLMProviderName.BEDROCK]: SvgAws,
-  [LLMProviderName.AZURE]: SvgAzure,
-  [LLMProviderName.LITELLM]: SvgLitellm,
-  [LLMProviderName.LITELLM_PROXY]: SvgLitellm,
-  [LLMProviderName.OLLAMA_CHAT]: SvgOllama,
-  [LLMProviderName.OPENROUTER]: SvgOpenrouter,
-  [LLMProviderName.LM_STUDIO]: SvgLmStudio,
-  [LLMProviderName.BIFROST]: SvgBifrost,
-  [LLMProviderName.OPENAI_COMPATIBLE]: SvgPlug,
-
-  // fallback
-  [LLMProviderName.CUSTOM]: SvgServer,
-};
-
-const PROVIDER_PRODUCT_NAMES: Record<string, string> = {
-  [LLMProviderName.OPENAI]: "GPT",
-  [LLMProviderName.ANTHROPIC]: "Claude",
-  [LLMProviderName.VERTEX_AI]: "Gemini",
-  [LLMProviderName.BEDROCK]: "Amazon Bedrock",
-  [LLMProviderName.AZURE]: "Azure OpenAI",
-  [LLMProviderName.LITELLM]: "LiteLLM",
-  [LLMProviderName.LITELLM_PROXY]: "LiteLLM Proxy",
-  [LLMProviderName.OLLAMA_CHAT]: "Ollama",
-  [LLMProviderName.OPENROUTER]: "OpenRouter",
-  [LLMProviderName.LM_STUDIO]: "LM Studio",
-  [LLMProviderName.BIFROST]: "Bifrost",
-  [LLMProviderName.OPENAI_COMPATIBLE]: "OpenAI-Compatible",
-
-  // fallback
-  [LLMProviderName.CUSTOM]: "Custom Models",
-};
-
-const PROVIDER_DISPLAY_NAMES: Record<string, string> = {
-  [LLMProviderName.OPENAI]: "OpenAI",
-  [LLMProviderName.ANTHROPIC]: "Anthropic",
-  [LLMProviderName.VERTEX_AI]: "Google Cloud Vertex AI",
-  [LLMProviderName.BEDROCK]: "AWS",
-  [LLMProviderName.AZURE]: "Microsoft Azure",
-  [LLMProviderName.LITELLM]: "LiteLLM",
-  [LLMProviderName.LITELLM_PROXY]: "LiteLLM Proxy",
-  [LLMProviderName.OLLAMA_CHAT]: "Ollama",
-  [LLMProviderName.OPENROUTER]: "OpenRouter",
-  [LLMProviderName.LM_STUDIO]: "LM Studio",
-  [LLMProviderName.BIFROST]: "Bifrost",
-  [LLMProviderName.OPENAI_COMPATIBLE]: "OpenAI-Compatible",
-
-  // fallback
-  [LLMProviderName.CUSTOM]: "models from other LiteLLM-compatible providers",
-};
-
-export function getProviderProductName(providerName: string): string {
-  return PROVIDER_PRODUCT_NAMES[providerName] ?? providerName;
-}
-
-export function getProviderDisplayName(providerName: string): string {
-  return PROVIDER_DISPLAY_NAMES[providerName] ?? providerName;
-}
-
-export function getProviderIcon(providerName: string): IconFunctionComponent {
-  return PROVIDER_ICONS[providerName] ?? SvgCpu;
-}
-
-// ---------------------------------------------------------------------------
-// Model-aware icon resolver (legacy icon set)
-// ---------------------------------------------------------------------------
-
-const MODEL_ICON_MAP: Record<string, IconFunctionComponent> = {
-  [LLMProviderName.OPENAI]: SvgOpenai,
-  [LLMProviderName.ANTHROPIC]: SvgClaude,
-  [LLMProviderName.OLLAMA_CHAT]: SvgOllama,
-  [LLMProviderName.LM_STUDIO]: SvgLmStudio,
-  [LLMProviderName.OPENROUTER]: SvgOpenrouter,
-  [LLMProviderName.VERTEX_AI]: SvgGemini,
-  [LLMProviderName.BEDROCK]: SvgAws,
-  [LLMProviderName.LITELLM_PROXY]: SvgLitellm,
-  [LLMProviderName.BIFROST]: SvgBifrost,
-  [LLMProviderName.OPENAI_COMPATIBLE]: SvgPlug,
-
-  amazon: SvgAws,
-  phi: SvgMicrosoft,
-  mistral: SvgMistral,
-  ministral: SvgMistral,
-  llama: SvgCpu,
-  ollama: SvgOllama,
-  gemini: SvgGemini,
-  deepseek: SvgDeepseek,
-  claude: SvgClaude,
-  azure: SvgAzure,
-  microsoft: SvgMicrosoft,
-  meta: SvgCpu,
-  google: SvgGoogle,
-  qwen: SvgQwen,
-  qwq: SvgQwen,
-  zai: ZAIIcon,
-  bedrock_converse: SvgAws,
-};
-
-/**
- * Model-aware icon resolver that checks both provider name and model name
- * to pick the most specific icon (e.g. Claude icon for a Bedrock Claude model).
- */
-export const getModelIcon = (
-  providerName: string,
-  modelName?: string
-): IconFunctionComponent => {
-  const lowerProviderName = providerName.toLowerCase();
-
-  // For aggregator providers, prioritise showing the vendor icon based on model name
-  if (AGGREGATOR_PROVIDERS.has(lowerProviderName) && modelName) {
-    const lowerModelName = modelName.toLowerCase();
-    for (const [key, icon] of Object.entries(MODEL_ICON_MAP)) {
-      if (lowerModelName.includes(key)) {
-        return icon;
-      }
-    }
-  }
-
-  // Check if provider name directly matches an icon
-  if (lowerProviderName in MODEL_ICON_MAP) {
-    const icon = MODEL_ICON_MAP[lowerProviderName];
-    if (icon) {
-      return icon;
-    }
-  }
-
-  // For non-aggregator providers, check if model name contains any of the keys
-  if (modelName) {
-    const lowerModelName = modelName.toLowerCase();
-    for (const [key, icon] of Object.entries(MODEL_ICON_MAP)) {
-      if (lowerModelName.includes(key)) {
-        return icon;
-      }
-    }
-  }
-
-  // Fallback to CPU icon if no matches
-  return SvgCpu;
-};
--- a/web/src/lib/llmConfig/utils.ts
+++ b/web/src/lib/llmConfig/utils.ts
@@ -44,7 +44,7 @@ export function getFinalLLM(
  return [provider, model];
 }

-export function getLLMProviderOverrideForPersona(
+export function getProviderOverrideForPersona(
  liveAgent: MinimalPersonaSnapshot,
  llmProviders: LLMProviderDescriptor[]
 ): LlmDescriptor | null {
@@ -144,7 +144,7 @@ export function getDisplayName(
  agent: MinimalPersonaSnapshot,
  llmProviders: LLMProviderDescriptor[]
 ): string | undefined {
-  const llmDescriptor = getLLMProviderOverrideForPersona(
+  const llmDescriptor = getProviderOverrideForPersona(
    agent,
    llmProviders ?? []
  );
--- a/web/src/refresh-components/popovers/LLMPopover.tsx
+++ b/web/src/refresh-components/popovers/LLMPopover.tsx
@@ -4,7 +4,7 @@ import { useState, useEffect, useCallback, useMemo, useRef } from "react";
 import Popover from "@/refresh-components/Popover";
 import { LlmDescriptor, LlmManager } from "@/lib/hooks";
 import { structureValue } from "@/lib/llmConfig/utils";
-import { getModelIcon } from "@/lib/llmConfig/providers";
+import { getModelIcon } from "@/lib/llmConfig";
 import { AGGREGATOR_PROVIDERS } from "@/lib/llmConfig/svc";

 import { Slider } from "@/components/ui/slider";
--- a/web/src/refresh-components/popovers/ModelSelector.tsx
+++ b/web/src/refresh-components/popovers/ModelSelector.tsx
@@ -3,7 +3,7 @@
 import { useState, useMemo, useRef } from "react";
 import Popover from "@/refresh-components/Popover";
 import { LlmManager } from "@/lib/hooks";
-import { getModelIcon } from "@/lib/llmConfig/providers";
+import { getModelIcon } from "@/lib/llmConfig";
 import { Button, SelectButton, OpenButton } from "@opal/components";
 import { SvgPlusCircle, SvgX } from "@opal/icons";
 import { LLMOption } from "@/refresh-components/popovers/interfaces";
@@ -104,6 +104,7 @@ export default function ModelSelector({
      onRemove(existingIndex);
    } else if (!atMax) {
      onAdd(model);
+      setOpen(false);
    }
  };

@@ -214,15 +215,17 @@ export default function ModelSelector({
        )}
      </div>

-      <Popover.Content side="top" align="end" width="lg">
-        <ModelListContent
-          llmProviders={llmManager.llmProviders}
-          isLoading={llmManager.isLoadingProviders}
-          onSelect={handleSelect}
-          isSelected={isSelected}
-          isDisabled={isDisabled}
-        />
-      </Popover.Content>
+      {!(atMax && replacingIndex === null) && (
+        <Popover.Content side="top" align="end" width="lg">
+          <ModelListContent
+            llmProviders={llmManager.llmProviders}
+            isLoading={llmManager.isLoadingProviders}
+            onSelect={handleSelect}
+            isSelected={isSelected}
+            isDisabled={isDisabled}
+          />
+        </Popover.Content>
+      )}
    </Popover>
  );
 }
--- a/web/src/refresh-pages/AppPage.tsx
+++ b/web/src/refresh-pages/AppPage.tsx
@@ -400,19 +400,22 @@ export default function AppPage({ firstMessage }: ChatPageProps) {

  const multiModel = useMultiModelChat(llmManager);

-  // Auto-fold sidebar when multi-model is active (panels need full width)
+  // Auto-fold sidebar when a multi-model message is submitted.
+  // Stays collapsed until the user exits multi-model mode (removes models).
  const { folded: sidebarFolded, setFolded: setSidebarFolded } =
    useSidebarState();
  const preMultiModelFoldedRef = useRef<boolean | null>(null);

-  useEffect(() => {
-    if (
-      multiModel.isMultiModelActive &&
-      preMultiModelFoldedRef.current === null
-    ) {
+  const foldSidebarForMultiModel = useCallback(() => {
+    if (preMultiModelFoldedRef.current === null) {
      preMultiModelFoldedRef.current = sidebarFolded;
      setSidebarFolded(true);
-    } else if (
+    }
+  }, [sidebarFolded, setSidebarFolded]);
+
+  // Restore sidebar when user exits multi-model mode
+  useEffect(() => {
+    if (
      !multiModel.isMultiModelActive &&
      preMultiModelFoldedRef.current !== null
    ) {
@@ -532,6 +535,9 @@ export default function AppPage({ firstMessage }: ChatPageProps) {

  const onChat = useCallback(
    (message: string) => {
+      if (multiModel.isMultiModelActive) {
+        foldSidebarForMultiModel();
+      }
      resetInputBar();
      onSubmit({
        message,
@@ -552,6 +558,7 @@ export default function AppPage({ firstMessage }: ChatPageProps) {
      deepResearchEnabledForCurrentWorkflow,
      multiModel.isMultiModelActive,
      multiModel.selectedModels,
+      foldSidebarForMultiModel,
      showOnboarding,
      onboardingDismissed,
      finishOnboarding,
@@ -864,13 +871,15 @@ export default function AppPage({ firstMessage }: ChatPageProps) {
                        agent={liveAgent}
                        isDefaultAgent={isDefaultAgent}
                      />
-                      <ModelSelector
-                        llmManager={llmManager}
-                        selectedModels={multiModel.selectedModels}
-                        onAdd={multiModel.addModel}
-                        onRemove={multiModel.removeModel}
-                        onReplace={multiModel.replaceModel}
-                      />
+                      {liveAgent && !llmManager.isLoadingProviders && (
+                        <ModelSelector
+                          llmManager={llmManager}
+                          selectedModels={multiModel.selectedModels}
+                          onAdd={multiModel.addModel}
+                          onRemove={multiModel.removeModel}
+                          onReplace={multiModel.replaceModel}
+                        />
+                      )}
                    </Section>
                    <Spacer rem={1.5} />
                  </Fade>
@@ -936,17 +945,19 @@ export default function AppPage({ firstMessage }: ChatPageProps) {
                          isSearch ? "h-[14px]" : "h-0"
                        )}
                      />
-                      {appFocus.isChat() && (
-                        <div className="pb-1">
-                          <ModelSelector
-                            llmManager={llmManager}
-                            selectedModels={multiModel.selectedModels}
-                            onAdd={multiModel.addModel}
-                            onRemove={multiModel.removeModel}
-                            onReplace={multiModel.replaceModel}
-                          />
-                        </div>
-                      )}
+                      {appFocus.isChat() &&
+                        liveAgent &&
+                        !llmManager.isLoadingProviders && (
+                          <div className="pb-1">
+                            <ModelSelector
+                              llmManager={llmManager}
+                              selectedModels={multiModel.selectedModels}
+                              onAdd={multiModel.addModel}
+                              onRemove={multiModel.removeModel}
+                              onReplace={multiModel.replaceModel}
+                            />
+                          </div>
+                        )}
                      <AppInputBar
                        ref={chatInputBarRef}
                        deepResearchEnabled={
--- a/web/src/refresh-pages/admin/LLMProviderConfigurationPage.tsx
+++ b/web/src/refresh-pages/admin/LLMProviderConfigurationPage.tsx
@@ -15,11 +15,7 @@ import { SvgArrowExchange, SvgSettings, SvgTrash } from "@opal/icons";
 import * as SettingsLayouts from "@/layouts/settings-layouts";
 import { ADMIN_ROUTES } from "@/lib/admin-routes";
 import * as GeneralLayouts from "@/layouts/general-layouts";
-import {
-  getProviderDisplayName,
-  getProviderIcon,
-  getProviderProductName,
-} from "@/lib/llmConfig/providers";
+import { getProvider } from "@/lib/llmConfig";
 import { refreshLlmProviderCaches } from "@/lib/llmConfig/cache";
 import { deleteLlmProvider, setDefaultLlmModel } from "@/lib/llmConfig/svc";
 import { Horizontal as HorizontalInput } from "@/layouts/input-layouts";
@@ -33,19 +29,6 @@ import {
  LLMProviderView,
  WellKnownLLMProviderDescriptor,
 } from "@/interfaces/llm";
-import { getModalForExistingProvider } from "@/sections/modals/llmConfig/getModal";
-import OpenAIModal from "@/sections/modals/llmConfig/OpenAIModal";
-import AnthropicModal from "@/sections/modals/llmConfig/AnthropicModal";
-import OllamaModal from "@/sections/modals/llmConfig/OllamaModal";
-import AzureModal from "@/sections/modals/llmConfig/AzureModal";
-import BedrockModal from "@/sections/modals/llmConfig/BedrockModal";
-import VertexAIModal from "@/sections/modals/llmConfig/VertexAIModal";
-import OpenRouterModal from "@/sections/modals/llmConfig/OpenRouterModal";
-import CustomModal from "@/sections/modals/llmConfig/CustomModal";
-import LMStudioModal from "@/sections/modals/llmConfig/LMStudioModal";
-import LiteLLMProxyModal from "@/sections/modals/llmConfig/LiteLLMProxyModal";
-import BifrostModal from "@/sections/modals/llmConfig/BifrostModal";
-import OpenAICompatibleModal from "@/sections/modals/llmConfig/OpenAICompatibleModal";
 import { Section } from "@/layouts/general-layouts";
 import { markdown } from "@opal/utils";

@@ -72,51 +55,6 @@ const PROVIDER_DISPLAY_ORDER: string[] = [
  LLMProviderName.OPENAI_COMPATIBLE,
 ];

-const PROVIDER_MODAL_MAP: Record<
-  string,
-  (
-    shouldMarkAsDefault: boolean,
-    onOpenChange: (open: boolean) => void
-  ) => React.ReactNode
-> = {
-  openai: (d, onOpenChange) => (
-    <OpenAIModal shouldMarkAsDefault={d} onOpenChange={onOpenChange} />
-  ),
-  anthropic: (d, onOpenChange) => (
-    <AnthropicModal shouldMarkAsDefault={d} onOpenChange={onOpenChange} />
-  ),
-  ollama_chat: (d, onOpenChange) => (
-    <OllamaModal shouldMarkAsDefault={d} onOpenChange={onOpenChange} />
-  ),
-  azure: (d, onOpenChange) => (
-    <AzureModal shouldMarkAsDefault={d} onOpenChange={onOpenChange} />
-  ),
-  bedrock: (d, onOpenChange) => (
-    <BedrockModal shouldMarkAsDefault={d} onOpenChange={onOpenChange} />
-  ),
-  vertex_ai: (d, onOpenChange) => (
-    <VertexAIModal shouldMarkAsDefault={d} onOpenChange={onOpenChange} />
-  ),
-  openrouter: (d, onOpenChange) => (
-    <OpenRouterModal shouldMarkAsDefault={d} onOpenChange={onOpenChange} />
-  ),
-  lm_studio: (d, onOpenChange) => (
-    <LMStudioModal shouldMarkAsDefault={d} onOpenChange={onOpenChange} />
-  ),
-  litellm_proxy: (d, onOpenChange) => (
-    <LiteLLMProxyModal shouldMarkAsDefault={d} onOpenChange={onOpenChange} />
-  ),
-  bifrost: (d, onOpenChange) => (
-    <BifrostModal shouldMarkAsDefault={d} onOpenChange={onOpenChange} />
-  ),
-  openai_compatible: (d, onOpenChange) => (
-    <OpenAICompatibleModal
-      shouldMarkAsDefault={d}
-      onOpenChange={onOpenChange}
-    />
-  ),
-};
-
 // ============================================================================
 // ExistingProviderCard — card for configured (existing) providers
 // ============================================================================
@@ -125,14 +63,12 @@ interface ExistingProviderCardProps {
  provider: LLMProviderView;
  isDefault: boolean;
  isLastProvider: boolean;
-  defaultModelName?: string;
 }

 function ExistingProviderCard({
  provider,
  isDefault,
  isLastProvider,
-  defaultModelName,
 }: ExistingProviderCardProps) {
  const { mutate } = useSWRConfig();
  const [isOpen, setIsOpen] = useState(false);
@@ -150,8 +86,14 @@ function ExistingProviderCard({
    }
  };

+  const { icon, companyName, Modal } = getProvider(provider.provider, provider);
+
  return (
    <>
+      {isOpen && (
+        <Modal existingLlmProvider={provider} onOpenChange={setIsOpen} />
+      )}
+
      {deleteModal.isOpen && (
        <ConfirmationModalLayout
          icon={SvgTrash}
@@ -202,9 +144,9 @@ function ExistingProviderCard({
          onClick={() => setIsOpen(true)}
        >
          <CardLayout.Header
-            icon={getProviderIcon(provider.provider)}
+            icon={icon}
            title={provider.name}
-            description={getProviderDisplayName(provider.provider)}
+            description={companyName}
            sizePreset="main-ui"
            variant="section"
            tag={isDefault ? { title: "Default", color: "blue" } : undefined}
@@ -236,8 +178,6 @@ function ExistingProviderCard({
              </div>
            }
          />
-          {isOpen &&
-            getModalForExistingProvider(provider, setIsOpen, defaultModelName)}
        </SelectCard>
      </Hoverable.Root>
    </>
@@ -251,18 +191,11 @@ function ExistingProviderCard({
 interface NewProviderCardProps {
  provider: WellKnownLLMProviderDescriptor;
  isFirstProvider: boolean;
-  formFn: (
-    shouldMarkAsDefault: boolean,
-    onOpenChange: (open: boolean) => void
-  ) => React.ReactNode;
 }

-function NewProviderCard({
-  provider,
-  isFirstProvider,
-  formFn,
-}: NewProviderCardProps) {
+function NewProviderCard({ provider, isFirstProvider }: NewProviderCardProps) {
  const [isOpen, setIsOpen] = useState(false);
+  const { icon, productName, companyName, Modal } = getProvider(provider.name);

  return (
    <SelectCard
@@ -272,9 +205,9 @@ function NewProviderCard({
      onClick={() => setIsOpen(true)}
    >
      <CardLayout.Header
-        icon={getProviderIcon(provider.name)}
-        title={getProviderProductName(provider.name)}
-        description={getProviderDisplayName(provider.name)}
+        icon={icon}
+        title={productName}
+        description={companyName}
        sizePreset="main-ui"
        variant="section"
        rightChildren={
@@ -290,7 +223,9 @@ function NewProviderCard({
          </Button>
        }
      />
-      {isOpen && formFn(isFirstProvider, setIsOpen)}
+      {isOpen && (
+        <Modal shouldMarkAsDefault={isFirstProvider} onOpenChange={setIsOpen} />
+      )}
    </SelectCard>
  );
 }
@@ -307,6 +242,7 @@ function NewCustomProviderCard({
  isFirstProvider,
 }: NewCustomProviderCardProps) {
  const [isOpen, setIsOpen] = useState(false);
+  const { icon, productName, companyName, Modal } = getProvider("custom");

  return (
    <SelectCard
@@ -316,9 +252,9 @@ function NewCustomProviderCard({
      onClick={() => setIsOpen(true)}
    >
      <CardLayout.Header
-        icon={getProviderIcon("custom")}
-        title={getProviderProductName("custom")}
-        description={getProviderDisplayName("custom")}
+        icon={icon}
+        title={productName}
+        description={companyName}
        sizePreset="main-ui"
        variant="section"
        rightChildren={
@@ -335,10 +271,7 @@ function NewCustomProviderCard({
        }
      />
      {isOpen && (
-        <CustomModal
-          shouldMarkAsDefault={isFirstProvider}
-          onOpenChange={setIsOpen}
-        />
+        <Modal shouldMarkAsDefault={isFirstProvider} onOpenChange={setIsOpen} />
      )}
    </SelectCard>
  );
@@ -348,7 +281,7 @@ function NewCustomProviderCard({
 // LLMConfigurationPage — main page component
 // ============================================================================

-export default function LLMProviderConfigurationPage() {
+export default function LLMConfigurationPage() {
  const { mutate } = useSWRConfig();
  const { llmProviders: existingLlmProviders, defaultText } =
    useAdminLLMProviders();
@@ -469,11 +402,6 @@ export default function LLMProviderConfigurationPage() {
                    provider={provider}
                    isDefault={defaultText?.provider_id === provider.id}
                    isLastProvider={sortedProviders.length === 1}
-                    defaultModelName={
-                      defaultText?.provider_id === provider.id
-                        ? defaultText.model_name
-                        : undefined
-                    }
                  />
                ))}
              </div>
@@ -507,23 +435,13 @@ export default function LLMProviderConfigurationPage() {
                  (bIndex === -1 ? Infinity : bIndex)
                );
              })
-              .map((provider) => {
-                const formFn = PROVIDER_MODAL_MAP[provider.name];
-                if (!formFn) {
-                  toast.error(
-                    `No modal mapping for provider "${provider.name}".`
-                  );
-                  return null;
-                }
-                return (
-                  <NewProviderCard
-                    key={provider.name}
-                    provider={provider}
-                    isFirstProvider={isFirstProvider}
-                    formFn={formFn}
-                  />
-                );
-              })}
+              .map((provider) => (
+                <NewProviderCard
+                  key={provider.name}
+                  provider={provider}
+                  isFirstProvider={isFirstProvider}
+                />
+              ))}
            <NewCustomProviderCard isFirstProvider={isFirstProvider} />
          </div>
        </GeneralLayouts.Section>
--- a/web/src/sections/chat/ChatScrollContainer.tsx
+++ b/web/src/sections/chat/ChatScrollContainer.tsx
@@ -352,6 +352,7 @@ const ChatScrollContainer = React.memo(
            key={sessionId}
            ref={scrollContainerRef}
            data-testid="chat-scroll-container"
+            data-chat-scroll
            className={cn(
              "flex flex-col flex-1 min-h-0 overflow-y-auto overflow-x-hidden",
              hideScrollbar ? "no-scrollbar" : "default-scrollbar"
--- a/web/src/sections/modals/llmConfig/BifrostModal.tsx
+++ b/web/src/sections/modals/llmConfig/BifrostModal.tsx
@@ -50,7 +50,7 @@ function BifrostModalInternals({
    const { models, error } = await fetchBifrostModels({
      api_base: formikProps.values.api_base,
      api_key: formikProps.values.api_key || undefined,
-      provider_name: LLMProviderName.BIFROST,
+      provider_name: existingLlmProvider?.name,
    });
    if (error) {
      throw new Error(error);
--- a/web/src/sections/modals/llmConfig/CustomModal.tsx
+++ b/web/src/sections/modals/llmConfig/CustomModal.tsx
@@ -1,6 +1,6 @@
 "use client";

-import { useEffect, useMemo, useRef, useState } from "react";
+import { useMemo } from "react";
 import { useSWRConfig } from "swr";
 import { useFormikContext } from "formik";
 import {
@@ -29,9 +29,8 @@ import InputComboBox from "@/refresh-components/inputs/InputComboBox";
 import InputTypeIn from "@/refresh-components/inputs/InputTypeIn";
 import InputSelect from "@/refresh-components/inputs/InputSelect";
 import Text from "@/refresh-components/texts/Text";
-import SimpleLoader from "@/refresh-components/loaders/SimpleLoader";
 import { Button, Card, EmptyMessageCard } from "@opal/components";
-import { SvgMinusCircle, SvgPlusCircle, SvgRefreshCw } from "@opal/icons";
+import { SvgMinusCircle, SvgPlusCircle } from "@opal/icons";
 import { markdown } from "@opal/utils";
 import { toast } from "@/hooks/useToast";
 import { refreshLlmProviderCaches } from "@/lib/llmConfig/cache";
@@ -111,95 +110,6 @@ function ModelConfigurationItem({
  );
 }

-interface FetchedModel {
-  name: string;
-  display_name: string;
-  max_input_tokens: number | null;
-  supports_image_input: boolean;
-}
-
-function FetchModelsButton({ provider }: { provider: string }) {
-  const abortRef = useRef<AbortController | null>(null);
-  const [isFetching, setIsFetching] = useState(false);
-  const formikProps = useFormikContext<{
-    api_base?: string;
-    api_key?: string;
-    api_version?: string;
-    model_configurations: CustomModelConfiguration[];
-  }>();
-
-  useEffect(() => {
-    return () => abortRef.current?.abort();
-  }, []);
-
-  async function handleFetch() {
-    abortRef.current?.abort();
-    const controller = new AbortController();
-    abortRef.current = controller;
-    setIsFetching(true);
-    try {
-      const response = await fetch("/api/admin/llm/custom/available-models", {
-        method: "POST",
-        headers: { "Content-Type": "application/json" },
-        body: JSON.stringify({
-          provider,
-          api_base: formikProps.values.api_base || undefined,
-          api_key: formikProps.values.api_key || undefined,
-          api_version: formikProps.values.api_version || undefined,
-        }),
-        signal: controller.signal,
-      });
-      if (!response.ok) {
-        let errorMessage = "Failed to fetch models";
-        try {
-          const errorData = await response.json();
-          errorMessage = errorData.detail || errorMessage;
-        } catch {
-          // ignore JSON parsing errors
-        }
-        throw new Error(errorMessage);
-      }
-      const fetched: FetchedModel[] = await response.json();
-      const existing = formikProps.values.model_configurations;
-      const existingNames = new Set(existing.map((m) => m.name));
-      const newModels: CustomModelConfiguration[] = fetched
-        .filter((m) => !existingNames.has(m.name))
-        .map((m) => ({
-          name: m.name,
-          display_name: m.display_name !== m.name ? m.display_name : "",
-          max_input_tokens: m.max_input_tokens,
-          supports_image_input: m.supports_image_input,
-        }));
-      // Replace empty placeholder rows, then merge
-      const nonEmpty = existing.filter((m) => m.name.trim() !== "");
-      formikProps.setFieldValue("model_configurations", [
-        ...nonEmpty,
-        ...newModels,
-      ]);
-      toast.success(`Fetched ${fetched.length} models`);
-    } catch (err) {
-      if (err instanceof DOMException && err.name === "AbortError") return;
-      toast.error(
-        err instanceof Error ? err.message : "Failed to fetch models"
-      );
-    } finally {
-      if (!controller.signal.aborted) {
-        setIsFetching(false);
-      }
-    }
-  }
-
-  return (
-    <Button
-      prominence="tertiary"
-      icon={isFetching ? SimpleLoader : SvgRefreshCw}
-      onClick={handleFetch}
-      disabled={isFetching || !provider}
-      type="button"
-    />
-  );
-}
-
 function ModelConfigurationList() {
  const formikProps = useFormikContext<{
    model_configurations: CustomModelConfiguration[];
@@ -312,24 +222,6 @@ function ProviderNameSelect({ disabled }: { disabled?: boolean }) {
  );
 }

-function ModelsHeader() {
-  const { values } = useFormikContext<{ provider: string }>();
-  return (
-    <InputLayouts.Horizontal
-      title="Models"
-      description="List LLM models you wish to use and their configurations for this provider. See full list of models at LiteLLM."
-      nonInteractive
-      center
-    >
-      {values.provider ? (
-        <FetchModelsButton provider={values.provider} />
-      ) : (
-        <div />
-      )}
-    </InputLayouts.Horizontal>
-  );
-}
-
 // ─── Custom Config Processing ─────────────────────────────────────────────────

 function keyValueListToDict(items: KeyValue[]): Record<string, string> {
@@ -532,7 +424,13 @@ export default function CustomModal({
      <InputLayouts.FieldSeparator />
      <Section gap={0.5}>
        <InputLayouts.FieldPadder>
-          <ModelsHeader />
+          <Content
+            title="Models"
+            description="List LLM models you wish to use and their configurations for this provider. See full list of models at LiteLLM."
+            variant="section"
+            sizePreset="main-content"
+            widthVariant="full"
+          />
        </InputLayouts.FieldPadder>

        <Card padding="sm">
--- a/web/src/sections/modals/llmConfig/LiteLLMProxyModal.tsx
+++ b/web/src/sections/modals/llmConfig/LiteLLMProxyModal.tsx
@@ -52,7 +52,7 @@ function LiteLLMProxyModalInternals({
    const { models, error } = await fetchLiteLLMProxyModels({
      api_base: formikProps.values.api_base,
      api_key: formikProps.values.api_key,
-      provider_name: LLMProviderName.LITELLM_PROXY,
+      provider_name: existingLlmProvider?.name,
    });
    if (error) {
      throw new Error(error);
--- a/web/src/sections/modals/llmConfig/OpenRouterModal.tsx
+++ b/web/src/sections/modals/llmConfig/OpenRouterModal.tsx
@@ -52,7 +52,7 @@ function OpenRouterModalInternals({
    const { models, error } = await fetchOpenRouterModels({
      api_base: formikProps.values.api_base,
      api_key: formikProps.values.api_key,
-      provider_name: LLMProviderName.OPENROUTER,
+      provider_name: existingLlmProvider?.name,
    });
    if (error) {
      throw new Error(error);
--- a/web/src/sections/modals/llmConfig/getModal.tsx
+++ b/web/src/sections/modals/llmConfig/getModal.tsx
@@ -1,75 +0,0 @@
-import { LLMProviderName, LLMProviderView } from "@/interfaces/llm";
-import AnthropicModal from "@/sections/modals/llmConfig/AnthropicModal";
-import OpenAIModal from "@/sections/modals/llmConfig/OpenAIModal";
-import OllamaModal from "@/sections/modals/llmConfig/OllamaModal";
-import AzureModal from "@/sections/modals/llmConfig/AzureModal";
-import VertexAIModal from "@/sections/modals/llmConfig/VertexAIModal";
-import OpenRouterModal from "@/sections/modals/llmConfig/OpenRouterModal";
-import CustomModal from "@/sections/modals/llmConfig/CustomModal";
-import BedrockModal from "@/sections/modals/llmConfig/BedrockModal";
-import LMStudioModal from "@/sections/modals/llmConfig/LMStudioModal";
-import LiteLLMProxyModal from "@/sections/modals/llmConfig/LiteLLMProxyModal";
-import BifrostModal from "@/sections/modals/llmConfig/BifrostModal";
-import OpenAICompatibleModal from "@/sections/modals/llmConfig/OpenAICompatibleModal";
-
-export function getModalForExistingProvider(
-  provider: LLMProviderView,
-  onOpenChange?: (open: boolean) => void,
-  defaultModelName?: string
-) {
-  const props = {
-    existingLlmProvider: provider,
-    onOpenChange,
-    defaultModelName,
-  };
-
-  const hasCustomConfig = provider.custom_config != null;
-
-  switch (provider.provider) {
-    // These providers don't use custom_config themselves, so a non-null
-    // custom_config means the provider was created via CustomModal.
-    case LLMProviderName.OPENAI:
-      return hasCustomConfig ? (
-        <CustomModal {...props} />
-      ) : (
-        <OpenAIModal {...props} />
-      );
-    case LLMProviderName.ANTHROPIC:
-      return hasCustomConfig ? (
-        <CustomModal {...props} />
-      ) : (
-        <AnthropicModal {...props} />
-      );
-    case LLMProviderName.AZURE:
-      return hasCustomConfig ? (
-        <CustomModal {...props} />
-      ) : (
-        <AzureModal {...props} />
-      );
-    case LLMProviderName.OPENROUTER:
-      return hasCustomConfig ? (
-        <CustomModal {...props} />
-      ) : (
-        <OpenRouterModal {...props} />
-      );
-
-    // These providers legitimately store settings in custom_config,
-    // so always use their dedicated modals.
-    case LLMProviderName.OLLAMA_CHAT:
-      return <OllamaModal {...props} />;
-    case LLMProviderName.VERTEX_AI:
-      return <VertexAIModal {...props} />;
-    case LLMProviderName.BEDROCK:
-      return <BedrockModal {...props} />;
-    case LLMProviderName.LM_STUDIO:
-      return <LMStudioModal {...props} />;
-    case LLMProviderName.LITELLM_PROXY:
-      return <LiteLLMProxyModal {...props} />;
-    case LLMProviderName.BIFROST:
-      return <BifrostModal {...props} />;
-    case LLMProviderName.OPENAI_COMPATIBLE:
-      return <OpenAICompatibleModal {...props} />;
-    default:
-      return <CustomModal {...props} />;
-  }
-}
--- a/web/src/sections/modals/llmConfig/shared.tsx
+++ b/web/src/sections/modals/llmConfig/shared.tsx
@@ -44,11 +44,7 @@ import useUsers from "@/hooks/useUsers";
 import { toast } from "@/hooks/useToast";
 import { UserRole } from "@/lib/types";
 import Modal from "@/refresh-components/Modal";
-import {
-  getProviderIcon,
-  getProviderDisplayName,
-  getProviderProductName,
-} from "@/lib/llmConfig/providers";
+import { getProvider } from "@/lib/llmConfig";

 // ─── DisplayNameField ────────────────────────────────────────────────────────

@@ -717,9 +713,11 @@ function ModalWrapperInner({
        ? "No changes to save."
        : undefined;

-  const providerIcon = getProviderIcon(providerName);
-  const providerDisplayName = getProviderDisplayName(providerName);
-  const providerProductName = getProviderProductName(providerName);
+  const {
+    icon: providerIcon,
+    companyName: providerDisplayName,
+    productName: providerProductName,
+  } = getProvider(providerName);

  const title = llmProvider
    ? `Configure "${llmProvider.name}"`
--- a/web/src/sections/onboarding/forms/getOnboardingForm.tsx
+++ b/web/src/sections/onboarding/forms/getOnboardingForm.tsx
@@ -1,145 +0,0 @@
-import React from "react";
-import {
-  WellKnownLLMProviderDescriptor,
-  LLMProviderName,
-  LLMProviderFormProps,
-} from "@/interfaces/llm";
-import { OnboardingActions, OnboardingState } from "@/interfaces/onboarding";
-import OpenAIModal from "@/sections/modals/llmConfig/OpenAIModal";
-import AnthropicModal from "@/sections/modals/llmConfig/AnthropicModal";
-import OllamaModal from "@/sections/modals/llmConfig/OllamaModal";
-import AzureModal from "@/sections/modals/llmConfig/AzureModal";
-import BedrockModal from "@/sections/modals/llmConfig/BedrockModal";
-import VertexAIModal from "@/sections/modals/llmConfig/VertexAIModal";
-import OpenRouterModal from "@/sections/modals/llmConfig/OpenRouterModal";
-import CustomModal from "@/sections/modals/llmConfig/CustomModal";
-import LMStudioModal from "@/sections/modals/llmConfig/LMStudioModal";
-import LiteLLMProxyModal from "@/sections/modals/llmConfig/LiteLLMProxyModal";
-import OpenAICompatibleModal from "@/sections/modals/llmConfig/OpenAICompatibleModal";
-
-// Display info for LLM provider cards - title is the product name, displayName is the company/platform
-const PROVIDER_DISPLAY_INFO: Record<
-  string,
-  { title: string; displayName: string }
-> = {
-  [LLMProviderName.OPENAI]: { title: "GPT", displayName: "OpenAI" },
-  [LLMProviderName.ANTHROPIC]: { title: "Claude", displayName: "Anthropic" },
-  [LLMProviderName.OLLAMA_CHAT]: { title: "Ollama", displayName: "Ollama" },
-  [LLMProviderName.AZURE]: {
-    title: "Azure OpenAI",
-    displayName: "Microsoft Azure Cloud",
-  },
-  [LLMProviderName.BEDROCK]: {
-    title: "Amazon Bedrock",
-    displayName: "AWS",
-  },
-  [LLMProviderName.VERTEX_AI]: {
-    title: "Gemini",
-    displayName: "Google Cloud Vertex AI",
-  },
-  [LLMProviderName.OPENROUTER]: {
-    title: "OpenRouter",
-    displayName: "OpenRouter",
-  },
-  [LLMProviderName.LM_STUDIO]: {
-    title: "LM Studio",
-    displayName: "LM Studio",
-  },
-  [LLMProviderName.LITELLM_PROXY]: {
-    title: "LiteLLM Proxy",
-    displayName: "LiteLLM Proxy",
-  },
-  [LLMProviderName.OPENAI_COMPATIBLE]: {
-    title: "OpenAI-Compatible",
-    displayName: "OpenAI-Compatible",
-  },
-};
-
-export function getProviderDisplayInfo(providerName: string): {
-  title: string;
-  displayName: string;
-} {
-  return (
-    PROVIDER_DISPLAY_INFO[providerName] ?? {
-      title: providerName,
-      displayName: providerName,
-    }
-  );
-}
-
-export interface OnboardingFormProps {
-  llmDescriptor?: WellKnownLLMProviderDescriptor;
-  isCustomProvider?: boolean;
-  onboardingState: OnboardingState;
-  onboardingActions: OnboardingActions;
-  onOpenChange: (open: boolean) => void;
-}
-
-export function getOnboardingForm({
-  llmDescriptor,
-  isCustomProvider,
-  onboardingState,
-  onboardingActions,
-  onOpenChange,
-}: OnboardingFormProps): React.ReactNode {
-  const providerName = isCustomProvider
-    ? "custom"
-    : llmDescriptor?.name ?? "custom";
-
-  const sharedProps: LLMProviderFormProps = {
-    variant: "onboarding" as const,
-    shouldMarkAsDefault:
-      (onboardingState?.data.llmProviders ?? []).length === 0,
-    onboardingActions,
-    onOpenChange,
-    onSuccess: () => {
-      onboardingActions.updateData({
-        llmProviders: [
-          ...(onboardingState?.data.llmProviders ?? []),
-          providerName,
-        ],
-      });
-      onboardingActions.setButtonActive(true);
-    },
-  };
-
-  // Handle custom provider
-  if (isCustomProvider || !llmDescriptor) {
-    return <CustomModal {...sharedProps} />;
-  }
-
-  switch (llmDescriptor.name) {
-    case LLMProviderName.OPENAI:
-      return <OpenAIModal {...sharedProps} />;
-
-    case LLMProviderName.ANTHROPIC:
-      return <AnthropicModal {...sharedProps} />;
-
-    case LLMProviderName.OLLAMA_CHAT:
-      return <OllamaModal {...sharedProps} />;
-
-    case LLMProviderName.AZURE:
-      return <AzureModal {...sharedProps} />;
-
-    case LLMProviderName.BEDROCK:
-      return <BedrockModal {...sharedProps} />;
-
-    case LLMProviderName.VERTEX_AI:
-      return <VertexAIModal {...sharedProps} />;
-
-    case LLMProviderName.OPENROUTER:
-      return <OpenRouterModal {...sharedProps} />;
-
-    case LLMProviderName.LM_STUDIO:
-      return <LMStudioModal {...sharedProps} />;
-
-    case LLMProviderName.LITELLM_PROXY:
-      return <LiteLLMProxyModal {...sharedProps} />;
-
-    case LLMProviderName.OPENAI_COMPATIBLE:
-      return <OpenAICompatibleModal {...sharedProps} />;
-
-    default:
-      return <CustomModal {...sharedProps} />;
-  }
-}
--- a/web/src/sections/onboarding/steps/LLMStep.tsx
+++ b/web/src/sections/onboarding/steps/LLMStep.tsx
@@ -4,35 +4,29 @@ import { memo, useState, useCallback } from "react";
 import Text from "@/refresh-components/texts/Text";
 import { Button } from "@opal/components";
 import Separator from "@/refresh-components/Separator";
-import LLMProviderCard from "../components/LLMProviderCard";
+import LLMProviderCard from "@/sections/onboarding/components/LLMProviderCard";
 import {
  OnboardingActions,
  OnboardingState,
  OnboardingStep,
 } from "@/interfaces/onboarding";
-import { WellKnownLLMProviderDescriptor } from "@/interfaces/llm";
 import {
-  getOnboardingForm,
-  getProviderDisplayInfo,
-} from "../forms/getOnboardingForm";
+  LLMProviderFormProps,
+  WellKnownLLMProviderDescriptor,
+} from "@/interfaces/llm";
+import { getProvider } from "@/lib/llmConfig";
 import { Disabled } from "@opal/core";
 import ModelIcon from "@/app/admin/configuration/llm/ModelIcon";
 import { SvgCheckCircle, SvgCpu, SvgExternalLink } from "@opal/icons";
 import { ContentAction } from "@opal/layouts";
 import { useLLMProviderOptions } from "@/lib/hooks/useLLMProviderOptions";

-type LLMStepProps = {
-  state: OnboardingState;
-  actions: OnboardingActions;
-  disabled?: boolean;
-};
-
 interface SelectedProvider {
  llmDescriptor?: WellKnownLLMProviderDescriptor;
  isCustomProvider: boolean;
 }

-const LLMProviderSkeleton = () => {
+function LLMProviderSkeleton() {
  return (
    <div className="flex justify-between h-full w-full p-1 rounded-12 border border-border-01 bg-background-neutral-01 animate-pulse">
      <div className="flex gap-1 p-1 flex-1 min-w-0">
@@ -47,12 +41,11 @@ const LLMProviderSkeleton = () => {
      <div className="h-6 w-16 bg-neutral-200 rounded" />
    </div>
  );
-};
+}

-type StackedProviderIconsProps = {
+interface StackedProviderIconsProps {
  providers: string[];
-};
-
+}
 const StackedProviderIcons = ({ providers }: StackedProviderIconsProps) => {
  if (!providers || providers.length === 0) {
    return null;
@@ -89,133 +82,157 @@ const StackedProviderIcons = ({ providers }: StackedProviderIconsProps) => {
  );
 };

-const LLMStepInner = ({
-  state: onboardingState,
-  actions: onboardingActions,
-  disabled,
-}: LLMStepProps) => {
-  const { llmProviderOptions, isLoading } = useLLMProviderOptions();
-  const llmDescriptors = llmProviderOptions ?? [];
+interface LLMStepProps {
+  state: OnboardingState;
+  actions: OnboardingActions;
+  disabled?: boolean;
+}
+const LLMStep = memo(
+  ({
+    state: onboardingState,
+    actions: onboardingActions,
+    disabled,
+  }: LLMStepProps) => {
+    const { llmProviderOptions, isLoading } = useLLMProviderOptions();
+    const llmDescriptors = llmProviderOptions ?? [];

-  const [selectedProvider, setSelectedProvider] =
-    useState<SelectedProvider | null>(null);
-  const [isModalOpen, setIsModalOpen] = useState(false);
+    const [selectedProvider, setSelectedProvider] =
+      useState<SelectedProvider | null>(null);
+    const [isModalOpen, setIsModalOpen] = useState(false);

-  const handleProviderClick = useCallback(
-    (
-      llmDescriptor?: WellKnownLLMProviderDescriptor,
-      isCustomProvider: boolean = false
-    ) => {
-      setSelectedProvider({ llmDescriptor, isCustomProvider });
-      setIsModalOpen(true);
-    },
-    []
-  );
+    const handleProviderClick = useCallback(
+      (
+        llmDescriptor?: WellKnownLLMProviderDescriptor,
+        isCustomProvider: boolean = false
+      ) => {
+        setSelectedProvider({ llmDescriptor, isCustomProvider });
+        setIsModalOpen(true);
+      },
+      []
+    );

-  const handleModalClose = useCallback((open: boolean) => {
-    setIsModalOpen(open);
-    if (!open) {
-      setSelectedProvider(null);
-    }
-  }, []);
+    const handleModalClose = useCallback((open: boolean) => {
+      setIsModalOpen(open);
+      if (!open) {
+        setSelectedProvider(null);
+      }
+    }, []);

-  if (
-    onboardingState.currentStep === OnboardingStep.LlmSetup ||
-    onboardingState.currentStep === OnboardingStep.Name
-  ) {
-    return (
-      <Disabled disabled={disabled} allowClick>
-        <div
-          className="flex flex-col items-center justify-between w-full p-1 rounded-16 border border-border-01 bg-background-tint-00"
-          aria-label="onboarding-llm-step"
-        >
-          <ContentAction
-            icon={SvgCpu}
-            title="Connect your LLM models"
-            description="Onyx supports both self-hosted models and popular providers."
-            sizePreset="main-ui"
-            variant="section"
-            paddingVariant="lg"
-            rightChildren={
-              <Button
-                disabled={disabled}
-                prominence="tertiary"
-                rightIcon={SvgExternalLink}
-                href="/admin/configuration/llm"
-              >
-                View in Admin Panel
-              </Button>
-            }
-          />
-          <Separator />
-          <div className="flex flex-wrap gap-1 [&>*:last-child:nth-child(odd)]:basis-full">
-            {isLoading ? (
-              Array.from({ length: 8 }).map((_, idx) => (
-                <div
-                  key={idx}
-                  className="basis-[calc(50%-theme(spacing.1)/2)] grow"
+    if (
+      onboardingState.currentStep === OnboardingStep.LlmSetup ||
+      onboardingState.currentStep === OnboardingStep.Name
+    ) {
+      const providerName = selectedProvider?.isCustomProvider
+        ? "custom"
+        : selectedProvider?.llmDescriptor?.name ?? "custom";
+
+      const { Modal: ModalComponent } = getProvider(providerName);
+
+      const modalProps: LLMProviderFormProps = {
+        variant: "onboarding" as const,
+        shouldMarkAsDefault:
+          (onboardingState?.data.llmProviders ?? []).length === 0,
+        onboardingActions,
+        onOpenChange: handleModalClose,
+        onSuccess: () => {
+          onboardingActions.updateData({
+            llmProviders: [
+              ...(onboardingState?.data.llmProviders ?? []),
+              providerName,
+            ],
+          });
+          onboardingActions.setButtonActive(true);
+        },
+      };
+
+      return (
+        <Disabled disabled={disabled} allowClick>
+          <div
+            className="flex flex-col items-center justify-between w-full p-1 rounded-16 border border-border-01 bg-background-tint-00"
+            aria-label="onboarding-llm-step"
+          >
+            <ContentAction
+              icon={SvgCpu}
+              title="Connect your LLM models"
+              description="Onyx supports both self-hosted models and popular providers."
+              sizePreset="main-ui"
+              variant="section"
+              paddingVariant="lg"
+              rightChildren={
+                <Button
+                  disabled={disabled}
+                  prominence="tertiary"
+                  rightIcon={SvgExternalLink}
+                  href="/admin/configuration/llm"
                >
-                  <LLMProviderSkeleton />
-                </div>
-              ))
-            ) : (
-              <>
-                {/* Render the selected provider form */}
-                {selectedProvider &&
-                  isModalOpen &&
-                  getOnboardingForm({
-                    llmDescriptor: selectedProvider.llmDescriptor,
-                    isCustomProvider: selectedProvider.isCustomProvider,
-                    onboardingState,
-                    onboardingActions,
-                    onOpenChange: handleModalClose,
+                  View in Admin Panel
+                </Button>
+              }
+            />
+            <Separator />
+            <div className="flex flex-wrap gap-1 [&>*:last-child:nth-child(odd)]:basis-full">
+              {isLoading ? (
+                Array.from({ length: 8 }).map((_, idx) => (
+                  <div
+                    key={idx}
+                    className="basis-[calc(50%-theme(spacing.1)/2)] grow"
+                  >
+                    <LLMProviderSkeleton />
+                  </div>
+                ))
+              ) : (
+                <>
+                  {/* Render the selected provider form */}
+                  {selectedProvider && isModalOpen && (
+                    <ModalComponent {...modalProps} />
+                  )}
+
+                  {/* Render provider cards */}
+                  {llmDescriptors.map((llmDescriptor) => {
+                    const { productName, companyName } = getProvider(
+                      llmDescriptor.name
+                    );
+                    return (
+                      <div
+                        key={llmDescriptor.name}
+                        className="basis-[calc(50%-theme(spacing.1)/2)] grow"
+                      >
+                        <LLMProviderCard
+                          title={productName}
+                          subtitle={companyName}
+                          providerName={llmDescriptor.name}
+                          disabled={disabled}
+                          isConnected={onboardingState.data.llmProviders?.some(
+                            (provider) => provider === llmDescriptor.name
+                          )}
+                          onClick={() =>
+                            handleProviderClick(llmDescriptor, false)
+                          }
+                        />
+                      </div>
+                    );
                  })}

-                {/* Render provider cards */}
-                {llmDescriptors.map((llmDescriptor) => {
-                  const displayInfo = getProviderDisplayInfo(
-                    llmDescriptor.name
-                  );
-                  return (
-                    <div
-                      key={llmDescriptor.name}
-                      className="basis-[calc(50%-theme(spacing.1)/2)] grow"
-                    >
-                      <LLMProviderCard
-                        title={displayInfo.title}
-                        subtitle={displayInfo.displayName}
-                        providerName={llmDescriptor.name}
-                        disabled={disabled}
-                        isConnected={onboardingState.data.llmProviders?.some(
-                          (provider) => provider === llmDescriptor.name
-                        )}
-                        onClick={() =>
-                          handleProviderClick(llmDescriptor, false)
-                        }
-                      />
-                    </div>
-                  );
-                })}
-
-                {/* Custom provider card */}
-                <div className="basis-[calc(50%-theme(spacing.1)/2)] grow">
-                  <LLMProviderCard
-                    title="Custom LLM Provider"
-                    subtitle="LiteLLM Compatible APIs"
-                    disabled={disabled}
-                    isConnected={onboardingState.data.llmProviders?.some(
-                      (provider) => provider === "custom"
-                    )}
-                    onClick={() => handleProviderClick(undefined, true)}
-                  />
-                </div>
-              </>
-            )}
+                  {/* Custom provider card */}
+                  <div className="basis-[calc(50%-theme(spacing.1)/2)] grow">
+                    <LLMProviderCard
+                      title="Custom LLM Provider"
+                      subtitle="LiteLLM Compatible APIs"
+                      disabled={disabled}
+                      isConnected={onboardingState.data.llmProviders?.some(
+                        (provider) => provider === "custom"
+                      )}
+                      onClick={() => handleProviderClick(undefined, true)}
+                    />
+                  </div>
+                </>
+              )}
+            </div>
          </div>
-        </div>
-      </Disabled>
-    );
-  } else {
+        </Disabled>
+      );
+    }
+
    return (
      <button
        type="button"
@@ -244,7 +261,7 @@ const LLMStepInner = ({
      </button>
    );
  }
-};
+);
+LLMStep.displayName = "LLMStep";

-const LLMStep = memo(LLMStepInner);
 export default LLMStep;
Author	SHA1	Message	Date
github-actions[bot]	711651276c	fix(LLM config): resolve API Key before fetching models (#10056 ) to release v3.2 (#10057 ) Co-authored-by: Jamison Lahman <jamison@lahman.dev>	2026-04-10 00:02:33 -07:00
github-actions[bot]	3731110cf9	feat(federated): full thread replies + direct URL fetch in Slack search (#9940 ) to release v3.2 (#10050 ) Co-authored-by: Nikolas Garza <90273783+nmgarza5@users.noreply.github.com>	2026-04-09 18:24:02 -07:00
Evan Lohn	8fb7a8718e	fix: jira bulk issue fetch batching (#10044 )	2026-04-09 20:50:41 -04:00
Bo-Onyx	c4f8d5370b	fix(helm): declare metrics port on celery-worker-heavy (#10033 )	2026-04-09 18:29:31 +00:00
Nikolas Garza	9e434f6a5a	fix(chat): set consistent 720px content width for chat and input bar (#10032 )	2026-04-09 18:06:35 +00:00
Raunak Bhagat	67dc819319	refactor: consolidate LLM provider modal routing (#10030 )	2026-04-09 18:02:43 +00:00
Nikolas Garza	2d12274050	feat(chat): add deselect preferred response with smooth transitions and scroll preservation (#10026 )	2026-04-09 18:00:54 +00:00
Nikolas Garza	c727ba13ee	feat(nrf): add ModelSelector and multi-model support to Chrome extension (#10023 )	2026-04-09 16:43:40 +00:00
Jamison Lahman	6193dd5326	chore(python): simplify internal packages/workspace (#10029 )	2026-04-09 09:32:19 -07:00
Nikolas Garza	387a7d1cea	fix(chat): prevent popover flash when selecting 3rd model (#10021 )	2026-04-09 15:52:12 +00:00
Nikolas Garza	869578eeed	fix(chat): only collapse sidebar on multi-model submit (#10020 )	2026-04-09 15:41:32 +00:00
Nikolas Garza	e68648ab74	fix(chat): gate ModelSelector render on agent and provider readiness (#10017 )	2026-04-09 15:41:01 +00:00
Nikolas Garza	da01002099	fix(chat): center multi-model response panels in chat view (#10006 )	2026-04-09 15:40:22 +00:00