chore(llm): Add OpenAI Integration Tests

Addressing gs
chore(llm): Scaffolding for Nightly LLM Tests
2026-02-26 20:25:46 +00:00 · 2026-02-23 17:47:47 -08:00 · 2026-02-23 17:04:24 -08:00 · 2026-02-23 16:48:09 -08:00 · 2026-02-24 00:04:21 +00:00 · 2026-02-24 00:02:56 +00:00
6 changed files with 687 additions and 8 deletions
--- a/.github/workflows/nightly-llm-provider-chat-openai.yml
+++ b/.github/workflows/nightly-llm-provider-chat-openai.yml
@@ -0,0 +1,342 @@
+name: Nightly LLM Provider Chat Tests (OpenAI)
+concurrency:
+  group: Nightly-LLM-Provider-Chat-OpenAI-${{ github.workflow }}-${{ github.run_id }}
+  cancel-in-progress: true
+
+on:
+  schedule:
+    # Runs daily at 10:30 UTC (2:30 AM PST / 3:30 AM PDT)
+    - cron: "30 10 * * *"
+  workflow_dispatch:
+
+permissions:
+  contents: read
+
+env:
+  OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
+  NIGHTLY_LLM_OPENAI_MODELS: ${{ vars.NIGHTLY_LLM_OPENAI_MODELS }}
+
+jobs:
+  build-backend-image:
+    runs-on:
+      [
+        runs-on,
+        runner=1cpu-linux-arm64,
+        "run-id=${{ github.run_id }}-build-backend-image",
+        "extras=ecr-cache",
+      ]
+    timeout-minutes: 45
+    steps:
+      - uses: runs-on/action@cd2b598b0515d39d78c38a02d529db87d2196d1e # ratchet:runs-on/action@v2
+
+      - name: Checkout code
+        uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # ratchet:actions/checkout@v6
+        with:
+          persist-credentials: false
+
+      - name: Format branch name for cache
+        id: format-branch
+        env:
+          PR_NUMBER: ${{ github.event.pull_request.number }}
+          REF_NAME: ${{ github.ref_name }}
+        run: |
+          if [ -n "${PR_NUMBER}" ]; then
+            CACHE_SUFFIX="${PR_NUMBER}"
+          else
+            # shellcheck disable=SC2001
+            CACHE_SUFFIX=$(echo "${REF_NAME}" | sed 's/[^A-Za-z0-9._-]/-/g')
+          fi
+          echo "cache-suffix=${CACHE_SUFFIX}" >> $GITHUB_OUTPUT
+
+      - name: Set up Docker Buildx
+        uses: docker/setup-buildx-action@8d2750c68a42422c14e847fe6c8ac0403b4cbd6f # ratchet:docker/setup-buildx-action@v3
+
+      - name: Login to Docker Hub
+        uses: docker/login-action@c94ce9fb468520275223c153574b00df6fe4bcc9 # ratchet:docker/login-action@v3
+        with:
+          username: ${{ secrets.DOCKER_USERNAME }}
+          password: ${{ secrets.DOCKER_TOKEN }}
+
+      - name: Build and push Backend Docker image
+        uses: docker/build-push-action@263435318d21b8e681c14492fe198d362a7d2c83 # ratchet:docker/build-push-action@v6
+        with:
+          context: ./backend
+          file: ./backend/Dockerfile
+          push: true
+          tags: ${{ env.RUNS_ON_ECR_CACHE }}:nightly-llm-it-backend-${{ github.run_id }}
+          cache-from: |
+            type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:backend-cache-${{ github.sha }}
+            type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:backend-cache-${{ steps.format-branch.outputs.cache-suffix }}
+            type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:backend-cache
+            type=registry,ref=onyxdotapp/onyx-backend:latest
+          cache-to: |
+            type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:backend-cache-${{ github.sha }},mode=max
+            type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:backend-cache-${{ steps.format-branch.outputs.cache-suffix }},mode=max
+            type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:backend-cache,mode=max
+          no-cache: ${{ vars.DOCKER_NO_CACHE == 'true' }}
+
+  build-model-server-image:
+    runs-on:
+      [
+        runs-on,
+        runner=1cpu-linux-arm64,
+        "run-id=${{ github.run_id }}-build-model-server-image",
+        "extras=ecr-cache",
+      ]
+    timeout-minutes: 45
+    steps:
+      - uses: runs-on/action@cd2b598b0515d39d78c38a02d529db87d2196d1e # ratchet:runs-on/action@v2
+
+      - name: Checkout code
+        uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # ratchet:actions/checkout@v6
+        with:
+          persist-credentials: false
+
+      - name: Format branch name for cache
+        id: format-branch
+        env:
+          PR_NUMBER: ${{ github.event.pull_request.number }}
+          REF_NAME: ${{ github.ref_name }}
+        run: |
+          if [ -n "${PR_NUMBER}" ]; then
+            CACHE_SUFFIX="${PR_NUMBER}"
+          else
+            # shellcheck disable=SC2001
+            CACHE_SUFFIX=$(echo "${REF_NAME}" | sed 's/[^A-Za-z0-9._-]/-/g')
+          fi
+          echo "cache-suffix=${CACHE_SUFFIX}" >> $GITHUB_OUTPUT
+
+      - name: Set up Docker Buildx
+        uses: docker/setup-buildx-action@8d2750c68a42422c14e847fe6c8ac0403b4cbd6f # ratchet:docker/setup-buildx-action@v3
+
+      - name: Login to Docker Hub
+        uses: docker/login-action@c94ce9fb468520275223c153574b00df6fe4bcc9 # ratchet:docker/login-action@v3
+        with:
+          username: ${{ secrets.DOCKER_USERNAME }}
+          password: ${{ secrets.DOCKER_TOKEN }}
+
+      - name: Build and push Model Server Docker image
+        uses: docker/build-push-action@263435318d21b8e681c14492fe198d362a7d2c83 # ratchet:docker/build-push-action@v6
+        with:
+          context: ./backend
+          file: ./backend/Dockerfile.model_server
+          push: true
+          tags: ${{ env.RUNS_ON_ECR_CACHE }}:nightly-llm-it-model-server-${{ github.run_id }}
+          cache-from: |
+            type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:model-server-cache-${{ github.sha }}
+            type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:model-server-cache-${{ steps.format-branch.outputs.cache-suffix }}
+            type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:model-server-cache
+            type=registry,ref=onyxdotapp/onyx-model-server:latest
+          cache-to: |
+            type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:model-server-cache-${{ github.sha }},mode=max
+            type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:model-server-cache-${{ steps.format-branch.outputs.cache-suffix }},mode=max
+            type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:model-server-cache,mode=max
+
+  build-integration-image:
+    runs-on:
+      [
+        runs-on,
+        runner=2cpu-linux-arm64,
+        "run-id=${{ github.run_id }}-build-integration-image",
+        "extras=ecr-cache",
+      ]
+    timeout-minutes: 45
+    steps:
+      - uses: runs-on/action@cd2b598b0515d39d78c38a02d529db87d2196d1e # ratchet:runs-on/action@v2
+
+      - name: Checkout code
+        uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # ratchet:actions/checkout@v6
+        with:
+          persist-credentials: false
+
+      - name: Set up Docker Buildx
+        uses: docker/setup-buildx-action@8d2750c68a42422c14e847fe6c8ac0403b4cbd6f # ratchet:docker/setup-buildx-action@v3
+
+      - name: Login to Docker Hub
+        uses: docker/login-action@c94ce9fb468520275223c153574b00df6fe4bcc9 # ratchet:docker/login-action@v3
+        with:
+          username: ${{ secrets.DOCKER_USERNAME }}
+          password: ${{ secrets.DOCKER_TOKEN }}
+
+      - name: Format branch name for cache
+        id: format-branch
+        env:
+          PR_NUMBER: ${{ github.event.pull_request.number }}
+          REF_NAME: ${{ github.ref_name }}
+        run: |
+          if [ -n "${PR_NUMBER}" ]; then
+            CACHE_SUFFIX="${PR_NUMBER}"
+          else
+            # shellcheck disable=SC2001
+            CACHE_SUFFIX=$(echo "${REF_NAME}" | sed 's/[^A-Za-z0-9._-]/-/g')
+          fi
+          echo "cache-suffix=${CACHE_SUFFIX}" >> $GITHUB_OUTPUT
+
+      - name: Build and push integration test image with Docker Bake
+        env:
+          INTEGRATION_REPOSITORY: ${{ env.RUNS_ON_ECR_CACHE }}
+          TAG: nightly-llm-it-${{ github.run_id }}
+          CACHE_SUFFIX: ${{ steps.format-branch.outputs.cache-suffix }}
+          HEAD_SHA: ${{ github.sha }}
+        run: |
+          docker buildx bake --push \
+            --set backend.cache-from=type=registry,ref=${RUNS_ON_ECR_CACHE}:backend-cache-${HEAD_SHA} \
+            --set backend.cache-from=type=registry,ref=${RUNS_ON_ECR_CACHE}:backend-cache-${CACHE_SUFFIX} \
+            --set backend.cache-from=type=registry,ref=${RUNS_ON_ECR_CACHE}:backend-cache \
+            --set backend.cache-from=type=registry,ref=onyxdotapp/onyx-backend:latest \
+            --set backend.cache-to=type=registry,ref=${RUNS_ON_ECR_CACHE}:backend-cache-${HEAD_SHA},mode=max \
+            --set backend.cache-to=type=registry,ref=${RUNS_ON_ECR_CACHE}:backend-cache-${CACHE_SUFFIX},mode=max \
+            --set backend.cache-to=type=registry,ref=${RUNS_ON_ECR_CACHE}:backend-cache,mode=max \
+            --set integration.cache-from=type=registry,ref=${RUNS_ON_ECR_CACHE}:integration-cache-${HEAD_SHA} \
+            --set integration.cache-from=type=registry,ref=${RUNS_ON_ECR_CACHE}:integration-cache-${CACHE_SUFFIX} \
+            --set integration.cache-from=type=registry,ref=${RUNS_ON_ECR_CACHE}:integration-cache \
+            --set integration.cache-to=type=registry,ref=${RUNS_ON_ECR_CACHE}:integration-cache-${HEAD_SHA},mode=max \
+            --set integration.cache-to=type=registry,ref=${RUNS_ON_ECR_CACHE}:integration-cache-${CACHE_SUFFIX},mode=max \
+            --set integration.cache-to=type=registry,ref=${RUNS_ON_ECR_CACHE}:integration-cache,mode=max \
+            integration
+
+  openai-provider-chat-test:
+    needs:
+      [build-backend-image, build-model-server-image, build-integration-image]
+    runs-on:
+      - runs-on
+      - runner=4cpu-linux-arm64
+      - "run-id=${{ github.run_id }}-nightly-openai-provider-chat-test"
+      - extras=ecr-cache
+    timeout-minutes: 45
+    steps:
+      - uses: runs-on/action@cd2b598b0515d39d78c38a02d529db87d2196d1e # ratchet:runs-on/action@v2
+
+      - name: Checkout code
+        uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # ratchet:actions/checkout@v6
+        with:
+          persist-credentials: false
+
+      - name: Login to Docker Hub
+        uses: docker/login-action@c94ce9fb468520275223c153574b00df6fe4bcc9 # ratchet:docker/login-action@v3
+        with:
+          username: ${{ secrets.DOCKER_USERNAME }}
+          password: ${{ secrets.DOCKER_TOKEN }}
+
+      - name: Create .env file for Docker Compose
+        env:
+          ECR_CACHE: ${{ env.RUNS_ON_ECR_CACHE }}
+          RUN_ID: ${{ github.run_id }}
+        run: |
+          cat <<EOF > deployment/docker_compose/.env
+          COMPOSE_PROFILES=s3-filestore
+          ENABLE_PAID_ENTERPRISE_EDITION_FEATURES=true
+          LICENSE_ENFORCEMENT_ENABLED=false
+          AUTH_TYPE=basic
+          POSTGRES_POOL_PRE_PING=true
+          POSTGRES_USE_NULL_POOL=true
+          REQUIRE_EMAIL_VERIFICATION=false
+          DISABLE_TELEMETRY=true
+          INTEGRATION_TESTS_MODE=true
+          AUTO_LLM_UPDATE_INTERVAL_SECONDS=10
+          ONYX_BACKEND_IMAGE=${ECR_CACHE}:nightly-llm-it-backend-${RUN_ID}
+          ONYX_MODEL_SERVER_IMAGE=${ECR_CACHE}:nightly-llm-it-model-server-${RUN_ID}
+          EOF
+
+      - name: Start Docker containers
+        run: |
+          cd deployment/docker_compose
+          docker compose -f docker-compose.yml -f docker-compose.dev.yml up \
+            relational_db \
+            index \
+            cache \
+            minio \
+            api_server \
+            inference_model_server \
+            indexing_model_server \
+            background \
+            -d
+
+      - name: Wait for API server health endpoint
+        run: |
+          start_time=$(date +%s)
+          timeout=300
+          while true; do
+            current_time=$(date +%s)
+            elapsed_time=$((current_time - start_time))
+            if [ $elapsed_time -ge $timeout ]; then
+              echo "Timeout reached waiting for API server health endpoint."
+              exit 1
+            fi
+
+            response=$(curl -s -o /dev/null -w "%{http_code}" http://localhost:8080/health || echo "curl_error")
+            if [ "$response" = "200" ]; then
+              echo "API server is ready."
+              break
+            fi
+
+            sleep 5
+          done
+
+      - name: Run OpenAI nightly provider integration test
+        env:
+          MODELS: ${{ env.NIGHTLY_LLM_OPENAI_MODELS }}
+        run: |
+          if [ -z "${MODELS}" ]; then
+            MODELS="gpt-4o-mini"
+          fi
+
+          docker run --rm --network onyx_default \
+            --name test-runner \
+            -e POSTGRES_HOST=relational_db \
+            -e POSTGRES_USER=postgres \
+            -e POSTGRES_PASSWORD=password \
+            -e POSTGRES_DB=postgres \
+            -e DB_READONLY_USER=db_readonly_user \
+            -e DB_READONLY_PASSWORD=password \
+            -e POSTGRES_POOL_PRE_PING=true \
+            -e POSTGRES_USE_NULL_POOL=true \
+            -e VESPA_HOST=index \
+            -e REDIS_HOST=cache \
+            -e API_SERVER_HOST=api_server \
+            -e TEST_WEB_HOSTNAME=test-runner \
+            -e NIGHTLY_LLM_PROVIDER="openai" \
+            -e NIGHTLY_LLM_MODELS="${MODELS}" \
+            -e NIGHTLY_LLM_API_KEY="${OPENAI_API_KEY}" \
+            -e NIGHTLY_LLM_STRICT="true" \
+            ${{ env.RUNS_ON_ECR_CACHE }}:nightly-llm-it-${{ github.run_id }} \
+            /app/tests/integration/tests/llm_workflows/test_nightly_provider_chat_workflow.py
+
+      - name: Dump API server logs
+        if: always()
+        run: |
+          cd deployment/docker_compose
+          docker compose logs --no-color api_server > $GITHUB_WORKSPACE/api_server.log || true
+
+      - name: Dump all-container logs
+        if: always()
+        run: |
+          cd deployment/docker_compose
+          docker compose logs --no-color > $GITHUB_WORKSPACE/docker-compose.log || true
+
+      - name: Upload logs
+        if: always()
+        uses: actions/upload-artifact@b7c566a772e6b6bfb58ed0dc250532a479d7789f
+        with:
+          name: docker-all-logs-nightly-openai-llm-provider
+          path: ${{ github.workspace }}/docker-compose.log
+
+      - name: Stop Docker containers
+        if: always()
+        run: |
+          cd deployment/docker_compose
+          docker compose down -v
+
+  notify-slack-on-failure:
+    needs: [openai-provider-chat-test]
+    if: failure() && github.event_name == 'schedule'
+    runs-on: ubuntu-slim
+    timeout-minutes: 5
+    steps:
+      - name: Send Slack notification
+        uses: ./.github/actions/slack-notify
+        with:
+          webhook-url: ${{ secrets.SLACK_WEBHOOK }}
+          failed-jobs: openai-provider-chat-test
+          title: "🚨 Scheduled OpenAI Provider Chat Tests failed!"
+          ref-name: ${{ github.ref_name }}
--- a/backend/onyx/chat/llm_loop.py
+++ b/backend/onyx/chat/llm_loop.py
@@ -30,6 +30,7 @@ from onyx.configs.constants import DocumentSource
 from onyx.configs.constants import MessageType
 from onyx.context.search.models import SearchDoc
 from onyx.context.search.models import SearchDocsResponse
+from onyx.db.engine.sql_engine import get_session_with_current_tenant
 from onyx.db.memory import add_memory
 from onyx.db.memory import update_memory_at_index
 from onyx.db.memory import UserMemoryContext
@@ -656,7 +657,12 @@ def run_llm_loop(
        fallback_extraction_attempted: bool = False
        citation_mapping: dict[int, str] = {}  # Maps citation_num -> document_id/URL

-        default_base_system_prompt: str = get_default_base_system_prompt(db_session)
+        # Fetch this in a short-lived session so the long-running stream loop does
+        # not pin a connection just to keep read state alive.
+        with get_session_with_current_tenant() as prompt_db_session:
+            default_base_system_prompt: str = get_default_base_system_prompt(
+                prompt_db_session
+            )
        system_prompt = None
        custom_agent_prompt_msg = None

--- a/backend/onyx/chat/process_message.py
+++ b/backend/onyx/chat/process_message.py
@@ -856,6 +856,11 @@ def handle_stream_message_objects(
                reserved_tokens=reserved_token_count,
            )

+        # Release any read transaction before entering the long-running LLM stream.
+        # Without this, the request-scoped session can keep a connection checked out
+        # for the full stream duration.
+        db_session.commit()
+
        # The stream generator can resume on a different worker thread after early yields.
        # Set this right before launching the LLM loop so run_in_background copies the right context.
        if new_msg_req.mock_llm_response is not None:
--- a/backend/tests/daily/conftest.py
+++ b/backend/tests/daily/conftest.py
@@ -9,6 +9,7 @@ from collections.abc import AsyncGenerator
 from collections.abc import Generator
 from contextlib import asynccontextmanager
 from unittest.mock import MagicMock
+from unittest.mock import patch

 import pytest
 from dotenv import load_dotenv
@@ -46,11 +47,15 @@ def mock_current_admin_user() -> MagicMock:

@pytest.fixture(scope="function")
 def client() -> Generator[TestClient, None, None]:
-    # Initialize TestClient with the FastAPI app using a no-op test lifespan
+    # Initialize TestClient with the FastAPI app using a no-op test lifespan.
+    # Patch out prometheus metrics setup to avoid "Duplicated timeseries in
+    # CollectorRegistry" errors when multiple tests each create a new app
+    # (prometheus registers metrics globally and rejects duplicate names).
    get_app = fetch_versioned_implementation(
        module="onyx.main", attribute="get_application"
    )
-    app: FastAPI = get_app(lifespan_override=test_lifespan)
+    with patch("onyx.main.setup_prometheus_metrics"):
+        app: FastAPI = get_app(lifespan_override=test_lifespan)

    # Override the database session dependency with a mock
    # (these tests don't actually need DB access)
--- a/backend/tests/integration/tests/llm_workflows/test_nightly_provider_chat_workflow.py
+++ b/backend/tests/integration/tests/llm_workflows/test_nightly_provider_chat_workflow.py
@@ -0,0 +1,322 @@
+import json
+import os
+import time
+from uuid import uuid4
+
+import pytest
+import requests
+from pydantic import BaseModel
+from pydantic import ConfigDict
+
+from onyx.configs import app_configs
+from onyx.configs.constants import DocumentSource
+from onyx.tools.constants import SEARCH_TOOL_ID
+from tests.integration.common_utils.constants import API_SERVER_URL
+from tests.integration.common_utils.managers.cc_pair import CCPairManager
+from tests.integration.common_utils.managers.chat import ChatSessionManager
+from tests.integration.common_utils.managers.tool import ToolManager
+from tests.integration.common_utils.test_models import DATestUser
+from tests.integration.common_utils.test_models import ToolName
+
+
+_ENV_PROVIDER = "NIGHTLY_LLM_PROVIDER"
+_ENV_MODELS = "NIGHTLY_LLM_MODELS"
+_ENV_API_KEY = "NIGHTLY_LLM_API_KEY"
+_ENV_API_BASE = "NIGHTLY_LLM_API_BASE"
+_ENV_CUSTOM_CONFIG_JSON = "NIGHTLY_LLM_CUSTOM_CONFIG_JSON"
+_ENV_STRICT = "NIGHTLY_LLM_STRICT"
+
+
+class NightlyProviderConfig(BaseModel):
+    model_config = ConfigDict(frozen=True)
+
+    provider: str
+    model_names: list[str]
+    api_key: str | None
+    api_base: str | None
+    custom_config: dict[str, str] | None
+    strict: bool
+
+
+def _env_true(env_var: str, default: bool = False) -> bool:
+    value = os.environ.get(env_var)
+    if value is None:
+        return default
+    return value.strip().lower() in {"1", "true", "yes", "on"}
+
+
+def _split_csv_env(env_var: str) -> list[str]:
+    return [
+        part.strip() for part in os.environ.get(env_var, "").split(",") if part.strip()
+    ]
+
+
+def _load_provider_config() -> NightlyProviderConfig:
+    provider = os.environ.get(_ENV_PROVIDER, "").strip().lower()
+    model_names = _split_csv_env(_ENV_MODELS)
+    api_key = os.environ.get(_ENV_API_KEY) or None
+    api_base = os.environ.get(_ENV_API_BASE) or None
+    strict = _env_true(_ENV_STRICT, default=False)
+
+    custom_config: dict[str, str] | None = None
+    custom_config_json = os.environ.get(_ENV_CUSTOM_CONFIG_JSON, "").strip()
+    if custom_config_json:
+        parsed = json.loads(custom_config_json)
+        if not isinstance(parsed, dict):
+            raise ValueError(f"{_ENV_CUSTOM_CONFIG_JSON} must be a JSON object")
+        custom_config = {str(key): str(value) for key, value in parsed.items()}
+
+    if provider == "ollama_chat" and api_key and not custom_config:
+        custom_config = {"OLLAMA_API_KEY": api_key}
+
+    return NightlyProviderConfig(
+        provider=provider,
+        model_names=model_names,
+        api_key=api_key,
+        api_base=api_base,
+        custom_config=custom_config,
+        strict=strict,
+    )
+
+
+def _skip_or_fail(strict: bool, message: str) -> None:
+    if strict:
+        pytest.fail(message)
+    pytest.skip(message)
+
+
+def _validate_provider_config(config: NightlyProviderConfig) -> None:
+    if not config.provider:
+        _skip_or_fail(strict=config.strict, message=f"{_ENV_PROVIDER} must be set")
+
+    if not config.model_names:
+        _skip_or_fail(
+            strict=config.strict,
+            message=f"{_ENV_MODELS} must include at least one model",
+        )
+
+    if config.provider != "ollama_chat" and not config.api_key:
+        _skip_or_fail(
+            strict=config.strict,
+            message=(f"{_ENV_API_KEY} is required for provider '{config.provider}'"),
+        )
+
+    if config.provider == "ollama_chat" and not (
+        config.api_base or _default_api_base_for_provider(config.provider)
+    ):
+        _skip_or_fail(
+            strict=config.strict,
+            message=(f"{_ENV_API_BASE} is required for provider '{config.provider}'"),
+        )
+
+
+def _assert_integration_mode_enabled() -> None:
+    assert (
+        app_configs.INTEGRATION_TESTS_MODE is True
+    ), "Integration tests require INTEGRATION_TESTS_MODE=true."
+
+
+def _seed_connector_for_search_tool(admin_user: DATestUser) -> None:
+    # SearchTool is only exposed when at least one non-default connector exists.
+    CCPairManager.create_from_scratch(
+        source=DocumentSource.INGESTION_API,
+        user_performing_action=admin_user,
+    )
+
+
+def _get_internal_search_tool_id(admin_user: DATestUser) -> int:
+    tools = ToolManager.list_tools(user_performing_action=admin_user)
+    for tool in tools:
+        if tool.in_code_tool_id == SEARCH_TOOL_ID:
+            return tool.id
+    raise AssertionError("SearchTool must exist for this test")
+
+
+def _default_api_base_for_provider(provider: str) -> str | None:
+    if provider == "openrouter":
+        return "https://openrouter.ai/api/v1"
+    if provider == "ollama_chat":
+        # host.docker.internal works when tests are running inside the integration test container.
+        return "http://host.docker.internal:11434"
+    return None
+
+
+def _create_provider_payload(
+    provider: str,
+    provider_name: str,
+    model_name: str,
+    api_key: str | None,
+    api_base: str | None,
+    custom_config: dict[str, str] | None,
+) -> dict:
+    return {
+        "name": provider_name,
+        "provider": provider,
+        "api_key": api_key,
+        "api_base": api_base,
+        "custom_config": custom_config,
+        "default_model_name": model_name,
+        "is_public": True,
+        "groups": [],
+        "personas": [],
+        "model_configurations": [{"name": model_name, "is_visible": True}],
+        "api_key_changed": bool(api_key),
+        "custom_config_changed": bool(custom_config),
+    }
+
+
+def _ensure_provider_is_default(provider_id: int, admin_user: DATestUser) -> None:
+    list_response = requests.get(
+        f"{API_SERVER_URL}/admin/llm/provider",
+        headers=admin_user.headers,
+    )
+    list_response.raise_for_status()
+    providers = list_response.json()
+
+    current_default = next(
+        (provider for provider in providers if provider.get("is_default_provider")),
+        None,
+    )
+    assert (
+        current_default is not None
+    ), "Expected a default provider after setting provider as default"
+    assert (
+        current_default["id"] == provider_id
+    ), f"Expected provider {provider_id} to be default, found {current_default['id']}"
+
+
+def _run_chat_assertions(
+    admin_user: DATestUser,
+    search_tool_id: int,
+    provider: str,
+    model_name: str,
+) -> None:
+    last_error: str | None = None
+    # Retry once to reduce transient nightly flakes due provider-side blips.
+    for attempt in range(1, 3):
+        chat_session = ChatSessionManager.create(user_performing_action=admin_user)
+
+        response = ChatSessionManager.send_message(
+            chat_session_id=chat_session.id,
+            message=(
+                "Use internal_search to search for 'nightly-provider-regression-sentinel', "
+                "then summarize the result in one short sentence."
+            ),
+            user_performing_action=admin_user,
+            forced_tool_ids=[search_tool_id],
+        )
+
+        if response.error is None:
+            used_internal_search = any(
+                used_tool.tool_name == ToolName.INTERNAL_SEARCH
+                for used_tool in response.used_tools
+            )
+            debug_has_internal_search = any(
+                debug_tool_call.tool_name == "internal_search"
+                for debug_tool_call in response.tool_call_debug
+            )
+            has_answer = bool(response.full_message.strip())
+
+            if used_internal_search and debug_has_internal_search and has_answer:
+                return
+
+            last_error = (
+                f"attempt={attempt} provider={provider} model={model_name} "
+                f"used_internal_search={used_internal_search} "
+                f"debug_internal_search={debug_has_internal_search} "
+                f"has_answer={has_answer} "
+                f"tool_call_debug={response.tool_call_debug}"
+            )
+        else:
+            last_error = (
+                f"attempt={attempt} provider={provider} model={model_name} "
+                f"stream_error={response.error.error}"
+            )
+
+        time.sleep(attempt)
+
+    pytest.fail(f"Chat/tool-call assertions failed: {last_error}")
+
+
+def _create_and_test_provider_for_model(
+    admin_user: DATestUser,
+    config: NightlyProviderConfig,
+    model_name: str,
+    search_tool_id: int,
+) -> None:
+    provider_name = f"nightly-{config.provider}-{uuid4().hex[:12]}"
+    resolved_api_base = config.api_base or _default_api_base_for_provider(
+        config.provider
+    )
+
+    provider_payload = _create_provider_payload(
+        provider=config.provider,
+        provider_name=provider_name,
+        model_name=model_name,
+        api_key=config.api_key,
+        api_base=resolved_api_base,
+        custom_config=config.custom_config,
+    )
+
+    test_response = requests.post(
+        f"{API_SERVER_URL}/admin/llm/test",
+        headers=admin_user.headers,
+        json=provider_payload,
+    )
+    assert test_response.status_code == 200, (
+        f"Provider test endpoint failed for provider={config.provider} "
+        f"model={model_name}: {test_response.status_code} {test_response.text}"
+    )
+
+    create_response = requests.put(
+        f"{API_SERVER_URL}/admin/llm/provider?is_creation=true",
+        headers=admin_user.headers,
+        json=provider_payload,
+    )
+    assert create_response.status_code == 200, (
+        f"Provider creation failed for provider={config.provider} "
+        f"model={model_name}: {create_response.status_code} {create_response.text}"
+    )
+    provider_id = create_response.json()["id"]
+
+    try:
+        set_default_response = requests.post(
+            f"{API_SERVER_URL}/admin/llm/provider/{provider_id}/default",
+            headers=admin_user.headers,
+        )
+        assert set_default_response.status_code == 200, (
+            f"Setting default provider failed for provider={config.provider} "
+            f"model={model_name}: {set_default_response.status_code} "
+            f"{set_default_response.text}"
+        )
+
+        _ensure_provider_is_default(provider_id=provider_id, admin_user=admin_user)
+        _run_chat_assertions(
+            admin_user=admin_user,
+            search_tool_id=search_tool_id,
+            provider=config.provider,
+            model_name=model_name,
+        )
+    finally:
+        requests.delete(
+            f"{API_SERVER_URL}/admin/llm/provider/{provider_id}",
+            headers=admin_user.headers,
+        )
+
+
+def test_nightly_provider_chat_workflow(admin_user: DATestUser) -> None:
+    """Nightly regression test for provider setup + default selection + chat tool calls."""
+    _assert_integration_mode_enabled()
+    config = _load_provider_config()
+    _validate_provider_config(config)
+
+    _seed_connector_for_search_tool(admin_user)
+    search_tool_id = _get_internal_search_tool_id(admin_user)
+
+    for model_name in config.model_names:
+        _create_and_test_provider_for_model(
+            admin_user=admin_user,
+            config=config,
+            model_name=model_name,
+            search_tool_id=search_tool_id,
+        )
--- a/web/src/app/craft/onboarding/hooks/useOnboardingModal.ts
+++ b/web/src/app/craft/onboarding/hooks/useOnboardingModal.ts
@@ -75,11 +75,10 @@ export function useOnboardingModal(): OnboardingModalController {
    level: existingPersona?.level,
  };

-  // Check if user has completed initial onboarding
+  // Check if user has completed initial onboarding (only role required, not name)
  const hasUserInfo = useMemo(() => {
-    const existingPersona = getBuildUserPersona();
-    return !!(user?.personalization?.name && existingPersona?.workArea);
-  }, [user?.personalization?.name]);
+    return !!getBuildUserPersona()?.workArea;
+  }, [user]);

  // Check if all providers are configured (skip LLM step entirely if so)
  const allProvidersConfigured = useMemo(
@@ -94,7 +93,7 @@ export function useOnboardingModal(): OnboardingModalController {
  );

  // Auto-open initial onboarding modal on first load
-  // Shows if: user info is missing OR (admin AND no providers configured)
+  // Shows if: user info (role) missing OR (admin AND no providers configured)
  useEffect(() => {
    if (hasInitialized || isLoadingLlm || !user) return;
Author	SHA1	Message	Date
justin-tahara	31217a744d	chore(llm): Add OpenAI Integration Tests	2026-02-23 17:47:47 -08:00
justin-tahara	5bf2ef4062	Addressing gs	2026-02-23 17:04:24 -08:00
justin-tahara	660a68e78b	chore(llm): Scaffolding for Nightly LLM Tests	2026-02-23 16:48:09 -08:00
Justin Tahara	adade353c5	fix(api): Improving the API handling of threads (#8573 )	2026-02-24 00:04:21 +00:00
Nikolas Garza	3cb6ec2f85	fix: patch prometheus metrics in daily test fixture (#8699 )	2026-02-24 00:02:56 +00:00
Wenxi	691eebf00a	fix: remove user info requirement for craft onboarding modal (#8697 )	2026-02-23 23:52:17 +00:00
Danelegend	905b6633e6	chore: preview modal (#8665 )	2026-02-23 23:40:55 +00:00
Justin Tahara	fd088196ff	fix(search): Improve Speed (#8430 )	2026-02-23 22:45:18 +00:00
Jamison Lahman	cafbf5b8be	chore(playwright): warn user if setup takes longer than usual (#8690 )	2026-02-23 22:23:58 +00:00
roshan	1235181559	fix(ui): Clean up NRF settings button styling (#8678 ) Co-authored-by: Claude <noreply@anthropic.com>	2026-02-23 21:25:43 +00:00
Justin Tahara	caa2e45632	fix(db): Multitenant Schema migration update (#8679 )	2026-02-23 21:25:26 +00:00
Justin Tahara	9c62e03120	chore(ods): Automated Cherry-pick backport (#8642 )	2026-02-23 21:15:09 +00:00
Nikolas Garza	0937305064	feat(scim): Okta compatibility + provider abstraction (#8568 )	2026-02-23 21:09:18 +00:00
Wenxi	e4c06570e3	fix: domain rules for signup on cloud (#8671 )	2026-02-23 20:27:37 +00:00
roshan	78fc7c86d7	fix: Handle unauthenticated state gracefully on NRF page (#8491 ) Co-authored-by: Claude <noreply@anthropic.com>	2026-02-23 19:26:38 +00:00
Raunak Bhagat	84d3aea847	refactor: migrate Web Search page to SettingsLayouts + Content (#8662 )	2026-02-23 13:38:37 +00:00
Danelegend	00a404d3cd	feat: Add code interpreter server db model (#8669 )	2026-02-23 05:09:59 +00:00
Wenxi	787cf90d96	chore: set trial api usage to 0 and show ui (#8664 )	2026-02-23 01:41:23 +00:00