Remove unused imports

Make share button instantaneous
Implement AppPage wrapper for all other pages inside of /chat
2026-02-27 04:35:50 +00:00 · 2025-11-18 13:51:10 -08:00 · 2025-11-18 13:50:37 -08:00 · 2025-11-18 13:34:38 -08:00 · 2025-11-18 13:20:09 -08:00 · 2025-11-18 13:07:52 -08:00
1619 changed files with 72194 additions and 123492 deletions
--- a/.github/actionlint.yml
+++ b/.github/actionlint.yml
@@ -17,7 +17,6 @@ self-hosted-runner:
    - runner=16cpu-linux-x64
    - ubuntu-slim # Currently in public preview
    - volume=40gb
-    - volume=50gb

 # Configuration variables in array of strings defined in your repository or
 # organization. `null` means disabling configuration variables check.
--- a/.github/actions/custom-build-and-push/action.yml
+++ b/.github/actions/custom-build-and-push/action.yml
@@ -0,0 +1,135 @@
+name: 'Build and Push Docker Image with Retry'
+description: 'Attempts to build and push a Docker image, with a retry on failure'
+inputs:
+  context:
+    description: 'Build context'
+    required: true
+  file:
+    description: 'Dockerfile location'
+    required: true
+  platforms:
+    description: 'Target platforms'
+    required: true
+  pull:
+    description: 'Always attempt to pull a newer version of the image'
+    required: false
+    default: 'true'
+  push:
+    description: 'Push the image to registry'
+    required: false
+    default: 'true'
+  load:
+    description: 'Load the image into Docker daemon'
+    required: false
+    default: 'true'
+  tags:
+    description: 'Image tags'
+    required: true
+  no-cache:
+    description: 'Read from cache'
+    required: false
+    default: 'false'
+  cache-from:
+    description: 'Cache sources'
+    required: false
+  cache-to:
+    description: 'Cache destinations'
+    required: false
+  outputs:
+    description: 'Output destinations'
+    required: false
+  provenance:
+    description: 'Generate provenance attestation'
+    required: false
+    default: 'false'
+  build-args:
+    description: 'Build arguments'
+    required: false
+  retry-wait-time:
+    description: 'Time to wait before attempt 2 in seconds'
+    required: false
+    default: '60'
+  retry-wait-time-2:
+    description: 'Time to wait before attempt 3 in seconds'
+    required: false
+    default: '120'
+
+runs:
+  using: "composite"
+  steps:
+    - name: Build and push Docker image (Attempt 1 of 3)
+      id: buildx1
+      uses: docker/build-push-action@263435318d21b8e681c14492fe198d362a7d2c83 # ratchet:docker/build-push-action@v6
+      continue-on-error: true
+      with:
+        context: ${{ inputs.context }}
+        file: ${{ inputs.file }}
+        platforms: ${{ inputs.platforms }}
+        pull: ${{ inputs.pull }}
+        push: ${{ inputs.push }}
+        load: ${{ inputs.load }}
+        tags: ${{ inputs.tags }}
+        no-cache: ${{ inputs.no-cache }}
+        cache-from: ${{ inputs.cache-from }}
+        cache-to: ${{ inputs.cache-to }}
+        outputs: ${{ inputs.outputs }}
+        provenance: ${{ inputs.provenance }}
+        build-args: ${{ inputs.build-args }}
+
+    - name: Wait before attempt 2
+      if: steps.buildx1.outcome != 'success'
+      run: |
+        echo "First attempt failed. Waiting ${{ inputs.retry-wait-time }} seconds before retry..."
+        sleep ${{ inputs.retry-wait-time }}
+      shell: bash
+
+    - name: Build and push Docker image (Attempt 2 of 3)
+      id: buildx2
+      if: steps.buildx1.outcome != 'success'
+      uses: docker/build-push-action@263435318d21b8e681c14492fe198d362a7d2c83 # ratchet:docker/build-push-action@v6
+      with:
+        context: ${{ inputs.context }}
+        file: ${{ inputs.file }}
+        platforms: ${{ inputs.platforms }}
+        pull: ${{ inputs.pull }}
+        push: ${{ inputs.push }}
+        load: ${{ inputs.load }}
+        tags: ${{ inputs.tags }}
+        no-cache: ${{ inputs.no-cache }}
+        cache-from: ${{ inputs.cache-from }}
+        cache-to: ${{ inputs.cache-to }}
+        outputs: ${{ inputs.outputs }}
+        provenance: ${{ inputs.provenance }}
+        build-args: ${{ inputs.build-args }}
+
+    - name: Wait before attempt 3
+      if: steps.buildx1.outcome != 'success' && steps.buildx2.outcome != 'success'
+      run: |
+        echo "Second attempt failed. Waiting ${{ inputs.retry-wait-time-2 }} seconds before retry..."
+        sleep ${{ inputs.retry-wait-time-2 }}
+      shell: bash
+
+    - name: Build and push Docker image (Attempt 3 of 3)
+      id: buildx3
+      if: steps.buildx1.outcome != 'success' && steps.buildx2.outcome != 'success'
+      uses: docker/build-push-action@263435318d21b8e681c14492fe198d362a7d2c83 # ratchet:docker/build-push-action@v6
+      with:
+        context: ${{ inputs.context }}
+        file: ${{ inputs.file }}
+        platforms: ${{ inputs.platforms }}
+        pull: ${{ inputs.pull }}
+        push: ${{ inputs.push }}
+        load: ${{ inputs.load }}
+        tags: ${{ inputs.tags }}
+        no-cache: ${{ inputs.no-cache }}
+        cache-from: ${{ inputs.cache-from }}
+        cache-to: ${{ inputs.cache-to }}
+        outputs: ${{ inputs.outputs }}
+        provenance: ${{ inputs.provenance }}
+        build-args: ${{ inputs.build-args }}
+
+    - name: Report failure
+      if: steps.buildx1.outcome != 'success' && steps.buildx2.outcome != 'success' && steps.buildx3.outcome != 'success'
+      run: |
+        echo "All attempts failed. Possible transient infrastucture issues? Try again later or inspect logs for details."
+      shell: bash
--- a/.github/actions/prepare-build/action.yml
+++ b/.github/actions/prepare-build/action.yml
@@ -0,0 +1,42 @@
+name: "Prepare Build (OpenAPI generation)"
+description: "Sets up Python with uv, installs deps, generates OpenAPI schema and Python client, uploads artifact"
+inputs:
+  docker-username:
+    required: true
+  docker-password:
+    required: true
+runs:
+  using: "composite"
+  steps:
+    - name: Setup Python and Install Dependencies
+      uses: ./.github/actions/setup-python-and-install-dependencies
+
+    - name: Generate OpenAPI schema
+      shell: bash
+      working-directory: backend
+      env:
+        PYTHONPATH: "."
+      run: |
+        python scripts/onyx_openapi_schema.py --filename generated/openapi.json
+
+    # needed for pulling openapitools/openapi-generator-cli
+    # otherwise, we hit the "Unauthenticated users" limit
+    # https://docs.docker.com/docker-hub/usage/
+    - name: Login to Docker Hub
+      uses: docker/login-action@5e57cd118135c172c3672efd75eb46360885c0ef # ratchet:docker/login-action@v3
+      with:
+        username: ${{ inputs['docker-username'] }}
+        password: ${{ inputs['docker-password'] }}
+
+    - name: Generate OpenAPI Python client
+      shell: bash
+      run: |
+        docker run --rm \
+          -v "${{ github.workspace }}/backend/generated:/local" \
+          openapitools/openapi-generator-cli generate \
+          -i /local/openapi.json \
+          -g python \
+          -o /local/onyx_openapi_client \
+          --package-name onyx_openapi_client \
+          --skip-validate-spec \
+          --openapi-normalizer "SIMPLIFY_ONEOF_ANYOF=true,SET_OAS3_NULLABLE=true"
--- a/.github/actions/setup-playwright/action.yml
+++ b/.github/actions/setup-playwright/action.yml
@@ -7,9 +7,9 @@ runs:
      uses: runs-on/cache@50350ad4242587b6c8c2baa2e740b1bc11285ff4 # ratchet:runs-on/cache@v4
      with:
        path: ~/.cache/ms-playwright
-        key: ${{ runner.os }}-${{ runner.arch }}-playwright-${{ hashFiles('backend/requirements/default.txt') }}
+        key: ${{ runner.os }}-playwright-${{ hashFiles('backend/requirements/default.txt') }}
        restore-keys: |
-          ${{ runner.os }}-${{ runner.arch }}-playwright-
+          ${{ runner.os }}-playwright-

    - name: Install playwright
      shell: bash
--- a/.github/actions/setup-python-and-install-dependencies/action.yml
+++ b/.github/actions/setup-python-and-install-dependencies/action.yml
@@ -1,40 +1,19 @@
 name: "Setup Python and Install Dependencies"
 description: "Sets up Python with uv and installs deps"
-inputs:
-  requirements:
-    description: "Newline-separated list of requirement files to install (relative to repo root)"
-    required: true
 runs:
  using: "composite"
  steps:
    - name: Setup uv
-      uses: astral-sh/setup-uv@ed21f2f24f8dd64503750218de024bcf64c7250a # ratchet:astral-sh/setup-uv@v7
-      with:
-        version: "0.9.9"
+      uses: astral-sh/setup-uv@caf0cab7a618c569241d31dcd442f54681755d39 # ratchet:astral-sh/setup-uv@v3
      # TODO: Enable caching once there is a uv.lock file checked in.
      # with:
      #   enable-cache: true

-    - name: Compute requirements hash
-      id: req-hash
-      shell: bash
-      env:
-        REQUIREMENTS: ${{ inputs.requirements }}
-      run: |
-        # Hash the contents of the specified requirement files
-        hash=""
-        while IFS= read -r req; do
-          if [ -n "$req" ] && [ -f "$req" ]; then
-            hash="$hash$(sha256sum "$req")"
-          fi
-        done <<< "$REQUIREMENTS"
-        echo "hash=$(echo "$hash" | sha256sum | cut -d' ' -f1)" >> "$GITHUB_OUTPUT"
-
    - name: Cache uv cache directory
      uses: runs-on/cache@50350ad4242587b6c8c2baa2e740b1bc11285ff4 # ratchet:runs-on/cache@v4
      with:
        path: ~/.cache/uv
-        key: ${{ runner.os }}-uv-${{ steps.req-hash.outputs.hash }}
+        key: ${{ runner.os }}-uv-${{ hashFiles('backend/requirements/*.txt', 'backend/pyproject.toml') }}
        restore-keys: |
          ${{ runner.os }}-uv-

@@ -45,30 +24,15 @@ runs:

    - name: Create virtual environment
      shell: bash
-      env:
-        VENV_DIR: ${{ runner.temp }}/venv
-      run: | # zizmor: ignore[github-env]
-        uv venv "$VENV_DIR"
-        # Validate path before adding to GITHUB_PATH to prevent code injection
-        if [ -d "$VENV_DIR/bin" ]; then
-          realpath "$VENV_DIR/bin" >> "$GITHUB_PATH"
-        else
-          echo "Error: $VENV_DIR/bin does not exist"
-          exit 1
-        fi
+      run: |
+        uv venv ${{ runner.temp }}/venv
+        echo "VENV_PATH=${{ runner.temp }}/venv" >> $GITHUB_ENV
+        echo "${{ runner.temp }}/venv/bin" >> $GITHUB_PATH

    - name: Install Python dependencies with uv
      shell: bash
-      env:
-        REQUIREMENTS: ${{ inputs.requirements }}
      run: |
-        # Build the uv pip install command with each requirement file as array elements
-        cmd=("uv" "pip" "install")
-        while IFS= read -r req; do
-          # Skip empty lines
-          if [ -n "$req" ]; then
-            cmd+=("-r" "$req")
-          fi
-        done <<< "$REQUIREMENTS"
-        echo "Running: ${cmd[*]}"
-        "${cmd[@]}"
+        uv pip install \
+          -r backend/requirements/default.txt \
+          -r backend/requirements/dev.txt \
+          -r backend/requirements/model_server.txt
--- a/.github/actions/slack-notify/action.yml
+++ b/.github/actions/slack-notify/action.yml
@@ -21,27 +21,26 @@ runs:
      shell: bash
      env:
        SLACK_WEBHOOK_URL: ${{ inputs.webhook-url }}
-        FAILED_JOBS: ${{ inputs.failed-jobs }}
-        TITLE: ${{ inputs.title }}
-        REF_NAME: ${{ inputs.ref-name }}
-        REPO: ${{ github.repository }}
-        WORKFLOW: ${{ github.workflow }}
-        RUN_NUMBER: ${{ github.run_number }}
-        RUN_ID: ${{ github.run_id }}
-        SERVER_URL: ${{ github.server_url }}
-        GITHUB_REF_NAME: ${{ github.ref_name }}
      run: |
        if [ -z "$SLACK_WEBHOOK_URL" ]; then
          echo "webhook-url input or SLACK_WEBHOOK_URL env var is not set, skipping notification"
          exit 0
        fi

-        # Build workflow URL
+        # Get inputs with defaults
+        FAILED_JOBS="${{ inputs.failed-jobs }}"
+        TITLE="${{ inputs.title }}"
+        REF_NAME="${{ inputs.ref-name }}"
+        REPO="${{ github.repository }}"
+        WORKFLOW="${{ github.workflow }}"
+        RUN_NUMBER="${{ github.run_number }}"
+        RUN_ID="${{ github.run_id }}"
+        SERVER_URL="${{ github.server_url }}"
        WORKFLOW_URL="${SERVER_URL}/${REPO}/actions/runs/${RUN_ID}"

        # Use ref_name from input or fall back to github.ref_name
        if [ -z "$REF_NAME" ]; then
-          REF_NAME="$GITHUB_REF_NAME"
+          REF_NAME="${{ github.ref_name }}"
        fi

        # Escape JSON special characters
--- a/.github/dependabot.yml
+++ b/.github/dependabot.yml
@@ -4,8 +4,6 @@ updates:
    directory: "/"
    schedule:
      interval: "weekly"
-    cooldown:
-      default-days: 7
    open-pull-requests-limit: 3
    assignees:
      - "jmelahman"
@@ -15,8 +13,6 @@ updates:
    directory: "/backend"
    schedule:
      interval: "weekly"
-    cooldown:
-      default-days: 7
    open-pull-requests-limit: 3
    assignees:
      - "jmelahman"
--- a/.github/pull_request_template.md
+++ b/.github/pull_request_template.md
@@ -1,10 +1,10 @@
 ## Description

-<!--- Provide a brief description of the changes in this PR --->
+[Provide a brief description of the changes in this PR]

 ## How Has This Been Tested?

-<!--- Describe the tests you ran to verify your changes --->
+[Describe the tests you ran to verify your changes]

 ## Additional Options

--- a/.github/workflows/check-lazy-imports.yml
+++ b/.github/workflows/check-lazy-imports.yml
@@ -0,0 +1,27 @@
+name: Check Lazy Imports
+concurrency:
+  group: Check-Lazy-Imports-${{ github.workflow }}-${{ github.head_ref || github.event.workflow_run.head_branch || github.run_id }}
+  cancel-in-progress: true
+
+on:
+  merge_group:
+  pull_request:
+    branches:
+      - main
+      - 'release/**'
+
+jobs:
+  check-lazy-imports:
+    runs-on: ubuntu-latest
+
+    steps:
+    - name: Checkout code
+      uses: actions/checkout@08eba0b27e820071cde6df949e0beb9ba4906955 # ratchet:actions/checkout@v4
+
+    - name: Set up Python
+      uses: actions/setup-python@e797f83bcb11b83ae66e0230d6156d7c80228e7c # ratchet:actions/setup-python@v6
+      with:
+        python-version: '3.11'
+
+    - name: Check lazy imports
+      run: python3 backend/scripts/check_lazy_imports.py
--- a/.github/workflows/deployment.yml
+++ b/.github/workflows/deployment.yml
--- a/.github/workflows/docker-tag-beta.yml
+++ b/.github/workflows/docker-tag-beta.yml
@@ -10,15 +10,11 @@ on:
        description: "The version (ie v1.0.0-beta.0) to tag as beta"
        required: true

-permissions:
-  contents: read
-
 jobs:
  tag:
    # See https://runs-on.com/runners/linux/
    # use a lower powered instance since this just does i/o to docker hub
    runs-on: [runs-on, runner=2cpu-linux-x64, "run-id=${{ github.run_id }}-tag"]
-    timeout-minutes: 45
    steps:
      - name: Set up Docker Buildx
        uses: docker/setup-buildx-action@e468171a9de216ec08956ac3ada2f0791b6bd435 # ratchet:docker/setup-buildx-action@v3
@@ -33,19 +29,13 @@ jobs:
        run: echo "DOCKER_CLI_EXPERIMENTAL=enabled" >> $GITHUB_ENV

      - name: Pull, Tag and Push Web Server Image
-        env:
-          VERSION: ${{ github.event.inputs.version }}
        run: |
-          docker buildx imagetools create -t onyxdotapp/onyx-web-server:beta onyxdotapp/onyx-web-server:${VERSION}
+          docker buildx imagetools create -t onyxdotapp/onyx-web-server:beta onyxdotapp/onyx-web-server:${{ github.event.inputs.version }}

      - name: Pull, Tag and Push API Server Image
-        env:
-          VERSION: ${{ github.event.inputs.version }}
        run: |
-          docker buildx imagetools create -t onyxdotapp/onyx-backend:beta onyxdotapp/onyx-backend:${VERSION}
+          docker buildx imagetools create -t onyxdotapp/onyx-backend:beta onyxdotapp/onyx-backend:${{ github.event.inputs.version }}

      - name: Pull, Tag and Push Model Server Image
-        env:
-          VERSION: ${{ github.event.inputs.version }}
        run: |
-          docker buildx imagetools create -t onyxdotapp/onyx-model-server:beta onyxdotapp/onyx-model-server:${VERSION}
+          docker buildx imagetools create -t onyxdotapp/onyx-model-server:beta onyxdotapp/onyx-model-server:${{ github.event.inputs.version }}
--- a/.github/workflows/docker-tag-latest.yml
+++ b/.github/workflows/docker-tag-latest.yml
@@ -10,15 +10,11 @@ on:
        description: "The version (ie v0.0.1) to tag as latest"
        required: true

-permissions:
-  contents: read
-
 jobs:
  tag:
    # See https://runs-on.com/runners/linux/
    # use a lower powered instance since this just does i/o to docker hub
    runs-on: [runs-on, runner=2cpu-linux-x64, "run-id=${{ github.run_id }}-tag"]
-    timeout-minutes: 45
    steps:
      - name: Set up Docker Buildx
        uses: docker/setup-buildx-action@e468171a9de216ec08956ac3ada2f0791b6bd435 # ratchet:docker/setup-buildx-action@v3
@@ -33,19 +29,13 @@ jobs:
        run: echo "DOCKER_CLI_EXPERIMENTAL=enabled" >> $GITHUB_ENV

      - name: Pull, Tag and Push Web Server Image
-        env:
-          VERSION: ${{ github.event.inputs.version }}
        run: |
-          docker buildx imagetools create -t onyxdotapp/onyx-web-server:latest onyxdotapp/onyx-web-server:${VERSION}
+          docker buildx imagetools create -t onyxdotapp/onyx-web-server:latest onyxdotapp/onyx-web-server:${{ github.event.inputs.version }}

      - name: Pull, Tag and Push API Server Image
-        env:
-          VERSION: ${{ github.event.inputs.version }}
        run: |
-          docker buildx imagetools create -t onyxdotapp/onyx-backend:latest onyxdotapp/onyx-backend:${VERSION}
+          docker buildx imagetools create -t onyxdotapp/onyx-backend:latest onyxdotapp/onyx-backend:${{ github.event.inputs.version }}

      - name: Pull, Tag and Push Model Server Image
-        env:
-          VERSION: ${{ github.event.inputs.version }}
        run: |
-          docker buildx imagetools create -t onyxdotapp/onyx-model-server:latest onyxdotapp/onyx-model-server:${VERSION}
+          docker buildx imagetools create -t onyxdotapp/onyx-model-server:latest onyxdotapp/onyx-model-server:${{ github.event.inputs.version }}
--- a/.github/workflows/helm-chart-releases.yml
+++ b/.github/workflows/helm-chart-releases.yml
@@ -12,13 +12,11 @@ jobs:
    permissions:
      contents: write
    runs-on: ubuntu-latest
-    timeout-minutes: 45
    steps:
      - name: Checkout
-        uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # ratchet:actions/checkout@v6
+        uses: actions/checkout@08eba0b27e820071cde6df949e0beb9ba4906955 # ratchet:actions/checkout@v4
        with:
          fetch-depth: 0
-          persist-credentials: false

      - name: Install Helm CLI
        uses: azure/setup-helm@1a275c3b69536ee54be43f2070a358922e12c8d4 # ratchet:azure/setup-helm@v4
@@ -32,7 +30,6 @@ jobs:
          helm repo add cloudnative-pg https://cloudnative-pg.github.io/charts
          helm repo add ot-container-kit https://ot-container-kit.github.io/helm-charts
          helm repo add minio https://charts.min.io/
-          helm repo add code-interpreter https://onyx-dot-app.github.io/code-interpreter/
          helm repo update

      - name: Build chart dependencies
--- a/.github/workflows/merge-group.yml
+++ b/.github/workflows/merge-group.yml
@@ -1,31 +0,0 @@
-name: Merge Group-Specific
-
-on:
-  merge_group:
-
-permissions:
-  contents: read
-
-jobs:
-  # This job immediately succeeds to satisfy branch protection rules on merge_group events.
-  # There is a similarly named "required" job in pr-integration-tests.yml which runs the actual
-  # integration tests. That job runs on both pull_request and merge_group events, and this job
-  # exists solely to provide a fast-passing check with the same name for branch protection.
-  # The actual tests remain enforced on presubmit (pull_request events).
-  required:
-    runs-on: ubuntu-latest
-    timeout-minutes: 45
-    steps:
-      - name: Success
-        run: echo "Success"
-  # This job immediately succeeds to satisfy branch protection rules on merge_group events.
-  # There is a similarly named "playwright-required" job in pr-playwright-tests.yml which runs
-  # the actual playwright tests. That job runs on both pull_request and merge_group events, and
-  # this job exists solely to provide a fast-passing check with the same name for branch protection.
-  # The actual tests remain enforced on presubmit (pull_request events).
-  playwright-required:
-    runs-on: ubuntu-latest
-    timeout-minutes: 45
-    steps:
-      - name: Success
-        run: echo "Success"
--- a/.github/workflows/nightly-close-stale-issues.yml
+++ b/.github/workflows/nightly-close-stale-issues.yml
@@ -11,9 +11,8 @@ permissions:
 jobs:
  stale:
    runs-on: ubuntu-latest
-    timeout-minutes: 45
    steps:
-      - uses: actions/stale@5f858e3efba33a5ca4407a664cc011ad407f2008 # ratchet:actions/stale@v10
+      - uses: actions/stale@5bef64f19d7facfb25b37b414482c7164d639639 # ratchet:actions/stale@v9
        with:
          stale-issue-message: 'This issue is stale because it has been open 75 days with no activity. Remove stale label or comment or this will be closed in 15 days.'
          stale-pr-message: 'This PR is stale because it has been open 75 days with no activity. Remove stale label or comment or this will be closed in 15 days.'
--- a/.github/workflows/nightly-scan-licenses.yml
+++ b/.github/workflows/nightly-scan-licenses.yml
@@ -15,25 +15,19 @@ on:
 permissions:
  actions: read
  contents: read
+  security-events: write

 jobs:
  scan-licenses:
    # See https://runs-on.com/runners/linux/
    runs-on: [runs-on,runner=2cpu-linux-x64,"run-id=${{ github.run_id }}-scan-licenses"]
-    timeout-minutes: 45
-    permissions:
-      actions: read
-      contents: read
-      security-events: write

    steps:
      - name: Checkout code
-        uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # ratchet:actions/checkout@v6
-        with:
-          persist-credentials: false
+        uses: actions/checkout@08eba0b27e820071cde6df949e0beb9ba4906955 # ratchet:actions/checkout@v4

      - name: Set up Python
-        uses: actions/setup-python@83679a892e2d95755f2dac6acb0bfd1e9ac5d548 # ratchet:actions/setup-python@v6
+        uses: actions/setup-python@e797f83bcb11b83ae66e0230d6156d7c80228e7c # ratchet:actions/setup-python@v6
        with:
          python-version: '3.11'
          cache: 'pip'
@@ -60,9 +54,7 @@ jobs:

      - name: Print report
        if: always()
-        env:
-          REPORT: ${{ steps.license_check_report.outputs.report }}
-        run: echo "$REPORT"
+        run: echo "${{ steps.license_check_report.outputs.report }}"

      - name: Install npm dependencies
        working-directory: ./web
@@ -90,7 +82,6 @@ jobs:
  scan-trivy:
    # See https://runs-on.com/runners/linux/
    runs-on: [runs-on,runner=2cpu-linux-x64,"run-id=${{ github.run_id }}-scan-trivy"]
-    timeout-minutes: 45

    steps:
    - name: Set up Docker Buildx
--- a/.github/workflows/pr-database-tests.yml
+++ b/.github/workflows/pr-database-tests.yml
@@ -1,62 +0,0 @@
-name: Database Tests
-concurrency:
-  group: Database-Tests-${{ github.workflow }}-${{ github.head_ref || github.event.workflow_run.head_branch || github.run_id }}
-  cancel-in-progress: true
-
-on:
-  merge_group:
-  pull_request:
-    branches:
-      - main
-      - "release/**"
-  push:
-    tags:
-      - "v*.*.*"
-
-permissions:
-  contents: read
-
-jobs:
-  database-tests:
-    runs-on:
-      - runs-on
-      - runner=2cpu-linux-arm64
-      - "run-id=${{ github.run_id }}-database-tests"
-    timeout-minutes: 45
-    steps:
-      - uses: runs-on/action@cd2b598b0515d39d78c38a02d529db87d2196d1e # ratchet:runs-on/action@v2
-
-      - name: Checkout code
-        uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # ratchet:actions/checkout@v6
-        with:
-          persist-credentials: false
-
-      - name: Setup Python and Install Dependencies
-        uses: ./.github/actions/setup-python-and-install-dependencies
-        with:
-          requirements: |
-            backend/requirements/default.txt
-            backend/requirements/dev.txt
-
-      - name: Generate OpenAPI schema and Python client
-        shell: bash
-        run: |
-          ods openapi all
-
-      # needed for pulling external images otherwise, we hit the "Unauthenticated users" limit
-      # https://docs.docker.com/docker-hub/usage/
-      - name: Login to Docker Hub
-        uses: docker/login-action@5e57cd118135c172c3672efd75eb46360885c0ef # ratchet:docker/login-action@v3
-        with:
-          username: ${{ secrets.DOCKER_USERNAME }}
-          password: ${{ secrets.DOCKER_TOKEN }}
-
-      - name: Start Docker containers
-        working-directory: ./deployment/docker_compose
-        run: |
-          docker compose -f docker-compose.yml -f docker-compose.dev.yml up -d \
-            relational_db
-
-      - name: Run Database Tests
-        working-directory: ./backend
-        run: pytest -m alembic tests/integration/tests/migrations/
--- a/.github/workflows/pr-external-dependency-unit-tests.yml
+++ b/.github/workflows/pr-external-dependency-unit-tests.yml
@@ -7,22 +7,14 @@ on:
  merge_group:
  pull_request:
    branches: [main]
-  push:
-    tags:
-      - "v*.*.*"
-
-permissions:
-  contents: read

 env:
-  # AWS credentials for S3-specific test
-  S3_AWS_ACCESS_KEY_ID_FOR_TEST: ${{ secrets.S3_AWS_ACCESS_KEY_ID }}
-  S3_AWS_SECRET_ACCESS_KEY_FOR_TEST: ${{ secrets.S3_AWS_SECRET_ACCESS_KEY }}
+  # AWS
+  S3_AWS_ACCESS_KEY_ID: ${{ secrets.S3_AWS_ACCESS_KEY_ID }}
+  S3_AWS_SECRET_ACCESS_KEY: ${{ secrets.S3_AWS_SECRET_ACCESS_KEY }}

  # MinIO
  S3_ENDPOINT_URL: "http://localhost:9004"
-  S3_AWS_ACCESS_KEY_ID: "minioadmin"
-  S3_AWS_SECRET_ACCESS_KEY: "minioadmin"

  # Confluence
  CONFLUENCE_TEST_SPACE_URL: ${{ vars.CONFLUENCE_TEST_SPACE_URL }}
@@ -32,29 +24,19 @@ env:
  CONFLUENCE_ACCESS_TOKEN: ${{ secrets.CONFLUENCE_ACCESS_TOKEN }}
  CONFLUENCE_ACCESS_TOKEN_SCOPED: ${{ secrets.CONFLUENCE_ACCESS_TOKEN_SCOPED }}

-  # Jira
-  JIRA_ADMIN_API_TOKEN: ${{ secrets.JIRA_ADMIN_API_TOKEN }}
-
  # LLMs
  OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
  ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }}

-  # Code Interpreter
-  # TODO: debug why this is failing and enable
-  CODE_INTERPRETER_BASE_URL: http://localhost:8000
-
 jobs:
  discover-test-dirs:
    # NOTE: Github-hosted runners have about 20s faster queue times and are preferred here.
    runs-on: ubuntu-slim
-    timeout-minutes: 45
    outputs:
      test-dirs: ${{ steps.set-matrix.outputs.test-dirs }}
    steps:
      - name: Checkout code
-        uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # ratchet:actions/checkout@v6
-        with:
-          persist-credentials: false
+        uses: actions/checkout@08eba0b27e820071cde6df949e0beb9ba4906955 # ratchet:actions/checkout@v4

      - name: Discover test directories
        id: set-matrix
@@ -71,7 +53,6 @@ jobs:
      - runner=2cpu-linux-arm64
      - ${{ format('run-id={0}-external-dependency-unit-tests-job-{1}', github.run_id, strategy['job-index']) }}
      - extras=s3-cache
-    timeout-minutes: 45
    strategy:
      fail-fast: false
      matrix:
@@ -80,23 +61,15 @@ jobs:
    env:
      PYTHONPATH: ./backend
      MODEL_SERVER_HOST: "disabled"
-      DISABLE_TELEMETRY: "true"

    steps:
      - uses: runs-on/action@cd2b598b0515d39d78c38a02d529db87d2196d1e # ratchet:runs-on/action@v2

      - name: Checkout code
-        uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # ratchet:actions/checkout@v6
-        with:
-          persist-credentials: false
+        uses: actions/checkout@08eba0b27e820071cde6df949e0beb9ba4906955 # ratchet:actions/checkout@v4

      - name: Setup Python and Install Dependencies
        uses: ./.github/actions/setup-python-and-install-dependencies
-        with:
-          requirements: |
-            backend/requirements/default.txt
-            backend/requirements/dev.txt
-            backend/requirements/ee.txt

      - name: Setup Playwright
        uses: ./.github/actions/setup-playwright
@@ -110,25 +83,10 @@ jobs:
          username: ${{ secrets.DOCKER_USERNAME }}
          password: ${{ secrets.DOCKER_TOKEN }}

-      - name: Create .env file for Docker Compose
-        run: |
-          cat <<EOF > deployment/docker_compose/.env
-          CODE_INTERPRETER_BETA_ENABLED=true
-          DISABLE_TELEMETRY=true
-          EOF
-
      - name: Set up Standard Dependencies
        run: |
          cd deployment/docker_compose
-          docker compose \
-            -f docker-compose.yml \
-            -f docker-compose.dev.yml \
-            up -d \
-            minio \
-            relational_db \
-            cache \
-            index \
-            code-interpreter
+          docker compose -f docker-compose.yml -f docker-compose.dev.yml up -d minio relational_db cache index

      - name: Run migrations
        run: |
@@ -139,39 +97,10 @@ jobs:

      - name: Run Tests for ${{ matrix.test-dir }}
        shell: script -q -e -c "bash --noprofile --norc -eo pipefail {0}"
-        env:
-          TEST_DIR: ${{ matrix.test-dir }}
        run: |
          py.test \
            --durations=8 \
            -o junit_family=xunit2 \
            -xv \
            --ff \
-            backend/tests/external_dependency_unit/${TEST_DIR}
-
-      - name: Collect Docker logs on failure
-        if: failure()
-        run: |
-          mkdir -p docker-logs
-          cd deployment/docker_compose
-
-          # Get list of running containers
-          containers=$(docker compose -f docker-compose.yml -f docker-compose.dev.yml ps -q)
-
-          # Collect logs from each container
-          for container in $containers; do
-            container_name=$(docker inspect --format='{{.Name}}' $container | sed 's/^\///')
-            echo "Collecting logs from $container_name..."
-            docker logs $container > ../../docker-logs/${container_name}.log 2>&1
-          done
-
-          cd ../..
-          echo "Docker logs collected in docker-logs directory"
-
-      - name: Upload Docker logs
-        if: failure()
-        uses: actions/upload-artifact@330a01c490aca151604b8cf639adc76d48f6c5d4 # ratchet:actions/upload-artifact@v5
-        with:
-          name: docker-logs-${{ matrix.test-dir }}
-          path: docker-logs/
-          retention-days: 7
+            backend/tests/external_dependency_unit/${{ matrix.test-dir }}
--- a/.github/workflows/pr-helm-chart-testing.yml
+++ b/.github/workflows/pr-helm-chart-testing.yml
@@ -7,27 +7,19 @@ on:
  merge_group:
  pull_request:
    branches: [ main ]
-  push:
-    tags:
-      - "v*.*.*"
  workflow_dispatch:  # Allows manual triggering

-permissions:
-  contents: read
-
 jobs:
  helm-chart-check:
    # See https://runs-on.com/runners/linux/
    runs-on: [runs-on,runner=8cpu-linux-x64,hdd=256,"run-id=${{ github.run_id }}-helm-chart-check"]
-    timeout-minutes: 45

    # fetch-depth 0 is required for helm/chart-testing-action
    steps:
    - name: Checkout code
-      uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # ratchet:actions/checkout@v6
+      uses: actions/checkout@08eba0b27e820071cde6df949e0beb9ba4906955 # ratchet:actions/checkout@v4
      with:
        fetch-depth: 0
-        persist-credentials: false

    - name: Set up Helm
      uses: azure/setup-helm@1a275c3b69536ee54be43f2070a358922e12c8d4 # ratchet:azure/setup-helm@v4.3.1
@@ -40,11 +32,9 @@ jobs:
    # even though we specify chart-dirs in ct.yaml, it isn't used by ct for the list-changed command...
    - name: Run chart-testing (list-changed)
      id: list-changed
-      env:
-        DEFAULT_BRANCH: ${{ github.event.repository.default_branch }}
      run: |
-        echo "default_branch: ${DEFAULT_BRANCH}"
-        changed=$(ct list-changed --remote origin --target-branch ${DEFAULT_BRANCH} --chart-dirs deployment/helm/charts)
+        echo "default_branch: ${{ github.event.repository.default_branch }}"
+        changed=$(ct list-changed --remote origin --target-branch ${{ github.event.repository.default_branch }} --chart-dirs deployment/helm/charts)
        echo "list-changed output: $changed"
        if [[ -n "$changed" ]]; then
          echo "changed=true" >> "$GITHUB_OUTPUT"
@@ -83,7 +73,6 @@ jobs:
        helm repo add cloudnative-pg https://cloudnative-pg.github.io/charts
        helm repo add ot-container-kit https://ot-container-kit.github.io/helm-charts
        helm repo add minio https://charts.min.io/
-        helm repo add code-interpreter https://onyx-dot-app.github.io/code-interpreter/
        helm repo update

    - name: Install Redis operator
--- a/.github/workflows/pr-integration-tests.yml
+++ b/.github/workflows/pr-integration-tests.yml
@@ -9,12 +9,6 @@ on:
    branches:
      - main
      - "release/**"
-  push:
-    tags:
-      - "v*.*.*"
-
-permissions:
-  contents: read

 env:
  # Test Environment Variables
@@ -33,24 +27,16 @@ env:
  PERM_SYNC_SHAREPOINT_CERTIFICATE_PASSWORD: ${{ secrets.PERM_SYNC_SHAREPOINT_CERTIFICATE_PASSWORD }}
  PERM_SYNC_SHAREPOINT_DIRECTORY_ID: ${{ secrets.PERM_SYNC_SHAREPOINT_DIRECTORY_ID }}
  EXA_API_KEY: ${{ secrets.EXA_API_KEY }}
-  GITHUB_PERMISSION_SYNC_TEST_ACCESS_TOKEN: ${{ secrets.ONYX_GITHUB_PERMISSION_SYNC_TEST_ACCESS_TOKEN }}
-  GITHUB_PERMISSION_SYNC_TEST_ACCESS_TOKEN_CLASSIC: ${{ secrets.ONYX_GITHUB_PERMISSION_SYNC_TEST_ACCESS_TOKEN_CLASSIC }}
-  GITHUB_ADMIN_EMAIL: ${{ secrets.ONYX_GITHUB_ADMIN_EMAIL }}
-  GITHUB_TEST_USER_1_EMAIL: ${{ secrets.ONYX_GITHUB_TEST_USER_1_EMAIL }}
-  GITHUB_TEST_USER_2_EMAIL: ${{ secrets.ONYX_GITHUB_TEST_USER_2_EMAIL }}

 jobs:
  discover-test-dirs:
    # NOTE: Github-hosted runners have about 20s faster queue times and are preferred here.
    runs-on: ubuntu-slim
-    timeout-minutes: 45
    outputs:
      test-dirs: ${{ steps.set-matrix.outputs.test-dirs }}
    steps:
      - name: Checkout code
-        uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # ratchet:actions/checkout@v6
-        with:
-          persist-credentials: false
+        uses: actions/checkout@08eba0b27e820071cde6df949e0beb9ba4906955 # ratchet:actions/checkout@v4

      - name: Discover test directories
        id: set-matrix
@@ -72,35 +58,13 @@ jobs:
          all_dirs="[${all_dirs%,}]"
          echo "test-dirs=$all_dirs" >> $GITHUB_OUTPUT

+
  build-backend-image:
-    runs-on:
-      [
-        runs-on,
-        runner=1cpu-linux-arm64,
-        "run-id=${{ github.run_id }}-build-backend-image",
-        "extras=ecr-cache",
-      ]
-    timeout-minutes: 45
+    runs-on: [runs-on, runner=1cpu-linux-arm64, "run-id=${{ github.run_id }}-build-backend-image", "extras=ecr-cache"]
    steps:
      - uses: runs-on/action@cd2b598b0515d39d78c38a02d529db87d2196d1e # ratchet:runs-on/action@v2
      - name: Checkout code
-        uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # ratchet:actions/checkout@v6
-        with:
-          persist-credentials: false
-
-      - name: Format branch name for cache
-        id: format-branch
-        env:
-          PR_NUMBER: ${{ github.event.pull_request.number }}
-          REF_NAME: ${{ github.ref_name }}
-        run: |
-          if [ -n "${PR_NUMBER}" ]; then
-            CACHE_SUFFIX="${PR_NUMBER}"
-          else
-            # shellcheck disable=SC2001
-            CACHE_SUFFIX=$(echo "${REF_NAME}" | sed 's/[^A-Za-z0-9._-]/-/g')
-          fi
-          echo "cache-suffix=${CACHE_SUFFIX}" >> $GITHUB_OUTPUT
+        uses: actions/checkout@08eba0b27e820071cde6df949e0beb9ba4906955 # ratchet:actions/checkout@v4

      - name: Set up Docker Buildx
        uses: docker/setup-buildx-action@e468171a9de216ec08956ac3ada2f0791b6bd435 # ratchet:docker/setup-buildx-action@v3
@@ -121,46 +85,17 @@ jobs:
          file: ./backend/Dockerfile
          push: true
          tags: ${{ env.RUNS_ON_ECR_CACHE }}:integration-test-backend-test-${{ github.run_id }}
-          cache-from: |
-            type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:backend-cache-${{ github.event.pull_request.head.sha || github.sha }}
-            type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:backend-cache-${{ steps.format-branch.outputs.cache-suffix }}
-            type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:backend-cache
-            type=registry,ref=onyxdotapp/onyx-backend:latest
-          cache-to: |
-            type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:backend-cache-${{ github.event.pull_request.head.sha || github.sha }},mode=max
-            type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:backend-cache-${{ steps.format-branch.outputs.cache-suffix }},mode=max
-            type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:backend-cache,mode=max
+          cache-from: type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:integration-test-backend-cache
+          cache-to: type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:integration-test-backend-cache,mode=max
          no-cache: ${{ vars.DOCKER_NO_CACHE == 'true' }}

+
  build-model-server-image:
-    runs-on:
-      [
-        runs-on,
-        runner=1cpu-linux-arm64,
-        "run-id=${{ github.run_id }}-build-model-server-image",
-        "extras=ecr-cache",
-      ]
-    timeout-minutes: 45
+    runs-on: [runs-on, runner=1cpu-linux-arm64, "run-id=${{ github.run_id }}-build-model-server-image", "extras=ecr-cache"]
    steps:
      - uses: runs-on/action@cd2b598b0515d39d78c38a02d529db87d2196d1e # ratchet:runs-on/action@v2
      - name: Checkout code
-        uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # ratchet:actions/checkout@v6
-        with:
-          persist-credentials: false
-
-      - name: Format branch name for cache
-        id: format-branch
-        env:
-          PR_NUMBER: ${{ github.event.pull_request.number }}
-          REF_NAME: ${{ github.ref_name }}
-        run: |
-          if [ -n "${PR_NUMBER}" ]; then
-            CACHE_SUFFIX="${PR_NUMBER}"
-          else
-            # shellcheck disable=SC2001
-            CACHE_SUFFIX=$(echo "${REF_NAME}" | sed 's/[^A-Za-z0-9._-]/-/g')
-          fi
-          echo "cache-suffix=${CACHE_SUFFIX}" >> $GITHUB_OUTPUT
+        uses: actions/checkout@08eba0b27e820071cde6df949e0beb9ba4906955 # ratchet:actions/checkout@v4

      - name: Set up Docker Buildx
        uses: docker/setup-buildx-action@e468171a9de216ec08956ac3ada2f0791b6bd435 # ratchet:docker/setup-buildx-action@v3
@@ -181,31 +116,16 @@ jobs:
          file: ./backend/Dockerfile.model_server
          push: true
          tags: ${{ env.RUNS_ON_ECR_CACHE }}:integration-test-model-server-test-${{ github.run_id }}
-          cache-from: |
-            type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:model-server-cache-${{ github.event.pull_request.head.sha || github.sha }}
-            type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:model-server-cache-${{ steps.format-branch.outputs.cache-suffix }}
-            type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:model-server-cache
-            type=registry,ref=onyxdotapp/onyx-model-server:latest
-          cache-to: |
-            type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:model-server-cache-${{ github.event.pull_request.head.sha || github.sha }},mode=max
-            type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:model-server-cache-${{ steps.format-branch.outputs.cache-suffix }},mode=max
-            type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:model-server-cache,mode=max
+          cache-from: type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:integration-test-model-server-cache
+          cache-to: type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:integration-test-model-server-cache,mode=max
+

  build-integration-image:
-    runs-on:
-      [
-        runs-on,
-        runner=2cpu-linux-arm64,
-        "run-id=${{ github.run_id }}-build-integration-image",
-        "extras=ecr-cache",
-      ]
-    timeout-minutes: 45
+    runs-on: [runs-on, runner=2cpu-linux-arm64, "run-id=${{ github.run_id }}-build-integration-image", "extras=ecr-cache"]
    steps:
      - uses: runs-on/action@cd2b598b0515d39d78c38a02d529db87d2196d1e # ratchet:runs-on/action@v2
      - name: Checkout code
-        uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # ratchet:actions/checkout@v6
-        with:
-          persist-credentials: false
+        uses: actions/checkout@08eba0b27e820071cde6df949e0beb9ba4906955 # ratchet:actions/checkout@v4

      - name: Set up Docker Buildx
        uses: docker/setup-buildx-action@e468171a9de216ec08956ac3ada2f0791b6bd435 # ratchet:docker/setup-buildx-action@v3
@@ -219,42 +139,11 @@ jobs:
          username: ${{ secrets.DOCKER_USERNAME }}
          password: ${{ secrets.DOCKER_TOKEN }}

-      - name: Format branch name for cache
-        id: format-branch
-        env:
-          PR_NUMBER: ${{ github.event.pull_request.number }}
-          REF_NAME: ${{ github.ref_name }}
-        run: |
-          if [ -n "${PR_NUMBER}" ]; then
-            CACHE_SUFFIX="${PR_NUMBER}"
-          else
-            # shellcheck disable=SC2001
-            CACHE_SUFFIX=$(echo "${REF_NAME}" | sed 's/[^A-Za-z0-9._-]/-/g')
-          fi
-          echo "cache-suffix=${CACHE_SUFFIX}" >> $GITHUB_OUTPUT
-
      - name: Build and push integration test image with Docker Bake
        env:
-          INTEGRATION_REPOSITORY: ${{ env.RUNS_ON_ECR_CACHE }}
+          REPOSITORY: ${{ env.RUNS_ON_ECR_CACHE }}
          TAG: integration-test-${{ github.run_id }}
-          CACHE_SUFFIX: ${{ steps.format-branch.outputs.cache-suffix }}
-          HEAD_SHA: ${{ github.event.pull_request.head.sha || github.sha }}
-        run: |
-          docker buildx bake --push \
-            --set backend.cache-from=type=registry,ref=${RUNS_ON_ECR_CACHE}:backend-cache-${HEAD_SHA} \
-            --set backend.cache-from=type=registry,ref=${RUNS_ON_ECR_CACHE}:backend-cache-${CACHE_SUFFIX} \
-            --set backend.cache-from=type=registry,ref=${RUNS_ON_ECR_CACHE}:backend-cache \
-            --set backend.cache-from=type=registry,ref=onyxdotapp/onyx-backend:latest \
-            --set backend.cache-to=type=registry,ref=${RUNS_ON_ECR_CACHE}:backend-cache-${HEAD_SHA},mode=max \
-            --set backend.cache-to=type=registry,ref=${RUNS_ON_ECR_CACHE}:backend-cache-${CACHE_SUFFIX},mode=max \
-            --set backend.cache-to=type=registry,ref=${RUNS_ON_ECR_CACHE}:backend-cache,mode=max \
-            --set integration.cache-from=type=registry,ref=${RUNS_ON_ECR_CACHE}:integration-cache-${HEAD_SHA} \
-            --set integration.cache-from=type=registry,ref=${RUNS_ON_ECR_CACHE}:integration-cache-${CACHE_SUFFIX} \
-            --set integration.cache-from=type=registry,ref=${RUNS_ON_ECR_CACHE}:integration-cache \
-            --set integration.cache-to=type=registry,ref=${RUNS_ON_ECR_CACHE}:integration-cache-${HEAD_SHA},mode=max \
-            --set integration.cache-to=type=registry,ref=${RUNS_ON_ECR_CACHE}:integration-cache-${CACHE_SUFFIX},mode=max \
-            --set integration.cache-to=type=registry,ref=${RUNS_ON_ECR_CACHE}:integration-cache,mode=max \
-            integration
+        run: cd backend && docker buildx bake --push integration

  integration-tests:
    needs:
@@ -269,7 +158,6 @@ jobs:
      - runner=4cpu-linux-arm64
      - ${{ format('run-id={0}-integration-tests-job-{1}', github.run_id, strategy['job-index']) }}
      - extras=ecr-cache
-    timeout-minutes: 45

    strategy:
      fail-fast: false
@@ -279,9 +167,7 @@ jobs:
    steps:
      - uses: runs-on/action@cd2b598b0515d39d78c38a02d529db87d2196d1e # ratchet:runs-on/action@v2
      - name: Checkout code
-        uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # ratchet:actions/checkout@v6
-        with:
-          persist-credentials: false
+        uses: actions/checkout@08eba0b27e820071cde6df949e0beb9ba4906955 # ratchet:actions/checkout@v4

      # needed for pulling Vespa, Redis, Postgres, and Minio images
      # otherwise, we hit the "Unauthenticated users" limit
@@ -294,28 +180,19 @@ jobs:

      # NOTE: Use pre-ping/null pool to reduce flakiness due to dropped connections
      # NOTE: don't need web server for integration tests
-      - name: Create .env file for Docker Compose
-        env:
-          ECR_CACHE: ${{ env.RUNS_ON_ECR_CACHE }}
-          RUN_ID: ${{ github.run_id }}
-        run: |
-          cat <<EOF > deployment/docker_compose/.env
-          ENABLE_PAID_ENTERPRISE_EDITION_FEATURES=true
-          AUTH_TYPE=basic
-          POSTGRES_POOL_PRE_PING=true
-          POSTGRES_USE_NULL_POOL=true
-          REQUIRE_EMAIL_VERIFICATION=false
-          DISABLE_TELEMETRY=true
-          ONYX_BACKEND_IMAGE=${ECR_CACHE}:integration-test-backend-test-${RUN_ID}
-          ONYX_MODEL_SERVER_IMAGE=${ECR_CACHE}:integration-test-model-server-test-${RUN_ID}
-          INTEGRATION_TESTS_MODE=true
-          CHECK_TTL_MANAGEMENT_TASK_FREQUENCY_IN_HOURS=0.001
-          MCP_SERVER_ENABLED=true
-          EOF
-
      - name: Start Docker containers
        run: |
          cd deployment/docker_compose
+          ENABLE_PAID_ENTERPRISE_EDITION_FEATURES=true \
+          AUTH_TYPE=basic \
+          POSTGRES_POOL_PRE_PING=true \
+          POSTGRES_USE_NULL_POOL=true \
+          REQUIRE_EMAIL_VERIFICATION=false \
+          DISABLE_TELEMETRY=true \
+          ONYX_BACKEND_IMAGE=${{ env.RUNS_ON_ECR_CACHE }}:integration-test-backend-test-${{ github.run_id }} \
+          ONYX_MODEL_SERVER_IMAGE=${{ env.RUNS_ON_ECR_CACHE }}:integration-test-model-server-test-${{ github.run_id }} \
+          INTEGRATION_TESTS_MODE=true \
+          CHECK_TTL_MANAGEMENT_TASK_FREQUENCY_IN_HOURS=0.001 \
          docker compose -f docker-compose.yml -f docker-compose.dev.yml up \
            relational_db \
            index \
@@ -324,56 +201,43 @@ jobs:
            api_server \
            inference_model_server \
            indexing_model_server \
-            mcp_server \
            background \
            -d
        id: start_docker

-      - name: Wait for services to be ready
+      - name: Wait for service to be ready
        run: |
          echo "Starting wait-for-service script..."

-          wait_for_service() {
-            local url=$1
-            local label=$2
-            local timeout=${3:-300}  # default 5 minutes
-            local start_time
-            start_time=$(date +%s)
+          docker logs -f onyx-api_server-1 &

-            while true; do
-              local current_time
-              current_time=$(date +%s)
-              local elapsed_time=$((current_time - start_time))
+          start_time=$(date +%s)
+          timeout=300  # 5 minutes in seconds

-              if [ $elapsed_time -ge $timeout ]; then
-                echo "Timeout reached. ${label} did not become ready in $timeout seconds."
-                exit 1
-              fi
+          while true; do
+            current_time=$(date +%s)
+            elapsed_time=$((current_time - start_time))

-              local response
-              response=$(curl -s -o /dev/null -w "%{http_code}" "$url" || echo "curl_error")
+            if [ $elapsed_time -ge $timeout ]; then
+              echo "Timeout reached. Service did not become ready in 5 minutes."
+              exit 1
+            fi

-              if [ "$response" = "200" ]; then
-                echo "${label} is ready!"
-                break
-              elif [ "$response" = "curl_error" ]; then
-                echo "Curl encountered an error while checking ${label}. Retrying in 5 seconds..."
-              else
-                echo "${label} not ready yet (HTTP status $response). Retrying in 5 seconds..."
-              fi
+            # Use curl with error handling to ignore specific exit code 56
+            response=$(curl -s -o /dev/null -w "%{http_code}" http://localhost:8080/health || echo "curl_error")

-              sleep 5
-            done
-          }
+            if [ "$response" = "200" ]; then
+              echo "Service is ready!"
+              break
+            elif [ "$response" = "curl_error" ]; then
+              echo "Curl encountered an error, possibly exit code 56. Continuing to retry..."
+            else
+              echo "Service not ready yet (HTTP status $response). Retrying in 5 seconds..."
+            fi

-          wait_for_service "http://localhost:8080/health" "API server"
-          test_dir="${{ matrix.test-dir.path }}"
-          if [ "$test_dir" = "tests/mcp" ]; then
-            wait_for_service "http://localhost:8090/health" "MCP server"
-          else
-            echo "Skipping MCP server wait for non-MCP suite: $test_dir"
-          fi
-          echo "Finished waiting for services."
+            sleep 5
+          done
+          echo "Finished waiting for service."

      - name: Start Mock Services
        run: |
@@ -402,10 +266,7 @@ jobs:
              -e VESPA_HOST=index \
              -e REDIS_HOST=cache \
              -e API_SERVER_HOST=api_server \
-              -e MCP_SERVER_HOST=mcp_server \
-              -e MCP_SERVER_PORT=8090 \
              -e OPENAI_API_KEY=${OPENAI_API_KEY} \
-              -e EXA_API_KEY=${EXA_API_KEY} \
              -e SLACK_BOT_TOKEN=${SLACK_BOT_TOKEN} \
              -e CONFLUENCE_TEST_SPACE_URL=${CONFLUENCE_TEST_SPACE_URL} \
              -e CONFLUENCE_USER_NAME=${CONFLUENCE_USER_NAME} \
@@ -419,11 +280,6 @@ jobs:
              -e PERM_SYNC_SHAREPOINT_PRIVATE_KEY="${PERM_SYNC_SHAREPOINT_PRIVATE_KEY}" \
              -e PERM_SYNC_SHAREPOINT_CERTIFICATE_PASSWORD=${PERM_SYNC_SHAREPOINT_CERTIFICATE_PASSWORD} \
              -e PERM_SYNC_SHAREPOINT_DIRECTORY_ID=${PERM_SYNC_SHAREPOINT_DIRECTORY_ID} \
-              -e GITHUB_PERMISSION_SYNC_TEST_ACCESS_TOKEN=${GITHUB_PERMISSION_SYNC_TEST_ACCESS_TOKEN} \
-              -e GITHUB_PERMISSION_SYNC_TEST_ACCESS_TOKEN_CLASSIC=${GITHUB_PERMISSION_SYNC_TEST_ACCESS_TOKEN_CLASSIC} \
-              -e GITHUB_ADMIN_EMAIL=${GITHUB_ADMIN_EMAIL} \
-              -e GITHUB_TEST_USER_1_EMAIL=${GITHUB_TEST_USER_1_EMAIL} \
-              -e GITHUB_TEST_USER_2_EMAIL=${GITHUB_TEST_USER_2_EMAIL} \
              -e TEST_WEB_HOSTNAME=test-runner \
              -e MOCK_CONNECTOR_SERVER_HOST=mock_connector_server \
              -e MOCK_CONNECTOR_SERVER_PORT=8001 \
@@ -452,24 +308,20 @@ jobs:
          path: ${{ github.workspace }}/docker-compose.log
      # ------------------------------------------------------------

+
  multitenant-tests:
    needs:
-      [build-backend-image, build-model-server-image, build-integration-image]
-    runs-on:
      [
-        runs-on,
-        runner=8cpu-linux-arm64,
-        "run-id=${{ github.run_id }}-multitenant-tests",
-        "extras=ecr-cache",
+        build-backend-image,
+        build-model-server-image,
+        build-integration-image,
      ]
-    timeout-minutes: 45
+    runs-on: [runs-on, runner=8cpu-linux-arm64, "run-id=${{ github.run_id }}-multitenant-tests", "extras=ecr-cache"]

    steps:
      - uses: runs-on/action@cd2b598b0515d39d78c38a02d529db87d2196d1e # ratchet:runs-on/action@v2
      - name: Checkout code
-        uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # ratchet:actions/checkout@v6
-        with:
-          persist-credentials: false
+        uses: actions/checkout@08eba0b27e820071cde6df949e0beb9ba4906955 # ratchet:actions/checkout@v4

      - name: Login to Docker Hub
        uses: docker/login-action@5e57cd118135c172c3672efd75eb46360885c0ef # ratchet:docker/login-action@v3
@@ -478,9 +330,6 @@ jobs:
          password: ${{ secrets.DOCKER_TOKEN }}

      - name: Start Docker containers for multi-tenant tests
-        env:
-          ECR_CACHE: ${{ env.RUNS_ON_ECR_CACHE }}
-          RUN_ID: ${{ github.run_id }}
        run: |
          cd deployment/docker_compose
          ENABLE_PAID_ENTERPRISE_EDITION_FEATURES=true \
@@ -488,10 +337,9 @@ jobs:
          AUTH_TYPE=cloud \
          REQUIRE_EMAIL_VERIFICATION=false \
          DISABLE_TELEMETRY=true \
-          ONYX_BACKEND_IMAGE=${ECR_CACHE}:integration-test-backend-test-${RUN_ID} \
-          ONYX_MODEL_SERVER_IMAGE=${ECR_CACHE}:integration-test-model-server-test-${RUN_ID} \
+          ONYX_BACKEND_IMAGE=${{ env.RUNS_ON_ECR_CACHE }}:integration-test-backend-test-${{ github.run_id }} \
+          ONYX_MODEL_SERVER_IMAGE=${{ env.RUNS_ON_ECR_CACHE }}:integration-test-model-server-test-${{ github.run_id }} \
          DEV_MODE=true \
-          MCP_SERVER_ENABLED=true \
          docker compose -f docker-compose.multitenant-dev.yml up \
            relational_db \
            index \
@@ -500,7 +348,6 @@ jobs:
            api_server \
            inference_model_server \
            indexing_model_server \
-            mcp_server \
            background \
            -d
        id: start_docker_multi_tenant
@@ -532,9 +379,6 @@ jobs:
          echo "Finished waiting for service."

      - name: Run Multi-Tenant Integration Tests
-        env:
-          ECR_CACHE: ${{ env.RUNS_ON_ECR_CACHE }}
-          RUN_ID: ${{ github.run_id }}
        run: |
          echo "Running multi-tenant integration tests..."
          docker run --rm --network onyx_default \
@@ -549,10 +393,7 @@ jobs:
            -e VESPA_HOST=index \
            -e REDIS_HOST=cache \
            -e API_SERVER_HOST=api_server \
-            -e MCP_SERVER_HOST=mcp_server \
-            -e MCP_SERVER_PORT=8090 \
            -e OPENAI_API_KEY=${OPENAI_API_KEY} \
-            -e EXA_API_KEY=${EXA_API_KEY} \
            -e SLACK_BOT_TOKEN=${SLACK_BOT_TOKEN} \
            -e TEST_WEB_HOSTNAME=test-runner \
            -e AUTH_TYPE=cloud \
@@ -561,7 +402,7 @@ jobs:
            -e REQUIRE_EMAIL_VERIFICATION=false \
            -e DISABLE_TELEMETRY=true \
            -e DEV_MODE=true \
-            ${ECR_CACHE}:integration-test-${RUN_ID} \
+            ${{ env.RUNS_ON_ECR_CACHE }}:integration-test-${{ github.run_id }} \
            /app/tests/integration/multitenant_tests

      - name: Dump API server logs (multi-tenant)
@@ -592,10 +433,16 @@ jobs:
  required:
    # NOTE: Github-hosted runners have about 20s faster queue times and are preferred here.
    runs-on: ubuntu-slim
-    timeout-minutes: 45
    needs: [integration-tests, multitenant-tests]
    if: ${{ always() }}
    steps:
-      - name: Check job status
-        if: ${{ contains(needs.*.result, 'failure') || contains(needs.*.result, 'cancelled') || contains(needs.*.result, 'skipped') }}
-        run: exit 1
+      - uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # ratchet:actions/github-script@v8
+        with:
+          script: |
+            const needs = ${{ toJSON(needs) }};
+            const failed = Object.values(needs).some(n => n.result !== 'success');
+            if (failed) {
+              core.setFailed('One or more upstream jobs failed or were cancelled.');
+            } else {
+              core.notice('All required jobs succeeded.');
+            }
--- a/.github/workflows/pr-jest-tests.yml
+++ b/.github/workflows/pr-jest-tests.yml
@@ -3,35 +3,21 @@ concurrency:
  group: Run-Jest-Tests-${{ github.workflow }}-${{ github.head_ref || github.event.workflow_run.head_branch || github.run_id }}
  cancel-in-progress: true

-on:
-  merge_group:
-  pull_request:
-    branches:
-      - main
-      - "release/**"
-  push:
-    tags:
-      - "v*.*.*"
-
-permissions:
-  contents: read
+on: push

 jobs:
  jest-tests:
    name: Jest Tests
    runs-on: ubuntu-latest
-    timeout-minutes: 45
    steps:
      - name: Checkout code
-        uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # ratchet:actions/checkout@v6
-        with:
-          persist-credentials: false
+        uses: actions/checkout@08eba0b27e820071cde6df949e0beb9ba4906955 # ratchet:actions/checkout@v4

      - name: Setup node
-        uses: actions/setup-node@395ad3262231945c25e8478fd5baf05154b1d79f # ratchet:actions/setup-node@v4
+        uses: actions/setup-node@2028fbc5c25fe9cf00d9f06a71cc4710d4507903 # ratchet:actions/setup-node@v4
        with:
          node-version: 22
-          cache: "npm"
+          cache: 'npm'
          cache-dependency-path: ./web/package-lock.json

      - name: Install node dependencies
--- a/.github/workflows/pr-labeler.yml
+++ b/.github/workflows/pr-labeler.yml
@@ -1,7 +1,7 @@
 name: PR Labeler

 on:
-  pull_request:
+  pull_request_target:
    branches:
      - main
    types:
@@ -12,11 +12,11 @@ on:

 permissions:
  contents: read
+  pull-requests: write

 jobs:
  validate_pr_title:
    runs-on: ubuntu-latest
-    timeout-minutes: 45
    steps:
      - name: Check PR title for Conventional Commits
        env:
--- a/.github/workflows/pr-linear-check.yml
+++ b/.github/workflows/pr-linear-check.yml
@@ -7,13 +7,9 @@ on:
  pull_request:
    types: [opened, edited, reopened, synchronize]

-permissions:
-  contents: read
-
 jobs:
  linear-check:
    runs-on: ubuntu-latest
-    timeout-minutes: 45
    steps:
      - name: Check PR body for Linear link or override
        env:
--- a/.github/workflows/pr-mit-integration-tests.yml
+++ b/.github/workflows/pr-mit-integration-tests.yml
@@ -6,18 +6,11 @@ concurrency:
 on:
  merge_group:
    types: [checks_requested]
-  push:
-    tags:
-      - "v*.*.*"
-
-permissions:
-  contents: read

 env:
  # Test Environment Variables
  OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
  SLACK_BOT_TOKEN: ${{ secrets.SLACK_BOT_TOKEN }}
-  EXA_API_KEY: ${{ secrets.EXA_API_KEY }}
  CONFLUENCE_TEST_SPACE_URL: ${{ vars.CONFLUENCE_TEST_SPACE_URL }}
  CONFLUENCE_USER_NAME: ${{ vars.CONFLUENCE_USER_NAME }}
  CONFLUENCE_ACCESS_TOKEN: ${{ secrets.CONFLUENCE_ACCESS_TOKEN }}
@@ -35,14 +28,11 @@ jobs:
  discover-test-dirs:
    # NOTE: Github-hosted runners have about 20s faster queue times and are preferred here.
    runs-on: ubuntu-slim
-    timeout-minutes: 45
    outputs:
      test-dirs: ${{ steps.set-matrix.outputs.test-dirs }}
    steps:
      - name: Checkout code
-        uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # ratchet:actions/checkout@v6
-        with:
-          persist-credentials: false
+        uses: actions/checkout@08eba0b27e820071cde6df949e0beb9ba4906955 # ratchet:actions/checkout@v4

      - name: Discover test directories
        id: set-matrix
@@ -65,34 +55,11 @@ jobs:
          echo "test-dirs=$all_dirs" >> $GITHUB_OUTPUT

  build-backend-image:
-    runs-on:
-      [
-        runs-on,
-        runner=1cpu-linux-arm64,
-        "run-id=${{ github.run_id }}-build-backend-image",
-        "extras=ecr-cache",
-      ]
-    timeout-minutes: 45
+    runs-on: [runs-on, runner=1cpu-linux-arm64, "run-id=${{ github.run_id }}-build-backend-image", "extras=ecr-cache"]
    steps:
      - uses: runs-on/action@cd2b598b0515d39d78c38a02d529db87d2196d1e # ratchet:runs-on/action@v2
      - name: Checkout code
-        uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # ratchet:actions/checkout@v6
-        with:
-          persist-credentials: false
-
-      - name: Format branch name for cache
-        id: format-branch
-        env:
-          PR_NUMBER: ${{ github.event.pull_request.number }}
-          REF_NAME: ${{ github.ref_name }}
-        run: |
-          if [ -n "${PR_NUMBER}" ]; then
-            CACHE_SUFFIX="${PR_NUMBER}"
-          else
-            # shellcheck disable=SC2001
-            CACHE_SUFFIX=$(echo "${REF_NAME}" | sed 's/[^A-Za-z0-9._-]/-/g')
-          fi
-          echo "cache-suffix=${CACHE_SUFFIX}" >> $GITHUB_OUTPUT
+        uses: actions/checkout@08eba0b27e820071cde6df949e0beb9ba4906955 # ratchet:actions/checkout@v4

      - name: Set up Docker Buildx
        uses: docker/setup-buildx-action@e468171a9de216ec08956ac3ada2f0791b6bd435 # ratchet:docker/setup-buildx-action@v3
@@ -113,46 +80,16 @@ jobs:
          file: ./backend/Dockerfile
          push: true
          tags: ${{ env.RUNS_ON_ECR_CACHE }}:integration-test-backend-test-${{ github.run_id }}
-          cache-from: |
-            type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:backend-cache-${{ github.event.pull_request.head.sha || github.sha }}
-            type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:backend-cache-${{ steps.format-branch.outputs.cache-suffix }}
-            type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:backend-cache
-            type=registry,ref=onyxdotapp/onyx-backend:latest
-          cache-to: |
-            type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:backend-cache-${{ github.event.pull_request.head.sha || github.sha }},mode=max
-            type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:backend-cache-${{ steps.format-branch.outputs.cache-suffix }},mode=max
-            type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:backend-cache,mode=max
+          cache-from: type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:integration-test-backend-cache
+          cache-to: type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:integration-test-backend-cache,mode=max
          no-cache: ${{ vars.DOCKER_NO_CACHE == 'true' }}

  build-model-server-image:
-    runs-on:
-      [
-        runs-on,
-        runner=1cpu-linux-arm64,
-        "run-id=${{ github.run_id }}-build-model-server-image",
-        "extras=ecr-cache",
-      ]
-    timeout-minutes: 45
+    runs-on: [runs-on, runner=1cpu-linux-arm64, "run-id=${{ github.run_id }}-build-model-server-image", "extras=ecr-cache"]
    steps:
      - uses: runs-on/action@cd2b598b0515d39d78c38a02d529db87d2196d1e # ratchet:runs-on/action@v2
      - name: Checkout code
-        uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # ratchet:actions/checkout@v6
-        with:
-          persist-credentials: false
-
-      - name: Format branch name for cache
-        id: format-branch
-        env:
-          PR_NUMBER: ${{ github.event.pull_request.number }}
-          REF_NAME: ${{ github.ref_name }}
-        run: |
-          if [ -n "${PR_NUMBER}" ]; then
-            CACHE_SUFFIX="${PR_NUMBER}"
-          else
-            # shellcheck disable=SC2001
-            CACHE_SUFFIX=$(echo "${REF_NAME}" | sed 's/[^A-Za-z0-9._-]/-/g')
-          fi
-          echo "cache-suffix=${CACHE_SUFFIX}" >> $GITHUB_OUTPUT
+        uses: actions/checkout@08eba0b27e820071cde6df949e0beb9ba4906955 # ratchet:actions/checkout@v4

      - name: Set up Docker Buildx
        uses: docker/setup-buildx-action@e468171a9de216ec08956ac3ada2f0791b6bd435 # ratchet:docker/setup-buildx-action@v3
@@ -173,45 +110,15 @@ jobs:
          file: ./backend/Dockerfile.model_server
          push: true
          tags: ${{ env.RUNS_ON_ECR_CACHE }}:integration-test-model-server-test-${{ github.run_id }}
-          cache-from: |
-            type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:model-server-cache-${{ github.event.pull_request.head.sha || github.sha }}
-            type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:model-server-cache-${{ steps.format-branch.outputs.cache-suffix }}
-            type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:model-server-cache
-            type=registry,ref=onyxdotapp/onyx-model-server:latest
-          cache-to: |
-            type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:model-server-cache-${{ github.event.pull_request.head.sha || github.sha }},mode=max
-            type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:model-server-cache-${{ steps.format-branch.outputs.cache-suffix }},mode=max
-            type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:model-server-cache,mode=max
+          cache-from: type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:integration-test-model-server-cache
+          cache-to: type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:integration-test-model-server-cache,mode=max

  build-integration-image:
-    runs-on:
-      [
-        runs-on,
-        runner=2cpu-linux-arm64,
-        "run-id=${{ github.run_id }}-build-integration-image",
-        "extras=ecr-cache",
-      ]
-    timeout-minutes: 45
+    runs-on: [runs-on, runner=2cpu-linux-arm64, "run-id=${{ github.run_id }}-build-integration-image", "extras=ecr-cache"]
    steps:
      - uses: runs-on/action@cd2b598b0515d39d78c38a02d529db87d2196d1e # ratchet:runs-on/action@v2
      - name: Checkout code
-        uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # ratchet:actions/checkout@v6
-        with:
-          persist-credentials: false
-
-      - name: Format branch name for cache
-        id: format-branch
-        env:
-          PR_NUMBER: ${{ github.event.pull_request.number }}
-          REF_NAME: ${{ github.ref_name }}
-        run: |
-          if [ -n "${PR_NUMBER}" ]; then
-            CACHE_SUFFIX="${PR_NUMBER}"
-          else
-            # shellcheck disable=SC2001
-            CACHE_SUFFIX=$(echo "${REF_NAME}" | sed 's/[^A-Za-z0-9._-]/-/g')
-          fi
-          echo "cache-suffix=${CACHE_SUFFIX}" >> $GITHUB_OUTPUT
+        uses: actions/checkout@08eba0b27e820071cde6df949e0beb9ba4906955 # ratchet:actions/checkout@v4

      - name: Set up Docker Buildx
        uses: docker/setup-buildx-action@e468171a9de216ec08956ac3ada2f0791b6bd435 # ratchet:docker/setup-buildx-action@v3
@@ -227,26 +134,9 @@ jobs:

      - name: Build and push integration test image with Docker Bake
        env:
-          INTEGRATION_REPOSITORY: ${{ env.RUNS_ON_ECR_CACHE }}
+          REPOSITORY: ${{ env.RUNS_ON_ECR_CACHE }}
          TAG: integration-test-${{ github.run_id }}
-          CACHE_SUFFIX: ${{ steps.format-branch.outputs.cache-suffix }}
-          HEAD_SHA: ${{ github.event.pull_request.head.sha || github.sha }}
-        run: |
-          docker buildx bake --push \
-            --set backend.cache-from=type=registry,ref=${RUNS_ON_ECR_CACHE}:backend-cache-${HEAD_SHA} \
-            --set backend.cache-from=type=registry,ref=${RUNS_ON_ECR_CACHE}:backend-cache-${CACHE_SUFFIX} \
-            --set backend.cache-from=type=registry,ref=${RUNS_ON_ECR_CACHE}:backend-cache \
-            --set backend.cache-from=type=registry,ref=onyxdotapp/onyx-backend:latest \
-            --set backend.cache-to=type=registry,ref=${RUNS_ON_ECR_CACHE}:backend-cache-${HEAD_SHA},mode=max \
-            --set backend.cache-to=type=registry,ref=${RUNS_ON_ECR_CACHE}:backend-cache-${CACHE_SUFFIX},mode=max \
-            --set backend.cache-to=type=registry,ref=${RUNS_ON_ECR_CACHE}:backend-cache,mode=max \
-            --set integration.cache-from=type=registry,ref=${RUNS_ON_ECR_CACHE}:integration-cache-${HEAD_SHA} \
-            --set integration.cache-from=type=registry,ref=${RUNS_ON_ECR_CACHE}:integration-cache-${CACHE_SUFFIX} \
-            --set integration.cache-from=type=registry,ref=${RUNS_ON_ECR_CACHE}:integration-cache \
-            --set integration.cache-to=type=registry,ref=${RUNS_ON_ECR_CACHE}:integration-cache-${HEAD_SHA},mode=max \
-            --set integration.cache-to=type=registry,ref=${RUNS_ON_ECR_CACHE}:integration-cache-${CACHE_SUFFIX},mode=max \
-            --set integration.cache-to=type=registry,ref=${RUNS_ON_ECR_CACHE}:integration-cache,mode=max \
-            integration
+        run: cd backend && docker buildx bake --push integration

  integration-tests-mit:
    needs:
@@ -261,7 +151,6 @@ jobs:
      - runner=4cpu-linux-arm64
      - ${{ format('run-id={0}-integration-tests-mit-job-{1}', github.run_id, strategy['job-index']) }}
      - extras=ecr-cache
-    timeout-minutes: 45

    strategy:
      fail-fast: false
@@ -271,9 +160,7 @@ jobs:
    steps:
      - uses: runs-on/action@cd2b598b0515d39d78c38a02d529db87d2196d1e # ratchet:runs-on/action@v2
      - name: Checkout code
-        uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # ratchet:actions/checkout@v6
-        with:
-          persist-credentials: false
+        uses: actions/checkout@08eba0b27e820071cde6df949e0beb9ba4906955 # ratchet:actions/checkout@v4

      # needed for pulling Vespa, Redis, Postgres, and Minio images
      # otherwise, we hit the "Unauthenticated users" limit
@@ -286,26 +173,17 @@ jobs:

      # NOTE: Use pre-ping/null pool to reduce flakiness due to dropped connections
      # NOTE: don't need web server for integration tests
-      - name: Create .env file for Docker Compose
-        env:
-          ECR_CACHE: ${{ env.RUNS_ON_ECR_CACHE }}
-          RUN_ID: ${{ github.run_id }}
-        run: |
-          cat <<EOF > deployment/docker_compose/.env
-          AUTH_TYPE=basic
-          POSTGRES_POOL_PRE_PING=true
-          POSTGRES_USE_NULL_POOL=true
-          REQUIRE_EMAIL_VERIFICATION=false
-          DISABLE_TELEMETRY=true
-          ONYX_BACKEND_IMAGE=${ECR_CACHE}:integration-test-backend-test-${RUN_ID}
-          ONYX_MODEL_SERVER_IMAGE=${ECR_CACHE}:integration-test-model-server-test-${RUN_ID}
-          INTEGRATION_TESTS_MODE=true
-          MCP_SERVER_ENABLED=true
-          EOF
-
      - name: Start Docker containers
        run: |
          cd deployment/docker_compose
+          AUTH_TYPE=basic \
+          POSTGRES_POOL_PRE_PING=true \
+          POSTGRES_USE_NULL_POOL=true \
+          REQUIRE_EMAIL_VERIFICATION=false \
+          DISABLE_TELEMETRY=true \
+          ONYX_BACKEND_IMAGE=${{ env.RUNS_ON_ECR_CACHE }}:integration-test-backend-test-${{ github.run_id }} \
+          ONYX_MODEL_SERVER_IMAGE=${{ env.RUNS_ON_ECR_CACHE }}:integration-test-model-server-test-${{ github.run_id }} \
+          INTEGRATION_TESTS_MODE=true \
          docker compose -f docker-compose.yml -f docker-compose.dev.yml up \
            relational_db \
            index \
@@ -314,56 +192,43 @@ jobs:
            api_server \
            inference_model_server \
            indexing_model_server \
-            mcp_server \
            background \
            -d
        id: start_docker

-      - name: Wait for services to be ready
+      - name: Wait for service to be ready
        run: |
          echo "Starting wait-for-service script..."

-          wait_for_service() {
-            local url=$1
-            local label=$2
-            local timeout=${3:-300}  # default 5 minutes
-            local start_time
-            start_time=$(date +%s)
+          docker logs -f onyx-api_server-1 &

-            while true; do
-              local current_time
-              current_time=$(date +%s)
-              local elapsed_time=$((current_time - start_time))
+          start_time=$(date +%s)
+          timeout=300  # 5 minutes in seconds

-              if [ $elapsed_time -ge $timeout ]; then
-                echo "Timeout reached. ${label} did not become ready in $timeout seconds."
-                exit 1
-              fi
+          while true; do
+            current_time=$(date +%s)
+            elapsed_time=$((current_time - start_time))

-              local response
-              response=$(curl -s -o /dev/null -w "%{http_code}" "$url" || echo "curl_error")
+            if [ $elapsed_time -ge $timeout ]; then
+              echo "Timeout reached. Service did not become ready in 5 minutes."
+              exit 1
+            fi

-              if [ "$response" = "200" ]; then
-                echo "${label} is ready!"
-                break
-              elif [ "$response" = "curl_error" ]; then
-                echo "Curl encountered an error while checking ${label}. Retrying in 5 seconds..."
-              else
-                echo "${label} not ready yet (HTTP status $response). Retrying in 5 seconds..."
-              fi
+            # Use curl with error handling to ignore specific exit code 56
+            response=$(curl -s -o /dev/null -w "%{http_code}" http://localhost:8080/health || echo "curl_error")

-              sleep 5
-            done
-          }
+            if [ "$response" = "200" ]; then
+              echo "Service is ready!"
+              break
+            elif [ "$response" = "curl_error" ]; then
+              echo "Curl encountered an error, possibly exit code 56. Continuing to retry..."
+            else
+              echo "Service not ready yet (HTTP status $response). Retrying in 5 seconds..."
+            fi

-          wait_for_service "http://localhost:8080/health" "API server"
-          test_dir="${{ matrix.test-dir.path }}"
-          if [ "$test_dir" = "tests/mcp" ]; then
-            wait_for_service "http://localhost:8090/health" "MCP server"
-          else
-            echo "Skipping MCP server wait for non-MCP suite: $test_dir"
-          fi
-          echo "Finished waiting for services."
+            sleep 5
+          done
+          echo "Finished waiting for service."

      - name: Start Mock Services
        run: |
@@ -393,10 +258,7 @@ jobs:
              -e VESPA_HOST=index \
              -e REDIS_HOST=cache \
              -e API_SERVER_HOST=api_server \
-              -e MCP_SERVER_HOST=mcp_server \
-              -e MCP_SERVER_PORT=8090 \
              -e OPENAI_API_KEY=${OPENAI_API_KEY} \
-              -e EXA_API_KEY=${EXA_API_KEY} \
              -e SLACK_BOT_TOKEN=${SLACK_BOT_TOKEN} \
              -e CONFLUENCE_TEST_SPACE_URL=${CONFLUENCE_TEST_SPACE_URL} \
              -e CONFLUENCE_USER_NAME=${CONFLUENCE_USER_NAME} \
@@ -438,13 +300,20 @@ jobs:
          path: ${{ github.workspace }}/docker-compose.log
      # ------------------------------------------------------------

+
  required:
    # NOTE: Github-hosted runners have about 20s faster queue times and are preferred here.
    runs-on: ubuntu-slim
-    timeout-minutes: 45
    needs: [integration-tests-mit]
    if: ${{ always() }}
    steps:
-      - name: Check job status
-        if: ${{ contains(needs.*.result, 'failure') || contains(needs.*.result, 'cancelled') || contains(needs.*.result, 'skipped') }}
-        run: exit 1
+      - uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # ratchet:actions/github-script@v8
+        with:
+          script: |
+            const needs = ${{ toJSON(needs) }};
+            const failed = Object.values(needs).some(n => n.result !== 'success');
+            if (failed) {
+              core.setFailed('One or more upstream jobs failed or were cancelled.');
+            } else {
+              core.notice('All required jobs succeeded.');
+            }
--- a/.github/workflows/pr-playwright-tests.yml
+++ b/.github/workflows/pr-playwright-tests.yml
@@ -3,18 +3,7 @@ concurrency:
  group: Run-Playwright-Tests-${{ github.workflow }}-${{ github.head_ref || github.event.workflow_run.head_branch || github.run_id }}
  cancel-in-progress: true

-on:
-  merge_group:
-  pull_request:
-    branches:
-      - main
-      - "release/**"
-  push:
-    tags:
-      - "v*.*.*"
-
-permissions:
-  contents: read
+on: push

 env:
  # Test Environment Variables
@@ -35,13 +24,6 @@ env:
  MCP_OAUTH_USERNAME: ${{ vars.MCP_OAUTH_USERNAME }}
  MCP_OAUTH_PASSWORD: ${{ secrets.MCP_OAUTH_PASSWORD }}

-  # for MCP API Key tests
-  MCP_API_KEY: test-api-key-12345
-  MCP_API_KEY_TEST_PORT: 8005
-  MCP_API_KEY_TEST_URL: http://host.docker.internal:8005/mcp
-  MCP_API_KEY_SERVER_HOST: 0.0.0.0
-  MCP_API_KEY_SERVER_PUBLIC_HOST: host.docker.internal
-
  MOCK_LLM_RESPONSE: true
  MCP_TEST_SERVER_PORT: 8004
  MCP_TEST_SERVER_URL: http://host.docker.internal:8004/mcp
@@ -54,35 +36,12 @@ env:

 jobs:
  build-web-image:
-    runs-on:
-      [
-        runs-on,
-        runner=4cpu-linux-arm64,
-        "run-id=${{ github.run_id }}-build-web-image",
-        "extras=ecr-cache",
-      ]
-    timeout-minutes: 45
+    runs-on: [runs-on, runner=4cpu-linux-arm64, "run-id=${{ github.run_id }}-build-web-image", "extras=ecr-cache"]
    steps:
      - uses: runs-on/action@cd2b598b0515d39d78c38a02d529db87d2196d1e # ratchet:runs-on/action@v2

      - name: Checkout code
-        uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # ratchet:actions/checkout@v6
-        with:
-          persist-credentials: false
-
-      - name: Format branch name for cache
-        id: format-branch
-        env:
-          PR_NUMBER: ${{ github.event.pull_request.number }}
-          REF_NAME: ${{ github.ref_name }}
-        run: |
-          if [ -n "${PR_NUMBER}" ]; then
-            CACHE_SUFFIX="${PR_NUMBER}"
-          else
-            # shellcheck disable=SC2001
-            CACHE_SUFFIX=$(echo "${REF_NAME}" | sed 's/[^A-Za-z0-9._-]/-/g')
-          fi
-          echo "cache-suffix=${CACHE_SUFFIX}" >> $GITHUB_OUTPUT
+        uses: actions/checkout@08eba0b27e820071cde6df949e0beb9ba4906955 # ratchet:actions/checkout@v4

      - name: Set up Docker Buildx
        uses: docker/setup-buildx-action@e468171a9de216ec08956ac3ada2f0791b6bd435 # ratchet:docker/setup-buildx-action@v3
@@ -103,47 +62,17 @@ jobs:
          platforms: linux/arm64
          tags: ${{ env.RUNS_ON_ECR_CACHE }}:playwright-test-web-${{ github.run_id }}
          push: true
-          cache-from: |
-            type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:web-cache-${{ github.event.pull_request.head.sha || github.sha }}
-            type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:web-cache-${{ steps.format-branch.outputs.cache-suffix }}
-            type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:web-cache
-            type=registry,ref=onyxdotapp/onyx-web-server:latest
-          cache-to: |
-            type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:web-cache-${{ github.event.pull_request.head.sha || github.sha }},mode=max
-            type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:web-cache-${{ steps.format-branch.outputs.cache-suffix }},mode=max
-            type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:web-cache,mode=max
+          cache-from: type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:playwright-test-web-cache
+          cache-to: type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:playwright-test-web-cache,mode=max
          no-cache: ${{ vars.DOCKER_NO_CACHE == 'true' }}

  build-backend-image:
-    runs-on:
-      [
-        runs-on,
-        runner=1cpu-linux-arm64,
-        "run-id=${{ github.run_id }}-build-backend-image",
-        "extras=ecr-cache",
-      ]
-    timeout-minutes: 45
+    runs-on: [runs-on, runner=1cpu-linux-arm64, "run-id=${{ github.run_id }}-build-backend-image", "extras=ecr-cache"]
    steps:
      - uses: runs-on/action@cd2b598b0515d39d78c38a02d529db87d2196d1e # ratchet:runs-on/action@v2

      - name: Checkout code
-        uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # ratchet:actions/checkout@v6
-        with:
-          persist-credentials: false
-
-      - name: Format branch name for cache
-        id: format-branch
-        env:
-          PR_NUMBER: ${{ github.event.pull_request.number }}
-          REF_NAME: ${{ github.ref_name }}
-        run: |
-          if [ -n "${PR_NUMBER}" ]; then
-            CACHE_SUFFIX="${PR_NUMBER}"
-          else
-            # shellcheck disable=SC2001
-            CACHE_SUFFIX=$(echo "${REF_NAME}" | sed 's/[^A-Za-z0-9._-]/-/g')
-          fi
-          echo "cache-suffix=${CACHE_SUFFIX}" >> $GITHUB_OUTPUT
+        uses: actions/checkout@08eba0b27e820071cde6df949e0beb9ba4906955 # ratchet:actions/checkout@v4

      - name: Set up Docker Buildx
        uses: docker/setup-buildx-action@e468171a9de216ec08956ac3ada2f0791b6bd435 # ratchet:docker/setup-buildx-action@v3
@@ -164,47 +93,17 @@ jobs:
          platforms: linux/arm64
          tags: ${{ env.RUNS_ON_ECR_CACHE }}:playwright-test-backend-${{ github.run_id }}
          push: true
-          cache-from: |
-            type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:backend-cache-${{ github.event.pull_request.head.sha || github.sha }}
-            type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:backend-cache-${{ steps.format-branch.outputs.cache-suffix }}
-            type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:backend-cache
-            type=registry,ref=onyxdotapp/onyx-backend:latest
-          cache-to: |
-            type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:backend-cache-${{ github.event.pull_request.head.sha || github.sha }},mode=max
-            type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:backend-cache-${{ steps.format-branch.outputs.cache-suffix }},mode=max
-            type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:backend-cache,mode=max
+          cache-from: type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:playwright-test-backend-cache
+          cache-to: type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:playwright-test-backend-cache,mode=max
          no-cache: ${{ vars.DOCKER_NO_CACHE == 'true' }}

  build-model-server-image:
-    runs-on:
-      [
-        runs-on,
-        runner=1cpu-linux-arm64,
-        "run-id=${{ github.run_id }}-build-model-server-image",
-        "extras=ecr-cache",
-      ]
-    timeout-minutes: 45
+    runs-on: [runs-on, runner=1cpu-linux-arm64, "run-id=${{ github.run_id }}-build-model-server-image", "extras=ecr-cache"]
    steps:
      - uses: runs-on/action@cd2b598b0515d39d78c38a02d529db87d2196d1e # ratchet:runs-on/action@v2

      - name: Checkout code
-        uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # ratchet:actions/checkout@v6
-        with:
-          persist-credentials: false
-
-      - name: Format branch name for cache
-        id: format-branch
-        env:
-          PR_NUMBER: ${{ github.event.pull_request.number }}
-          REF_NAME: ${{ github.ref_name }}
-        run: |
-          if [ -n "${PR_NUMBER}" ]; then
-            CACHE_SUFFIX="${PR_NUMBER}"
-          else
-            # shellcheck disable=SC2001
-            CACHE_SUFFIX=$(echo "${REF_NAME}" | sed 's/[^A-Za-z0-9._-]/-/g')
-          fi
-          echo "cache-suffix=${CACHE_SUFFIX}" >> $GITHUB_OUTPUT
+        uses: actions/checkout@08eba0b27e820071cde6df949e0beb9ba4906955 # ratchet:actions/checkout@v4

      - name: Set up Docker Buildx
        uses: docker/setup-buildx-action@e468171a9de216ec08956ac3ada2f0791b6bd435 # ratchet:docker/setup-buildx-action@v3
@@ -225,27 +124,14 @@ jobs:
          platforms: linux/arm64
          tags: ${{ env.RUNS_ON_ECR_CACHE }}:playwright-test-model-server-${{ github.run_id }}
          push: true
-          cache-from: |
-            type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:model-server-cache-${{ github.event.pull_request.head.sha || github.sha }}
-            type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:model-server-cache-${{ steps.format-branch.outputs.cache-suffix }}
-            type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:model-server-cache
-            type=registry,ref=onyxdotapp/onyx-model-server:latest
-          cache-to: |
-            type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:model-server-cache-${{ github.event.pull_request.head.sha || github.sha }},mode=max
-            type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:model-server-cache-${{ steps.format-branch.outputs.cache-suffix }},mode=max
-            type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:model-server-cache,mode=max
+          cache-from: type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:playwright-test-model-server-cache
+          cache-to: type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:playwright-test-model-server-cache,mode=max
          no-cache: ${{ vars.DOCKER_NO_CACHE == 'true' }}

  playwright-tests:
    needs: [build-web-image, build-backend-image, build-model-server-image]
    name: Playwright Tests (${{ matrix.project }})
-    runs-on:
-      - runs-on
-      - runner=8cpu-linux-arm64
-      - "run-id=${{ github.run_id }}-playwright-tests-${{ matrix.project }}"
-      - "extras=ecr-cache"
-      - volume=50gb
-    timeout-minutes: 45
+    runs-on: [runs-on, runner=8cpu-linux-arm64, "run-id=${{ github.run_id }}-playwright-tests-${{ matrix.project }}", "extras=ecr-cache"]
    strategy:
      fail-fast: false
      matrix:
@@ -254,15 +140,15 @@ jobs:
      - uses: runs-on/action@cd2b598b0515d39d78c38a02d529db87d2196d1e # ratchet:runs-on/action@v2

      - name: Checkout code
-        uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # ratchet:actions/checkout@v6
+        uses: actions/checkout@08eba0b27e820071cde6df949e0beb9ba4906955 # ratchet:actions/checkout@v4
        with:
-          persist-credentials: false
+          fetch-depth: 0

      - name: Setup node
-        uses: actions/setup-node@395ad3262231945c25e8478fd5baf05154b1d79f # ratchet:actions/setup-node@v4
+        uses: actions/setup-node@2028fbc5c25fe9cf00d9f06a71cc4710d4507903 # ratchet:actions/setup-node@v4
        with:
          node-version: 22
-          cache: "npm"
+          cache: 'npm'
          cache-dependency-path: ./web/package-lock.json

      - name: Install node dependencies
@@ -282,26 +168,18 @@ jobs:
        run: npx playwright install --with-deps

      - name: Create .env file for Docker Compose
-        env:
-          OPENAI_API_KEY_VALUE: ${{ env.OPENAI_API_KEY }}
-          EXA_API_KEY_VALUE: ${{ env.EXA_API_KEY }}
-          ECR_CACHE: ${{ env.RUNS_ON_ECR_CACHE }}
-          RUN_ID: ${{ github.run_id }}
        run: |
          cat <<EOF > deployment/docker_compose/.env
          ENABLE_PAID_ENTERPRISE_EDITION_FEATURES=true
          AUTH_TYPE=basic
-          GEN_AI_API_KEY=${OPENAI_API_KEY_VALUE}
-          EXA_API_KEY=${EXA_API_KEY_VALUE}
+          GEN_AI_API_KEY=${{ env.OPENAI_API_KEY }}
+          EXA_API_KEY=${{ env.EXA_API_KEY }}
          REQUIRE_EMAIL_VERIFICATION=false
          DISABLE_TELEMETRY=true
-          ONYX_BACKEND_IMAGE=${ECR_CACHE}:playwright-test-backend-${RUN_ID}
-          ONYX_MODEL_SERVER_IMAGE=${ECR_CACHE}:playwright-test-model-server-${RUN_ID}
-          ONYX_WEB_SERVER_IMAGE=${ECR_CACHE}:playwright-test-web-${RUN_ID}
+          ONYX_BACKEND_IMAGE=${{ env.RUNS_ON_ECR_CACHE }}:playwright-test-backend-${{ github.run_id }}
+          ONYX_MODEL_SERVER_IMAGE=${{ env.RUNS_ON_ECR_CACHE }}:playwright-test-model-server-${{ github.run_id }}
+          ONYX_WEB_SERVER_IMAGE=${{ env.RUNS_ON_ECR_CACHE }}:playwright-test-web-${{ github.run_id }}
          EOF
-          if [ "${{ matrix.project }}" = "no-auth" ]; then
-            echo "PLAYWRIGHT_FORCE_EMPTY_LLM_PROVIDERS=true" >> deployment/docker_compose/.env
-          fi

      # needed for pulling Vespa, Redis, Postgres, and Minio images
      # otherwise, we hit the "Unauthenticated users" limit
@@ -315,7 +193,7 @@ jobs:
      - name: Start Docker containers
        run: |
          cd deployment/docker_compose
-          docker compose -f docker-compose.yml -f docker-compose.dev.yml -f docker-compose.mcp-oauth-test.yml -f docker-compose.mcp-api-key-test.yml up -d
+          docker compose -f docker-compose.yml -f docker-compose.dev.yml -f docker-compose.mcp-oauth-test.yml up -d
        id: start_docker

      - name: Wait for service to be ready
@@ -375,65 +253,12 @@ jobs:
            sleep 3
          done

-      - name: Wait for MCP API Key mock server
-        run: |
-          echo "Waiting for MCP API Key mock server on port ${MCP_API_KEY_TEST_PORT:-8005}..."
-          start_time=$(date +%s)
-          timeout=120
-
-          while true; do
-            current_time=$(date +%s)
-            elapsed_time=$((current_time - start_time))
-
-            if [ $elapsed_time -ge $timeout ]; then
-              echo "Timeout reached. MCP API Key mock server did not become ready in ${timeout}s."
-              exit 1
-            fi
-
-            if curl -sf "http://localhost:${MCP_API_KEY_TEST_PORT:-8005}/healthz" > /dev/null; then
-              echo "MCP API Key mock server is ready!"
-              break
-            fi
-
-            sleep 3
-          done
-
-      - name: Wait for web server to be ready
-        run: |
-          echo "Waiting for web server on port 3000..."
-          start_time=$(date +%s)
-          timeout=120
-
-          while true; do
-            current_time=$(date +%s)
-            elapsed_time=$((current_time - start_time))
-
-            if [ $elapsed_time -ge $timeout ]; then
-              echo "Timeout reached. Web server did not become ready in ${timeout}s."
-              exit 1
-            fi
-
-            if curl -sf "http://localhost:3000/api/health" > /dev/null 2>&1 || \
-               curl -sf "http://localhost:3000/" > /dev/null 2>&1; then
-              echo "Web server is ready!"
-              break
-            fi
-
-            echo "Web server not ready yet. Retrying in 3 seconds..."
-            sleep 3
-          done
-
      - name: Run Playwright tests
        working-directory: ./web
-        env:
-          PROJECT: ${{ matrix.project }}
        run: |
          # Create test-results directory to ensure it exists for artifact upload
          mkdir -p test-results
-          if [ "${PROJECT}" = "no-auth" ]; then
-            export PLAYWRIGHT_FORCE_EMPTY_LLM_PROVIDERS=true
-          fi
-          npx playwright test --project ${PROJECT}
+          npx playwright test --project ${{ matrix.project }}

      - uses: actions/upload-artifact@330a01c490aca151604b8cf639adc76d48f6c5d4 # ratchet:actions/upload-artifact@v4
        if: always()
@@ -446,12 +271,10 @@ jobs:
      # save before stopping the containers so the logs can be captured
      - name: Save Docker logs
        if: success() || failure()
-        env:
-          WORKSPACE: ${{ github.workspace }}
        run: |
          cd deployment/docker_compose
          docker compose logs > docker-compose.log
-          mv docker-compose.log ${WORKSPACE}/docker-compose.log
+          mv docker-compose.log ${{ github.workspace }}/docker-compose.log

      - name: Upload logs
        if: success() || failure()
@@ -460,16 +283,6 @@ jobs:
          name: docker-logs-${{ matrix.project }}-${{ github.run_id }}
          path: ${{ github.workspace }}/docker-compose.log

-  playwright-required:
-    # NOTE: Github-hosted runners have about 20s faster queue times and are preferred here.
-    runs-on: ubuntu-slim
-    timeout-minutes: 45
-    needs: [playwright-tests]
-    if: ${{ always() }}
-    steps:
-      - name: Check job status
-        if: ${{ contains(needs.*.result, 'failure') || contains(needs.*.result, 'cancelled') || contains(needs.*.result, 'skipped') }}
-        run: exit 1

 # NOTE: Chromatic UI diff testing is currently disabled.
 # We are using Playwright for local and CI testing without visual regression checks.
@@ -488,12 +301,12 @@ jobs:
 #     ]
 #   steps:
 #     - name: Checkout code
-#       uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # ratchet:actions/checkout@v6
+#       uses: actions/checkout@08eba0b27e820071cde6df949e0beb9ba4906955 # ratchet:actions/checkout@v4
 #       with:
 #         fetch-depth: 0

 #     - name: Setup node
-#       uses: actions/setup-node@395ad3262231945c25e8478fd5baf05154b1d79f # ratchet:actions/setup-node@v4
+#       uses: actions/setup-node@2028fbc5c25fe9cf00d9f06a71cc4710d4507903 # ratchet:actions/setup-node@v4
 #       with:
 #         node-version: 22

--- a/.github/workflows/pr-python-checks.yml
+++ b/.github/workflows/pr-python-checks.yml
@@ -9,12 +9,6 @@ on:
    branches:
      - main
      - 'release/**'
-  push:
-    tags:
-      - "v*.*.*"
-
-permissions:
-  contents: read

 jobs:
  mypy-check:
@@ -22,28 +16,26 @@ jobs:
    # Note: Mypy seems quite optimized for x64 compared to arm64.
    # Similarly, mypy is single-threaded and incremental, so 2cpu is sufficient.
    runs-on: [runs-on, runner=2cpu-linux-x64, "run-id=${{ github.run_id }}-mypy-check", "extras=s3-cache"]
-    timeout-minutes: 45

    steps:
      - uses: runs-on/action@cd2b598b0515d39d78c38a02d529db87d2196d1e # ratchet:runs-on/action@v2
      - name: Checkout code
-        uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # ratchet:actions/checkout@v6
-        with:
-          persist-credentials: false
+        uses: actions/checkout@08eba0b27e820071cde6df949e0beb9ba4906955 # ratchet:actions/checkout@v4

-      - name: Setup Python and Install Dependencies
-        uses: ./.github/actions/setup-python-and-install-dependencies
+      # needed for pulling openapitools/openapi-generator-cli
+      # otherwise, we hit the "Unauthenticated users" limit
+      # https://docs.docker.com/docker-hub/usage/
+      - name: Login to Docker Hub
+        uses: docker/login-action@5e57cd118135c172c3672efd75eb46360885c0ef # ratchet:docker/login-action@v3
        with:
-          requirements: |
-            backend/requirements/default.txt
-            backend/requirements/dev.txt
-            backend/requirements/model_server.txt
-            backend/requirements/ee.txt
+          username: ${{ secrets.DOCKER_USERNAME }}
+          password: ${{ secrets.DOCKER_TOKEN }}

-      - name: Generate OpenAPI schema and Python client
-        shell: bash
-        run: |
-          ods openapi all
+      - name: Prepare build
+        uses: ./.github/actions/prepare-build
+        with:
+          docker-username: ${{ secrets.DOCKER_USERNAME }}
+          docker-password: ${{ secrets.DOCKER_TOKEN }}

      - name: Cache mypy cache
        if: ${{ vars.DISABLE_MYPY_CACHE != 'true' }}
@@ -61,8 +53,11 @@ jobs:
          TERM: xterm-256color
        run: mypy .

-      - name: Run MyPy (tools/)
-        env:
-          MYPY_FORCE_COLOR: 1
-          TERM: xterm-256color
-        run: mypy tools/
+      - name: Check import order with reorder-python-imports
+        working-directory: ./backend
+        run: |
+          find ./onyx -name "*.py" | xargs reorder-python-imports --py311-plus
+
+      - name: Check code formatting with Black
+        working-directory: ./backend
+        run: black --check .
--- a/.github/workflows/pr-python-connector-tests.yml
+++ b/.github/workflows/pr-python-connector-tests.yml
@@ -7,16 +7,10 @@ on:
  merge_group:
  pull_request:
    branches: [main]
-  push:
-    tags:
-      - "v*.*.*"
  schedule:
    # This cron expression runs the job daily at 16:00 UTC (9am PT)
    - cron: "0 16 * * *"

-permissions:
-  contents: read
-
 env:
  # AWS
  AWS_ACCESS_KEY_ID_DAILY_CONNECTOR_TESTS: ${{ secrets.AWS_ACCESS_KEY_ID_DAILY_CONNECTOR_TESTS }}
@@ -129,26 +123,18 @@ jobs:
  connectors-check:
    # See https://runs-on.com/runners/linux/
    runs-on: [runs-on, runner=8cpu-linux-x64, "run-id=${{ github.run_id }}-connectors-check", "extras=s3-cache"]
-    timeout-minutes: 45

    env:
      PYTHONPATH: ./backend
-      DISABLE_TELEMETRY: "true"

    steps:
      - uses: runs-on/action@cd2b598b0515d39d78c38a02d529db87d2196d1e # ratchet:runs-on/action@v2

      - name: Checkout code
-        uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # ratchet:actions/checkout@v6
-        with:
-          persist-credentials: false
+        uses: actions/checkout@08eba0b27e820071cde6df949e0beb9ba4906955 # ratchet:actions/checkout@v4

      - name: Setup Python and Install Dependencies
        uses: ./.github/actions/setup-python-and-install-dependencies
-        with:
-          requirements: |
-            backend/requirements/default.txt
-            backend/requirements/dev.txt

      - name: Setup Playwright
        uses: ./.github/actions/setup-playwright
@@ -161,20 +147,16 @@ jobs:
            hubspot:
              - 'backend/onyx/connectors/hubspot/**'
              - 'backend/tests/daily/connectors/hubspot/**'
-              - 'uv.lock'
            salesforce:
              - 'backend/onyx/connectors/salesforce/**'
              - 'backend/tests/daily/connectors/salesforce/**'
-              - 'uv.lock'
            github:
              - 'backend/onyx/connectors/github/**'
              - 'backend/tests/daily/connectors/github/**'
-              - 'uv.lock'
            file_processing:
              - 'backend/onyx/file_processing/**'
-              - 'uv.lock'

-      - name: Run Tests (excluding HubSpot, Salesforce, GitHub, and Coda)
+      - name: Run Tests (excluding HubSpot, Salesforce, and GitHub)
        shell: script -q -e -c "bash --noprofile --norc -eo pipefail {0}"
        run: |
          py.test \
@@ -187,8 +169,7 @@ jobs:
            backend/tests/daily/connectors \
            --ignore backend/tests/daily/connectors/hubspot \
            --ignore backend/tests/daily/connectors/salesforce \
-            --ignore backend/tests/daily/connectors/github \
-            --ignore backend/tests/daily/connectors/coda
+            --ignore backend/tests/daily/connectors/github

      - name: Run HubSpot Connector Tests
        if: ${{ github.event_name == 'schedule' || steps.changes.outputs.hubspot == 'true' || steps.changes.outputs.file_processing == 'true' }}
@@ -233,10 +214,8 @@ jobs:
        if: failure() && github.event_name == 'schedule'
        env:
          SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
-          REPO: ${{ github.repository }}
-          RUN_ID: ${{ github.run_id }}
        run: |
          curl -X POST \
            -H 'Content-type: application/json' \
-            --data "{\"text\":\"Scheduled Connector Tests failed! Check the run at: https://github.com/${REPO}/actions/runs/${RUN_ID}\"}" \
+            --data '{"text":"Scheduled Connector Tests failed! Check the run at: https://github.com/${{ github.repository }}/actions/runs/${{ github.run_id }}"}' \
            $SLACK_WEBHOOK
--- a/.github/workflows/pr-python-model-tests.yml
+++ b/.github/workflows/pr-python-model-tests.yml
@@ -11,9 +11,6 @@ on:
        required: false
        default: 'main'

-permissions:
-  contents: read
-
 env:
  # Bedrock
  AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }}
@@ -32,16 +29,13 @@ jobs:
  model-check:
    # See https://runs-on.com/runners/linux/
    runs-on: [runs-on,runner=8cpu-linux-x64,"run-id=${{ github.run_id }}-model-check"]
-    timeout-minutes: 45

    env:
      PYTHONPATH: ./backend

    steps:
      - name: Checkout code
-        uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # ratchet:actions/checkout@v6
-        with:
-          persist-credentials: false
+        uses: actions/checkout@08eba0b27e820071cde6df949e0beb9ba4906955 # ratchet:actions/checkout@v4

      - name: Login to Docker Hub
        uses: docker/login-action@5e57cd118135c172c3672efd75eb46360885c0ef # ratchet:docker/login-action@v3
@@ -61,7 +55,7 @@ jobs:
          docker tag onyxdotapp/onyx-model-server:latest onyxdotapp/onyx-model-server:test

      - name: Set up Python
-        uses: actions/setup-python@83679a892e2d95755f2dac6acb0bfd1e9ac5d548 # ratchet:actions/setup-python@v6
+        uses: actions/setup-python@e797f83bcb11b83ae66e0230d6156d7c80228e7c # ratchet:actions/setup-python@v6
        with:
          python-version: "3.11"
          cache: "pip"
@@ -128,12 +122,10 @@ jobs:
        if: failure() && github.event_name == 'schedule'
        env:
          SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
-          REPO: ${{ github.repository }}
-          RUN_ID: ${{ github.run_id }}
        run: |
          curl -X POST \
            -H 'Content-type: application/json' \
-            --data "{\"text\":\"Scheduled Model Tests failed! Check the run at: https://github.com/${REPO}/actions/runs/${RUN_ID}\"}" \
+            --data '{"text":"Scheduled Model Tests failed! Check the run at: https://github.com/${{ github.repository }}/actions/runs/${{ github.run_id }}"}' \
            $SLACK_WEBHOOK

      - name: Dump all-container logs (optional)
--- a/.github/workflows/pr-python-tests.yml
+++ b/.github/workflows/pr-python-tests.yml
@@ -9,41 +9,28 @@ on:
    branches:
      - main
      - 'release/**'
-  push:
-    tags:
-      - "v*.*.*"
-
-permissions:
-  contents: read

 jobs:
  backend-check:
    # See https://runs-on.com/runners/linux/
    runs-on: [runs-on, runner=2cpu-linux-arm64, "run-id=${{ github.run_id }}-backend-check"]
-    timeout-minutes: 45


    env:
      PYTHONPATH: ./backend
      REDIS_CLOUD_PYTEST_PASSWORD: ${{ secrets.REDIS_CLOUD_PYTEST_PASSWORD }}
-      DISABLE_TELEMETRY: "true"
+      SF_USERNAME: ${{ secrets.SF_USERNAME }}
+      SF_PASSWORD: ${{ secrets.SF_PASSWORD }}
+      SF_SECURITY_TOKEN: ${{ secrets.SF_SECURITY_TOKEN }}

    steps:
    - uses: runs-on/action@cd2b598b0515d39d78c38a02d529db87d2196d1e # ratchet:runs-on/action@v2

    - name: Checkout code
-      uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # ratchet:actions/checkout@v6
-      with:
-        persist-credentials: false
+      uses: actions/checkout@08eba0b27e820071cde6df949e0beb9ba4906955 # ratchet:actions/checkout@v4

    - name: Setup Python and Install Dependencies
      uses: ./.github/actions/setup-python-and-install-dependencies
-      with:
-        requirements: |
-          backend/requirements/default.txt
-          backend/requirements/dev.txt
-          backend/requirements/model_server.txt
-          backend/requirements/ee.txt

    - name: Run Tests
      shell: script -q -e -c "bash --noprofile --norc -eo pipefail {0}"
--- a/.github/workflows/pr-quality-checks.yml
+++ b/.github/workflows/pr-quality-checks.yml
@@ -6,44 +6,21 @@ concurrency:
 on:
  merge_group:
  pull_request: null
-  push:
-    branches:
-      - main
-    tags:
-      - "v*.*.*"
-
-permissions:
-  contents: read

 jobs:
  quality-checks:
-    runs-on: ubuntu-latest
-    timeout-minutes: 45
+    # See https://runs-on.com/runners/linux/
+    runs-on: [runs-on, runner=1cpu-linux-arm64, "run-id=${{ github.run_id }}-quality-checks"]
    steps:
-      - uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # ratchet:actions/checkout@v6
+      - uses: runs-on/action@cd2b598b0515d39d78c38a02d529db87d2196d1e # ratchet:runs-on/action@v2
+      - uses: actions/checkout@08eba0b27e820071cde6df949e0beb9ba4906955 # ratchet:actions/checkout@v4
        with:
          fetch-depth: 0
-          persist-credentials: false
-      - uses: actions/setup-python@83679a892e2d95755f2dac6acb0bfd1e9ac5d548 # ratchet:actions/setup-python@v6
+      - uses: actions/setup-python@e797f83bcb11b83ae66e0230d6156d7c80228e7c # ratchet:actions/setup-python@v6
        with:
          python-version: "3.11"
      - name: Setup Terraform
        uses: hashicorp/setup-terraform@b9cd54a3c349d3f38e8881555d616ced269862dd # ratchet:hashicorp/setup-terraform@v3
-      - name: Setup node
-        uses: actions/setup-node@395ad3262231945c25e8478fd5baf05154b1d79f # ratchet:actions/setup-node@v6
-        with: # zizmor: ignore[cache-poisoning]
-          node-version: 22
-          cache: "npm"
-          cache-dependency-path: ./web/package-lock.json
-      - name: Install node dependencies
-        working-directory: ./web
-        run: npm ci
-      - uses: j178/prek-action@91fd7d7cf70ae1dee9f4f44e7dfa5d1073fe6623 # ratchet:j178/prek-action@v1
+      - uses: pre-commit/action@2c7b3805fd2a0fd8c1884dcaebf91fc102a13ecd # ratchet:pre-commit/action@v3.0.1
        with:
-          prek-version: '0.2.21'
-          extra-args: ${{ github.event_name == 'pull_request' && format('--from-ref {0} --to-ref {1}', github.event.pull_request.base.sha, github.event.pull_request.head.sha) || github.event_name == 'merge_group' && format('--from-ref {0} --to-ref {1}', github.event.merge_group.base_sha, github.event.merge_group.head_sha) || github.ref_name == 'main' && '--all-files' || '' }}
-      - name: Check Actions
-        uses: giner/check-actions@28d366c7cbbe235f9624a88aa31a628167eee28c # ratchet:giner/check-actions@v1.0.1
-        with:
-          check_permissions: false
-          check_versions: false
+          extra_args: ${{ github.event_name == 'pull_request' && format('--from-ref {0} --to-ref {1}', github.event.pull_request.base.sha, github.event.pull_request.head.sha) || '' }}
--- a/.github/workflows/release-devtools.yml
+++ b/.github/workflows/release-devtools.yml
@@ -1,41 +0,0 @@
-name: Release Devtools
-
-on:
-  push:
-    tags:
-      - "ods/v*.*.*"
-
-jobs:
-  pypi:
-    runs-on: ubuntu-latest
-    environment:
-      name: release-devtools
-    permissions:
-      id-token: write
-    timeout-minutes: 10
-    strategy:
-      matrix:
-        os-arch:
-          - { goos: "linux", goarch: "amd64" }
-          - { goos: "linux", goarch: "arm64" }
-          - { goos: "windows", goarch: "amd64" }
-          - { goos: "windows", goarch: "arm64" }
-          - { goos: "darwin", goarch: "amd64" }
-          - { goos: "darwin", goarch: "arm64" }
-          - { goos: "", goarch: "" }
-    steps:
-      - uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # ratchet:actions/checkout@v6
-        with:
-          persist-credentials: false
-          fetch-depth: 0
-      - uses: astral-sh/setup-uv@ed21f2f24f8dd64503750218de024bcf64c7250a # ratchet:astral-sh/setup-uv@v7
-        with:
-          enable-cache: false
-          version: "0.9.9"
-      - run: |
-          GOOS="${{ matrix.os-arch.goos }}" \
-          GOARCH="${{ matrix.os-arch.goarch }}" \
-          uv build --wheel
-        working-directory: tools/ods
-      - run: uv publish
-        working-directory: tools/ods
--- a/.github/workflows/sync_foss.yml
+++ b/.github/workflows/sync_foss.yml
@@ -9,15 +9,13 @@ on:
 jobs:
  sync-foss:
    runs-on: ubuntu-latest
-    timeout-minutes: 45
    permissions:
      contents: read
    steps:
      - name: Checkout main Onyx repo
-        uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # ratchet:actions/checkout@v6
+        uses: actions/checkout@08eba0b27e820071cde6df949e0beb9ba4906955 # ratchet:actions/checkout@v4
        with:
          fetch-depth: 0
-          persist-credentials: false

      - name: Install git-filter-repo
        run: |
--- a/.github/workflows/tag-nightly.yml
+++ b/.github/workflows/tag-nightly.yml
@@ -3,30 +3,30 @@ name: Nightly Tag Push
 on:
  schedule:
    - cron: "0 10 * * *" # Runs every day at 2 AM PST / 3 AM PDT / 10 AM UTC
-  workflow_dispatch:

 permissions:
  contents: write # Allows pushing tags to the repository

 jobs:
  create-and-push-tag:
-    runs-on: ubuntu-slim
-    timeout-minutes: 45
+    runs-on: [runs-on, runner=2cpu-linux-x64, "run-id=${{ github.run_id }}-create-and-push-tag"]

    steps:
      # actions using GITHUB_TOKEN cannot trigger another workflow, but we do want this to trigger docker pushes
      # see https://github.com/orgs/community/discussions/27028#discussioncomment-3254367 for the workaround we
      # implement here which needs an actual user's deploy key
+
+      # Additional NOTE: even though this is named "rkuo", the actual key is tied to the onyx repo
+      # and not rkuo's personal account. It is fine to leave this key as is!
      - name: Checkout code
-        uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # ratchet:actions/checkout@v6
+        uses: actions/checkout@08eba0b27e820071cde6df949e0beb9ba4906955 # ratchet:actions/checkout@v4
        with:
-          ssh-key: "${{ secrets.DEPLOY_KEY }}"
-          persist-credentials: true
+          ssh-key: "${{ secrets.RKUO_DEPLOY_KEY }}"

      - name: Set up Git user
        run: |
-          git config user.name "Onyx Bot [bot]"
-          git config user.email "onyx-bot[bot]@onyx.app"
+          git config user.name "Richard Kuo [bot]"
+          git config user.email "rkuo[bot]@onyx.app"

      - name: Check for existing nightly tag
        id: check_tag
@@ -54,12 +54,3 @@ jobs:
        run: |
          TAG_NAME="nightly-latest-$(date +'%Y%m%d')"
          git push origin $TAG_NAME
-
-      - name: Send Slack notification
-        if: failure()
-        uses: ./.github/actions/slack-notify
-        with:
-          webhook-url: ${{ secrets.MONITOR_DEPLOYMENTS_WEBHOOK }}
-          title: "🚨 Nightly Tag Push Failed"
-          ref-name: ${{ github.ref_name }}
-          failed-jobs: "create-and-push-tag"
--- a/.github/workflows/zizmor.yml
+++ b/.github/workflows/zizmor.yml
@@ -1,39 +0,0 @@
-name: Run Zizmor
-
-on:
-  push:
-    branches: ["main"]
-  pull_request:
-    branches: ["**"]
-
-permissions: {}
-
-jobs:
-  zizmor:
-    name: zizmor
-    runs-on: ubuntu-slim
-    timeout-minutes: 45
-    permissions:
-      security-events: write # needed for SARIF uploads
-    steps:
-      - name: Checkout repository
-        uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # ratchet:actions/checkout@v6.0.1
-        with:
-          persist-credentials: false
-
-      - name: Install the latest version of uv
-        uses: astral-sh/setup-uv@ed21f2f24f8dd64503750218de024bcf64c7250a # ratchet:astral-sh/setup-uv@v7
-        with:
-          enable-cache: false
-          version: "0.9.9"
-
-      - name: Run zizmor
-        run: uv run --no-sync --with zizmor zizmor --format=sarif . > results.sarif
-        env:
-          GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
-
-      - name: Upload SARIF file
-        uses: github/codeql-action/upload-sarif@ba454b8ab46733eb6145342877cd148270bb77ab # ratchet:github/codeql-action/upload-sarif@codeql-bundle-v2.23.5
-        with:
-          sarif_file: results.sarif
-          category: zizmor
--- a/.gitignore
+++ b/.gitignore
@@ -1,7 +1,6 @@
 # editors
 .vscode
 .zed
-.cursor

 # macos
 .DS_store
@@ -29,8 +28,6 @@ settings.json

 # others
 /deployment/data/nginx/app.conf
-/deployment/data/nginx/mcp.conf.inc
-/deployment/data/nginx/mcp_upstream.conf.inc
 *.sw?
 /backend/tests/regression/answer_quality/search_test_config.yaml
 *.egg-info
@@ -49,10 +46,5 @@ CLAUDE.md
 # Local .terraform.lock.hcl file
 .terraform.lock.hcl

-node_modules
-
 # MCP configs
 .playwright-mcp
-
-# plans
-plans/
--- a/.mcp.json.template
+++ b/.mcp.json.template
@@ -0,0 +1,8 @@
+{
+  "mcpServers": {
+    "onyx-mcp": {
+      "type": "http",
+      "url": "http://localhost:8000/mcp"
+    }
+  }
+}
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -1,142 +1,61 @@
-default_install_hook_types:
-  - pre-commit
-  - post-checkout
-  - post-merge
-  - post-rewrite
 repos:
-  - repo: https://github.com/astral-sh/uv-pre-commit
-    # From: https://github.com/astral-sh/uv-pre-commit/pull/53/commits/d30b4298e4fb63ce8609e29acdbcf4c9018a483c
-    rev: d30b4298e4fb63ce8609e29acdbcf4c9018a483c
+  - repo: https://github.com/pre-commit/pre-commit-hooks
+    rev: v4.6.0
    hooks:
-      - id: uv-sync
-        args: ["--active", "--locked", "--all-extras"]
-      - id: uv-lock
-        files: ^pyproject\.toml$
-      - id: uv-export
-        name: uv-export default.txt
-        args:
-          [
-            "--no-emit-project",
-            "--no-default-groups",
-            "--no-hashes",
-            "--extra",
-            "backend",
-            "-o",
-            "backend/requirements/default.txt",
-          ]
-        files: ^(pyproject\.toml|uv\.lock|backend/requirements/.*\.txt)$
-      - id: uv-export
-        name: uv-export dev.txt
-        args:
-          [
-            "--no-emit-project",
-            "--no-default-groups",
-            "--no-hashes",
-            "--extra",
-            "dev",
-            "-o",
-            "backend/requirements/dev.txt",
-          ]
-        files: ^(pyproject\.toml|uv\.lock|backend/requirements/.*\.txt)$
-      - id: uv-export
-        name: uv-export ee.txt
-        args:
-          [
-            "--no-emit-project",
-            "--no-default-groups",
-            "--no-hashes",
-            "--extra",
-            "ee",
-            "-o",
-            "backend/requirements/ee.txt",
-          ]
-        files: ^(pyproject\.toml|uv\.lock|backend/requirements/.*\.txt)$
-      - id: uv-export
-        name: uv-export model_server.txt
-        args:
-          [
-            "--no-emit-project",
-            "--no-default-groups",
-            "--no-hashes",
-            "--extra",
-            "model_server",
-            "-o",
-            "backend/requirements/model_server.txt",
-          ]
-        files: ^(pyproject\.toml|uv\.lock|backend/requirements/.*\.txt)$
-      - id: uv-run
-        name: Check lazy imports
-        args: ["--active", "--with=onyx-devtools", "ods", "check-lazy-imports"]
-        files: ^backend/(?!\.venv/).*\.py$
-      # NOTE: This takes ~6s on a single, large module which is prohibitively slow.
-      # - id: uv-run
-      #   name: mypy
-      #   args: ["--all-extras", "mypy"]
-      #   pass_filenames: true
-      #   files: ^backend/.*\.py$
+      - id: check-yaml
+        files: ^.github/

  - repo: https://github.com/rhysd/actionlint
-    rev: a443f344ff32813837fa49f7aa6cbc478d770e62 # frozen: v1.7.9
+    rev: v1.7.8
    hooks:
      - id: actionlint

  - repo: https://github.com/psf/black
-    rev: 8a737e727ac5ab2f1d4cf5876720ed276dc8dc4b # frozen: 25.1.0
+    rev: 25.1.0
    hooks:
-      - id: black
-        language_version: python3.11
+    - id: black
+      language_version: python3.11

  # this is a fork which keeps compatibility with black
  - repo: https://github.com/wimglenn/reorder-python-imports-black
-    rev: f55cd27f90f0cf0ee775002c2383ce1c7820013d # frozen: v3.14.0
+    rev: v3.14.0
    hooks:
-      - id: reorder-python-imports
-        args: ["--py311-plus", "--application-directories=backend/"]
-        # need to ignore alembic files, since reorder-python-imports gets confused
-        # and thinks that alembic is a local package since there is a folder
-        # in the backend directory called `alembic`
-        exclude: ^backend/alembic/
+    - id: reorder-python-imports
+      args: ['--py311-plus', '--application-directories=backend/']
+      # need to ignore alembic files, since reorder-python-imports gets confused
+      # and thinks that alembic is a local package since there is a folder
+      # in the backend directory called `alembic`
+      exclude: ^backend/alembic/

  # These settings will remove unused imports with side effects
  # Note: The repo currently does not and should not have imports with side effects
  - repo: https://github.com/PyCQA/autoflake
-    rev: 0544741e2b4a22b472d9d93e37d4ea9153820bb1 # frozen: v2.3.1
+    rev: v2.3.1
    hooks:
      - id: autoflake
-        args:
-          [
-            "--remove-all-unused-imports",
-            "--remove-unused-variables",
-            "--in-place",
-            "--recursive",
-          ]
-
-  - repo: https://github.com/golangci/golangci-lint
-    rev: 9f61b0f53f80672872fced07b6874397c3ed197b # frozen: v2.7.2
-    hooks:
-      - id: golangci-lint
-        entry: bash -c "find tools/ -name go.mod -print0 | xargs -0 -I{} bash -c 'cd \"$(dirname {})\" && golangci-lint run ./...'"
+        args: [ '--remove-all-unused-imports', '--remove-unused-variables', '--in-place' , '--recursive']

  - repo: https://github.com/astral-sh/ruff-pre-commit
    # Ruff version.
-    rev: 971923581912ef60a6b70dbf0c3e9a39563c9d47 # frozen: v0.11.4
+    rev: v0.11.4
    hooks:
      - id: ruff

  - repo: https://github.com/pre-commit/mirrors-prettier
-    rev: ffb6a759a979008c0e6dff86e39f4745a2d9eac4 # frozen: v3.1.0
+    rev: v3.1.0
    hooks:
-      - id: prettier
-        types_or: [html, css, javascript, ts, tsx]
-        language_version: system
+    - id: prettier
+      types_or: [html, css, javascript, ts, tsx]
+      language_version: system

  - repo: https://github.com/sirwart/ripsecrets
-    rev: 7d94620933e79b8acaa0cd9e60e9864b07673d86 # frozen: v0.1.11
+    rev: v0.1.11
    hooks:
      - id: ripsecrets
        args:
-          - --additional-pattern
-          - ^sk-[A-Za-z0-9_\-]{20,}$
+        - --additional-pattern
+        - ^sk-[A-Za-z0-9_\-]{20,}$
+

  - repo: local
    hooks:
@@ -147,13 +66,36 @@ repos:
        pass_filenames: false
        files: \.tf$

-      # Uses tsgo (TypeScript's native Go compiler) for ~10x faster type checking.
-      # This is a preview package - if it breaks:
-      #   1. Try updating: cd web && npm update @typescript/native-preview
-      #   2. Or fallback to tsc: replace 'tsgo' with 'tsc' below
-      - id: typescript-check
-        name: TypeScript type check
-        entry: bash -c 'cd web && npx tsgo --noEmit --project tsconfig.types.json'
+      - id: check-lazy-imports
+        name: Check lazy imports
+        entry: python3 backend/scripts/check_lazy_imports.py
        language: system
-        pass_filenames: false
-        files: ^web/.*\.(ts|tsx)$
+        files: ^backend/(?!\.venv/).*\.py$
+
+  # We would like to have a mypy pre-commit hook, but due to the fact that
+  # pre-commit runs in it's own isolated environment, we would need to install
+  # and keep in sync all dependencies so mypy has access to the appropriate type
+  # stubs. This does not seem worth it at the moment, so for now we will stick to
+  # having mypy run via Github Actions / manually by contributors
+  # - repo: https://github.com/pre-commit/mirrors-mypy
+  #   rev: v1.1.1
+  #   hooks:
+  #     - id: mypy
+  #       exclude: ^tests/
+  #       # below are needed for type stubs since pre-commit runs in it's own
+  #       # isolated environment. Unfortunately, this needs to be kept in sync
+  #       # with requirements/dev.txt + requirements/default.txt
+  #       additional_dependencies: [
+  #         alembic==1.10.4,
+  #         types-beautifulsoup4==4.12.0.3,
+  #         types-html5lib==1.1.11.13,
+  #         types-oauthlib==3.2.0.9,
+  #         types-psycopg2==2.9.21.10,
+  #         types-python-dateutil==2.8.19.13,
+  #         types-regex==2023.3.23.1,
+  #         types-requests==2.28.11.17,
+  #         types-retry==0.9.9.3,
+  #         types-urllib3==1.26.25.11
+  #       ]
+  #       # TODO: add back once errors are addressed
+  #       # args: [--strict]
--- a/.vscode/env_template.txt
+++ b/.vscode/env_template.txt
@@ -5,8 +5,11 @@
 # For local dev, often user Authentication is not needed
 AUTH_TYPE=disabled

+# Skip warm up for dev
+SKIP_WARM_UP=True
+
 # Always keep these on for Dev
-# Logs model prompts, reasoning, and answer to stdout
+# Logs all model prompts to stdout
 LOG_ONYX_MODEL_INTERACTIONS=True
 # More verbose logging
 LOG_LEVEL=debug
@@ -34,16 +37,31 @@ OPENAI_API_KEY=<REPLACE THIS>
 GEN_AI_MODEL_VERSION=gpt-4o
 FAST_GEN_AI_MODEL_VERSION=gpt-4o

+# For Onyx Slack Bot, overrides the UI values so no need to set this up via UI every time
+# Only needed if using OnyxBot
+#ONYX_BOT_SLACK_APP_TOKEN=<REPLACE THIS>
+#ONYX_BOT_SLACK_BOT_TOKEN=<REPLACE THIS>
+

 # Python stuff
 PYTHONPATH=../backend
 PYTHONUNBUFFERED=1


+# Internet Search
+EXA_API_KEY=<REPLACE THIS>
+
+
 # Enable the full set of Danswer Enterprise Edition features
 # NOTE: DO NOT ENABLE THIS UNLESS YOU HAVE A PAID ENTERPRISE LICENSE (or if you are using this for local testing/development)
 ENABLE_PAID_ENTERPRISE_EDITION_FEATURES=False

+# Agent Search configs  # TODO: Remove give proper namings
+AGENT_RETRIEVAL_STATS=False   # Note: This setting will incur substantial re-ranking effort
+AGENT_RERANKING_STATS=True
+AGENT_MAX_QUERY_RETRIEVAL_RESULTS=20
+AGENT_RERANKING_MAX_QUERY_RETRIEVAL_RESULTS=20
+
 # S3 File Store Configuration (MinIO for local development)
 S3_ENDPOINT_URL=http://localhost:9004
 S3_FILE_STORE_BUCKET_NAME=onyx-file-store-bucket
--- a/.vscode/launch.template.jsonc
+++ b/.vscode/launch.template.jsonc
@@ -20,7 +20,6 @@
        "Web Server",
        "Model Server",
        "API Server",
-        "MCP Server",
        "Slack Bot",
        "Celery primary",
        "Celery light",
@@ -133,6 +132,8 @@
      },
      "consoleTitle": "API Server Console"
    },
+    // For the listener to access the Slack API,
+    // ONYX_BOT_SLACK_APP_TOKEN & ONYX_BOT_SLACK_BOT_TOKEN need to be set in .env file located in the root of the project
    {
      "name": "Slack Bot",
      "consoleName": "Slack Bot",
@@ -151,34 +152,6 @@
      },
      "consoleTitle": "Slack Bot Console"
    },
-    {
-      "name": "MCP Server",
-      "consoleName": "MCP Server",
-      "type": "debugpy",
-      "request": "launch",
-      "module": "uvicorn",
-      "cwd": "${workspaceFolder}/backend",
-      "envFile": "${workspaceFolder}/.vscode/.env",
-      "env": {
-        "MCP_SERVER_ENABLED": "true",
-        "MCP_SERVER_PORT": "8090",
-        "MCP_SERVER_CORS_ORIGINS": "http://localhost:*",
-        "LOG_LEVEL": "DEBUG",
-        "PYTHONUNBUFFERED": "1"
-      },
-      "args": [
-        "onyx.mcp_server.api:mcp_app",
-        "--reload",
-        "--port",
-        "8090",
-        "--timeout-graceful-shutdown",
-        "0"
-      ],
-      "presentation": {
-        "group": "2"
-      },
-      "consoleTitle": "MCP Server Console"
-    },
    {
      "name": "Celery primary",
      "type": "debugpy",
@@ -508,6 +481,7 @@
      ],
      "cwd": "${workspaceFolder}",
      "console": "integratedTerminal",
+      "stopOnEntry": true,
      "presentation": {
        "group": "3"
      }
@@ -553,10 +527,10 @@
      "name": "Install Python Requirements",
      "type": "node",
      "request": "launch",
-      "runtimeExecutable": "uv",
+      "runtimeExecutable": "bash",
      "runtimeArgs": [
-        "sync",
-        "--all-extras"
+        "-c",
+        "pip install -r backend/requirements/default.txt && pip install -r backend/requirements/dev.txt && pip install -r backend/requirements/ee.txt && pip install -r backend/requirements/model_server.txt"
      ],
      "cwd": "${workspaceFolder}",
      "console": "integratedTerminal",
@@ -569,14 +543,14 @@
      "name": "Onyx OpenAPI Schema Generator",
      "type": "debugpy",
      "request": "launch",
-      "program": "backend/scripts/onyx_openapi_schema.py",
-      "cwd": "${workspaceFolder}",
+      "program": "scripts/onyx_openapi_schema.py",
+      "cwd": "${workspaceFolder}/backend",
      "envFile": "${workspaceFolder}/.env",
      "env": {
        "PYTHONUNBUFFERED": "1",
-        "PYTHONPATH": "backend"
+        "PYTHONPATH": "."
      },
-      "args": ["--filename", "backend/generated/openapi.json", "--generate-python-client"]
+      "args": ["--filename", "generated/openapi.json"]
    },
    {
      // script to debug multi tenant db issues
--- a/CLAUDE.md.template
+++ b/CLAUDE.md.template
@@ -4,7 +4,7 @@ This file provides guidance to Claude Code (claude.ai/code) when working with co

 ## KEY NOTES

- If you run into any missing python dependency errors, try running your command with `source .venv/bin/activate` \
+- If you run into any missing python dependency errors, try running your command with `source backend/.venv/bin/activate` \
 to assume the python venv.
 - To make tests work, check the `.env` file at the root of the project to find an OpenAI key.
 - If using `playwright` to explore the frontend, you can usually log in with username `a@test.com` and password
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -71,12 +71,12 @@ If using a higher version, sometimes some libraries will not be available (i.e.

 #### Backend: Python requirements

-Currently, we use [uv](https://docs.astral.sh/uv/) and recommend creating a [virtual environment](https://docs.astral.sh/uv/pip/environments/#using-a-virtual-environment).
+Currently, we use pip and recommend creating a virtual environment.

 For convenience here's a command for it:

 ```bash
-uv venv .venv --python 3.11
+python -m venv .venv
 source .venv/bin/activate
 ```

@@ -95,15 +95,33 @@ If using PowerShell, the command slightly differs:
 Install the required python dependencies:

 ```bash
-uv sync --all-extras
+pip install -r backend/requirements/combined.txt
 ```

-Install Playwright for Python (headless browser required by the Web Connector):
+or

 ```bash
-uv run playwright install
+pip install -r backend/requirements/default.txt
+pip install -r backend/requirements/dev.txt
+pip install -r backend/requirements/ee.txt
+pip install -r backend/requirements/model_server.txt
 ```

+Fix vscode/cursor auto-imports:
+```bash
+pip install -e .
+```
+
+Install Playwright for Python (headless browser required by the Web Connector)
+
+In the activated Python virtualenv, install Playwright for Python by running:
+
+```bash
+playwright install
+```
+
+You may have to deactivate and reactivate your virtualenv for `playwright` to appear on your path.
+
 #### Frontend: Node dependencies

 Onyx uses Node v22.20.0. We highly recommend you use [Node Version Manager (nvm)](https://github.com/nvm-sh/nvm)
@@ -112,7 +130,7 @@ to manage your Node installations. Once installed, you can run
 ```bash
 nvm install 22 && nvm use 22
 node -v # verify your active version
-```
+``` 

 Navigate to `onyx/web` and run:

@@ -126,15 +144,21 @@ npm i

 For the backend, you'll need to setup pre-commit hooks (black / reorder-python-imports).

-Then run:
+With the virtual environment active, install the pre-commit library with:

 ```bash
-uv run pre-commit install
+pip install pre-commit
+```
+
+Then, from the `onyx/backend` directory, run:
+
+```bash
+pre-commit install
 ```

 Additionally, we use `mypy` for static type checking.
 Onyx is fully type-annotated, and we want to keep it that way!
-To run the mypy checks manually, run `uv run mypy .` from the `onyx/backend` directory.
+To run the mypy checks manually, run `python -m mypy .` from the `onyx/backend` directory.

 ### Web

@@ -161,7 +185,7 @@ You will need Docker installed to run these containers.
 First navigate to `onyx/deployment/docker_compose`, then start up Postgres/Vespa/Redis/MinIO with:

 ```bash
-docker compose -f docker-compose.yml -f docker-compose.dev.yml up -d index relational_db cache minio
+docker compose up -d index relational_db cache minio
 ```

 (index refers to Vespa, relational_db refers to Postgres, and cache refers to Redis)
--- a/backend/.dockerignore
+++ b/backend/.dockerignore
@@ -15,4 +15,3 @@ build/
 dist/
 .coverage
 htmlcov/
-model_server/legacy/
--- a/backend/Dockerfile
+++ b/backend/Dockerfile
@@ -12,13 +12,6 @@ ENV DANSWER_RUNNING_IN_DOCKER="true" \
    DO_NOT_TRACK="true" \
    PLAYWRIGHT_BROWSERS_PATH="/app/.cache/ms-playwright"

-# Create non-root user for security best practices
-RUN groupadd -g 1001 onyx && \
-    useradd -u 1001 -g onyx -m -s /bin/bash onyx && \
-    mkdir -p /var/log/onyx && \
-    chmod 755 /var/log/onyx && \
-    chown onyx:onyx /var/log/onyx
-
 COPY --from=ghcr.io/astral-sh/uv:0.9.9 /uv /uvx /bin/

 # Install system dependencies
@@ -58,7 +51,6 @@ RUN uv pip install --system --no-cache-dir --upgrade \
    pip uninstall -y py && \
    playwright install chromium && \
    playwright install-deps chromium && \
-    chown -R onyx:onyx /app && \
    ln -s /usr/local/bin/supervisord /usr/bin/supervisord && \
    # Cleanup for CVEs and size reduction
    # https://github.com/tornadoweb/tornado/issues/3107
@@ -102,6 +94,13 @@ tiktoken.get_encoding('cl100k_base')"
 # Set up application files
 WORKDIR /app

+# Create non-root user for security best practices
+RUN groupadd -g 1001 onyx && \
+    useradd -u 1001 -g onyx -m -s /bin/bash onyx && \
+    mkdir -p /var/log/onyx && \
+    chmod 755 /var/log/onyx && \
+    chown onyx:onyx /var/log/onyx
+
 # Enterprise Version Files
 COPY --chown=onyx:onyx ./ee /app/ee
 COPY supervisord.conf /etc/supervisor/conf.d/supervisord.conf
--- a/backend/Dockerfile.model_server
+++ b/backend/Dockerfile.model_server
@@ -1,29 +1,4 @@
-# Base stage with dependencies
-FROM python:3.11.7-slim-bookworm AS base
-
-ENV DANSWER_RUNNING_IN_DOCKER="true" \
-    HF_HOME=/app/.cache/huggingface
-
-COPY --from=ghcr.io/astral-sh/uv:0.9.9 /uv /uvx /bin/
-
-RUN mkdir -p /app/.cache/huggingface
-
-COPY ./requirements/model_server.txt /tmp/requirements.txt
-RUN uv pip install --system --no-cache-dir --upgrade \
-        -r /tmp/requirements.txt && \
-    rm -rf ~/.cache/uv /tmp/*.txt
-
-# Stage for downloading embedding models
-FROM base AS embedding-models
-RUN python -c "from huggingface_hub import snapshot_download; \
-snapshot_download('nomic-ai/nomic-embed-text-v1');"
-
-# Initialize SentenceTransformer to cache the custom architecture
-RUN python -c "from sentence_transformers import SentenceTransformer; \
-SentenceTransformer(model_name_or_path='nomic-ai/nomic-embed-text-v1', trust_remote_code=True);"
-
-# Final stage - combine all downloads
-FROM base AS final
+FROM python:3.11.7-slim-bookworm

 LABEL com.danswer.maintainer="founders@onyx.app"
 LABEL com.danswer.description="This image is for the Onyx model server which runs all of the \
@@ -31,17 +6,44 @@ AI models for Onyx. This container and all the code is MIT Licensed and free for
 You can find it at https://hub.docker.com/r/onyx/onyx-model-server. For more details, \
 visit https://github.com/onyx-dot-app/onyx."

+ENV DANSWER_RUNNING_IN_DOCKER="true" \
+    HF_HOME=/app/.cache/huggingface
+
+COPY --from=ghcr.io/astral-sh/uv:0.9.9 /uv /uvx /bin/
+
 # Create non-root user for security best practices
-RUN groupadd -g 1001 onyx && \
-    useradd -u 1001 -g onyx -m -s /bin/bash onyx && \
+RUN mkdir -p /app && \
+    groupadd -g 1001 onyx && \
+    useradd -u 1001 -g onyx -m -s /bin/bash onyx  && \
+    chown -R onyx:onyx /app && \
    mkdir -p /var/log/onyx && \
    chmod 755 /var/log/onyx && \
    chown onyx:onyx /var/log/onyx

-# In case the user has volumes mounted to /app/.cache/huggingface that they've downloaded while
-# running Onyx, move the current contents of the cache folder to a temporary location to ensure
-# it's preserved in order to combine with the user's cache contents
-COPY --chown=onyx:onyx --from=embedding-models /app/.cache/huggingface /app/.cache/temp_huggingface
+COPY ./requirements/model_server.txt /tmp/requirements.txt
+RUN uv pip install --system --no-cache-dir --upgrade \
+        -r /tmp/requirements.txt && \
+    rm -rf ~/.cache/uv /tmp/*.txt
+
+# Pre-downloading models for setups with limited egress
+# Download tokenizers, distilbert for the Onyx model
+# Download model weights
+# Run Nomic to pull in the custom architecture and have it cached locally
+RUN python -c "from transformers import AutoTokenizer; \
+AutoTokenizer.from_pretrained('distilbert-base-uncased'); \
+AutoTokenizer.from_pretrained('mixedbread-ai/mxbai-rerank-xsmall-v1'); \
+from huggingface_hub import snapshot_download; \
+snapshot_download(repo_id='onyx-dot-app/hybrid-intent-token-classifier'); \
+snapshot_download(repo_id='onyx-dot-app/information-content-model'); \
+snapshot_download('nomic-ai/nomic-embed-text-v1'); \
+snapshot_download('mixedbread-ai/mxbai-rerank-xsmall-v1'); \
+from sentence_transformers import SentenceTransformer; \
+SentenceTransformer(model_name_or_path='nomic-ai/nomic-embed-text-v1', trust_remote_code=True);" && \
+    # In case the user has volumes mounted to /app/.cache/huggingface that they've downloaded while
+    # running Onyx, move the current contents of the cache folder to a temporary location to ensure
+    # it's preserved in order to combine with the user's cache contents
+    mv /app/.cache/huggingface /app/.cache/temp_huggingface && \
+    chown -R onyx:onyx /app

 WORKDIR /app

--- a/backend/alembic/README.md
+++ b/backend/alembic/README.md
@@ -7,12 +7,8 @@ Onyx migrations use a generic single-database configuration with an async dbapi.

 ## To generate new migrations:

-From onyx/backend, run:
-`alembic revision -m <DESCRIPTION_OF_MIGRATION>`
-
-Note: you cannot use the `--autogenerate` flag as the automatic schema parsing does not work.
-
-Manually populate the upgrade and downgrade in your new migration.
+run from onyx/backend:
+`alembic revision --autogenerate -m <DESCRIPTION_OF_MIGRATION>`

 More info can be found here: https://alembic.sqlalchemy.org/en/latest/autogenerate.html

--- a/backend/alembic/env.py
+++ b/backend/alembic/env.py
@@ -39,9 +39,7 @@ config = context.config
 if config.config_file_name is not None and config.attributes.get(
    "configure_logger", True
 ):
-    # disable_existing_loggers=False prevents breaking pytest's caplog fixture
-    # See: https://pytest-alembic.readthedocs.io/en/latest/setup.html#caplog-issues
-    fileConfig(config.config_file_name, disable_existing_loggers=False)
+    fileConfig(config.config_file_name)

 target_metadata = [Base.metadata, ResultModelBase.metadata]

@@ -462,49 +460,8 @@ def run_migrations_offline() -> None:


 def run_migrations_online() -> None:
-    """Run migrations in 'online' mode.
-
-    Supports pytest-alembic by checking for a pre-configured connection
-    in context.config.attributes["connection"]. If present, uses that
-    connection/engine directly instead of creating a new async engine.
-    """
-    # Check if pytest-alembic is providing a connection/engine
-    connectable = context.config.attributes.get("connection", None)
-
-    if connectable is not None:
-        # pytest-alembic is providing an engine - use it directly
-        logger.info("run_migrations_online starting (pytest-alembic mode).")
-
-        # For pytest-alembic, we use the default schema (public)
-        schema_name = context.config.attributes.get(
-            "schema_name", POSTGRES_DEFAULT_SCHEMA
-        )
-
-        # pytest-alembic passes an Engine, we need to get a connection from it
-        with connectable.connect() as connection:
-            # Set search path for the schema
-            connection.execute(text(f'SET search_path TO "{schema_name}"'))
-
-            context.configure(
-                connection=connection,
-                target_metadata=target_metadata,  # type: ignore
-                include_object=include_object,
-                version_table_schema=schema_name,
-                include_schemas=True,
-                compare_type=True,
-                compare_server_default=True,
-                script_location=config.get_main_option("script_location"),
-            )
-
-            with context.begin_transaction():
-                context.run_migrations()
-
-            # Commit the transaction to ensure changes are visible to next migration
-            connection.commit()
-    else:
-        # Normal operation - use async migrations
-        logger.info("run_migrations_online starting.")
-        asyncio.run(run_async_migrations())
+    logger.info("run_migrations_online starting.")
+    asyncio.run(run_async_migrations())


 if context.is_offline_mode():
--- a/backend/alembic/versions/18b5b2524446_add_is_clarification_to_chat_message.py
+++ b/backend/alembic/versions/18b5b2524446_add_is_clarification_to_chat_message.py
@@ -1,29 +0,0 @@
-"""add is_clarification to chat_message
-
-Revision ID: 18b5b2524446
-Revises: 87c52ec39f84
-Create Date: 2025-01-16
-
-"""
-
-from alembic import op
-import sqlalchemy as sa
-
-# revision identifiers, used by Alembic.
-revision = "18b5b2524446"
-down_revision = "87c52ec39f84"
-branch_labels = None
-depends_on = None
-
-
-def upgrade() -> None:
-    op.add_column(
-        "chat_message",
-        sa.Column(
-            "is_clarification", sa.Boolean(), nullable=False, server_default="false"
-        ),
-    )
-
-
-def downgrade() -> None:
-    op.drop_column("chat_message", "is_clarification")
--- a/backend/alembic/versions/1f2a3b4c5d6e_add_internet_search_and_content_providers.py
+++ b/backend/alembic/versions/1f2a3b4c5d6e_add_internet_search_and_content_providers.py
@@ -1,89 +0,0 @@
-"""add internet search and content provider tables
-
-Revision ID: 1f2a3b4c5d6e
-Revises: 9drpiiw74ljy
-Create Date: 2025-11-10 19:45:00.000000
-
-"""
-
-from alembic import op
-import sqlalchemy as sa
-from sqlalchemy.dialects import postgresql
-
-
-# revision identifiers, used by Alembic.
-revision = "1f2a3b4c5d6e"
-down_revision = "9drpiiw74ljy"
-branch_labels = None
-depends_on = None
-
-
-def upgrade() -> None:
-    op.create_table(
-        "internet_search_provider",
-        sa.Column("id", sa.Integer(), primary_key=True),
-        sa.Column("name", sa.String(), nullable=False, unique=True),
-        sa.Column("provider_type", sa.String(), nullable=False),
-        sa.Column("api_key", sa.LargeBinary(), nullable=True),
-        sa.Column("config", postgresql.JSONB(astext_type=sa.Text()), nullable=True),
-        sa.Column(
-            "is_active", sa.Boolean(), nullable=False, server_default=sa.text("false")
-        ),
-        sa.Column(
-            "time_created",
-            sa.DateTime(timezone=True),
-            nullable=False,
-            server_default=sa.text("now()"),
-        ),
-        sa.Column(
-            "time_updated",
-            sa.DateTime(timezone=True),
-            nullable=False,
-            server_default=sa.text("now()"),
-        ),
-    )
-    op.create_index(
-        "ix_internet_search_provider_is_active",
-        "internet_search_provider",
-        ["is_active"],
-    )
-
-    op.create_table(
-        "internet_content_provider",
-        sa.Column("id", sa.Integer(), primary_key=True),
-        sa.Column("name", sa.String(), nullable=False, unique=True),
-        sa.Column("provider_type", sa.String(), nullable=False),
-        sa.Column("api_key", sa.LargeBinary(), nullable=True),
-        sa.Column("config", postgresql.JSONB(astext_type=sa.Text()), nullable=True),
-        sa.Column(
-            "is_active", sa.Boolean(), nullable=False, server_default=sa.text("false")
-        ),
-        sa.Column(
-            "time_created",
-            sa.DateTime(timezone=True),
-            nullable=False,
-            server_default=sa.text("now()"),
-        ),
-        sa.Column(
-            "time_updated",
-            sa.DateTime(timezone=True),
-            nullable=False,
-            server_default=sa.text("now()"),
-        ),
-    )
-    op.create_index(
-        "ix_internet_content_provider_is_active",
-        "internet_content_provider",
-        ["is_active"],
-    )
-
-
-def downgrade() -> None:
-    op.drop_index(
-        "ix_internet_content_provider_is_active", table_name="internet_content_provider"
-    )
-    op.drop_table("internet_content_provider")
-    op.drop_index(
-        "ix_internet_search_provider_is_active", table_name="internet_search_provider"
-    )
-    op.drop_table("internet_search_provider")
--- a/backend/alembic/versions/23957775e5f5_remove_feedback_foreignkey_constraint.py
+++ b/backend/alembic/versions/23957775e5f5_remove_feedback_foreignkey_constraint.py
@@ -12,8 +12,8 @@ import sqlalchemy as sa
 # revision identifiers, used by Alembic.
 revision = "23957775e5f5"
 down_revision = "bc9771dccadf"
-branch_labels = None
-depends_on = None
+branch_labels = None  # type: ignore
+depends_on = None  # type: ignore


 def upgrade() -> None:
--- a/backend/alembic/versions/2a391f840e85_add_last_refreshed_at_mcp_server.py
+++ b/backend/alembic/versions/2a391f840e85_add_last_refreshed_at_mcp_server.py
@@ -1,27 +0,0 @@
-"""add last refreshed at mcp server
-
-Revision ID: 2a391f840e85
-Revises: 4cebcbc9b2ae
-Create Date: 2025-12-06 15:19:59.766066
-
-"""
-
-from alembic import op
-import sqlalchemy as sa
-
-# revision identifiers, used by Alembi.
-revision = "2a391f840e85"
-down_revision = "4cebcbc9b2ae"
-branch_labels = None
-depends_on = None
-
-
-def upgrade() -> None:
-    op.add_column(
-        "mcp_server",
-        sa.Column("last_refreshed_at", sa.DateTime(timezone=True), nullable=True),
-    )
-
-
-def downgrade() -> None:
-    op.drop_column("mcp_server", "last_refreshed_at")
--- a/backend/alembic/versions/3c9a65f1207f_seed_exa_provider_from_env.py
+++ b/backend/alembic/versions/3c9a65f1207f_seed_exa_provider_from_env.py
@@ -1,89 +0,0 @@
-"""seed_exa_provider_from_env
-
-Revision ID: 3c9a65f1207f
-Revises: 1f2a3b4c5d6e
-Create Date: 2025-11-20 19:18:00.000000
-
-"""
-
-from __future__ import annotations
-
-import os
-
-from alembic import op
-import sqlalchemy as sa
-from sqlalchemy.dialects import postgresql
-from dotenv import load_dotenv, find_dotenv
-
-from onyx.utils.encryption import encrypt_string_to_bytes
-
-revision = "3c9a65f1207f"
-down_revision = "1f2a3b4c5d6e"
-branch_labels = None
-depends_on = None
-
-
-EXA_PROVIDER_NAME = "Exa"
-
-
-def _get_internet_search_table(metadata: sa.MetaData) -> sa.Table:
-    return sa.Table(
-        "internet_search_provider",
-        metadata,
-        sa.Column("id", sa.Integer, primary_key=True),
-        sa.Column("name", sa.String),
-        sa.Column("provider_type", sa.String),
-        sa.Column("api_key", sa.LargeBinary),
-        sa.Column("config", postgresql.JSONB),
-        sa.Column("is_active", sa.Boolean),
-        sa.Column(
-            "time_created",
-            sa.DateTime(timezone=True),
-            nullable=False,
-            server_default=sa.text("now()"),
-        ),
-        sa.Column(
-            "time_updated",
-            sa.DateTime(timezone=True),
-            nullable=False,
-            server_default=sa.text("now()"),
-        ),
-    )
-
-
-def upgrade() -> None:
-    load_dotenv(find_dotenv())
-
-    exa_api_key = os.environ.get("EXA_API_KEY")
-    if not exa_api_key:
-        return
-
-    bind = op.get_bind()
-    metadata = sa.MetaData()
-    table = _get_internet_search_table(metadata)
-
-    existing = bind.execute(
-        sa.select(table.c.id).where(table.c.name == EXA_PROVIDER_NAME)
-    ).first()
-    if existing:
-        return
-
-    encrypted_key = encrypt_string_to_bytes(exa_api_key)
-
-    has_active_provider = bind.execute(
-        sa.select(table.c.id).where(table.c.is_active.is_(True))
-    ).first()
-
-    bind.execute(
-        table.insert().values(
-            name=EXA_PROVIDER_NAME,
-            provider_type="exa",
-            api_key=encrypted_key,
-            config=None,
-            is_active=not bool(has_active_provider),
-        )
-    )
-
-
-def downgrade() -> None:
-    return
--- a/backend/alembic/versions/4cebcbc9b2ae_add_tab_index_to_tool_call.py
+++ b/backend/alembic/versions/4cebcbc9b2ae_add_tab_index_to_tool_call.py
@@ -1,27 +0,0 @@
-"""add tab_index to tool_call
-
-Revision ID: 4cebcbc9b2ae
-Revises: a1b2c3d4e5f6
-Create Date: 2025-12-16
-
-"""
-
-from alembic import op
-import sqlalchemy as sa
-
-# revision identifiers, used by Alembic.
-revision = "4cebcbc9b2ae"
-down_revision = "a1b2c3d4e5f6"
-branch_labels: None = None
-depends_on: None = None
-
-
-def upgrade() -> None:
-    op.add_column(
-        "tool_call",
-        sa.Column("tab_index", sa.Integer(), nullable=False, server_default="0"),
-    )
-
-
-def downgrade() -> None:
-    op.drop_column("tool_call", "tab_index")
--- a/backend/alembic/versions/4ea2c93919c1_add_type_to_credentials.py
+++ b/backend/alembic/versions/4ea2c93919c1_add_type_to_credentials.py
@@ -62,11 +62,6 @@ def upgrade() -> None:
    )
    """
    )
-
-    # Drop the temporary table to avoid conflicts if migration runs again
-    # (e.g., during upgrade -> downgrade -> upgrade cycles in tests)
-    op.execute("DROP TABLE IF EXISTS temp_connector_credential")
-
    # If no exception was raised, alter the column
    op.alter_column("credential", "source", nullable=True)  # TODO modify
    # # ### end Alembic commands ###
--- a/backend/alembic/versions/4f8a2b3c1d9e_add_open_url_tool.py
+++ b/backend/alembic/versions/4f8a2b3c1d9e_add_open_url_tool.py
@@ -1,104 +0,0 @@
-"""add_open_url_tool
-
-Revision ID: 4f8a2b3c1d9e
-Revises: a852cbe15577
-Create Date: 2025-11-24 12:00:00.000000
-
-"""
-
-from alembic import op
-import sqlalchemy as sa
-
-
-# revision identifiers, used by Alembic.
-revision = "4f8a2b3c1d9e"
-down_revision = "a852cbe15577"
-branch_labels = None
-depends_on = None
-
-
-OPEN_URL_TOOL = {
-    "name": "OpenURLTool",
-    "display_name": "Open URL",
-    "description": (
-        "The Open URL Action allows the agent to fetch and read contents of web pages."
-    ),
-    "in_code_tool_id": "OpenURLTool",
-    "enabled": True,
-}
-
-
-def upgrade() -> None:
-    conn = op.get_bind()
-
-    # Check if tool already exists
-    existing = conn.execute(
-        sa.text("SELECT id FROM tool WHERE in_code_tool_id = :in_code_tool_id"),
-        {"in_code_tool_id": OPEN_URL_TOOL["in_code_tool_id"]},
-    ).fetchone()
-
-    if existing:
-        tool_id = existing[0]
-        # Update existing tool
-        conn.execute(
-            sa.text(
-                """
-                UPDATE tool
-                SET name = :name,
-                    display_name = :display_name,
-                    description = :description
-                WHERE in_code_tool_id = :in_code_tool_id
-                """
-            ),
-            OPEN_URL_TOOL,
-        )
-    else:
-        # Insert new tool
-        conn.execute(
-            sa.text(
-                """
-                INSERT INTO tool (name, display_name, description, in_code_tool_id, enabled)
-                VALUES (:name, :display_name, :description, :in_code_tool_id, :enabled)
-                """
-            ),
-            OPEN_URL_TOOL,
-        )
-        # Get the newly inserted tool's id
-        result = conn.execute(
-            sa.text("SELECT id FROM tool WHERE in_code_tool_id = :in_code_tool_id"),
-            {"in_code_tool_id": OPEN_URL_TOOL["in_code_tool_id"]},
-        ).fetchone()
-        tool_id = result[0]  # type: ignore
-
-    # Associate the tool with all existing personas
-    # Get all persona IDs
-    persona_ids = conn.execute(sa.text("SELECT id FROM persona")).fetchall()
-
-    for (persona_id,) in persona_ids:
-        # Check if association already exists
-        exists = conn.execute(
-            sa.text(
-                """
-                SELECT 1 FROM persona__tool
-                WHERE persona_id = :persona_id AND tool_id = :tool_id
-                """
-            ),
-            {"persona_id": persona_id, "tool_id": tool_id},
-        ).fetchone()
-
-        if not exists:
-            conn.execute(
-                sa.text(
-                    """
-                    INSERT INTO persona__tool (persona_id, tool_id)
-                    VALUES (:persona_id, :tool_id)
-                    """
-                ),
-                {"persona_id": persona_id, "tool_id": tool_id},
-            )
-
-
-def downgrade() -> None:
-    # We don't remove the tool on downgrade since it's fine to have it around.
-    # If we upgrade again, it will be a no-op.
-    pass
--- a/backend/alembic/versions/5e6f7a8b9c0d_update_default_persona_prompt.py
+++ b/backend/alembic/versions/5e6f7a8b9c0d_update_default_persona_prompt.py
@@ -1,55 +0,0 @@
-"""update_default_persona_prompt
-
-Revision ID: 5e6f7a8b9c0d
-Revises: 4f8a2b3c1d9e
-Create Date: 2025-11-30 12:00:00.000000
-
-"""
-
-from alembic import op
-import sqlalchemy as sa
-
-
-# revision identifiers, used by Alembic.
-revision = "5e6f7a8b9c0d"
-down_revision = "4f8a2b3c1d9e"
-branch_labels = None
-depends_on = None
-
-
-DEFAULT_PERSONA_ID = 0
-
-# ruff: noqa: E501, W605 start
-DEFAULT_SYSTEM_PROMPT = """
-You are a highly capable, thoughtful, and precise assistant. Your goal is to deeply understand the user's intent, ask clarifying questions when needed, think step-by-step through complex problems, provide clear and accurate answers, and proactively anticipate helpful follow-up information. Always prioritize being truthful, nuanced, insightful, and efficient.
-
-The current date is [[CURRENT_DATETIME]].{citation_reminder_or_empty}
-
-# Response Style
-You use different text styles, bolding, emojis (sparingly), block quotes, and other formatting to make your responses more readable and engaging.
-You use proper Markdown and LaTeX to format your responses for math, scientific, and chemical formulas, symbols, etc.: '$$\\n[expression]\\n$$' for standalone cases and '\\( [expression] \\)' when inline.
-For code you prefer to use Markdown and specify the language.
-You can use horizontal rules (---) to separate sections of your responses.
-You can use Markdown tables to format your responses for data, lists, and other structured information.
-""".lstrip()
-# ruff: noqa: E501, W605 end
-
-
-def upgrade() -> None:
-    conn = op.get_bind()
-    conn.execute(
-        sa.text(
-            """
-            UPDATE persona
-            SET system_prompt = :system_prompt
-            WHERE id = :persona_id
-            """
-        ),
-        {"system_prompt": DEFAULT_SYSTEM_PROMPT, "persona_id": DEFAULT_PERSONA_ID},
-    )
-
-
-def downgrade() -> None:
-    # We don't revert the system prompt on downgrade since we don't know
-    # what the previous value was. The new prompt is a reasonable default.
-    pass
--- a/backend/alembic/versions/6436661d5b65_add_created_at_in_project_userfile.py
+++ b/backend/alembic/versions/6436661d5b65_add_created_at_in_project_userfile.py
@@ -1,44 +0,0 @@
-"""add_created_at_in_project_userfile
-
-Revision ID: 6436661d5b65
-Revises: c7e9f4a3b2d1
-Create Date: 2025-11-24 11:50:24.536052
-
-"""
-
-from alembic import op
-import sqlalchemy as sa
-
-# revision identifiers, used by Alembic.
-revision = "6436661d5b65"
-down_revision = "c7e9f4a3b2d1"
-branch_labels = None
-depends_on = None
-
-
-def upgrade() -> None:
-    # Add created_at column to project__user_file table
-    op.add_column(
-        "project__user_file",
-        sa.Column(
-            "created_at",
-            sa.DateTime(timezone=True),
-            server_default=sa.text("now()"),
-            nullable=False,
-        ),
-    )
-    # Add composite index on (project_id, created_at DESC)
-    op.create_index(
-        "ix_project__user_file_project_id_created_at",
-        "project__user_file",
-        ["project_id", sa.text("created_at DESC")],
-    )
-
-
-def downgrade() -> None:
-    # Remove composite index on (project_id, created_at)
-    op.drop_index(
-        "ix_project__user_file_project_id_created_at", table_name="project__user_file"
-    )
-    # Remove created_at column from project__user_file table
-    op.drop_column("project__user_file", "created_at")
--- a/backend/alembic/versions/7bd55f264e1b_add_display_name_to_model_configuration.py
+++ b/backend/alembic/versions/7bd55f264e1b_add_display_name_to_model_configuration.py
@@ -1,27 +0,0 @@
-"""Add display_name to model_configuration
-
-Revision ID: 7bd55f264e1b
-Revises: e8f0d2a38171
-Create Date: 2025-12-04
-
-"""
-
-from alembic import op
-import sqlalchemy as sa
-
-# revision identifiers, used by Alembic.
-revision = "7bd55f264e1b"
-down_revision = "e8f0d2a38171"
-branch_labels = None
-depends_on = None
-
-
-def upgrade() -> None:
-    op.add_column(
-        "model_configuration",
-        sa.Column("display_name", sa.String(), nullable=True),
-    )
-
-
-def downgrade() -> None:
-    op.drop_column("model_configuration", "display_name")
--- a/backend/alembic/versions/7ed603b64d5a_add_mcp_server_and_connection_config_.py
+++ b/backend/alembic/versions/7ed603b64d5a_add_mcp_server_and_connection_config_.py
@@ -42,13 +42,13 @@ def upgrade() -> None:
        sa.Column(
            "created_at",
            sa.DateTime(timezone=True),
-            server_default=sa.text("now()"),
+            server_default=sa.text("now()"),  # type: ignore
            nullable=False,
        ),
        sa.Column(
            "updated_at",
            sa.DateTime(timezone=True),
-            server_default=sa.text("now()"),
+            server_default=sa.text("now()"),  # type: ignore
            nullable=False,
        ),
    )
@@ -63,13 +63,13 @@ def upgrade() -> None:
        sa.Column(
            "created_at",
            sa.DateTime(timezone=True),
-            server_default=sa.text("now()"),
+            server_default=sa.text("now()"),  # type: ignore
            nullable=False,
        ),
        sa.Column(
            "updated_at",
            sa.DateTime(timezone=True),
-            server_default=sa.text("now()"),
+            server_default=sa.text("now()"),  # type: ignore
            nullable=False,
        ),
        sa.ForeignKeyConstraint(
--- a/backend/alembic/versions/87c52ec39f84_update_default_system_prompt.py
+++ b/backend/alembic/versions/87c52ec39f84_update_default_system_prompt.py
@@ -1,55 +0,0 @@
-"""update_default_system_prompt
-
-Revision ID: 87c52ec39f84
-Revises: 7bd55f264e1b
-Create Date: 2025-12-05 15:54:06.002452
-
-"""
-
-from alembic import op
-import sqlalchemy as sa
-
-
-# revision identifiers, used by Alembic.
-revision = "87c52ec39f84"
-down_revision = "7bd55f264e1b"
-branch_labels = None
-depends_on = None
-
-
-DEFAULT_PERSONA_ID = 0
-
-# ruff: noqa: E501, W605 start
-DEFAULT_SYSTEM_PROMPT = """
-You are a highly capable, thoughtful, and precise assistant. Your goal is to deeply understand the user's intent, ask clarifying questions when needed, think step-by-step through complex problems, provide clear and accurate answers, and proactively anticipate helpful follow-up information. Always prioritize being truthful, nuanced, insightful, and efficient.
-
-The current date is [[CURRENT_DATETIME]].[[CITATION_GUIDANCE]]
-
-# Response Style
-You use different text styles, bolding, emojis (sparingly), block quotes, and other formatting to make your responses more readable and engaging.
-You use proper Markdown and LaTeX to format your responses for math, scientific, and chemical formulas, symbols, etc.: '$$\\n[expression]\\n$$' for standalone cases and '\\( [expression] \\)' when inline.
-For code you prefer to use Markdown and specify the language.
-You can use horizontal rules (---) to separate sections of your responses.
-You can use Markdown tables to format your responses for data, lists, and other structured information.
-""".lstrip()
-# ruff: noqa: E501, W605 end
-
-
-def upgrade() -> None:
-    conn = op.get_bind()
-    conn.execute(
-        sa.text(
-            """
-            UPDATE persona
-            SET system_prompt = :system_prompt
-            WHERE id = :persona_id
-            """
-        ),
-        {"system_prompt": DEFAULT_SYSTEM_PROMPT, "persona_id": DEFAULT_PERSONA_ID},
-    )
-
-
-def downgrade() -> None:
-    # We don't revert the system prompt on downgrade since we don't know
-    # what the previous value was. The new prompt is a reasonable default.
-    pass
--- a/backend/alembic/versions/a01bf2971c5d_update_default_tool_descriptions.py
+++ b/backend/alembic/versions/a01bf2971c5d_update_default_tool_descriptions.py
@@ -1,62 +0,0 @@
-"""update_default_tool_descriptions
-
-Revision ID: a01bf2971c5d
-Revises: 87c52ec39f84
-Create Date: 2025-12-16 15:21:25.656375
-
-"""
-
-from alembic import op
-import sqlalchemy as sa
-
-
-# revision identifiers, used by Alembic.
-revision = "a01bf2971c5d"
-down_revision = "18b5b2524446"
-branch_labels = None
-depends_on = None
-
-# new tool descriptions (12/2025)
-TOOL_DESCRIPTIONS = {
-    "SearchTool": "The Search Action allows the agent to search through connected knowledge to help build an answer.",
-    "ImageGenerationTool": (
-        "The Image Generation Action allows the agent to use DALL-E 3 or GPT-IMAGE-1 to generate images. "
-        "The action will be used when the user asks the agent to generate an image."
-    ),
-    "WebSearchTool": (
-        "The Web Search Action allows the agent "
-        "to perform internet searches for up-to-date information."
-    ),
-    "KnowledgeGraphTool": (
-        "The Knowledge Graph Search Action allows the agent to search the "
-        "Knowledge Graph for information. This tool can (for now) only be active in the KG Beta Agent, "
-        "and it requires the Knowledge Graph to be enabled."
-    ),
-    "OktaProfileTool": (
-        "The Okta Profile Action allows the agent to fetch the current user's information from Okta. "
-        "This may include the user's name, email, phone number, address, and other details such as their "
-        "manager and direct reports."
-    ),
-}
-
-
-def upgrade() -> None:
-    conn = op.get_bind()
-    conn.execute(sa.text("BEGIN"))
-
-    try:
-        for tool_id, description in TOOL_DESCRIPTIONS.items():
-            conn.execute(
-                sa.text(
-                    "UPDATE tool SET description = :description WHERE in_code_tool_id = :tool_id"
-                ),
-                {"description": description, "tool_id": tool_id},
-            )
-        conn.execute(sa.text("COMMIT"))
-    except Exception as e:
-        conn.execute(sa.text("ROLLBACK"))
-        raise e
-
-
-def downgrade() -> None:
-    pass
--- a/backend/alembic/versions/a1b2c3d4e5f6_add_license_table.py
+++ b/backend/alembic/versions/a1b2c3d4e5f6_add_license_table.py
@@ -1,49 +0,0 @@
-"""add license table
-
-Revision ID: a1b2c3d4e5f6
-Revises: a01bf2971c5d
-Create Date: 2025-12-04 10:00:00.000000
-
-"""
-
-from alembic import op
-import sqlalchemy as sa
-
-# revision identifiers, used by Alembic.
-revision = "a1b2c3d4e5f6"
-down_revision = "a01bf2971c5d"
-branch_labels = None
-depends_on = None
-
-
-def upgrade() -> None:
-    op.create_table(
-        "license",
-        sa.Column("id", sa.Integer(), primary_key=True),
-        sa.Column("license_data", sa.Text(), nullable=False),
-        sa.Column(
-            "created_at",
-            sa.DateTime(timezone=True),
-            server_default=sa.func.now(),
-            nullable=False,
-        ),
-        sa.Column(
-            "updated_at",
-            sa.DateTime(timezone=True),
-            server_default=sa.func.now(),
-            nullable=False,
-        ),
-    )
-
-    # Singleton pattern - only ever one row in this table
-    op.create_index(
-        "idx_license_singleton",
-        "license",
-        [sa.text("(true)")],
-        unique=True,
-    )
-
-
-def downgrade() -> None:
-    op.drop_index("idx_license_singleton", table_name="license")
-    op.drop_table("license")
--- a/backend/alembic/versions/a2b3c4d5e6f7_remove_fast_default_model_name.py
+++ b/backend/alembic/versions/a2b3c4d5e6f7_remove_fast_default_model_name.py
@@ -1,27 +0,0 @@
-"""Remove fast_default_model_name from llm_provider
-
-Revision ID: a2b3c4d5e6f7
-Revises: 2a391f840e85
-Create Date: 2024-12-17
-
-"""
-
-from alembic import op
-import sqlalchemy as sa
-
-# revision identifiers, used by Alembic.
-revision = "a2b3c4d5e6f7"
-down_revision = "2a391f840e85"
-branch_labels: None = None
-depends_on: None = None
-
-
-def upgrade() -> None:
-    op.drop_column("llm_provider", "fast_default_model_name")
-
-
-def downgrade() -> None:
-    op.add_column(
-        "llm_provider",
-        sa.Column("fast_default_model_name", sa.String(), nullable=True),
-    )
--- a/backend/alembic/versions/a852cbe15577_new_chat_history.py
+++ b/backend/alembic/versions/a852cbe15577_new_chat_history.py
@@ -1,425 +0,0 @@
-"""New Chat History
-
-Revision ID: a852cbe15577
-Revises: 6436661d5b65
-Create Date: 2025-11-08 15:16:37.781308
-
-"""
-
-from alembic import op
-import sqlalchemy as sa
-from sqlalchemy.dialects import postgresql
-
-# revision identifiers, used by Alembic.
-revision = "a852cbe15577"
-down_revision = "6436661d5b65"
-branch_labels = None
-depends_on = None
-
-
-def upgrade() -> None:
-    # 1. Drop old research/agent tables (CASCADE handles dependencies)
-    op.execute("DROP TABLE IF EXISTS research_agent_iteration_sub_step CASCADE")
-    op.execute("DROP TABLE IF EXISTS research_agent_iteration CASCADE")
-    op.execute("DROP TABLE IF EXISTS agent__sub_query__search_doc CASCADE")
-    op.execute("DROP TABLE IF EXISTS agent__sub_query CASCADE")
-    op.execute("DROP TABLE IF EXISTS agent__sub_question CASCADE")
-
-    # 2. ChatMessage table changes
-    # Rename columns and add FKs
-    op.alter_column(
-        "chat_message", "parent_message", new_column_name="parent_message_id"
-    )
-    op.create_foreign_key(
-        "fk_chat_message_parent_message_id",
-        "chat_message",
-        "chat_message",
-        ["parent_message_id"],
-        ["id"],
-    )
-    op.alter_column(
-        "chat_message",
-        "latest_child_message",
-        new_column_name="latest_child_message_id",
-    )
-    op.create_foreign_key(
-        "fk_chat_message_latest_child_message_id",
-        "chat_message",
-        "chat_message",
-        ["latest_child_message_id"],
-        ["id"],
-    )
-
-    # Add new column
-    op.add_column(
-        "chat_message", sa.Column("reasoning_tokens", sa.Text(), nullable=True)
-    )
-
-    # Drop old columns
-    op.drop_column("chat_message", "rephrased_query")
-    op.drop_column("chat_message", "alternate_assistant_id")
-    op.drop_column("chat_message", "overridden_model")
-    op.drop_column("chat_message", "is_agentic")
-    op.drop_column("chat_message", "refined_answer_improvement")
-    op.drop_column("chat_message", "research_type")
-    op.drop_column("chat_message", "research_plan")
-    op.drop_column("chat_message", "research_answer_purpose")
-
-    # 3. ToolCall table changes
-    # Drop the unique constraint first
-    op.drop_constraint("uq_tool_call_message_id", "tool_call", type_="unique")
-
-    # Delete orphaned tool_call rows (those without valid chat_message)
-    op.execute(
-        "DELETE FROM tool_call WHERE message_id NOT IN (SELECT id FROM chat_message)"
-    )
-
-    # Add chat_session_id as nullable first, populate, then make NOT NULL
-    op.add_column(
-        "tool_call",
-        sa.Column("chat_session_id", postgresql.UUID(as_uuid=True), nullable=True),
-    )
-
-    # Populate chat_session_id from the related chat_message
-    op.execute(
-        """
-        UPDATE tool_call
-        SET chat_session_id = chat_message.chat_session_id
-        FROM chat_message
-        WHERE tool_call.message_id = chat_message.id
-    """
-    )
-
-    # Now make it NOT NULL and add FK
-    op.alter_column("tool_call", "chat_session_id", nullable=False)
-    op.create_foreign_key(
-        "fk_tool_call_chat_session_id",
-        "tool_call",
-        "chat_session",
-        ["chat_session_id"],
-        ["id"],
-        ondelete="CASCADE",
-    )
-
-    # Rename message_id and make nullable, recreate FK with CASCADE
-    op.drop_constraint("tool_call_message_id_fkey", "tool_call", type_="foreignkey")
-    op.alter_column(
-        "tool_call",
-        "message_id",
-        new_column_name="parent_chat_message_id",
-        nullable=True,
-    )
-    op.create_foreign_key(
-        "fk_tool_call_parent_chat_message_id",
-        "tool_call",
-        "chat_message",
-        ["parent_chat_message_id"],
-        ["id"],
-        ondelete="CASCADE",
-    )
-
-    # Add parent_tool_call_id with FK
-    op.add_column(
-        "tool_call", sa.Column("parent_tool_call_id", sa.Integer(), nullable=True)
-    )
-    op.create_foreign_key(
-        "fk_tool_call_parent_tool_call_id",
-        "tool_call",
-        "tool_call",
-        ["parent_tool_call_id"],
-        ["id"],
-        ondelete="CASCADE",
-    )
-
-    # Add other new columns
-    op.add_column(
-        "tool_call",
-        sa.Column("turn_number", sa.Integer(), nullable=False, server_default="0"),
-    )
-    op.add_column(
-        "tool_call",
-        sa.Column("tool_call_id", sa.String(), nullable=False, server_default=""),
-    )
-    op.add_column("tool_call", sa.Column("reasoning_tokens", sa.Text(), nullable=True))
-    op.add_column(
-        "tool_call",
-        sa.Column("tool_call_tokens", sa.Integer(), nullable=False, server_default="0"),
-    )
-    op.add_column(
-        "tool_call",
-        sa.Column("generated_images", postgresql.JSONB(), nullable=True),
-    )
-
-    # Rename columns
-    op.alter_column(
-        "tool_call", "tool_arguments", new_column_name="tool_call_arguments"
-    )
-    op.alter_column("tool_call", "tool_result", new_column_name="tool_call_response")
-
-    # Change tool_call_response type from JSONB to Text
-    op.execute(
-        """
-        ALTER TABLE tool_call
-        ALTER COLUMN tool_call_response TYPE TEXT
-        USING tool_call_response::text
-    """
-    )
-
-    # Drop old columns
-    op.drop_column("tool_call", "tool_name")
-
-    # 4. Create new association table
-    op.create_table(
-        "tool_call__search_doc",
-        sa.Column("tool_call_id", sa.Integer(), nullable=False),
-        sa.Column("search_doc_id", sa.Integer(), nullable=False),
-        sa.ForeignKeyConstraint(["tool_call_id"], ["tool_call.id"], ondelete="CASCADE"),
-        sa.ForeignKeyConstraint(
-            ["search_doc_id"], ["search_doc.id"], ondelete="CASCADE"
-        ),
-        sa.PrimaryKeyConstraint("tool_call_id", "search_doc_id"),
-    )
-
-    # 5. Persona table change
-    op.add_column(
-        "persona",
-        sa.Column(
-            "replace_base_system_prompt",
-            sa.Boolean(),
-            nullable=False,
-            server_default="false",
-        ),
-    )
-
-
-def downgrade() -> None:
-    # Reverse persona changes
-    op.drop_column("persona", "replace_base_system_prompt")
-
-    # Drop new association table
-    op.drop_table("tool_call__search_doc")
-
-    # Reverse ToolCall changes
-    op.add_column(
-        "tool_call",
-        sa.Column("tool_name", sa.String(), nullable=False, server_default=""),
-    )
-
-    # Change tool_call_response back to JSONB
-    op.execute(
-        """
-        ALTER TABLE tool_call
-        ALTER COLUMN tool_call_response TYPE JSONB
-        USING tool_call_response::jsonb
-    """
-    )
-
-    op.alter_column("tool_call", "tool_call_response", new_column_name="tool_result")
-    op.alter_column(
-        "tool_call", "tool_call_arguments", new_column_name="tool_arguments"
-    )
-
-    op.drop_column("tool_call", "generated_images")
-    op.drop_column("tool_call", "tool_call_tokens")
-    op.drop_column("tool_call", "reasoning_tokens")
-    op.drop_column("tool_call", "tool_call_id")
-    op.drop_column("tool_call", "turn_number")
-
-    op.drop_constraint(
-        "fk_tool_call_parent_tool_call_id", "tool_call", type_="foreignkey"
-    )
-    op.drop_column("tool_call", "parent_tool_call_id")
-
-    op.drop_constraint(
-        "fk_tool_call_parent_chat_message_id", "tool_call", type_="foreignkey"
-    )
-    op.alter_column(
-        "tool_call",
-        "parent_chat_message_id",
-        new_column_name="message_id",
-        nullable=False,
-    )
-    op.create_foreign_key(
-        "tool_call_message_id_fkey",
-        "tool_call",
-        "chat_message",
-        ["message_id"],
-        ["id"],
-    )
-
-    op.drop_constraint("fk_tool_call_chat_session_id", "tool_call", type_="foreignkey")
-    op.drop_column("tool_call", "chat_session_id")
-
-    op.create_unique_constraint("uq_tool_call_message_id", "tool_call", ["message_id"])
-
-    # Reverse ChatMessage changes
-    # Note: research_answer_purpose and research_type were originally String columns,
-    # not Enum types (see migrations 5ae8240accb3 and f8a9b2c3d4e5)
-    op.add_column(
-        "chat_message",
-        sa.Column("research_answer_purpose", sa.String(), nullable=True),
-    )
-    op.add_column(
-        "chat_message", sa.Column("research_plan", postgresql.JSONB(), nullable=True)
-    )
-    op.add_column(
-        "chat_message",
-        sa.Column("research_type", sa.String(), nullable=True),
-    )
-    op.add_column(
-        "chat_message",
-        sa.Column("refined_answer_improvement", sa.Boolean(), nullable=True),
-    )
-    op.add_column(
-        "chat_message",
-        sa.Column("is_agentic", sa.Boolean(), nullable=False, server_default="false"),
-    )
-    op.add_column(
-        "chat_message", sa.Column("overridden_model", sa.String(), nullable=True)
-    )
-    op.add_column(
-        "chat_message", sa.Column("alternate_assistant_id", sa.Integer(), nullable=True)
-    )
-    # Recreate the FK constraint that was implicitly dropped when the column was dropped
-    op.create_foreign_key(
-        "fk_chat_message_persona",
-        "chat_message",
-        "persona",
-        ["alternate_assistant_id"],
-        ["id"],
-    )
-    op.add_column(
-        "chat_message", sa.Column("rephrased_query", sa.Text(), nullable=True)
-    )
-
-    op.drop_column("chat_message", "reasoning_tokens")
-
-    op.drop_constraint(
-        "fk_chat_message_latest_child_message_id", "chat_message", type_="foreignkey"
-    )
-    op.alter_column(
-        "chat_message",
-        "latest_child_message_id",
-        new_column_name="latest_child_message",
-    )
-
-    op.drop_constraint(
-        "fk_chat_message_parent_message_id", "chat_message", type_="foreignkey"
-    )
-    op.alter_column(
-        "chat_message", "parent_message_id", new_column_name="parent_message"
-    )
-
-    # Recreate agent sub question and sub query tables
-    op.create_table(
-        "agent__sub_question",
-        sa.Column("id", sa.Integer(), primary_key=True),
-        sa.Column("primary_question_id", sa.Integer(), nullable=False),
-        sa.Column("chat_session_id", postgresql.UUID(as_uuid=True), nullable=False),
-        sa.Column("sub_question", sa.Text(), nullable=False),
-        sa.Column("level", sa.Integer(), nullable=False),
-        sa.Column("level_question_num", sa.Integer(), nullable=False),
-        sa.Column(
-            "time_created",
-            sa.DateTime(timezone=True),
-            server_default=sa.text("now()"),
-            nullable=False,
-        ),
-        sa.Column("sub_answer", sa.Text(), nullable=False),
-        sa.Column("sub_question_doc_results", postgresql.JSONB(), nullable=False),
-        sa.ForeignKeyConstraint(
-            ["primary_question_id"], ["chat_message.id"], ondelete="CASCADE"
-        ),
-        sa.ForeignKeyConstraint(["chat_session_id"], ["chat_session.id"]),
-        sa.PrimaryKeyConstraint("id"),
-    )
-
-    op.create_table(
-        "agent__sub_query",
-        sa.Column("id", sa.Integer(), primary_key=True),
-        sa.Column("parent_question_id", sa.Integer(), nullable=False),
-        sa.Column("chat_session_id", postgresql.UUID(as_uuid=True), nullable=False),
-        sa.Column("sub_query", sa.Text(), nullable=False),
-        sa.Column(
-            "time_created",
-            sa.DateTime(timezone=True),
-            server_default=sa.text("now()"),
-            nullable=False,
-        ),
-        sa.ForeignKeyConstraint(
-            ["parent_question_id"], ["agent__sub_question.id"], ondelete="CASCADE"
-        ),
-        sa.ForeignKeyConstraint(["chat_session_id"], ["chat_session.id"]),
-        sa.PrimaryKeyConstraint("id"),
-    )
-
-    op.create_table(
-        "agent__sub_query__search_doc",
-        sa.Column("sub_query_id", sa.Integer(), nullable=False),
-        sa.Column("search_doc_id", sa.Integer(), nullable=False),
-        sa.ForeignKeyConstraint(
-            ["sub_query_id"], ["agent__sub_query.id"], ondelete="CASCADE"
-        ),
-        sa.ForeignKeyConstraint(["search_doc_id"], ["search_doc.id"]),
-        sa.PrimaryKeyConstraint("sub_query_id", "search_doc_id"),
-    )
-
-    # Recreate research agent tables
-    op.create_table(
-        "research_agent_iteration",
-        sa.Column("id", sa.Integer(), autoincrement=True, nullable=False),
-        sa.Column("primary_question_id", sa.Integer(), nullable=False),
-        sa.Column("iteration_nr", sa.Integer(), nullable=False),
-        sa.Column(
-            "created_at",
-            sa.DateTime(timezone=True),
-            server_default=sa.text("now()"),
-            nullable=False,
-        ),
-        sa.Column("purpose", sa.String(), nullable=True),
-        sa.Column("reasoning", sa.String(), nullable=True),
-        sa.ForeignKeyConstraint(
-            ["primary_question_id"], ["chat_message.id"], ondelete="CASCADE"
-        ),
-        sa.PrimaryKeyConstraint("id"),
-        sa.UniqueConstraint(
-            "primary_question_id",
-            "iteration_nr",
-            name="_research_agent_iteration_unique_constraint",
-        ),
-    )
-
-    op.create_table(
-        "research_agent_iteration_sub_step",
-        sa.Column("id", sa.Integer(), autoincrement=True, nullable=False),
-        sa.Column("primary_question_id", sa.Integer(), nullable=False),
-        sa.Column("iteration_nr", sa.Integer(), nullable=False),
-        sa.Column("iteration_sub_step_nr", sa.Integer(), nullable=False),
-        sa.Column(
-            "created_at",
-            sa.DateTime(timezone=True),
-            server_default=sa.text("now()"),
-            nullable=False,
-        ),
-        sa.Column("sub_step_instructions", sa.String(), nullable=True),
-        sa.Column("sub_step_tool_id", sa.Integer(), nullable=True),
-        sa.Column("reasoning", sa.String(), nullable=True),
-        sa.Column("sub_answer", sa.String(), nullable=True),
-        sa.Column("cited_doc_results", postgresql.JSONB(), nullable=False),
-        sa.Column("claims", postgresql.JSONB(), nullable=True),
-        sa.Column("is_web_fetch", sa.Boolean(), nullable=True),
-        sa.Column("queries", postgresql.JSONB(), nullable=True),
-        sa.Column("generated_images", postgresql.JSONB(), nullable=True),
-        sa.Column("additional_data", postgresql.JSONB(), nullable=True),
-        sa.Column("file_ids", postgresql.JSONB(), nullable=True),
-        sa.ForeignKeyConstraint(
-            ["primary_question_id", "iteration_nr"],
-            [
-                "research_agent_iteration.primary_question_id",
-                "research_agent_iteration.iteration_nr",
-            ],
-            ondelete="CASCADE",
-        ),
-        sa.ForeignKeyConstraint(["sub_step_tool_id"], ["tool.id"], ondelete="SET NULL"),
-        sa.PrimaryKeyConstraint("id"),
-    )
--- a/backend/alembic/versions/b8c9d0e1f2a3_drop_milestone_table.py
+++ b/backend/alembic/versions/b8c9d0e1f2a3_drop_milestone_table.py
@@ -1,46 +0,0 @@
-"""Drop milestone table
-
-Revision ID: b8c9d0e1f2a3
-Revises: a2b3c4d5e6f7
-Create Date: 2025-12-18
-
-"""
-
-from alembic import op
-import sqlalchemy as sa
-import fastapi_users_db_sqlalchemy
-from sqlalchemy.dialects import postgresql
-
-# revision identifiers, used by Alembic.
-revision = "b8c9d0e1f2a3"
-down_revision = "a2b3c4d5e6f7"
-branch_labels = None
-depends_on = None
-
-
-def upgrade() -> None:
-    op.drop_table("milestone")
-
-
-def downgrade() -> None:
-    op.create_table(
-        "milestone",
-        sa.Column("id", sa.UUID(), nullable=False),
-        sa.Column("tenant_id", sa.String(), nullable=True),
-        sa.Column(
-            "user_id",
-            fastapi_users_db_sqlalchemy.generics.GUID(),
-            nullable=True,
-        ),
-        sa.Column("event_type", sa.String(), nullable=False),
-        sa.Column(
-            "time_created",
-            sa.DateTime(timezone=True),
-            server_default=sa.text("now()"),
-            nullable=False,
-        ),
-        sa.Column("event_tracker", postgresql.JSONB(), nullable=True),
-        sa.ForeignKeyConstraint(["user_id"], ["user.id"], ondelete="CASCADE"),
-        sa.PrimaryKeyConstraint("id"),
-        sa.UniqueConstraint("event_type", name="uq_milestone_event_type"),
-    )
--- a/backend/alembic/versions/c1d2e3f4a5b6_add_deep_research_tool.py
+++ b/backend/alembic/versions/c1d2e3f4a5b6_add_deep_research_tool.py
@@ -1,52 +0,0 @@
-"""add_deep_research_tool
-
-Revision ID: c1d2e3f4a5b6
-Revises: b8c9d0e1f2a3
-Create Date: 2025-12-18 16:00:00.000000
-
-"""
-
-from alembic import op
-from onyx.deep_research.dr_mock_tools import RESEARCH_AGENT_DB_NAME
-import sqlalchemy as sa
-
-
-# revision identifiers, used by Alembic.
-revision = "c1d2e3f4a5b6"
-down_revision = "b8c9d0e1f2a3"
-branch_labels = None
-depends_on = None
-
-
-DEEP_RESEARCH_TOOL = {
-    "name": RESEARCH_AGENT_DB_NAME,
-    "display_name": "Research Agent",
-    "description": "The Research Agent is a sub-agent that conducts research on a specific topic.",
-    "in_code_tool_id": "ResearchAgent",
-}
-
-
-def upgrade() -> None:
-    conn = op.get_bind()
-    conn.execute(
-        sa.text(
-            """
-            INSERT INTO tool (name, display_name, description, in_code_tool_id, enabled)
-            VALUES (:name, :display_name, :description, :in_code_tool_id, false)
-            """
-        ),
-        DEEP_RESEARCH_TOOL,
-    )
-
-
-def downgrade() -> None:
-    conn = op.get_bind()
-    conn.execute(
-        sa.text(
-            """
-            DELETE FROM tool
-            WHERE in_code_tool_id = :in_code_tool_id
-            """
-        ),
-        {"in_code_tool_id": DEEP_RESEARCH_TOOL["in_code_tool_id"]},
-    )
--- a/backend/alembic/versions/c7e9f4a3b2d1_add_python_tool.py
+++ b/backend/alembic/versions/c7e9f4a3b2d1_add_python_tool.py
@@ -1,73 +0,0 @@
-"""add_python_tool
-
-Revision ID: c7e9f4a3b2d1
-Revises: 3c9a65f1207f
-Create Date: 2025-11-08 00:00:00.000000
-
-"""
-
-from alembic import op
-import sqlalchemy as sa
-from sqlalchemy.dialects import postgresql
-
-
-# revision identifiers, used by Alembic.
-revision = "c7e9f4a3b2d1"
-down_revision = "3c9a65f1207f"
-branch_labels = None
-depends_on = None
-
-
-def upgrade() -> None:
-    """Add PythonTool to built-in tools"""
-    conn = op.get_bind()
-
-    conn.execute(
-        sa.text(
-            """
-            INSERT INTO tool (name, display_name, description, in_code_tool_id, enabled)
-            VALUES (:name, :display_name, :description, :in_code_tool_id, :enabled)
-            """
-        ),
-        {
-            "name": "PythonTool",
-            # in the UI, call it `Code Interpreter` since this is a well known term for this tool
-            "display_name": "Code Interpreter",
-            "description": (
-                "The Code Interpreter Action allows the assistant to execute "
-                "Python code in a secure, isolated environment for data analysis, "
-                "computation, visualization, and file processing."
-            ),
-            "in_code_tool_id": "PythonTool",
-            "enabled": True,
-        },
-    )
-
-    # needed to store files generated by the python tool
-    op.add_column(
-        "research_agent_iteration_sub_step",
-        sa.Column(
-            "file_ids",
-            postgresql.JSONB(astext_type=sa.Text()),
-            nullable=True,
-        ),
-    )
-
-
-def downgrade() -> None:
-    """Remove PythonTool from built-in tools"""
-    conn = op.get_bind()
-
-    conn.execute(
-        sa.text(
-            """
-            DELETE FROM tool
-            WHERE in_code_tool_id = :in_code_tool_id
-            """
-        ),
-        {
-            "in_code_tool_id": "PythonTool",
-        },
-    )
-
-    op.drop_column("research_agent_iteration_sub_step", "file_ids")
--- a/backend/alembic/versions/c9e2cd766c29_add_s3_file_store_table.py
+++ b/backend/alembic/versions/c9e2cd766c29_add_s3_file_store_table.py
@@ -257,8 +257,8 @@ def _migrate_files_to_external_storage() -> None:
            print(f"File {file_id} not found in PostgreSQL storage.")
            continue

-        lobj_id = cast(int, file_record.lobj_oid)
-        file_metadata = cast(Any, file_record.file_metadata)
+        lobj_id = cast(int, file_record.lobj_oid)  # type: ignore
+        file_metadata = cast(Any, file_record.file_metadata)  # type: ignore

        # Read file content from PostgreSQL
        try:
@@ -280,7 +280,7 @@ def _migrate_files_to_external_storage() -> None:
            else:
                # Convert other types to dict if possible, otherwise None
                try:
-                    file_metadata = dict(file_record.file_metadata)
+                    file_metadata = dict(file_record.file_metadata)  # type: ignore
                except (TypeError, ValueError):
                    file_metadata = None

--- a/backend/alembic/versions/e209dc5a8156_added_prune_frequency.py
+++ b/backend/alembic/versions/e209dc5a8156_added_prune_frequency.py
@@ -11,8 +11,8 @@ import sqlalchemy as sa

 revision = "e209dc5a8156"
 down_revision = "48d14957fe80"
-branch_labels = None
-depends_on = None
+branch_labels = None  # type: ignore
+depends_on = None  # type: ignore


 def upgrade() -> None:
--- a/backend/alembic/versions/e8f0d2a38171_add_status_to_mcp_server_and_make_auth_.py
+++ b/backend/alembic/versions/e8f0d2a38171_add_status_to_mcp_server_and_make_auth_.py
@@ -1,115 +0,0 @@
-"""add status to mcp server and make auth fields nullable
-
-Revision ID: e8f0d2a38171
-Revises: ed9e44312505
-Create Date: 2025-11-28 11:15:37.667340
-
-"""
-
-from alembic import op
-import sqlalchemy as sa
-from onyx.db.enums import (
-    MCPTransport,
-    MCPAuthenticationType,
-    MCPAuthenticationPerformer,
-    MCPServerStatus,
-)
-
-# revision identifiers, used by Alembic.
-revision = "e8f0d2a38171"
-down_revision = "ed9e44312505"
-branch_labels = None
-depends_on = None
-
-
-def upgrade() -> None:
-    # Make auth fields nullable
-    op.alter_column(
-        "mcp_server",
-        "transport",
-        existing_type=sa.Enum(MCPTransport, name="mcp_transport", native_enum=False),
-        nullable=True,
-    )
-
-    op.alter_column(
-        "mcp_server",
-        "auth_type",
-        existing_type=sa.Enum(
-            MCPAuthenticationType, name="mcp_authentication_type", native_enum=False
-        ),
-        nullable=True,
-    )
-
-    op.alter_column(
-        "mcp_server",
-        "auth_performer",
-        existing_type=sa.Enum(
-            MCPAuthenticationPerformer,
-            name="mcp_authentication_performer",
-            native_enum=False,
-        ),
-        nullable=True,
-    )
-
-    # Add status column with default
-    op.add_column(
-        "mcp_server",
-        sa.Column(
-            "status",
-            sa.Enum(MCPServerStatus, name="mcp_server_status", native_enum=False),
-            nullable=False,
-            server_default="CREATED",
-        ),
-    )
-
-    # For existing records, mark status as CONNECTED
-    bind = op.get_bind()
-    bind.execute(
-        sa.text(
-            """
-        UPDATE mcp_server
-        SET status = 'CONNECTED'
-        WHERE status != 'CONNECTED'
-        and admin_connection_config_id IS NOT NULL
-        """
-        )
-    )
-
-
-def downgrade() -> None:
-    # Remove status column
-    op.drop_column("mcp_server", "status")
-
-    # Make auth fields non-nullable (set defaults first)
-    op.execute(
-        "UPDATE mcp_server SET transport = 'STREAMABLE_HTTP' WHERE transport IS NULL"
-    )
-    op.execute("UPDATE mcp_server SET auth_type = 'NONE' WHERE auth_type IS NULL")
-    op.execute(
-        "UPDATE mcp_server SET auth_performer = 'ADMIN' WHERE auth_performer IS NULL"
-    )
-
-    op.alter_column(
-        "mcp_server",
-        "transport",
-        existing_type=sa.Enum(MCPTransport, name="mcp_transport", native_enum=False),
-        nullable=False,
-    )
-    op.alter_column(
-        "mcp_server",
-        "auth_type",
-        existing_type=sa.Enum(
-            MCPAuthenticationType, name="mcp_authentication_type", native_enum=False
-        ),
-        nullable=False,
-    )
-    op.alter_column(
-        "mcp_server",
-        "auth_performer",
-        existing_type=sa.Enum(
-            MCPAuthenticationPerformer,
-            name="mcp_authentication_performer",
-            native_enum=False,
-        ),
-        nullable=False,
-    )
--- a/backend/alembic/versions/ed9e44312505_add_icon_name_field.py
+++ b/backend/alembic/versions/ed9e44312505_add_icon_name_field.py
@@ -1,34 +0,0 @@
-"""Add icon_name field
-
-Revision ID: ed9e44312505
-Revises: 5e6f7a8b9c0d
-Create Date: 2025-12-03 16:35:07.828393
-
-"""
-
-from alembic import op
-import sqlalchemy as sa
-
-# revision identifiers, used by Alembic.
-revision = "ed9e44312505"
-down_revision = "5e6f7a8b9c0d"
-branch_labels = None
-depends_on = None
-
-
-def upgrade() -> None:
-    # Add icon_name column
-    op.add_column("persona", sa.Column("icon_name", sa.String(), nullable=True))
-
-    # Remove old icon columns
-    op.drop_column("persona", "icon_shape")
-    op.drop_column("persona", "icon_color")
-
-
-def downgrade() -> None:
-    # Re-add old icon columns
-    op.add_column("persona", sa.Column("icon_color", sa.String(), nullable=True))
-    op.add_column("persona", sa.Column("icon_shape", sa.Integer(), nullable=True))
-
-    # Remove icon_name column
-    op.drop_column("persona", "icon_name")
--- a/backend/alembic_tenants/env.py
+++ b/backend/alembic_tenants/env.py
@@ -20,9 +20,7 @@ config = context.config
 if config.config_file_name is not None and config.attributes.get(
    "configure_logger", True
 ):
-    # disable_existing_loggers=False prevents breaking pytest's caplog fixture
-    # See: https://pytest-alembic.readthedocs.io/en/latest/setup.html#caplog-issues
-    fileConfig(config.config_file_name, disable_existing_loggers=False)
+    fileConfig(config.config_file_name)

 # add your model's MetaData object here
 # for 'autogenerate' support
@@ -84,9 +82,9 @@ def run_migrations_offline() -> None:
 def do_run_migrations(connection: Connection) -> None:
    context.configure(
        connection=connection,
-        target_metadata=target_metadata,  # type: ignore[arg-type]
+        target_metadata=target_metadata,  # type: ignore
        include_object=include_object,
-    )
+    )  # type: ignore

    with context.begin_transaction():
        context.run_migrations()
@@ -110,24 +108,9 @@ async def run_async_migrations() -> None:


 def run_migrations_online() -> None:
-    """Run migrations in 'online' mode.
+    """Run migrations in 'online' mode."""

-    Supports pytest-alembic by checking for a pre-configured connection
-    in context.config.attributes["connection"]. If present, uses that
-    connection/engine directly instead of creating a new async engine.
-    """
-    # Check if pytest-alembic is providing a connection/engine
-    connectable = context.config.attributes.get("connection", None)
-
-    if connectable is not None:
-        # pytest-alembic is providing an engine - use it directly
-        with connectable.connect() as connection:
-            do_run_migrations(connection)
-            # Commit to ensure changes are visible to next migration
-            connection.commit()
-    else:
-        # Normal operation - use async migrations
-        asyncio.run(run_async_migrations())
+    asyncio.run(run_async_migrations())


 if context.is_offline_mode():
--- a/backend/docker-bake.hcl
+++ b/backend/docker-bake.hcl
@@ -0,0 +1,27 @@
+variable "REPOSITORY" {
+  default = "onyxdotapp/onyx-integration"
+}
+
+variable "TAG" {
+  default = "latest"
+}
+
+target "backend" {
+  context    = "."
+  dockerfile = "Dockerfile"
+}
+
+target "integration" {
+  context    = "."
+  dockerfile = "tests/integration/Dockerfile"
+
+  // Provide the base image via build context from the backend target
+  contexts = {
+    base = "target:backend"
+  }
+
+  cache-from = ["type=registry,ref=${REPOSITORY}:integration-test-backend-cache"]
+  cache-to   = ["type=registry,ref=${REPOSITORY}:integration-test-backend-cache,mode=max"]
+
+  tags      = ["${REPOSITORY}:${TAG}"]
+}
--- a/backend/ee/onyx/configs/app_configs.py
+++ b/backend/ee/onyx/configs/app_configs.py
@@ -41,10 +41,6 @@ CONFLUENCE_ANONYMOUS_ACCESS_IS_PUBLIC = (
 JIRA_PERMISSION_DOC_SYNC_FREQUENCY = int(
    os.environ.get("JIRA_PERMISSION_DOC_SYNC_FREQUENCY") or 30 * 60
 )
-# In seconds, default is 30 minutes
-JIRA_PERMISSION_GROUP_SYNC_FREQUENCY = int(
-    os.environ.get("JIRA_PERMISSION_GROUP_SYNC_FREQUENCY") or 30 * 60
-)


 #####
@@ -128,8 +124,6 @@ SUPER_CLOUD_API_KEY = os.environ.get("SUPER_CLOUD_API_KEY", "api_key")
 POSTHOG_API_KEY = os.environ.get("POSTHOG_API_KEY") or "FooBar"
 POSTHOG_HOST = os.environ.get("POSTHOG_HOST") or "https://us.i.posthog.com"

-MARKETING_POSTHOG_API_KEY = os.environ.get("MARKETING_POSTHOG_API_KEY")
-
 HUBSPOT_TRACKING_URL = os.environ.get("HUBSPOT_TRACKING_URL")

 GATED_TENANTS_KEY = "gated_tenants"
--- a/backend/ee/onyx/db/analytics.py
+++ b/backend/ee/onyx/db/analytics.py
@@ -199,7 +199,10 @@ def fetch_persona_message_analytics(
            ChatMessage.chat_session_id == ChatSession.id,
        )
        .where(
-            ChatSession.persona_id == persona_id,
+            or_(
+                ChatMessage.alternate_assistant_id == persona_id,
+                ChatSession.persona_id == persona_id,
+            ),
            ChatMessage.time_sent >= start,
            ChatMessage.time_sent <= end,
            ChatMessage.message_type == MessageType.ASSISTANT,
@@ -228,7 +231,10 @@ def fetch_persona_unique_users(
            ChatMessage.chat_session_id == ChatSession.id,
        )
        .where(
-            ChatSession.persona_id == persona_id,
+            or_(
+                ChatMessage.alternate_assistant_id == persona_id,
+                ChatSession.persona_id == persona_id,
+            ),
            ChatMessage.time_sent >= start,
            ChatMessage.time_sent <= end,
            ChatMessage.message_type == MessageType.ASSISTANT,
@@ -259,7 +265,10 @@ def fetch_assistant_message_analytics(
            ChatMessage.chat_session_id == ChatSession.id,
        )
        .where(
-            ChatSession.persona_id == assistant_id,
+            or_(
+                ChatMessage.alternate_assistant_id == assistant_id,
+                ChatSession.persona_id == assistant_id,
+            ),
            ChatMessage.time_sent >= start,
            ChatMessage.time_sent <= end,
            ChatMessage.message_type == MessageType.ASSISTANT,
@@ -290,7 +299,10 @@ def fetch_assistant_unique_users(
            ChatMessage.chat_session_id == ChatSession.id,
        )
        .where(
-            ChatSession.persona_id == assistant_id,
+            or_(
+                ChatMessage.alternate_assistant_id == assistant_id,
+                ChatSession.persona_id == assistant_id,
+            ),
            ChatMessage.time_sent >= start,
            ChatMessage.time_sent <= end,
            ChatMessage.message_type == MessageType.ASSISTANT,
@@ -320,7 +332,10 @@ def fetch_assistant_unique_users_total(
            ChatMessage.chat_session_id == ChatSession.id,
        )
        .where(
-            ChatSession.persona_id == assistant_id,
+            or_(
+                ChatMessage.alternate_assistant_id == assistant_id,
+                ChatSession.persona_id == assistant_id,
+            ),
            ChatMessage.time_sent >= start,
            ChatMessage.time_sent <= end,
            ChatMessage.message_type == MessageType.ASSISTANT,
--- a/backend/ee/onyx/db/document_set.py
+++ b/backend/ee/onyx/db/document_set.py
@@ -118,6 +118,6 @@ def fetch_document_sets(
            .all()
        )

-        document_set_with_cc_pairs.append((document_set, cc_pairs))
+        document_set_with_cc_pairs.append((document_set, cc_pairs))  # type: ignore

    return document_set_with_cc_pairs
--- a/backend/ee/onyx/db/license.py
+++ b/backend/ee/onyx/db/license.py
@@ -1,278 +0,0 @@
-"""Database and cache operations for the license table."""
-
-from datetime import datetime
-
-from sqlalchemy import func
-from sqlalchemy import select
-from sqlalchemy.orm import Session
-
-from ee.onyx.server.license.models import LicenseMetadata
-from ee.onyx.server.license.models import LicensePayload
-from ee.onyx.server.license.models import LicenseSource
-from onyx.db.models import License
-from onyx.db.models import User
-from onyx.redis.redis_pool import get_redis_client
-from onyx.redis.redis_pool import get_redis_replica_client
-from onyx.utils.logger import setup_logger
-from shared_configs.configs import MULTI_TENANT
-from shared_configs.contextvars import get_current_tenant_id
-
-logger = setup_logger()
-
-LICENSE_METADATA_KEY = "license:metadata"
-LICENSE_CACHE_TTL_SECONDS = 86400  # 24 hours
-
-
-# -----------------------------------------------------------------------------
-# Database CRUD Operations
-# -----------------------------------------------------------------------------
-
-
-def get_license(db_session: Session) -> License | None:
-    """
-    Get the current license (singleton pattern - only one row).
-
-    Args:
-        db_session: Database session
-
-    Returns:
-        License object if exists, None otherwise
-    """
-    return db_session.execute(select(License)).scalars().first()
-
-
-def upsert_license(db_session: Session, license_data: str) -> License:
-    """
-    Insert or update the license (singleton pattern).
-
-    Args:
-        db_session: Database session
-        license_data: Base64-encoded signed license blob
-
-    Returns:
-        The created or updated License object
-    """
-    existing = get_license(db_session)
-
-    if existing:
-        existing.license_data = license_data
-        db_session.commit()
-        db_session.refresh(existing)
-        logger.info("License updated")
-        return existing
-
-    new_license = License(license_data=license_data)
-    db_session.add(new_license)
-    db_session.commit()
-    db_session.refresh(new_license)
-    logger.info("License created")
-    return new_license
-
-
-def delete_license(db_session: Session) -> bool:
-    """
-    Delete the current license.
-
-    Args:
-        db_session: Database session
-
-    Returns:
-        True if deleted, False if no license existed
-    """
-    existing = get_license(db_session)
-    if existing:
-        db_session.delete(existing)
-        db_session.commit()
-        logger.info("License deleted")
-        return True
-    return False
-
-
-# -----------------------------------------------------------------------------
-# Seat Counting
-# -----------------------------------------------------------------------------
-
-
-def get_used_seats(tenant_id: str | None = None) -> int:
-    """
-    Get current seat usage.
-
-    For multi-tenant: counts users in UserTenantMapping for this tenant.
-    For self-hosted: counts all active users (includes both Onyx UI users
-    and Slack users who have been converted to Onyx users).
-    """
-    if MULTI_TENANT:
-        from ee.onyx.server.tenants.user_mapping import get_tenant_count
-
-        return get_tenant_count(tenant_id or get_current_tenant_id())
-    else:
-        # Self-hosted: count all active users (Onyx + converted Slack users)
-        from onyx.db.engine.sql_engine import get_session_with_current_tenant
-
-        with get_session_with_current_tenant() as db_session:
-            result = db_session.execute(
-                select(func.count()).select_from(User).where(User.is_active)  # type: ignore
-            )
-            return result.scalar() or 0
-
-
-# -----------------------------------------------------------------------------
-# Redis Cache Operations
-# -----------------------------------------------------------------------------
-
-
-def get_cached_license_metadata(tenant_id: str | None = None) -> LicenseMetadata | None:
-    """
-    Get license metadata from Redis cache.
-
-    Args:
-        tenant_id: Tenant ID (for multi-tenant deployments)
-
-    Returns:
-        LicenseMetadata if cached, None otherwise
-    """
-    tenant = tenant_id or get_current_tenant_id()
-    redis_client = get_redis_replica_client(tenant_id=tenant)
-
-    cached = redis_client.get(LICENSE_METADATA_KEY)
-    if cached:
-        try:
-            cached_str: str
-            if isinstance(cached, bytes):
-                cached_str = cached.decode("utf-8")
-            else:
-                cached_str = str(cached)
-            return LicenseMetadata.model_validate_json(cached_str)
-        except Exception as e:
-            logger.warning(f"Failed to parse cached license metadata: {e}")
-            return None
-    return None
-
-
-def invalidate_license_cache(tenant_id: str | None = None) -> None:
-    """
-    Invalidate the license metadata cache (not the license itself).
-
-    This deletes the cached LicenseMetadata from Redis. The actual license
-    in the database is not affected. Redis delete is idempotent - if the
-    key doesn't exist, this is a no-op.
-
-    Args:
-        tenant_id: Tenant ID (for multi-tenant deployments)
-    """
-    tenant = tenant_id or get_current_tenant_id()
-    redis_client = get_redis_client(tenant_id=tenant)
-    redis_client.delete(LICENSE_METADATA_KEY)
-    logger.info("License cache invalidated")
-
-
-def update_license_cache(
-    payload: LicensePayload,
-    source: LicenseSource | None = None,
-    grace_period_end: datetime | None = None,
-    tenant_id: str | None = None,
-) -> LicenseMetadata:
-    """
-    Update the Redis cache with license metadata.
-
-    We cache all license statuses (ACTIVE, GRACE_PERIOD, GATED_ACCESS) because:
-    1. Frontend needs status to show appropriate UI/banners
-    2. Caching avoids repeated DB + crypto verification on every request
-    3. Status enforcement happens at the feature level, not here
-
-    Args:
-        payload: Verified license payload
-        source: How the license was obtained
-        grace_period_end: Optional grace period end time
-        tenant_id: Tenant ID (for multi-tenant deployments)
-
-    Returns:
-        The cached LicenseMetadata
-    """
-    from ee.onyx.utils.license import get_license_status
-
-    tenant = tenant_id or get_current_tenant_id()
-    redis_client = get_redis_client(tenant_id=tenant)
-
-    used_seats = get_used_seats(tenant)
-    status = get_license_status(payload, grace_period_end)
-
-    metadata = LicenseMetadata(
-        tenant_id=payload.tenant_id,
-        organization_name=payload.organization_name,
-        seats=payload.seats,
-        used_seats=used_seats,
-        plan_type=payload.plan_type,
-        issued_at=payload.issued_at,
-        expires_at=payload.expires_at,
-        grace_period_end=grace_period_end,
-        status=status,
-        source=source,
-        stripe_subscription_id=payload.stripe_subscription_id,
-    )
-
-    redis_client.setex(
-        LICENSE_METADATA_KEY,
-        LICENSE_CACHE_TTL_SECONDS,
-        metadata.model_dump_json(),
-    )
-
-    logger.info(f"License cache updated: {metadata.seats} seats, status={status.value}")
-    return metadata
-
-
-def refresh_license_cache(
-    db_session: Session,
-    tenant_id: str | None = None,
-) -> LicenseMetadata | None:
-    """
-    Refresh the license cache from the database.
-
-    Args:
-        db_session: Database session
-        tenant_id: Tenant ID (for multi-tenant deployments)
-
-    Returns:
-        LicenseMetadata if license exists, None otherwise
-    """
-    from ee.onyx.utils.license import verify_license_signature
-
-    license_record = get_license(db_session)
-    if not license_record:
-        invalidate_license_cache(tenant_id)
-        return None
-
-    try:
-        payload = verify_license_signature(license_record.license_data)
-        return update_license_cache(
-            payload,
-            source=LicenseSource.AUTO_FETCH,
-            tenant_id=tenant_id,
-        )
-    except ValueError as e:
-        logger.error(f"Failed to verify license during cache refresh: {e}")
-        invalidate_license_cache(tenant_id)
-        return None
-
-
-def get_license_metadata(
-    db_session: Session,
-    tenant_id: str | None = None,
-) -> LicenseMetadata | None:
-    """
-    Get license metadata, using cache if available.
-
-    Args:
-        db_session: Database session
-        tenant_id: Tenant ID (for multi-tenant deployments)
-
-    Returns:
-        LicenseMetadata if license exists, None otherwise
-    """
-    # Try cache first
-    cached = get_cached_license_metadata(tenant_id)
-    if cached:
-        return cached
-
-    # Refresh from database
-    return refresh_license_cache(db_session, tenant_id)
--- a/backend/ee/onyx/db/usage_export.py
+++ b/backend/ee/onyx/db/usage_export.py
@@ -55,7 +55,18 @@ def get_empty_chat_messages_entries__paginated(

            # Get assistant name (from session persona, or alternate if specified)
            assistant_name = None
-            if chat_session.persona:
+            if message.alternate_assistant_id:
+                # If there's an alternate assistant, we need to fetch it
+                from onyx.db.models import Persona
+
+                alternate_persona = (
+                    db_session.query(Persona)
+                    .filter(Persona.id == message.alternate_assistant_id)
+                    .first()
+                )
+                if alternate_persona:
+                    assistant_name = alternate_persona.name
+            elif chat_session.persona:
                assistant_name = chat_session.persona.name

            message_skeletons.append(
--- a/backend/ee/onyx/db/user_group.py
+++ b/backend/ee/onyx/db/user_group.py
@@ -8,7 +8,6 @@ from sqlalchemy import func
 from sqlalchemy import Select
 from sqlalchemy import select
 from sqlalchemy import update
-from sqlalchemy.dialects.postgresql import insert
 from sqlalchemy.orm import Session

 from ee.onyx.server.user_group.models import SetCuratorRequest
@@ -363,29 +362,14 @@ def _check_user_group_is_modifiable(user_group: UserGroup) -> None:

 def _add_user__user_group_relationships__no_commit(
    db_session: Session, user_group_id: int, user_ids: list[UUID]
-) -> None:
-    """NOTE: does not commit the transaction.
-
-    This function is idempotent - it will skip users who are already in the group
-    to avoid duplicate key violations during concurrent operations or re-syncs.
-    Uses ON CONFLICT DO NOTHING to keep inserts atomic under concurrency.
-    """
-    if not user_ids:
-        return
-
-    insert_stmt = (
-        insert(User__UserGroup)
-        .values(
-            [
-                {"user_id": user_id, "user_group_id": user_group_id}
-                for user_id in user_ids
-            ]
-        )
-        .on_conflict_do_nothing(
-            index_elements=[User__UserGroup.user_group_id, User__UserGroup.user_id]
-        )
-    )
-    db_session.execute(insert_stmt)
+) -> list[User__UserGroup]:
+    """NOTE: does not commit the transaction."""
+    relationships = [
+        User__UserGroup(user_id=user_id, user_group_id=user_group_id)
+        for user_id in user_ids
+    ]
+    db_session.add_all(relationships)
+    return relationships


 def _add_user_group__cc_pair_relationships__no_commit(
@@ -597,48 +581,6 @@ def update_user_curator_relationship(
    db_session.commit()


-def add_users_to_user_group(
-    db_session: Session,
-    user: User | None,
-    user_group_id: int,
-    user_ids: list[UUID],
-) -> UserGroup:
-    db_user_group = fetch_user_group(db_session=db_session, user_group_id=user_group_id)
-    if db_user_group is None:
-        raise ValueError(f"UserGroup with id '{user_group_id}' not found")
-
-    missing_users = [
-        user_id for user_id in user_ids if fetch_user_by_id(db_session, user_id) is None
-    ]
-    if missing_users:
-        raise ValueError(
-            f"User(s) not found: {', '.join(str(user_id) for user_id in missing_users)}"
-        )
-
-    _check_user_group_is_modifiable(db_user_group)
-
-    current_user_ids = [user.id for user in db_user_group.users]
-    current_user_ids_set = set(current_user_ids)
-    new_user_ids = [
-        user_id for user_id in user_ids if user_id not in current_user_ids_set
-    ]
-
-    if not new_user_ids:
-        return db_user_group
-
-    user_group_update = UserGroupUpdate(
-        user_ids=current_user_ids + new_user_ids,
-        cc_pair_ids=[cc_pair.id for cc_pair in db_user_group.cc_pairs],
-    )
-
-    return update_user_group(
-        db_session=db_session,
-        user=user,
-        user_group_id=user_group_id,
-        user_group_update=user_group_update,
-    )
-
-
 def update_user_group(
    db_session: Session,
    user: User | None,
@@ -661,17 +603,6 @@ def update_user_group(
    added_user_ids = list(updated_user_ids - current_user_ids)
    removed_user_ids = list(current_user_ids - updated_user_ids)

-    if added_user_ids:
-        missing_users = [
-            user_id
-            for user_id in added_user_ids
-            if fetch_user_by_id(db_session, user_id) is None
-        ]
-        if missing_users:
-            raise ValueError(
-                f"User(s) not found: {', '.join(str(user_id) for user_id in missing_users)}"
-            )
-
    # LEAVING THIS HERE FOR NOW FOR GIVING DIFFERENT ROLES
    # ACCESS TO DIFFERENT PERMISSIONS
    # if (removed_user_ids or added_user_ids) and (
--- a/backend/ee/onyx/external_permissions/confluence/group_sync.py
+++ b/backend/ee/onyx/external_permissions/confluence/group_sync.py
@@ -3,15 +3,12 @@ from collections.abc import Generator
 from ee.onyx.db.external_perm import ExternalUserGroup
 from ee.onyx.external_permissions.confluence.constants import ALL_CONF_EMAILS_GROUP_NAME
 from onyx.background.error_logging import emit_background_error
-from onyx.configs.app_configs import CONFLUENCE_USE_ONYX_USERS_FOR_GROUP_SYNC
 from onyx.connectors.confluence.onyx_confluence import (
    get_user_email_from_username__server,
 )
 from onyx.connectors.confluence.onyx_confluence import OnyxConfluence
 from onyx.connectors.credentials_provider import OnyxDBCredentialsProvider
-from onyx.db.engine.sql_engine import get_session_with_current_tenant
 from onyx.db.models import ConnectorCredentialPair
-from onyx.db.users import get_all_users
 from onyx.utils.logger import setup_logger

 logger = setup_logger()
@@ -22,7 +19,7 @@ def _build_group_member_email_map(
 ) -> dict[str, set[str]]:
    group_member_emails: dict[str, set[str]] = {}
    for user in confluence_client.paginated_cql_user_retrieval():
-        logger.info(f"Processing groups for user: {user}")
+        logger.debug(f"Processing groups for user: {user}")

        email = user.email
        if not email:
@@ -34,8 +31,6 @@ def _build_group_member_email_map(
                    confluence_client=confluence_client,
                    user_name=user_name,
                )
-            else:
-                logger.error(f"user result missing username field: {user}")

        if not email:
            # If we still don't have an email, skip this user
@@ -69,92 +64,6 @@ def _build_group_member_email_map(
    return group_member_emails


-def _build_group_member_email_map_from_onyx_users(
-    confluence_client: OnyxConfluence,
-) -> dict[str, set[str]]:
-    """Hacky, but it's the only way to do this as long as the
-    Confluence APIs are broken.
-
-    This is fixed in Confluence Data Center 10.1.0, so first choice
-    is to tell users to upgrade to 10.1.0.
-    https://jira.atlassian.com/browse/CONFSERVER-95999
-    """
-    with get_session_with_current_tenant() as db_session:
-        # don't include external since they are handled by the "through confluence"
-        # user fetching mechanism
-        user_emails = [
-            user.email for user in get_all_users(db_session, include_external=False)
-        ]
-
-    def _infer_username_from_email(email: str) -> str:
-        return email.split("@")[0]
-
-    group_member_emails: dict[str, set[str]] = {}
-    for email in user_emails:
-        logger.info(f"Processing groups for user with email: {email}")
-        try:
-            user_name = _infer_username_from_email(email)
-            response = confluence_client.get_user_details_by_username(user_name)
-            user_key = response.get("userKey")
-            if not user_key:
-                logger.error(f"User key not found for user with email {email}")
-                continue
-
-            all_users_groups: set[str] = set()
-            for group in confluence_client.paginated_groups_by_user_retrieval(user_key):
-                # group name uniqueness is enforced by Confluence, so we can use it as a group ID
-                group_id = group["name"]
-                group_member_emails.setdefault(group_id, set()).add(email)
-                all_users_groups.add(group_id)
-
-            if not all_users_groups:
-                msg = f"No groups found for user with email: {email}"
-                logger.error(msg)
-            else:
-                logger.info(
-                    f"Found groups {all_users_groups} for user with email {email}"
-                )
-        except Exception:
-            logger.exception(f"Error getting user details for user with email {email}")
-
-    return group_member_emails
-
-
-def _build_final_group_to_member_email_map(
-    confluence_client: OnyxConfluence,
-    cc_pair_id: int,
-    # if set, will infer confluence usernames from onyx users in addition to using the
-    # confluence users API. This is a hacky workaround for the fact that the Confluence
-    # users API is broken before Confluence Data Center 10.1.0.
-    use_onyx_users: bool = CONFLUENCE_USE_ONYX_USERS_FOR_GROUP_SYNC,
-) -> dict[str, set[str]]:
-    group_to_member_email_map = _build_group_member_email_map(
-        confluence_client=confluence_client,
-        cc_pair_id=cc_pair_id,
-    )
-    group_to_member_email_map_from_onyx_users = (
-        (
-            _build_group_member_email_map_from_onyx_users(
-                confluence_client=confluence_client,
-            )
-        )
-        if use_onyx_users
-        else {}
-    )
-
-    all_group_ids = set(group_to_member_email_map.keys()) | set(
-        group_to_member_email_map_from_onyx_users.keys()
-    )
-    final_group_to_member_email_map = {}
-    for group_id in all_group_ids:
-        group_member_emails = group_to_member_email_map.get(
-            group_id, set()
-        ) | group_to_member_email_map_from_onyx_users.get(group_id, set())
-        final_group_to_member_email_map[group_id] = group_member_emails
-
-    return final_group_to_member_email_map
-
-
 def confluence_group_sync(
    tenant_id: str,
    cc_pair: ConnectorCredentialPair,
@@ -178,12 +87,13 @@ def confluence_group_sync(
    confluence_client._probe_connection(**probe_kwargs)
    confluence_client._initialize_connection(**final_kwargs)

-    group_to_member_email_map = _build_final_group_to_member_email_map(
-        confluence_client, cc_pair.id
+    group_member_email_map = _build_group_member_email_map(
+        confluence_client=confluence_client,
+        cc_pair_id=cc_pair.id,
    )

    all_found_emails = set()
-    for group_id, group_member_emails in group_to_member_email_map.items():
+    for group_id, group_member_emails in group_member_email_map.items():
        yield (
            ExternalUserGroup(
                id=group_id,
--- a/backend/ee/onyx/external_permissions/jira/group_sync.py
+++ b/backend/ee/onyx/external_permissions/jira/group_sync.py
@@ -1,136 +0,0 @@
-from collections.abc import Generator
-
-from jira import JIRA
-
-from ee.onyx.db.external_perm import ExternalUserGroup
-from onyx.connectors.jira.utils import build_jira_client
-from onyx.db.models import ConnectorCredentialPair
-from onyx.utils.logger import setup_logger
-
-logger = setup_logger()
-
-
-def _get_jira_group_members_email(
-    jira_client: JIRA,
-    group_name: str,
-) -> list[str]:
-    """Get all member emails for a Jira group.
-
-    Filters out app accounts (bots, integrations) and only returns real user emails.
-    """
-    emails: list[str] = []
-
-    try:
-        # group_members returns an OrderedDict of account_id -> member_info
-        members = jira_client.group_members(group=group_name)
-
-        if not members:
-            logger.warning(f"No members found for group {group_name}")
-            return emails
-
-        for account_id, member_info in members.items():
-            # member_info is a dict with keys like 'fullname', 'email', 'active'
-            email = member_info.get("email")
-
-            # Skip "hidden" emails - these are typically app accounts
-            if email and email != "hidden":
-                emails.append(email)
-            else:
-                # For cloud, we might need to fetch user details separately
-                try:
-                    user = jira_client.user(id=account_id)
-
-                    # Skip app accounts (bots, integrations, etc.)
-                    if hasattr(user, "accountType") and user.accountType == "app":
-                        logger.info(
-                            f"Skipping app account {account_id} for group {group_name}"
-                        )
-                        continue
-
-                    if hasattr(user, "emailAddress") and user.emailAddress:
-                        emails.append(user.emailAddress)
-                    else:
-                        logger.warning(f"User {account_id} has no email address")
-                except Exception as e:
-                    logger.warning(
-                        f"Could not fetch email for user {account_id} in group {group_name}: {e}"
-                    )
-
-    except Exception as e:
-        logger.error(f"Error fetching members for group {group_name}: {e}")
-
-    return emails
-
-
-def _build_group_member_email_map(
-    jira_client: JIRA,
-) -> dict[str, set[str]]:
-    """Build a map of group names to member emails."""
-    group_member_emails: dict[str, set[str]] = {}
-
-    try:
-        # Get all groups from Jira - returns a list of group name strings
-        group_names = jira_client.groups()
-
-        if not group_names:
-            logger.warning("No groups found in Jira")
-            return group_member_emails
-
-        logger.info(f"Found {len(group_names)} groups in Jira")
-
-        for group_name in group_names:
-            if not group_name:
-                continue
-
-            member_emails = _get_jira_group_members_email(
-                jira_client=jira_client,
-                group_name=group_name,
-            )
-
-            if member_emails:
-                group_member_emails[group_name] = set(member_emails)
-                logger.debug(
-                    f"Found {len(member_emails)} members for group {group_name}"
-                )
-            else:
-                logger.debug(f"No members found for group {group_name}")
-
-    except Exception as e:
-        logger.error(f"Error building group member email map: {e}")
-
-    return group_member_emails
-
-
-def jira_group_sync(
-    tenant_id: str,
-    cc_pair: ConnectorCredentialPair,
-) -> Generator[ExternalUserGroup, None, None]:
-    """
-    Sync Jira groups and their members.
-
-    This function fetches all groups from Jira and yields ExternalUserGroup
-    objects containing the group ID and member emails.
-    """
-    jira_base_url = cc_pair.connector.connector_specific_config.get("jira_base_url", "")
-    scoped_token = cc_pair.connector.connector_specific_config.get(
-        "scoped_token", False
-    )
-
-    if not jira_base_url:
-        raise ValueError("No jira_base_url found in connector config")
-
-    jira_client = build_jira_client(
-        credentials=cc_pair.credential.credential_json,
-        jira_base=jira_base_url,
-        scoped_token=scoped_token,
-    )
-
-    group_member_email_map = _build_group_member_email_map(jira_client=jira_client)
-    if not group_member_email_map:
-        raise ValueError(f"No groups with members found for cc_pair_id={cc_pair.id}")
-
-    for group_id, group_member_emails in group_member_email_map.items():
-        yield ExternalUserGroup(
-            id=group_id,
-            user_emails=list(group_member_emails),
-        )
--- a/backend/ee/onyx/external_permissions/jira/page_access.py
+++ b/backend/ee/onyx/external_permissions/jira/page_access.py
@@ -16,10 +16,6 @@ HolderMap = dict[str, list[Holder]]
 logger = setup_logger()


-def _get_role_id(holder: Holder) -> str | None:
-    return holder.get("value") or holder.get("parameter")
-
-
 def _build_holder_map(permissions: list[dict]) -> dict[str, list[Holder]]:
    """
    A "Holder" in JIRA is a person / entity who "holds" the corresponding permission.
@@ -114,137 +110,80 @@ def _get_user_emails(user_holders: list[Holder]) -> list[str]:
    return emails


-def _get_user_emails_and_groups_from_project_roles(
+def _get_user_emails_from_project_roles(
    jira_client: JIRA,
    jira_project: str,
    project_role_holders: list[Holder],
-) -> tuple[list[str], list[str]]:
-    """
-    Get user emails and group names from project roles.
-    Returns a tuple of (emails, group_names).
-    """
-    # Get role IDs - Cloud uses "value", Data Center uses "parameter"
-    role_ids = []
-    for holder in project_role_holders:
-        role_id = _get_role_id(holder)
-        if role_id:
-            role_ids.append(role_id)
-        else:
-            logger.warning(f"No value or parameter in projectRole holder: {holder}")
-
+) -> list[str]:
+    # NOTE (@raunakab) a `parallel_yield` may be helpful here...?
    roles = [
-        jira_client.project_role(project=jira_project, id=role_id)
-        for role_id in role_ids
+        jira_client.project_role(project=jira_project, id=project_role_holder["value"])
+        for project_role_holder in project_role_holders
+        if "value" in project_role_holder
    ]

    emails = []
-    groups = []

    for role in roles:
        if not hasattr(role, "actors"):
-            logger.warning(f"Project role {role} has no actors attribute")
            continue

        for actor in role.actors:
-            # Handle group actors
-            if hasattr(actor, "actorGroup"):
-                group_name = getattr(actor.actorGroup, "name", None) or getattr(
-                    actor.actorGroup, "displayName", None
-                )
-                if group_name:
-                    groups.append(group_name)
+            if not hasattr(actor, "actorUser") or not hasattr(
+                actor.actorUser, "accountId"
+            ):
                continue

-            # Handle user actors
-            if hasattr(actor, "actorUser"):
-                account_id = getattr(actor.actorUser, "accountId", None)
-                if not account_id:
-                    logger.error(f"No accountId in actorUser: {actor.actorUser}")
-                    continue
-
-                user = jira_client.user(id=account_id)
-                if not hasattr(user, "accountType") or user.accountType != "atlassian":
-                    logger.info(
-                        f"Skipping user {account_id} because it is not an atlassian user"
-                    )
-                    continue
-
-                if not hasattr(user, "emailAddress"):
-                    msg = f"User's email address was not able to be retrieved;  {actor.actorUser.accountId=}"
-                    if hasattr(user, "displayName"):
-                        msg += f" {actor.displayName=}"
-                    logger.warning(msg)
-                    continue
-
-                emails.append(user.emailAddress)
+            user = jira_client.user(id=actor.actorUser.accountId)
+            if not hasattr(user, "accountType") or user.accountType != "atlassian":
                continue

-            logger.debug(f"Skipping actor type: {actor}")
+            if not hasattr(user, "emailAddress"):
+                msg = f"User's email address was not able to be retrieved;  {actor.actorUser.accountId=}"
+                if hasattr(user, "displayName"):
+                    msg += f" {actor.displayName=}"
+                logger.warn(msg)
+                continue

-    return emails, groups
+            emails.append(user.emailAddress)
+
+    return emails


 def _build_external_access_from_holder_map(
    jira_client: JIRA, jira_project: str, holder_map: HolderMap
 ) -> ExternalAccess:
    """
-    Build ExternalAccess from the holder map.
-
-    Holder types handled:
-        - "anyone": Public project, anyone can access
-        - "applicationRole": All users with a Jira license can access (treated as public)
-        - "user": Specific users with access
-        - "projectRole": Project roles containing users and/or groups
-        - "group": Groups directly assigned in the permission scheme
+    # Note:
+        If the `holder_map` contains an instance of "anyone", then this is a public JIRA project.
+        Otherwise, we fetch the "projectRole"s (i.e., the user-groups in JIRA speak), and the user emails.
    """
-    # Public access - anyone can view
+
    if "anyone" in holder_map:
        return ExternalAccess(
            external_user_emails=set(), external_user_group_ids=set(), is_public=True
        )

-    # applicationRole means all users with a Jira license can access - treat as public
-    if "applicationRole" in holder_map:
-        return ExternalAccess(
-            external_user_emails=set(), external_user_group_ids=set(), is_public=True
-        )
-
-    # Get emails from explicit user holders
    user_emails = (
        _get_user_emails(user_holders=holder_map["user"])
        if "user" in holder_map
        else []
    )
-
-    # Get emails and groups from project roles
-    project_role_user_emails: list[str] = []
-    project_role_groups: list[str] = []
-    if "projectRole" in holder_map:
-        project_role_user_emails, project_role_groups = (
-            _get_user_emails_and_groups_from_project_roles(
-                jira_client=jira_client,
-                jira_project=jira_project,
-                project_role_holders=holder_map["projectRole"],
-            )
+    project_role_user_emails = (
+        _get_user_emails_from_project_roles(
+            jira_client=jira_client,
+            jira_project=jira_project,
+            project_role_holders=holder_map["projectRole"],
        )
-
-    # Get groups directly assigned in permission scheme (common in Data Center)
-    # Format: {'type': 'group', 'parameter': 'group-name', 'expand': 'group'}
-    direct_groups: list[str] = []
-    if "group" in holder_map:
-        for group_holder in holder_map["group"]:
-            group_name = _get_role_id(group_holder)
-            if group_name:
-                direct_groups.append(group_name)
-            else:
-                logger.error(f"No parameter/value in group holder: {group_holder}")
+        if "projectRole" in holder_map
+        else []
+    )

    external_user_emails = set(user_emails + project_role_user_emails)
-    external_user_group_ids = set(project_role_groups + direct_groups)

    return ExternalAccess(
        external_user_emails=external_user_emails,
-        external_user_group_ids=external_user_group_ids,
+        external_user_group_ids=set(),
        is_public=False,
    )

@@ -258,11 +197,9 @@ def get_project_permissions(
    )

    if not hasattr(project_permissions, "permissions"):
-        logger.error(f"Project {jira_project} has no permissions attribute")
        return None

    if not isinstance(project_permissions.permissions, list):
-        logger.error(f"Project {jira_project} permissions is not a list")
        return None

    holder_map = _build_holder_map(permissions=project_permissions.permissions)
--- a/backend/ee/onyx/external_permissions/sharepoint/permission_utils.py
+++ b/backend/ee/onyx/external_permissions/sharepoint/permission_utils.py
@@ -15,7 +15,6 @@ from ee.onyx.db.external_perm import ExternalUserGroup
 from onyx.access.models import ExternalAccess
 from onyx.access.utils import build_ext_group_name_for_onyx
 from onyx.configs.constants import DocumentSource
-from onyx.connectors.sharepoint.connector import SHARED_DOCUMENTS_MAP_REVERSE
 from onyx.connectors.sharepoint.connector import sleep_and_retry
 from onyx.utils.logger import setup_logger

@@ -512,8 +511,8 @@ def get_external_access_from_sharepoint(
                f"Failed to get SharePoint list item ID for item {drive_item.id}"
            )

-        if drive_name in SHARED_DOCUMENTS_MAP_REVERSE:
-            drive_name = SHARED_DOCUMENTS_MAP_REVERSE[drive_name]
+        if drive_name == "Shared Documents":
+            drive_name = "Documents"

        item = client_context.web.lists.get_by_title(drive_name).items.get_by_id(
            item_id
--- a/backend/ee/onyx/external_permissions/sync_params.py
+++ b/backend/ee/onyx/external_permissions/sync_params.py
@@ -11,7 +11,6 @@ from ee.onyx.configs.app_configs import GITHUB_PERMISSION_DOC_SYNC_FREQUENCY
 from ee.onyx.configs.app_configs import GITHUB_PERMISSION_GROUP_SYNC_FREQUENCY
 from ee.onyx.configs.app_configs import GOOGLE_DRIVE_PERMISSION_GROUP_SYNC_FREQUENCY
 from ee.onyx.configs.app_configs import JIRA_PERMISSION_DOC_SYNC_FREQUENCY
-from ee.onyx.configs.app_configs import JIRA_PERMISSION_GROUP_SYNC_FREQUENCY
 from ee.onyx.configs.app_configs import SHAREPOINT_PERMISSION_DOC_SYNC_FREQUENCY
 from ee.onyx.configs.app_configs import SHAREPOINT_PERMISSION_GROUP_SYNC_FREQUENCY
 from ee.onyx.configs.app_configs import SLACK_PERMISSION_DOC_SYNC_FREQUENCY
@@ -24,7 +23,6 @@ from ee.onyx.external_permissions.gmail.doc_sync import gmail_doc_sync
 from ee.onyx.external_permissions.google_drive.doc_sync import gdrive_doc_sync
 from ee.onyx.external_permissions.google_drive.group_sync import gdrive_group_sync
 from ee.onyx.external_permissions.jira.doc_sync import jira_doc_sync
-from ee.onyx.external_permissions.jira.group_sync import jira_group_sync
 from ee.onyx.external_permissions.perm_sync_types import CensoringFuncType
 from ee.onyx.external_permissions.perm_sync_types import DocSyncFuncType
 from ee.onyx.external_permissions.perm_sync_types import FetchAllDocumentsFunction
@@ -112,11 +110,6 @@ _SOURCE_TO_SYNC_CONFIG: dict[DocumentSource, SyncConfig] = {
            doc_sync_func=jira_doc_sync,
            initial_index_should_sync=True,
        ),
-        group_sync_config=GroupSyncConfig(
-            group_sync_frequency=JIRA_PERMISSION_GROUP_SYNC_FREQUENCY,
-            group_sync_func=jira_group_sync,
-            group_sync_is_cc_pair_agnostic=True,
-        ),
    ),
    # Groups are not needed for Slack.
    # All channel access is done at the individual user level.
--- a/backend/ee/onyx/main.py
+++ b/backend/ee/onyx/main.py
@@ -14,7 +14,6 @@ from ee.onyx.server.enterprise_settings.api import (
    basic_router as enterprise_settings_router,
 )
 from ee.onyx.server.evals.api import router as evals_router
-from ee.onyx.server.license.api import router as license_router
 from ee.onyx.server.manage.standard_answer import router as standard_answer_router
 from ee.onyx.server.middleware.tenant_tracking import (
    add_api_server_tenant_id_middleware,
@@ -24,7 +23,7 @@ from ee.onyx.server.query_and_chat.chat_backend import (
    router as chat_router,
 )
 from ee.onyx.server.query_and_chat.query_backend import (
-    basic_router as ee_query_router,
+    basic_router as query_router,
 )
 from ee.onyx.server.query_history.api import router as query_history_router
 from ee.onyx.server.reporting.usage_export_api import router as usage_export_router
@@ -49,9 +48,6 @@ from onyx.main import include_auth_router_with_prefix
 from onyx.main import include_router_with_global_prefix_prepended
 from onyx.main import lifespan as lifespan_base
 from onyx.main import use_route_function_names_as_operation_ids
-from onyx.server.query_and_chat.query_backend import (
-    basic_router as query_router,
-)
 from onyx.utils.logger import setup_logger
 from onyx.utils.variable_functionality import global_version
 from shared_configs.configs import MULTI_TENANT
@@ -123,7 +119,6 @@ def get_application() -> FastAPI:
    include_router_with_global_prefix_prepended(application, query_history_router)
    # EE only backend APIs
    include_router_with_global_prefix_prepended(application, query_router)
-    include_router_with_global_prefix_prepended(application, ee_query_router)
    include_router_with_global_prefix_prepended(application, chat_router)
    include_router_with_global_prefix_prepended(application, standard_answer_router)
    include_router_with_global_prefix_prepended(application, ee_oauth_router)
@@ -140,8 +135,6 @@ def get_application() -> FastAPI:
    )
    include_router_with_global_prefix_prepended(application, enterprise_settings_router)
    include_router_with_global_prefix_prepended(application, usage_export_router)
-    # License management
-    include_router_with_global_prefix_prepended(application, license_router)

    if MULTI_TENANT:
        # Tenant management
--- a/backend/ee/onyx/server/license/api.py
+++ b/backend/ee/onyx/server/license/api.py
@@ -1,246 +0,0 @@
-"""License API endpoints."""
-
-import requests
-from fastapi import APIRouter
-from fastapi import Depends
-from fastapi import File
-from fastapi import HTTPException
-from fastapi import UploadFile
-from sqlalchemy.orm import Session
-
-from ee.onyx.auth.users import current_admin_user
-from ee.onyx.db.license import delete_license as db_delete_license
-from ee.onyx.db.license import get_license_metadata
-from ee.onyx.db.license import invalidate_license_cache
-from ee.onyx.db.license import refresh_license_cache
-from ee.onyx.db.license import update_license_cache
-from ee.onyx.db.license import upsert_license
-from ee.onyx.server.license.models import LicenseResponse
-from ee.onyx.server.license.models import LicenseSource
-from ee.onyx.server.license.models import LicenseStatusResponse
-from ee.onyx.server.license.models import LicenseUploadResponse
-from ee.onyx.server.license.models import SeatUsageResponse
-from ee.onyx.server.tenants.access import generate_data_plane_token
-from ee.onyx.utils.license import verify_license_signature
-from onyx.auth.users import User
-from onyx.configs.app_configs import CONTROL_PLANE_API_BASE_URL
-from onyx.db.engine.sql_engine import get_session
-from onyx.utils.logger import setup_logger
-from shared_configs.contextvars import get_current_tenant_id
-
-logger = setup_logger()
-
-router = APIRouter(prefix="/license")
-
-
-@router.get("")
-async def get_license_status(
-    _: User = Depends(current_admin_user),
-    db_session: Session = Depends(get_session),
-) -> LicenseStatusResponse:
-    """Get current license status and seat usage."""
-    metadata = get_license_metadata(db_session)
-
-    if not metadata:
-        return LicenseStatusResponse(has_license=False)
-
-    return LicenseStatusResponse(
-        has_license=True,
-        seats=metadata.seats,
-        used_seats=metadata.used_seats,
-        plan_type=metadata.plan_type,
-        issued_at=metadata.issued_at,
-        expires_at=metadata.expires_at,
-        grace_period_end=metadata.grace_period_end,
-        status=metadata.status,
-        source=metadata.source,
-    )
-
-
-@router.get("/seats")
-async def get_seat_usage(
-    _: User = Depends(current_admin_user),
-    db_session: Session = Depends(get_session),
-) -> SeatUsageResponse:
-    """Get detailed seat usage information."""
-    metadata = get_license_metadata(db_session)
-
-    if not metadata:
-        return SeatUsageResponse(
-            total_seats=0,
-            used_seats=0,
-            available_seats=0,
-        )
-
-    return SeatUsageResponse(
-        total_seats=metadata.seats,
-        used_seats=metadata.used_seats,
-        available_seats=max(0, metadata.seats - metadata.used_seats),
-    )
-
-
-@router.post("/fetch")
-async def fetch_license(
-    _: User = Depends(current_admin_user),
-    db_session: Session = Depends(get_session),
-) -> LicenseResponse:
-    """
-    Fetch license from control plane.
-    Used after Stripe checkout completion to retrieve the new license.
-    """
-    tenant_id = get_current_tenant_id()
-
-    try:
-        token = generate_data_plane_token()
-    except ValueError as e:
-        logger.error(f"Failed to generate data plane token: {e}")
-        raise HTTPException(
-            status_code=500, detail="Authentication configuration error"
-        )
-
-    try:
-        headers = {
-            "Authorization": f"Bearer {token}",
-            "Content-Type": "application/json",
-        }
-        url = f"{CONTROL_PLANE_API_BASE_URL}/license/{tenant_id}"
-        response = requests.get(url, headers=headers, timeout=10)
-        response.raise_for_status()
-
-        data = response.json()
-        if not isinstance(data, dict) or "license" not in data:
-            raise HTTPException(
-                status_code=502, detail="Invalid response from control plane"
-            )
-
-        license_data = data["license"]
-        if not license_data:
-            raise HTTPException(status_code=404, detail="No license found")
-
-        # Verify signature before persisting
-        payload = verify_license_signature(license_data)
-
-        # Verify the fetched license is for this tenant
-        if payload.tenant_id != tenant_id:
-            logger.error(
-                f"License tenant mismatch: expected {tenant_id}, got {payload.tenant_id}"
-            )
-            raise HTTPException(
-                status_code=400,
-                detail="License tenant ID mismatch - control plane returned wrong license",
-            )
-
-        # Persist to DB and update cache atomically
-        upsert_license(db_session, license_data)
-        try:
-            update_license_cache(payload, source=LicenseSource.AUTO_FETCH)
-        except Exception as cache_error:
-            # Log but don't fail - DB is source of truth, cache will refresh on next read
-            logger.warning(f"Failed to update license cache: {cache_error}")
-
-        return LicenseResponse(success=True, license=payload)
-
-    except requests.HTTPError as e:
-        status_code = e.response.status_code if e.response is not None else 502
-        logger.error(f"Control plane returned error: {status_code}")
-        raise HTTPException(
-            status_code=status_code,
-            detail="Failed to fetch license from control plane",
-        )
-    except ValueError as e:
-        logger.error(f"License verification failed: {type(e).__name__}")
-        raise HTTPException(status_code=400, detail=str(e))
-    except requests.RequestException:
-        logger.exception("Failed to fetch license from control plane")
-        raise HTTPException(
-            status_code=502, detail="Failed to connect to control plane"
-        )
-
-
-@router.post("/upload")
-async def upload_license(
-    license_file: UploadFile = File(...),
-    _: User = Depends(current_admin_user),
-    db_session: Session = Depends(get_session),
-) -> LicenseUploadResponse:
-    """
-    Upload a license file manually.
-    Used for air-gapped deployments where control plane is not accessible.
-    """
-    try:
-        content = await license_file.read()
-        license_data = content.decode("utf-8").strip()
-    except UnicodeDecodeError:
-        raise HTTPException(status_code=400, detail="Invalid license file format")
-
-    try:
-        payload = verify_license_signature(license_data)
-    except ValueError as e:
-        raise HTTPException(status_code=400, detail=str(e))
-
-    tenant_id = get_current_tenant_id()
-    if payload.tenant_id != tenant_id:
-        raise HTTPException(
-            status_code=400,
-            detail=f"License tenant ID mismatch. Expected {tenant_id}, got {payload.tenant_id}",
-        )
-
-    # Persist to DB and update cache
-    upsert_license(db_session, license_data)
-    try:
-        update_license_cache(payload, source=LicenseSource.MANUAL_UPLOAD)
-    except Exception as cache_error:
-        # Log but don't fail - DB is source of truth, cache will refresh on next read
-        logger.warning(f"Failed to update license cache: {cache_error}")
-
-    return LicenseUploadResponse(
-        success=True,
-        message=f"License uploaded successfully. {payload.seats} seats, expires {payload.expires_at.date()}",
-    )
-
-
-@router.post("/refresh")
-async def refresh_license_cache_endpoint(
-    _: User = Depends(current_admin_user),
-    db_session: Session = Depends(get_session),
-) -> LicenseStatusResponse:
-    """
-    Force refresh the license cache from the database.
-    Useful after manual database changes or to verify license validity.
-    """
-    metadata = refresh_license_cache(db_session)
-
-    if not metadata:
-        return LicenseStatusResponse(has_license=False)
-
-    return LicenseStatusResponse(
-        has_license=True,
-        seats=metadata.seats,
-        used_seats=metadata.used_seats,
-        plan_type=metadata.plan_type,
-        issued_at=metadata.issued_at,
-        expires_at=metadata.expires_at,
-        grace_period_end=metadata.grace_period_end,
-        status=metadata.status,
-        source=metadata.source,
-    )
-
-
-@router.delete("")
-async def delete_license(
-    _: User = Depends(current_admin_user),
-    db_session: Session = Depends(get_session),
-) -> dict[str, bool]:
-    """
-    Delete the current license.
-    Admin only - removes license and invalidates cache.
-    """
-    # Invalidate cache first - if DB delete fails, stale cache is worse than no cache
-    try:
-        invalidate_license_cache()
-    except Exception as cache_error:
-        logger.warning(f"Failed to invalidate license cache: {cache_error}")
-
-    deleted = db_delete_license(db_session)
-
-    return {"deleted": deleted}
--- a/backend/ee/onyx/server/license/models.py
+++ b/backend/ee/onyx/server/license/models.py
@@ -1,92 +0,0 @@
-from datetime import datetime
-from enum import Enum
-
-from pydantic import BaseModel
-
-from onyx.server.settings.models import ApplicationStatus
-
-
-class PlanType(str, Enum):
-    MONTHLY = "monthly"
-    ANNUAL = "annual"
-
-
-class LicenseSource(str, Enum):
-    AUTO_FETCH = "auto_fetch"
-    MANUAL_UPLOAD = "manual_upload"
-
-
-class LicensePayload(BaseModel):
-    """The payload portion of a signed license."""
-
-    version: str
-    tenant_id: str
-    organization_name: str | None = None
-    issued_at: datetime
-    expires_at: datetime
-    seats: int
-    plan_type: PlanType
-    billing_cycle: str | None = None
-    grace_period_days: int = 30
-    stripe_subscription_id: str | None = None
-    stripe_customer_id: str | None = None
-
-
-class LicenseData(BaseModel):
-    """Full signed license structure."""
-
-    payload: LicensePayload
-    signature: str
-
-
-class LicenseMetadata(BaseModel):
-    """Cached license metadata stored in Redis."""
-
-    tenant_id: str
-    organization_name: str | None = None
-    seats: int
-    used_seats: int
-    plan_type: PlanType
-    issued_at: datetime
-    expires_at: datetime
-    grace_period_end: datetime | None = None
-    status: ApplicationStatus
-    source: LicenseSource | None = None
-    stripe_subscription_id: str | None = None
-
-
-class LicenseStatusResponse(BaseModel):
-    """Response for license status API."""
-
-    has_license: bool
-    seats: int = 0
-    used_seats: int = 0
-    plan_type: PlanType | None = None
-    issued_at: datetime | None = None
-    expires_at: datetime | None = None
-    grace_period_end: datetime | None = None
-    status: ApplicationStatus | None = None
-    source: LicenseSource | None = None
-
-
-class LicenseResponse(BaseModel):
-    """Response after license fetch/upload."""
-
-    success: bool
-    message: str | None = None
-    license: LicensePayload | None = None
-
-
-class LicenseUploadResponse(BaseModel):
-    """Response after license upload."""
-
-    success: bool
-    message: str | None = None
-
-
-class SeatUsageResponse(BaseModel):
-    """Response for seat usage API."""
-
-    total_seats: int
-    used_seats: int
-    available_seats: int
--- a/backend/ee/onyx/server/query_and_chat/chat_backend.py
+++ b/backend/ee/onyx/server/query_and_chat/chat_backend.py
@@ -8,10 +8,12 @@ from ee.onyx.server.query_and_chat.models import (
    BasicCreateChatMessageWithHistoryRequest,
 )
 from onyx.auth.users import current_user
-from onyx.chat.chat_utils import create_chat_history_chain
+from onyx.chat.chat_utils import combine_message_thread
+from onyx.chat.chat_utils import create_chat_chain
 from onyx.chat.models import ChatBasicResponse
 from onyx.chat.process_message import gather_stream
 from onyx.chat.process_message import stream_chat_message_objects
+from onyx.configs.chat_configs import CHAT_TARGET_CHUNK_PERCENTAGE
 from onyx.configs.constants import MessageType
 from onyx.context.search.models import OptionalSearchSetting
 from onyx.context.search.models import RetrievalDetails
@@ -20,8 +22,9 @@ from onyx.db.chat import create_new_chat_message
 from onyx.db.chat import get_or_create_root_message
 from onyx.db.engine.sql_engine import get_session
 from onyx.db.models import User
-from onyx.llm.factory import get_llm_for_persona
+from onyx.llm.factory import get_llms_for_persona
 from onyx.natural_language_processing.utils import get_tokenizer
+from onyx.secondary_llm_flows.query_expansion import thread_based_query_rephrase
 from onyx.server.query_and_chat.models import CreateChatMessageRequest
 from onyx.utils.logger import setup_logger

@@ -66,9 +69,9 @@ def handle_simplified_chat_message(
        chat_session_id = chat_message_req.chat_session_id

    try:
-        parent_message = create_chat_history_chain(
+        parent_message, _ = create_chat_chain(
            chat_session_id=chat_session_id, db_session=db_session
-        )[-1]
+        )
    except Exception:
        parent_message = get_or_create_root_message(
            chat_session_id=chat_session_id, db_session=db_session
@@ -100,6 +103,7 @@ def handle_simplified_chat_message(
        chunks_below=0,
        full_doc=chat_message_req.full_doc,
        structured_response_format=chat_message_req.structured_response_format,
+        use_agentic_search=chat_message_req.use_agentic_search,
    )

    packets = stream_chat_message_objects(
@@ -157,13 +161,15 @@ def handle_send_message_simple_with_history(
        persona_id=req.persona_id,
    )

-    llm = get_llm_for_persona(persona=chat_session.persona, user=user)
+    llm, _ = get_llms_for_persona(persona=chat_session.persona, user=user)

    llm_tokenizer = get_tokenizer(
        model_name=llm.config.model_name,
        provider_type=llm.config.model_provider,
    )

+    max_history_tokens = int(llm.config.max_input_tokens * CHAT_TARGET_CHUNK_PERCENTAGE)
+
    # Every chat Session begins with an empty root message
    root_message = get_or_create_root_message(
        chat_session_id=chat_session.id, db_session=db_session
@@ -182,6 +188,17 @@ def handle_send_message_simple_with_history(
        )
    db_session.commit()

+    history_str = combine_message_thread(
+        messages=msg_history,
+        max_tokens=max_history_tokens,
+        llm_tokenizer=llm_tokenizer,
+    )
+
+    rephrased_query = req.query_override or thread_based_query_rephrase(
+        user_query=query,
+        history_str=history_str,
+    )
+
    if req.retrieval_options is None and req.search_doc_ids is None:
        retrieval_options: RetrievalDetails | None = RetrievalDetails(
            run_search=OptionalSearchSetting.ALWAYS,
@@ -199,11 +216,12 @@ def handle_send_message_simple_with_history(
        retrieval_options=retrieval_options,
        # Simple API does not support reranking, hide complexity from user
        rerank_settings=None,
-        query_override=None,
+        query_override=rephrased_query,
        chunks_above=0,
        chunks_below=0,
        full_doc=req.full_doc,
        structured_response_format=req.structured_response_format,
+        use_agentic_search=req.use_agentic_search,
    )

    packets = stream_chat_message_objects(
--- a/backend/ee/onyx/server/query_and_chat/models.py
+++ b/backend/ee/onyx/server/query_and_chat/models.py
@@ -6,14 +6,18 @@ from pydantic import BaseModel
 from pydantic import Field
 from pydantic import model_validator

+from onyx.chat.models import PersonaOverrideConfig
+from onyx.chat.models import QADocsResponse
 from onyx.chat.models import ThreadMessage
 from onyx.configs.constants import DocumentSource
-from onyx.context.search.models import BaseFilters
-from onyx.context.search.models import BasicChunkRequest
+from onyx.context.search.enums import LLMEvaluationType
+from onyx.context.search.enums import SearchType
 from onyx.context.search.models import ChunkContext
-from onyx.context.search.models import InferenceChunk
+from onyx.context.search.models import RerankingDetails
 from onyx.context.search.models import RetrievalDetails
 from onyx.server.manage.models import StandardAnswer
+from onyx.server.query_and_chat.streaming_models import CitationInfo
+from onyx.server.query_and_chat.streaming_models import SubQuestionIdentifier


 class StandardAnswerRequest(BaseModel):
@@ -25,12 +29,14 @@ class StandardAnswerResponse(BaseModel):
    standard_answers: list[StandardAnswer] = Field(default_factory=list)


-class DocumentSearchRequest(BasicChunkRequest):
-    user_selected_filters: BaseFilters | None = None
-
-
-class DocumentSearchResponse(BaseModel):
-    top_documents: list[InferenceChunk]
+class DocumentSearchRequest(ChunkContext):
+    message: str
+    search_type: SearchType
+    retrieval_options: RetrievalDetails
+    recency_bias_multiplier: float = 1.0
+    evaluation_type: LLMEvaluationType
+    # None to use system defaults for reranking
+    rerank_settings: RerankingDetails | None = None


 class BasicCreateChatMessageRequest(ChunkContext):
@@ -54,6 +60,9 @@ class BasicCreateChatMessageRequest(ChunkContext):
    # https://platform.openai.com/docs/guides/structured-outputs/introduction
    structured_response_format: dict | None = None

+    # If True, uses agentic search instead of basic search
+    use_agentic_search: bool = False
+
    @model_validator(mode="after")
    def validate_chat_session_or_persona(self) -> "BasicCreateChatMessageRequest":
        if self.chat_session_id is None and self.persona_id is None:
@@ -73,6 +82,8 @@ class BasicCreateChatMessageWithHistoryRequest(ChunkContext):
    # only works if using an OpenAI model. See the following for more details:
    # https://platform.openai.com/docs/guides/structured-outputs/introduction
    structured_response_format: dict | None = None
+    # If True, uses agentic search instead of basic search
+    use_agentic_search: bool = False


 class SimpleDoc(BaseModel):
@@ -85,17 +96,17 @@ class SimpleDoc(BaseModel):
    metadata: dict | None


-class AgentSubQuestion(BaseModel):
+class AgentSubQuestion(SubQuestionIdentifier):
    sub_question: str
    document_ids: list[str]


-class AgentAnswer(BaseModel):
+class AgentAnswer(SubQuestionIdentifier):
    answer: str
    answer_type: Literal["agent_sub_answer", "agent_level_answer"]


-class AgentSubQuery(BaseModel):
+class AgentSubQuery(SubQuestionIdentifier):
    sub_query: str
    query_id: int

@@ -141,3 +152,45 @@ class AgentSubQuery(BaseModel):
            sorted(level_question_dict.items(), key=lambda x: (x is None, x))
        )
        return sorted_dict
+
+
+class OneShotQARequest(ChunkContext):
+    # Supports simplier APIs that don't deal with chat histories or message edits
+    # Easier APIs to work with for developers
+    persona_override_config: PersonaOverrideConfig | None = None
+    persona_id: int | None = None
+
+    messages: list[ThreadMessage]
+    retrieval_options: RetrievalDetails = Field(default_factory=RetrievalDetails)
+    rerank_settings: RerankingDetails | None = None
+
+    # allows the caller to specify the exact search query they want to use
+    # can be used if the message sent to the LLM / query should not be the same
+    # will also disable Thread-based Rewording if specified
+    query_override: str | None = None
+
+    # If True, skips generating an AI response to the search query
+    skip_gen_ai_answer_generation: bool = False
+
+    # If True, uses agentic search instead of basic search
+    use_agentic_search: bool = False
+
+    @model_validator(mode="after")
+    def check_persona_fields(self) -> "OneShotQARequest":
+        if self.persona_override_config is None and self.persona_id is None:
+            raise ValueError("Exactly one of persona_config or persona_id must be set")
+        elif self.persona_override_config is not None and (self.persona_id is not None):
+            raise ValueError(
+                "If persona_override_config is set, persona_id cannot be set"
+            )
+        return self
+
+
+class OneShotQAResponse(BaseModel):
+    # This is built piece by piece, any of these can be None as the flow could break
+    answer: str | None = None
+    rephrase: str | None = None
+    citations: list[CitationInfo] | None = None
+    docs: QADocsResponse | None = None
+    error_msg: str | None = None
+    chat_message_id: int | None = None
--- a/backend/ee/onyx/server/query_and_chat/query_backend.py
+++ b/backend/ee/onyx/server/query_and_chat/query_backend.py
@@ -1,23 +1,316 @@
+import json
+from collections.abc import Generator
+
 from fastapi import APIRouter
 from fastapi import Depends
 from fastapi import HTTPException
+from fastapi.responses import StreamingResponse
+from pydantic import BaseModel
 from sqlalchemy.orm import Session

 from ee.onyx.onyxbot.slack.handlers.handle_standard_answers import (
    oneoff_standard_answers,
 )
+from ee.onyx.server.query_and_chat.models import DocumentSearchRequest
+from ee.onyx.server.query_and_chat.models import OneShotQARequest
+from ee.onyx.server.query_and_chat.models import OneShotQAResponse
 from ee.onyx.server.query_and_chat.models import StandardAnswerRequest
 from ee.onyx.server.query_and_chat.models import StandardAnswerResponse
 from onyx.auth.users import current_user
+from onyx.chat.chat_utils import combine_message_thread
+from onyx.chat.chat_utils import prepare_chat_message_request
+from onyx.chat.models import AnswerStream
+from onyx.chat.models import PersonaOverrideConfig
+from onyx.chat.models import QADocsResponse
+from onyx.chat.process_message import gather_stream
+from onyx.chat.process_message import stream_chat_message_objects
+from onyx.configs.chat_configs import NUM_RETURNED_HITS
+from onyx.configs.onyxbot_configs import MAX_THREAD_CONTEXT_PERCENTAGE
+from onyx.context.search.models import SavedSearchDocWithContent
+from onyx.context.search.models import SearchRequest
+from onyx.context.search.pipeline import SearchPipeline
+from onyx.context.search.utils import dedupe_documents
+from onyx.context.search.utils import drop_llm_indices
+from onyx.context.search.utils import relevant_sections_to_indices
 from onyx.db.engine.sql_engine import get_session
+from onyx.db.models import Persona
 from onyx.db.models import User
+from onyx.db.persona import get_persona_by_id
+from onyx.llm.factory import get_default_llms
+from onyx.llm.factory import get_llms_for_persona
+from onyx.llm.factory import get_main_llm_from_tuple
+from onyx.natural_language_processing.utils import get_tokenizer
+from onyx.server.query_and_chat.streaming_models import CitationInfo
+from onyx.server.utils import get_json_line
 from onyx.utils.logger import setup_logger

-logger = setup_logger()

+logger = setup_logger()
 basic_router = APIRouter(prefix="/query")


+class DocumentSearchPagination(BaseModel):
+    offset: int
+    limit: int
+    returned_count: int
+    has_more: bool
+    next_offset: int | None = None
+
+
+class DocumentSearchResponse(BaseModel):
+    top_documents: list[SavedSearchDocWithContent]
+    llm_indices: list[int]
+    pagination: DocumentSearchPagination
+
+
+def _normalize_pagination(limit: int | None, offset: int | None) -> tuple[int, int]:
+    if limit is None:
+        resolved_limit = NUM_RETURNED_HITS
+    else:
+        resolved_limit = limit
+
+    if resolved_limit <= 0:
+        raise HTTPException(
+            status_code=400, detail="retrieval_options.limit must be positive"
+        )
+
+    if offset is None:
+        resolved_offset = 0
+    else:
+        resolved_offset = offset
+
+    if resolved_offset < 0:
+        raise HTTPException(
+            status_code=400, detail="retrieval_options.offset cannot be negative"
+        )
+
+    return resolved_limit, resolved_offset
+
+
+@basic_router.post("/document-search")
+def handle_search_request(
+    search_request: DocumentSearchRequest,
+    user: User | None = Depends(current_user),
+    db_session: Session = Depends(get_session),
+) -> DocumentSearchResponse:
+    """Simple search endpoint, does not create a new message or records in the DB"""
+    query = search_request.message
+    logger.notice(f"Received document search query: {query}")
+
+    llm, fast_llm = get_default_llms()
+    pagination_limit, pagination_offset = _normalize_pagination(
+        limit=search_request.retrieval_options.limit,
+        offset=search_request.retrieval_options.offset,
+    )
+
+    search_pipeline = SearchPipeline(
+        search_request=SearchRequest(
+            query=query,
+            search_type=search_request.search_type,
+            human_selected_filters=search_request.retrieval_options.filters,
+            enable_auto_detect_filters=search_request.retrieval_options.enable_auto_detect_filters,
+            persona=None,  # For simplicity, default settings should be good for this search
+            offset=pagination_offset,
+            limit=pagination_limit + 1,
+            rerank_settings=search_request.rerank_settings,
+            evaluation_type=search_request.evaluation_type,
+            chunks_above=search_request.chunks_above,
+            chunks_below=search_request.chunks_below,
+            full_doc=search_request.full_doc,
+        ),
+        user=user,
+        llm=llm,
+        fast_llm=fast_llm,
+        skip_query_analysis=False,
+        db_session=db_session,
+        bypass_acl=False,
+    )
+    top_sections = search_pipeline.reranked_sections
+    relevance_sections = search_pipeline.section_relevance
+    top_docs = [
+        SavedSearchDocWithContent(
+            document_id=section.center_chunk.document_id,
+            chunk_ind=section.center_chunk.chunk_id,
+            content=section.center_chunk.content,
+            semantic_identifier=section.center_chunk.semantic_identifier or "Unknown",
+            link=(
+                section.center_chunk.source_links.get(0)
+                if section.center_chunk.source_links
+                else None
+            ),
+            blurb=section.center_chunk.blurb,
+            source_type=section.center_chunk.source_type,
+            boost=section.center_chunk.boost,
+            hidden=section.center_chunk.hidden,
+            metadata=section.center_chunk.metadata,
+            score=section.center_chunk.score or 0.0,
+            match_highlights=section.center_chunk.match_highlights,
+            updated_at=section.center_chunk.updated_at,
+            primary_owners=section.center_chunk.primary_owners,
+            secondary_owners=section.center_chunk.secondary_owners,
+            is_internet=False,
+            db_doc_id=0,
+        )
+        for section in top_sections
+    ]
+
+    # Track whether the underlying retrieval produced more items than requested
+    has_more_results = len(top_docs) > pagination_limit
+
+    # Deduping happens at the last step to avoid harming quality by dropping content early on
+    deduped_docs = top_docs
+    dropped_inds = None
+
+    if search_request.retrieval_options.dedupe_docs:
+        deduped_docs, dropped_inds = dedupe_documents(top_docs)
+
+    llm_indices = relevant_sections_to_indices(
+        relevance_sections=relevance_sections, items=deduped_docs
+    )
+
+    if dropped_inds:
+        llm_indices = drop_llm_indices(
+            llm_indices=llm_indices,
+            search_docs=deduped_docs,
+            dropped_indices=dropped_inds,
+        )
+
+    paginated_docs = deduped_docs[:pagination_limit]
+    llm_indices = [index for index in llm_indices if index < len(paginated_docs)]
+    has_more = has_more_results
+    pagination = DocumentSearchPagination(
+        offset=pagination_offset,
+        limit=pagination_limit,
+        returned_count=len(paginated_docs),
+        has_more=has_more,
+        next_offset=(pagination_offset + pagination_limit) if has_more else None,
+    )
+
+    return DocumentSearchResponse(
+        top_documents=paginated_docs,
+        llm_indices=llm_indices,
+        pagination=pagination,
+    )
+
+
+def get_answer_stream(
+    query_request: OneShotQARequest,
+    user: User | None = Depends(current_user),
+    db_session: Session = Depends(get_session),
+) -> AnswerStream:
+    query = query_request.messages[0].message
+    logger.notice(f"Received query for Answer API: {query}")
+
+    if (
+        query_request.persona_override_config is None
+        and query_request.persona_id is None
+    ):
+        raise KeyError("Must provide persona ID or Persona Config")
+
+    persona_info: Persona | PersonaOverrideConfig | None = None
+    if query_request.persona_override_config is not None:
+        persona_info = query_request.persona_override_config
+    elif query_request.persona_id is not None:
+        persona_info = get_persona_by_id(
+            persona_id=query_request.persona_id,
+            user=user,
+            db_session=db_session,
+            is_for_edit=False,
+        )
+
+    llm = get_main_llm_from_tuple(get_llms_for_persona(persona=persona_info, user=user))
+
+    llm_tokenizer = get_tokenizer(
+        model_name=llm.config.model_name,
+        provider_type=llm.config.model_provider,
+    )
+
+    max_history_tokens = int(
+        llm.config.max_input_tokens * MAX_THREAD_CONTEXT_PERCENTAGE
+    )
+
+    combined_message = combine_message_thread(
+        messages=query_request.messages,
+        max_tokens=max_history_tokens,
+        llm_tokenizer=llm_tokenizer,
+    )
+
+    # Also creates a new chat session
+    request = prepare_chat_message_request(
+        message_text=combined_message,
+        user=user,
+        persona_id=query_request.persona_id,
+        persona_override_config=query_request.persona_override_config,
+        message_ts_to_respond_to=None,
+        retrieval_details=query_request.retrieval_options,
+        rerank_settings=query_request.rerank_settings,
+        db_session=db_session,
+        use_agentic_search=query_request.use_agentic_search,
+        skip_gen_ai_answer_generation=query_request.skip_gen_ai_answer_generation,
+    )
+
+    packets = stream_chat_message_objects(
+        new_msg_req=request,
+        user=user,
+        db_session=db_session,
+    )
+
+    return packets
+
+
+@basic_router.post("/answer-with-citation")
+def get_answer_with_citation(
+    request: OneShotQARequest,
+    db_session: Session = Depends(get_session),
+    user: User | None = Depends(current_user),
+) -> OneShotQAResponse:
+    try:
+        packets = get_answer_stream(request, user, db_session)
+        answer = gather_stream(packets)
+
+        if answer.error_msg:
+            raise RuntimeError(answer.error_msg)
+
+        return OneShotQAResponse(
+            answer=answer.answer,
+            chat_message_id=answer.message_id,
+            error_msg=answer.error_msg,
+            citations=[
+                CitationInfo(citation_num=i, document_id=doc_id)
+                for i, doc_id in answer.cited_documents.items()
+            ],
+            docs=QADocsResponse(
+                top_documents=answer.top_documents,
+                predicted_flow=None,
+                predicted_search=None,
+                applied_source_filters=None,
+                applied_time_cutoff=None,
+                recency_bias_multiplier=0.0,
+            ),
+        )
+    except Exception as e:
+        logger.error(f"Error in get_answer_with_citation: {str(e)}", exc_info=True)
+        raise HTTPException(status_code=500, detail="An internal server error occurred")
+
+
+@basic_router.post("/stream-answer-with-citation")
+def stream_answer_with_citation(
+    request: OneShotQARequest,
+    db_session: Session = Depends(get_session),
+    user: User | None = Depends(current_user),
+) -> StreamingResponse:
+    def stream_generator() -> Generator[str, None, None]:
+        try:
+            for packet in get_answer_stream(request, user, db_session):
+                serialized = get_json_line(packet.model_dump())
+                yield serialized
+        except Exception as e:
+            logger.exception("Error in answer streaming")
+            yield json.dumps({"error": str(e)})
+
+    return StreamingResponse(stream_generator(), media_type="application/json")
+
+
@basic_router.get("/standard-answer")
 def get_standard_answer(
    request: StandardAnswerRequest,
--- a/backend/ee/onyx/server/query_history/api.py
+++ b/backend/ee/onyx/server/query_history/api.py
@@ -24,7 +24,7 @@ from onyx.auth.users import current_admin_user
 from onyx.auth.users import get_display_email
 from onyx.background.celery.versioned_apps.client import app as client_app
 from onyx.background.task_utils import construct_query_history_report_name
-from onyx.chat.chat_utils import create_chat_history_chain
+from onyx.chat.chat_utils import create_chat_chain
 from onyx.configs.app_configs import ONYX_QUERY_HISTORY_TYPE
 from onyx.configs.constants import FileOrigin
 from onyx.configs.constants import FileType
@@ -123,9 +123,10 @@ def snapshot_from_chat_session(
 ) -> ChatSessionSnapshot | None:
    try:
        # Older chats may not have the right structure
-        messages = create_chat_history_chain(
+        last_message, messages = create_chat_chain(
            chat_session_id=chat_session.id, db_session=db_session
        )
+        messages.append(last_message)
    except RuntimeError:
        return None

--- a/backend/ee/onyx/server/tenants/provisioning.py
+++ b/backend/ee/onyx/server/tenants/provisioning.py
@@ -37,14 +37,17 @@ from onyx.db.models import AvailableTenant
 from onyx.db.models import IndexModelStatus
 from onyx.db.models import SearchSettings
 from onyx.db.models import UserTenantMapping
-from onyx.llm.constants import LlmProviderNames
+from onyx.llm.llm_provider_options import ANTHROPIC_PROVIDER_NAME
+from onyx.llm.llm_provider_options import ANTHROPIC_VISIBLE_MODEL_NAMES
 from onyx.llm.llm_provider_options import get_anthropic_model_names
-from onyx.llm.llm_provider_options import get_openai_model_names
+from onyx.llm.llm_provider_options import OPEN_AI_MODEL_NAMES
+from onyx.llm.llm_provider_options import OPEN_AI_VISIBLE_MODEL_NAMES
+from onyx.llm.llm_provider_options import OPENAI_PROVIDER_NAME
 from onyx.server.manage.embedding.models import CloudEmbeddingProviderCreationRequest
 from onyx.server.manage.llm.models import LLMProviderUpsertRequest
 from onyx.server.manage.llm.models import ModelConfigurationUpsertRequest
 from onyx.setup import setup_onyx
-from onyx.utils.telemetry import mt_cloud_telemetry
+from onyx.utils.telemetry import create_milestone_and_report
 from shared_configs.configs import MULTI_TENANT
 from shared_configs.configs import POSTGRES_DEFAULT_SCHEMA
 from shared_configs.configs import TENANT_ID_PREFIX
@@ -265,13 +268,14 @@ def configure_default_api_keys(db_session: Session) -> None:
    if ANTHROPIC_DEFAULT_API_KEY:
        anthropic_provider = LLMProviderUpsertRequest(
            name="Anthropic",
-            provider=LlmProviderNames.ANTHROPIC,
+            provider=ANTHROPIC_PROVIDER_NAME,
            api_key=ANTHROPIC_DEFAULT_API_KEY,
            default_model_name="claude-3-7-sonnet-20250219",
+            fast_default_model_name="claude-3-5-sonnet-20241022",
            model_configurations=[
                ModelConfigurationUpsertRequest(
                    name=name,
-                    is_visible=False,
+                    is_visible=name in ANTHROPIC_VISIBLE_MODEL_NAMES,
                    max_input_tokens=None,
                )
                for name in get_anthropic_model_names()
@@ -291,16 +295,17 @@ def configure_default_api_keys(db_session: Session) -> None:
    if OPENAI_DEFAULT_API_KEY:
        openai_provider = LLMProviderUpsertRequest(
            name="OpenAI",
-            provider=LlmProviderNames.OPENAI,
+            provider=OPENAI_PROVIDER_NAME,
            api_key=OPENAI_DEFAULT_API_KEY,
            default_model_name="gpt-4o",
+            fast_default_model_name="gpt-4o-mini",
            model_configurations=[
                ModelConfigurationUpsertRequest(
                    name=model_name,
-                    is_visible=False,
+                    is_visible=model_name in OPEN_AI_VISIBLE_MODEL_NAMES,
                    max_input_tokens=None,
                )
-                for model_name in get_openai_model_names()
+                for model_name in OPEN_AI_MODEL_NAMES
            ],
            api_key_changed=True,
        )
@@ -559,11 +564,17 @@ async def assign_tenant_to_user(
    try:
        add_users_to_tenant([email], tenant_id)

-        mt_cloud_telemetry(
-            tenant_id=tenant_id,
-            distinct_id=email,
-            event=MilestoneRecordType.TENANT_CREATED,
-        )
+        # Create milestone record in the same transaction context as the tenant assignment
+        with get_session_with_tenant(tenant_id=tenant_id) as db_session:
+            create_milestone_and_report(
+                user=None,
+                distinct_id=tenant_id,
+                event_type=MilestoneRecordType.TENANT_CREATED,
+                properties={
+                    "email": email,
+                },
+                db_session=db_session,
+            )
    except Exception:
        logger.exception(f"Failed to assign tenant {tenant_id} to user {email}")
        raise Exception("Failed to assign tenant to user")
--- a/backend/ee/onyx/server/tenants/user_mapping.py
+++ b/backend/ee/onyx/server/tenants/user_mapping.py
@@ -249,17 +249,6 @@ def accept_user_invite(email: str, tenant_id: str) -> None:
            )
            raise

-    # Remove from invited users list since they've accepted
-    token = CURRENT_TENANT_ID_CONTEXTVAR.set(tenant_id)
-    try:
-        invited_users = get_invited_users()
-        if email in invited_users:
-            invited_users.remove(email)
-            write_invited_users(invited_users)
-            logger.info(f"Removed {email} from invited users list after acceptance")
-    finally:
-        CURRENT_TENANT_ID_CONTEXTVAR.reset(token)
-

 def deny_user_invite(email: str, tenant_id: str) -> None:
    """
--- a/backend/ee/onyx/server/user_group/api.py
+++ b/backend/ee/onyx/server/user_group/api.py
@@ -4,14 +4,12 @@ from fastapi import HTTPException
 from sqlalchemy.exc import IntegrityError
 from sqlalchemy.orm import Session

-from ee.onyx.db.user_group import add_users_to_user_group
 from ee.onyx.db.user_group import fetch_user_groups
 from ee.onyx.db.user_group import fetch_user_groups_for_user
 from ee.onyx.db.user_group import insert_user_group
 from ee.onyx.db.user_group import prepare_user_group_for_deletion
 from ee.onyx.db.user_group import update_user_curator_relationship
 from ee.onyx.db.user_group import update_user_group
-from ee.onyx.server.user_group.models import AddUsersToUserGroupRequest
 from ee.onyx.server.user_group.models import SetCuratorRequest
 from ee.onyx.server.user_group.models import UserGroup
 from ee.onyx.server.user_group.models import UserGroupCreate
@@ -81,26 +79,6 @@ def patch_user_group(
        raise HTTPException(status_code=404, detail=str(e))


-@router.post("/admin/user-group/{user_group_id}/add-users")
-def add_users(
-    user_group_id: int,
-    add_users_request: AddUsersToUserGroupRequest,
-    user: User | None = Depends(current_curator_or_admin_user),
-    db_session: Session = Depends(get_session),
-) -> UserGroup:
-    try:
-        return UserGroup.from_model(
-            add_users_to_user_group(
-                db_session=db_session,
-                user=user,
-                user_group_id=user_group_id,
-                user_ids=add_users_request.user_ids,
-            )
-        )
-    except ValueError as e:
-        raise HTTPException(status_code=404, detail=str(e))
-
-
@router.post("/admin/user-group/{user_group_id}/set-curator")
 def set_user_curator(
    user_group_id: int,
--- a/backend/ee/onyx/server/user_group/models.py
+++ b/backend/ee/onyx/server/user_group/models.py
@@ -87,10 +87,6 @@ class UserGroupUpdate(BaseModel):
    cc_pair_ids: list[int]


-class AddUsersToUserGroupRequest(BaseModel):
-    user_ids: list[UUID]
-
-
 class SetCuratorRequest(BaseModel):
    user_id: UUID
    is_curator: bool
--- a/backend/ee/onyx/utils/license.py
+++ b/backend/ee/onyx/utils/license.py
@@ -1,126 +0,0 @@
-"""RSA-4096 license signature verification utilities."""
-
-import base64
-import json
-import os
-from datetime import datetime
-from datetime import timezone
-
-from cryptography.exceptions import InvalidSignature
-from cryptography.hazmat.primitives import hashes
-from cryptography.hazmat.primitives import serialization
-from cryptography.hazmat.primitives.asymmetric import padding
-from cryptography.hazmat.primitives.asymmetric.rsa import RSAPublicKey
-
-from ee.onyx.server.license.models import LicenseData
-from ee.onyx.server.license.models import LicensePayload
-from onyx.server.settings.models import ApplicationStatus
-from onyx.utils.logger import setup_logger
-
-logger = setup_logger()
-
-
-# RSA-4096 Public Key for license verification
-# Load from environment variable - key is generated on the control plane
-# In production, inject via Kubernetes secrets or secrets manager
-LICENSE_PUBLIC_KEY_PEM = os.environ.get("LICENSE_PUBLIC_KEY_PEM", "")
-
-
-def _get_public_key() -> RSAPublicKey:
-    """Load the public key from environment variable."""
-    if not LICENSE_PUBLIC_KEY_PEM:
-        raise ValueError(
-            "LICENSE_PUBLIC_KEY_PEM environment variable not set. "
-            "License verification requires the control plane public key."
-        )
-    key = serialization.load_pem_public_key(LICENSE_PUBLIC_KEY_PEM.encode())
-    if not isinstance(key, RSAPublicKey):
-        raise ValueError("Expected RSA public key")
-    return key
-
-
-def verify_license_signature(license_data: str) -> LicensePayload:
-    """
-    Verify RSA-4096 signature and return payload if valid.
-
-    Args:
-        license_data: Base64-encoded JSON containing payload and signature
-
-    Returns:
-        LicensePayload if signature is valid
-
-    Raises:
-        ValueError: If license data is invalid or signature verification fails
-    """
-    try:
-        # Decode the license data
-        decoded = json.loads(base64.b64decode(license_data))
-        license_obj = LicenseData(**decoded)
-
-        payload_json = json.dumps(
-            license_obj.payload.model_dump(mode="json"), sort_keys=True
-        )
-        signature_bytes = base64.b64decode(license_obj.signature)
-
-        # Verify signature using PSS padding (modern standard)
-        public_key = _get_public_key()
-        public_key.verify(
-            signature_bytes,
-            payload_json.encode(),
-            padding.PSS(
-                mgf=padding.MGF1(hashes.SHA256()),
-                salt_length=padding.PSS.MAX_LENGTH,
-            ),
-            hashes.SHA256(),
-        )
-
-        return license_obj.payload
-
-    except InvalidSignature:
-        logger.error("License signature verification failed")
-        raise ValueError("Invalid license signature")
-    except json.JSONDecodeError:
-        logger.error("Failed to decode license JSON")
-        raise ValueError("Invalid license format: not valid JSON")
-    except (ValueError, KeyError, TypeError) as e:
-        logger.error(f"License data validation error: {type(e).__name__}")
-        raise ValueError(f"Invalid license format: {type(e).__name__}")
-    except Exception:
-        logger.exception("Unexpected error during license verification")
-        raise ValueError("License verification failed: unexpected error")
-
-
-def get_license_status(
-    payload: LicensePayload,
-    grace_period_end: datetime | None = None,
-) -> ApplicationStatus:
-    """
-    Determine current license status based on expiry.
-
-    Args:
-        payload: The verified license payload
-        grace_period_end: Optional grace period end datetime
-
-    Returns:
-        ApplicationStatus indicating current license state
-    """
-    now = datetime.now(timezone.utc)
-
-    # Check if grace period has expired
-    if grace_period_end and now > grace_period_end:
-        return ApplicationStatus.GATED_ACCESS
-
-    # Check if license has expired
-    if now > payload.expires_at:
-        if grace_period_end and now <= grace_period_end:
-            return ApplicationStatus.GRACE_PERIOD
-        return ApplicationStatus.GATED_ACCESS
-
-    # License is valid
-    return ApplicationStatus.ACTIVE
-
-
-def is_license_valid(payload: LicensePayload) -> bool:
-    """Check if a license is currently valid (not expired)."""
-    now = datetime.now(timezone.utc)
-    return now <= payload.expires_at
--- a/backend/ee/onyx/utils/posthog_client.py
+++ b/backend/ee/onyx/utils/posthog_client.py
@@ -1,10 +1,7 @@
-import json
 from typing import Any
-from urllib.parse import unquote

 from posthog import Posthog

-from ee.onyx.configs.app_configs import MARKETING_POSTHOG_API_KEY
 from ee.onyx.configs.app_configs import POSTHOG_API_KEY
 from ee.onyx.configs.app_configs import POSTHOG_HOST
 from onyx.utils.logger import setup_logger
@@ -23,80 +20,3 @@ posthog = Posthog(
    debug=True,
    on_error=posthog_on_error,
 )
-
-# For cross referencing between cloud and www Onyx sites
-# NOTE: These clients are separate because they are separate posthog projects.
-# We should eventually unify them into a single posthog project,
-# which would no longer require this workaround
-marketing_posthog = None
-if MARKETING_POSTHOG_API_KEY:
-    marketing_posthog = Posthog(
-        project_api_key=MARKETING_POSTHOG_API_KEY,
-        host=POSTHOG_HOST,
-        debug=True,
-        on_error=posthog_on_error,
-    )
-
-
-def capture_and_sync_with_alternate_posthog(
-    alternate_distinct_id: str, event: str, properties: dict[str, Any]
-) -> None:
-    """
-    Identify in both PostHog projects and capture the event in marketing.
-    - Marketing keeps the marketing distinct_id (for feature flags).
-    - Cloud identify uses the cloud distinct_id
-    """
-    if not marketing_posthog:
-        return
-
-    props = properties.copy()
-
-    try:
-        marketing_posthog.identify(distinct_id=alternate_distinct_id, properties=props)
-        marketing_posthog.capture(alternate_distinct_id, event, props)
-        marketing_posthog.flush()
-    except Exception as e:
-        logger.error(f"Error capturing marketing posthog event: {e}")
-
-    try:
-        if cloud_user_id := props.get("onyx_cloud_user_id"):
-            cloud_props = props.copy()
-            cloud_props.pop("onyx_cloud_user_id", None)
-
-            posthog.identify(
-                distinct_id=cloud_user_id,
-                properties=cloud_props,
-            )
-    except Exception as e:
-        logger.error(f"Error identifying cloud posthog user: {e}")
-
-
-def get_marketing_posthog_cookie_name() -> str | None:
-    if not MARKETING_POSTHOG_API_KEY:
-        return None
-    return f"onyx_custom_ph_{MARKETING_POSTHOG_API_KEY}_posthog"
-
-
-def parse_marketing_cookie(cookie_value: str) -> dict[str, Any] | None:
-    """
-    Parse the URL-encoded JSON marketing cookie.
-
-    Expected format (URL-encoded):
-    {"distinct_id":"...", "featureFlags":{"landing_page_variant":"..."}, ...}
-
-    Returns:
-        Dict with 'distinct_id' explicitly required and all other cookie values
-        passed through as-is, or None if parsing fails or distinct_id is missing.
-    """
-    try:
-        decoded_cookie = unquote(cookie_value)
-        cookie_data = json.loads(decoded_cookie)
-
-        distinct_id = cookie_data.get("distinct_id")
-        if not distinct_id:
-            return None
-
-        return cookie_data
-    except (json.JSONDecodeError, KeyError, TypeError, AttributeError) as e:
-        logger.warning(f"Failed to parse cookie: {e}")
-        return None
--- a/backend/model_server/constants.py
+++ b/backend/model_server/constants.py
@@ -1,4 +1,5 @@
 MODEL_WARM_UP_STRING = "hi " * 512
+INFORMATION_CONTENT_MODEL_WARM_UP_STRING = "hi " * 16


 class GPUStatus:
--- a/backend/model_server/custom_models.py
+++ b/backend/model_server/custom_models.py
@@ -0,0 +1,562 @@
+from typing import cast
+from typing import Optional
+from typing import TYPE_CHECKING
+
+import numpy as np
+import torch
+import torch.nn.functional as F
+from fastapi import APIRouter
+from huggingface_hub import snapshot_download  # type: ignore
+
+from model_server.constants import INFORMATION_CONTENT_MODEL_WARM_UP_STRING
+from model_server.constants import MODEL_WARM_UP_STRING
+from model_server.onyx_torch_model import ConnectorClassifier
+from model_server.onyx_torch_model import HybridClassifier
+from model_server.utils import simple_log_function_time
+from onyx.utils.logger import setup_logger
+from shared_configs.configs import CONNECTOR_CLASSIFIER_MODEL_REPO
+from shared_configs.configs import CONNECTOR_CLASSIFIER_MODEL_TAG
+from shared_configs.configs import (
+    INDEXING_INFORMATION_CONTENT_CLASSIFICATION_CUTOFF_LENGTH,
+)
+from shared_configs.configs import INDEXING_INFORMATION_CONTENT_CLASSIFICATION_MAX
+from shared_configs.configs import INDEXING_INFORMATION_CONTENT_CLASSIFICATION_MIN
+from shared_configs.configs import (
+    INDEXING_INFORMATION_CONTENT_CLASSIFICATION_TEMPERATURE,
+)
+from shared_configs.configs import INDEXING_ONLY
+from shared_configs.configs import INFORMATION_CONTENT_MODEL_TAG
+from shared_configs.configs import INFORMATION_CONTENT_MODEL_VERSION
+from shared_configs.configs import INTENT_MODEL_TAG
+from shared_configs.configs import INTENT_MODEL_VERSION
+from shared_configs.model_server_models import ConnectorClassificationRequest
+from shared_configs.model_server_models import ConnectorClassificationResponse
+from shared_configs.model_server_models import ContentClassificationPrediction
+from shared_configs.model_server_models import IntentRequest
+from shared_configs.model_server_models import IntentResponse
+
+if TYPE_CHECKING:
+    from setfit import SetFitModel  # type: ignore
+    from transformers import PreTrainedTokenizer, BatchEncoding  # type: ignore
+
+
+logger = setup_logger()
+
+router = APIRouter(prefix="/custom")
+
+_CONNECTOR_CLASSIFIER_TOKENIZER: Optional["PreTrainedTokenizer"] = None
+_CONNECTOR_CLASSIFIER_MODEL: ConnectorClassifier | None = None
+
+_INTENT_TOKENIZER: Optional["PreTrainedTokenizer"] = None
+_INTENT_MODEL: HybridClassifier | None = None
+
+_INFORMATION_CONTENT_MODEL: Optional["SetFitModel"] = None
+
+_INFORMATION_CONTENT_MODEL_PROMPT_PREFIX: str = ""  # spec to model version!
+
+
+def get_connector_classifier_tokenizer() -> "PreTrainedTokenizer":
+    global _CONNECTOR_CLASSIFIER_TOKENIZER
+    from transformers import AutoTokenizer, PreTrainedTokenizer
+
+    if _CONNECTOR_CLASSIFIER_TOKENIZER is None:
+        # The tokenizer details are not uploaded to the HF hub since it's just the
+        # unmodified distilbert tokenizer.
+        _CONNECTOR_CLASSIFIER_TOKENIZER = cast(
+            PreTrainedTokenizer,
+            AutoTokenizer.from_pretrained("distilbert-base-uncased"),
+        )
+    return _CONNECTOR_CLASSIFIER_TOKENIZER
+
+
+def get_local_connector_classifier(
+    model_name_or_path: str = CONNECTOR_CLASSIFIER_MODEL_REPO,
+    tag: str = CONNECTOR_CLASSIFIER_MODEL_TAG,
+) -> ConnectorClassifier:
+    global _CONNECTOR_CLASSIFIER_MODEL
+    if _CONNECTOR_CLASSIFIER_MODEL is None:
+        try:
+            # Calculate where the cache should be, then load from local if available
+            local_path = snapshot_download(
+                repo_id=model_name_or_path, revision=tag, local_files_only=True
+            )
+            _CONNECTOR_CLASSIFIER_MODEL = ConnectorClassifier.from_pretrained(
+                local_path
+            )
+        except Exception as e:
+            logger.warning(f"Failed to load model directly: {e}")
+            try:
+                # Attempt to download the model snapshot
+                logger.info(f"Downloading model snapshot for {model_name_or_path}")
+                local_path = snapshot_download(repo_id=model_name_or_path, revision=tag)
+                _CONNECTOR_CLASSIFIER_MODEL = ConnectorClassifier.from_pretrained(
+                    local_path
+                )
+            except Exception as e:
+                logger.error(
+                    f"Failed to load model even after attempted snapshot download: {e}"
+                )
+                raise
+    return _CONNECTOR_CLASSIFIER_MODEL
+
+
+def get_intent_model_tokenizer() -> "PreTrainedTokenizer":
+    from transformers import AutoTokenizer, PreTrainedTokenizer
+
+    global _INTENT_TOKENIZER
+    if _INTENT_TOKENIZER is None:
+        # The tokenizer details are not uploaded to the HF hub since it's just the
+        # unmodified distilbert tokenizer.
+        _INTENT_TOKENIZER = cast(
+            PreTrainedTokenizer,
+            AutoTokenizer.from_pretrained("distilbert-base-uncased"),
+        )
+    return _INTENT_TOKENIZER
+
+
+def get_local_intent_model(
+    model_name_or_path: str = INTENT_MODEL_VERSION,
+    tag: str | None = INTENT_MODEL_TAG,
+) -> HybridClassifier:
+    global _INTENT_MODEL
+    if _INTENT_MODEL is None:
+        try:
+            # Calculate where the cache should be, then load from local if available
+            logger.notice(f"Loading model from local cache: {model_name_or_path}")
+            local_path = snapshot_download(
+                repo_id=model_name_or_path, revision=tag, local_files_only=True
+            )
+            _INTENT_MODEL = HybridClassifier.from_pretrained(local_path)
+            logger.notice(f"Loaded model from local cache: {local_path}")
+        except Exception as e:
+            logger.warning(f"Failed to load model directly: {e}")
+            try:
+                # Attempt to download the model snapshot
+                logger.notice(f"Downloading model snapshot for {model_name_or_path}")
+                local_path = snapshot_download(
+                    repo_id=model_name_or_path, revision=tag, local_files_only=False
+                )
+                _INTENT_MODEL = HybridClassifier.from_pretrained(local_path)
+            except Exception as e:
+                logger.error(
+                    f"Failed to load model even after attempted snapshot download: {e}"
+                )
+                raise
+    return _INTENT_MODEL
+
+
+def get_local_information_content_model(
+    model_name_or_path: str = INFORMATION_CONTENT_MODEL_VERSION,
+    tag: str | None = INFORMATION_CONTENT_MODEL_TAG,
+) -> "SetFitModel":
+    from setfit import SetFitModel
+
+    global _INFORMATION_CONTENT_MODEL
+    if _INFORMATION_CONTENT_MODEL is None:
+        try:
+            # Calculate where the cache should be, then load from local if available
+            logger.notice(
+                f"Loading content information model from local cache: {model_name_or_path}"
+            )
+            local_path = snapshot_download(
+                repo_id=model_name_or_path, revision=tag, local_files_only=True
+            )
+            _INFORMATION_CONTENT_MODEL = SetFitModel.from_pretrained(local_path)
+            logger.notice(
+                f"Loaded content information model from local cache: {local_path}"
+            )
+        except Exception as e:
+            logger.warning(f"Failed to load content information model directly: {e}")
+            try:
+                # Attempt to download the model snapshot
+                logger.notice(
+                    f"Downloading content information model snapshot for {model_name_or_path}"
+                )
+                local_path = snapshot_download(
+                    repo_id=model_name_or_path, revision=tag, local_files_only=False
+                )
+                _INFORMATION_CONTENT_MODEL = SetFitModel.from_pretrained(local_path)
+            except Exception as e:
+                logger.error(
+                    f"Failed to load content information model even after attempted snapshot download: {e}"
+                )
+                raise
+
+    return _INFORMATION_CONTENT_MODEL
+
+
+def tokenize_connector_classification_query(
+    connectors: list[str],
+    query: str,
+    tokenizer: "PreTrainedTokenizer",
+    connector_token_end_id: int,
+) -> tuple[torch.Tensor, torch.Tensor]:
+    """
+    Tokenize the connectors & user query into one prompt for the forward pass of ConnectorClassifier models
+
+    The attention mask is just all 1s. The prompt is CLS + each connector name suffixed with the connector end
+    token and then the user query.
+    """
+
+    input_ids = torch.tensor([tokenizer.cls_token_id], dtype=torch.long)
+
+    for connector in connectors:
+        connector_token_ids = tokenizer(
+            connector,
+            add_special_tokens=False,
+            return_tensors="pt",
+        )
+
+        input_ids = torch.cat(
+            (
+                input_ids,
+                connector_token_ids["input_ids"].squeeze(dim=0),
+                torch.tensor([connector_token_end_id], dtype=torch.long),
+            ),
+            dim=-1,
+        )
+    query_token_ids = tokenizer(
+        query,
+        add_special_tokens=False,
+        return_tensors="pt",
+    )
+
+    input_ids = torch.cat(
+        (
+            input_ids,
+            query_token_ids["input_ids"].squeeze(dim=0),
+            torch.tensor([tokenizer.sep_token_id], dtype=torch.long),
+        ),
+        dim=-1,
+    )
+    attention_mask = torch.ones(input_ids.numel(), dtype=torch.long)
+
+    return input_ids.unsqueeze(0), attention_mask.unsqueeze(0)
+
+
+def warm_up_connector_classifier_model() -> None:
+    logger.info(
+        f"Warming up connector_classifier model {CONNECTOR_CLASSIFIER_MODEL_TAG}"
+    )
+    connector_classifier_tokenizer = get_connector_classifier_tokenizer()
+    connector_classifier = get_local_connector_classifier()
+
+    input_ids, attention_mask = tokenize_connector_classification_query(
+        ["GitHub"],
+        "onyx classifier query google doc",
+        connector_classifier_tokenizer,
+        connector_classifier.connector_end_token_id,
+    )
+    input_ids = input_ids.to(connector_classifier.device)
+    attention_mask = attention_mask.to(connector_classifier.device)
+
+    connector_classifier(input_ids, attention_mask)
+
+
+def warm_up_intent_model() -> None:
+    logger.notice(f"Warming up Intent Model: {INTENT_MODEL_VERSION}")
+    intent_tokenizer = get_intent_model_tokenizer()
+    tokens = intent_tokenizer(
+        MODEL_WARM_UP_STRING, return_tensors="pt", truncation=True, padding=True
+    )
+
+    intent_model = get_local_intent_model()
+    device = intent_model.device
+    intent_model(
+        query_ids=tokens["input_ids"].to(device),
+        query_mask=tokens["attention_mask"].to(device),
+    )
+
+
+def warm_up_information_content_model() -> None:
+    logger.notice("Warming up Content Model")  # TODO: add version if needed
+
+    information_content_model = get_local_information_content_model()
+    information_content_model(INFORMATION_CONTENT_MODEL_WARM_UP_STRING)
+
+
+@simple_log_function_time()
+def run_inference(tokens: "BatchEncoding") -> tuple[list[float], list[float]]:
+    intent_model = get_local_intent_model()
+    device = intent_model.device
+
+    outputs = intent_model(
+        query_ids=tokens["input_ids"].to(device),
+        query_mask=tokens["attention_mask"].to(device),
+    )
+
+    token_logits = outputs["token_logits"]
+    intent_logits = outputs["intent_logits"]
+
+    # Move tensors to CPU before applying softmax and converting to numpy
+    intent_probabilities = F.softmax(intent_logits.cpu(), dim=-1).numpy()[0]
+    token_probabilities = F.softmax(token_logits.cpu(), dim=-1).numpy()[0]
+
+    # Extract the probabilities for the positive class (index 1) for each token
+    token_positive_probs = token_probabilities[:, 1].tolist()
+
+    return intent_probabilities.tolist(), token_positive_probs
+
+
+@simple_log_function_time()
+def run_content_classification_inference(
+    text_inputs: list[str],
+) -> list[ContentClassificationPrediction]:
+    """
+    Assign a score to the segments in question. The model stored in get_local_information_content_model()
+    creates the 'model score' based on its training, and the scores are then converted to a 0.0-1.0 scale.
+    In the code outside of the model/inference model servers that score will be converted into the actual
+    boost factor.
+    """
+
+    def _prob_to_score(prob: float) -> float:
+        """
+        Conversion of base score to 0.0 - 1.0 score. Note that the min/max values depend on the model!
+        """
+        _MIN_BASE_SCORE = 0.25
+        _MAX_BASE_SCORE = 0.75
+        if prob < _MIN_BASE_SCORE:
+            raw_score = 0.0
+        elif prob < _MAX_BASE_SCORE:
+            raw_score = (prob - _MIN_BASE_SCORE) / (_MAX_BASE_SCORE - _MIN_BASE_SCORE)
+        else:
+            raw_score = 1.0
+        return (
+            INDEXING_INFORMATION_CONTENT_CLASSIFICATION_MIN
+            + (
+                INDEXING_INFORMATION_CONTENT_CLASSIFICATION_MAX
+                - INDEXING_INFORMATION_CONTENT_CLASSIFICATION_MIN
+            )
+            * raw_score
+        )
+
+    _BATCH_SIZE = 32
+    content_model = get_local_information_content_model()
+
+    # Process inputs in batches
+    all_output_classes: list[int] = []
+    all_base_output_probabilities: list[float] = []
+
+    for i in range(0, len(text_inputs), _BATCH_SIZE):
+        batch = text_inputs[i : i + _BATCH_SIZE]
+        batch_with_prefix = []
+        batch_indices = []
+
+        # Pre-allocate results for this batch
+        batch_output_classes: list[np.ndarray] = [np.array(1)] * len(batch)
+        batch_probabilities: list[np.ndarray] = [np.array(1.0)] * len(batch)
+
+        # Pre-process batch to handle long input exceptions
+        for j, text in enumerate(batch):
+            if len(text) == 0:
+                # if no input, treat as non-informative from the model's perspective
+                batch_output_classes[j] = np.array(0)
+                batch_probabilities[j] = np.array(0.0)
+                logger.warning("Input for Content Information Model is empty")
+
+            elif (
+                len(text.split())
+                <= INDEXING_INFORMATION_CONTENT_CLASSIFICATION_CUTOFF_LENGTH
+            ):
+                # if input is short, use the model
+                batch_with_prefix.append(
+                    _INFORMATION_CONTENT_MODEL_PROMPT_PREFIX + text
+                )
+                batch_indices.append(j)
+            else:
+                # if longer than cutoff, treat as informative (stay with default), but issue warning
+                logger.warning("Input for Content Information Model too long")
+
+        if batch_with_prefix:  # Only run model if we have valid inputs
+            # Get predictions for the batch
+            model_output_classes = content_model(batch_with_prefix)
+            model_output_probabilities = content_model.predict_proba(batch_with_prefix)
+
+            # Place results in the correct positions
+            for idx, batch_idx in enumerate(batch_indices):
+                batch_output_classes[batch_idx] = model_output_classes[idx].numpy()
+                batch_probabilities[batch_idx] = model_output_probabilities[idx][
+                    1
+                ].numpy()  # x[1] is prob of the positive class
+
+        all_output_classes.extend([int(x) for x in batch_output_classes])
+        all_base_output_probabilities.extend([float(x) for x in batch_probabilities])
+
+    logits = [
+        np.log(p / (1 - p)) if p != 0.0 and p != 1.0 else (100 if p == 1.0 else -100)
+        for p in all_base_output_probabilities
+    ]
+    scaled_logits = [
+        logit / INDEXING_INFORMATION_CONTENT_CLASSIFICATION_TEMPERATURE
+        for logit in logits
+    ]
+    output_probabilities_with_temp = [
+        np.exp(scaled_logit) / (1 + np.exp(scaled_logit))
+        for scaled_logit in scaled_logits
+    ]
+
+    prediction_scores = [
+        _prob_to_score(p_temp) for p_temp in output_probabilities_with_temp
+    ]
+
+    content_classification_predictions = [
+        ContentClassificationPrediction(
+            predicted_label=predicted_label, content_boost_factor=output_score
+        )
+        for predicted_label, output_score in zip(all_output_classes, prediction_scores)
+    ]
+
+    return content_classification_predictions
+
+
+def map_keywords(
+    input_ids: torch.Tensor, tokenizer: "PreTrainedTokenizer", is_keyword: list[bool]
+) -> list[str]:
+    tokens = tokenizer.convert_ids_to_tokens(input_ids)  # type: ignore
+
+    if not len(tokens) == len(is_keyword):
+        raise ValueError("Length of tokens and keyword predictions must match")
+
+    if input_ids[0] == tokenizer.cls_token_id:
+        tokens = tokens[1:]
+        is_keyword = is_keyword[1:]
+
+    if input_ids[-1] == tokenizer.sep_token_id:
+        tokens = tokens[:-1]
+        is_keyword = is_keyword[:-1]
+
+    unk_token = tokenizer.unk_token
+    if unk_token in tokens:
+        raise ValueError("Unknown token detected in the input")
+
+    keywords = []
+    current_keyword = ""
+
+    for ind, token in enumerate(tokens):
+        if is_keyword[ind]:
+            if token.startswith("##"):
+                current_keyword += token[2:]
+            else:
+                if current_keyword:
+                    keywords.append(current_keyword)
+                current_keyword = token
+        else:
+            # If mispredicted a later token of a keyword, add it to the current keyword
+            # to complete it
+            if current_keyword:
+                if len(current_keyword) > 2 and current_keyword.startswith("##"):
+                    current_keyword = current_keyword[2:]
+
+                else:
+                    keywords.append(current_keyword)
+                    current_keyword = ""
+
+    if current_keyword:
+        keywords.append(current_keyword)
+
+    return keywords
+
+
+def clean_keywords(keywords: list[str]) -> list[str]:
+    cleaned_words = []
+    for word in keywords:
+        word = word[:-2] if word.endswith("'s") else word
+        word = word.replace("/", " ")
+        word = word.replace("'", "").replace('"', "")
+        cleaned_words.extend([w for w in word.strip().split() if w and not w.isspace()])
+    return cleaned_words
+
+
+def run_connector_classification(req: ConnectorClassificationRequest) -> list[str]:
+    tokenizer = get_connector_classifier_tokenizer()
+    model = get_local_connector_classifier()
+
+    connector_names = req.available_connectors
+
+    input_ids, attention_mask = tokenize_connector_classification_query(
+        connector_names,
+        req.query,
+        tokenizer,
+        model.connector_end_token_id,
+    )
+    input_ids = input_ids.to(model.device)
+    attention_mask = attention_mask.to(model.device)
+
+    global_confidence, classifier_confidence = model(input_ids, attention_mask)
+
+    if global_confidence.item() < 0.5:
+        return []
+
+    passed_connectors = []
+
+    for i, connector_name in enumerate(connector_names):
+        if classifier_confidence.view(-1)[i].item() > 0.5:
+            passed_connectors.append(connector_name)
+
+    return passed_connectors
+
+
+def run_analysis(intent_req: IntentRequest) -> tuple[bool, list[str]]:
+    tokenizer = get_intent_model_tokenizer()
+    model_input = tokenizer(
+        intent_req.query, return_tensors="pt", truncation=False, padding=False
+    )
+
+    if len(model_input.input_ids[0]) > 512:
+        # If the user text is too long, assume it is semantic and keep all words
+        return True, intent_req.query.split()
+
+    intent_probs, token_probs = run_inference(model_input)
+
+    is_keyword_sequence = intent_probs[0] >= intent_req.keyword_percent_threshold
+
+    keyword_preds = [
+        token_prob >= intent_req.keyword_percent_threshold for token_prob in token_probs
+    ]
+
+    try:
+        keywords = map_keywords(model_input.input_ids[0], tokenizer, keyword_preds)
+    except Exception as e:
+        logger.warning(
+            f"Failed to extract keywords for query: {intent_req.query} due to {e}"
+        )
+        # Fallback to keeping all words
+        keywords = intent_req.query.split()
+
+    cleaned_keywords = clean_keywords(keywords)
+
+    return is_keyword_sequence, cleaned_keywords
+
+
+@router.post("/connector-classification")
+async def process_connector_classification_request(
+    classification_request: ConnectorClassificationRequest,
+) -> ConnectorClassificationResponse:
+    if INDEXING_ONLY:
+        raise RuntimeError(
+            "Indexing model server should not call connector classification endpoint"
+        )
+
+    if len(classification_request.available_connectors) == 0:
+        return ConnectorClassificationResponse(connectors=[])
+
+    connectors = run_connector_classification(classification_request)
+    return ConnectorClassificationResponse(connectors=connectors)
+
+
+@router.post("/query-analysis")
+async def process_analysis_request(
+    intent_request: IntentRequest,
+) -> IntentResponse:
+    if INDEXING_ONLY:
+        raise RuntimeError("Indexing model server should not call intent endpoint")
+
+    is_keyword, keywords = run_analysis(intent_request)
+    return IntentResponse(is_keyword=is_keyword, keywords=keywords)
+
+
+@router.post("/content-classification")
+async def process_content_classification_request(
+    content_classification_requests: list[str],
+) -> list[ContentClassificationPrediction]:
+    return run_content_classification_inference(content_classification_requests)
--- a/backend/model_server/encoders.py
+++ b/backend/model_server/encoders.py
@@ -1,6 +1,7 @@
 import asyncio
 import time
 from typing import Any
+from typing import Optional
 from typing import TYPE_CHECKING

 from fastapi import APIRouter
@@ -9,13 +10,16 @@ from fastapi import Request

 from model_server.utils import simple_log_function_time
 from onyx.utils.logger import setup_logger
+from shared_configs.configs import INDEXING_ONLY
 from shared_configs.enums import EmbedTextType
 from shared_configs.model_server_models import Embedding
 from shared_configs.model_server_models import EmbedRequest
 from shared_configs.model_server_models import EmbedResponse
+from shared_configs.model_server_models import RerankRequest
+from shared_configs.model_server_models import RerankResponse

 if TYPE_CHECKING:
-    from sentence_transformers import SentenceTransformer
+    from sentence_transformers import CrossEncoder, SentenceTransformer

 logger = setup_logger()

@@ -23,6 +27,11 @@ router = APIRouter(prefix="/encoder")


 _GLOBAL_MODELS_DICT: dict[str, "SentenceTransformer"] = {}
+_RERANK_MODEL: Optional["CrossEncoder"] = None
+
+# If we are not only indexing, dont want retry very long
+_RETRY_DELAY = 10 if INDEXING_ONLY else 0.1
+_RETRY_TRIES = 10 if INDEXING_ONLY else 2


 def get_embedding_model(
@@ -33,7 +42,7 @@ def get_embedding_model(
    Loads or returns a cached SentenceTransformer, sets max_seq_length, pins device,
    pre-warms rotary caches once, and wraps encode() with a lock to avoid cache races.
    """
-    from sentence_transformers import SentenceTransformer
+    from sentence_transformers import SentenceTransformer  # type: ignore

    def _prewarm_rope(st_model: "SentenceTransformer", target_len: int) -> None:
        """
@@ -78,6 +87,19 @@ def get_embedding_model(
    return _GLOBAL_MODELS_DICT[model_name]


+def get_local_reranking_model(
+    model_name: str,
+) -> "CrossEncoder":
+    global _RERANK_MODEL
+    from sentence_transformers import CrossEncoder  # type: ignore
+
+    if _RERANK_MODEL is None:
+        logger.notice(f"Loading {model_name}")
+        model = CrossEncoder(model_name)
+        _RERANK_MODEL = model
+    return _RERANK_MODEL
+
+
 ENCODING_RETRIES = 3
 ENCODING_RETRY_DELAY = 0.1

@@ -94,7 +116,7 @@ def _concurrent_embedding(
            # the model to fail to encode texts. It's pretty rare and we want to allow
            # concurrent embedding, hence we retry (the specific error is
            # "RuntimeError: Already borrowed" and occurs in the transformers library)
-            logger.warning(f"Error encoding texts, retrying: {e}")
+            logger.error(f"Error encoding texts, retrying: {e}")
            time.sleep(ENCODING_RETRY_DELAY)
    return model.encode(texts, normalize_embeddings=normalize_embeddings)

@@ -167,6 +189,16 @@ async def embed_text(
    return embeddings


+@simple_log_function_time()
+async def local_rerank(query: str, docs: list[str], model_name: str) -> list[float]:
+    cross_encoder = get_local_reranking_model(model_name)
+    # Run CPU-bound reranking in a thread pool
+    return await asyncio.get_event_loop().run_in_executor(
+        None,
+        lambda: cross_encoder.predict([(query, doc) for doc in docs]).tolist(),  # type: ignore
+    )
+
+
@router.post("/bi-encoder-embed")
 async def route_bi_encoder_embed(
    request: Request,
@@ -222,3 +254,39 @@ async def process_embed_request(
        raise HTTPException(
            status_code=500, detail=f"Error during embedding process: {e}"
        )
+
+
+@router.post("/cross-encoder-scores")
+async def process_rerank_request(rerank_request: RerankRequest) -> RerankResponse:
+    """Cross encoders can be purely black box from the app perspective"""
+    # Only local models should use this endpoint - API providers should make direct API calls
+    if rerank_request.provider_type is not None:
+        raise ValueError(
+            f"Model server reranking endpoint should only be used for local models. "
+            f"API provider '{rerank_request.provider_type}' should make direct API calls instead."
+        )
+
+    if INDEXING_ONLY:
+        raise RuntimeError("Indexing model server should not call intent endpoint")
+
+    if not rerank_request.documents or not rerank_request.query:
+        raise HTTPException(
+            status_code=400, detail="Missing documents or query for reranking"
+        )
+    if not all(rerank_request.documents):
+        raise ValueError("Empty documents cannot be reranked.")
+
+    try:
+        # At this point, provider_type is None, so handle local reranking
+        sim_scores = await local_rerank(
+            query=rerank_request.query,
+            docs=rerank_request.documents,
+            model_name=rerank_request.model_name,
+        )
+        return RerankResponse(scores=sim_scores)
+
+    except Exception as e:
+        logger.exception(f"Error during reranking process:\n{str(e)}")
+        raise HTTPException(
+            status_code=500, detail="Failed to run Cross-Encoder reranking"
+        )
--- a/backend/model_server/legacy/README.md
+++ b/backend/model_server/legacy/README.md
@@ -1,5 +0,0 @@
-This directory contains code that was useful and may become useful again in the future.
-
-We stopped using rerankers because the state of the art rerankers are not significantly better than the biencoders and much worse than LLMs which are also capable of acting on a small set of documents for filtering, reranking, etc.
-
-We stopped using the internal query classifier as that's now offloaded to the LLM which does query expansion so we know ahead of time if it's a keyword or semantic query.
--- a/Show More
+++ b/Show More
Author	SHA1	Message	Date
Raunak Bhagat	9e9c3ec0b9	Remove unused imports	2025-11-18 13:51:10 -08:00
Raunak Bhagat	1457ca2a20	Make share button instantaneous	2025-11-18 13:50:37 -08:00
Raunak Bhagat	edc390edc6	Implement AppPage wrapper for all other pages inside of /chat	2025-11-18 13:34:38 -08:00
Raunak Bhagat	022624cb5a	Maintain consistent heights	2025-11-18 13:20:09 -08:00
Raunak Bhagat	f301257130	Make chatSession info and settings info be passed in as server-side data	2025-11-18 13:07:52 -08:00
Raunak Bhagat	9eecc71cda	Fix flashing	2025-11-18 11:43:49 -08:00