Remove unused imports

Make share button instantaneous
Implement AppPage wrapper for all other pages inside of /chat
2026-02-16 23:35:46 +00:00 · 2025-11-18 13:51:10 -08:00 · 2025-11-18 13:50:37 -08:00 · 2025-11-18 13:34:38 -08:00 · 2025-11-18 13:20:09 -08:00 · 2025-11-18 13:07:52 -08:00
407 changed files with 19761 additions and 6674 deletions
--- a/.github/actionlint.yml
+++ b/.github/actionlint.yml
@@ -0,0 +1,42 @@
+self-hosted-runner:
+  # Labels of self-hosted runner in array of strings.
+  labels:
+    - extras=ecr-cache
+    - extras=s3-cache
+    - hdd=256
+    - runs-on
+    - runner=1cpu-linux-arm64
+    - runner=1cpu-linux-x64
+    - runner=2cpu-linux-arm64
+    - runner=2cpu-linux-x64
+    - runner=4cpu-linux-arm64
+    - runner=4cpu-linux-x64
+    - runner=8cpu-linux-arm64
+    - runner=8cpu-linux-x64
+    - runner=16cpu-linux-arm64
+    - runner=16cpu-linux-x64
+    - ubuntu-slim # Currently in public preview
+    - volume=40gb
+
+# Configuration variables in array of strings defined in your repository or
+# organization. `null` means disabling configuration variables check.
+# Empty array means no configuration variable is allowed.
+config-variables: null
+
+# Configuration for file paths. The keys are glob patterns to match to file
+# paths relative to the repository root. The values are the configurations for
+# the file paths. Note that the path separator is always '/'.
+# The following configurations are available.
+#
+# "ignore" is an array of regular expression patterns. Matched error messages
+# are ignored. This is similar to the "-ignore" command line option.
+paths:
+  # Glob pattern relative to the repository root for matching files. The path separator is always '/'.
+  # This example configures any YAML file under the '.github/workflows/' directory.
+  .github/workflows/**/*.{yml,yaml}:
+    # TODO: These are real and should be fixed eventually.
+    ignore:
+      - 'shellcheck reported issue in this script: SC2038:.+'
+      - 'shellcheck reported issue in this script: SC2046:.+'
+      - 'shellcheck reported issue in this script: SC2086:.+'
+      - 'shellcheck reported issue in this script: SC2193:.+'
--- a/.github/actions/prepare-build/action.yml
+++ b/.github/actions/prepare-build/action.yml
@@ -1,25 +1,15 @@
 name: "Prepare Build (OpenAPI generation)"
 description: "Sets up Python with uv, installs deps, generates OpenAPI schema and Python client, uploads artifact"
+inputs:
+  docker-username:
+    required: true
+  docker-password:
+    required: true
 runs:
  using: "composite"
  steps:
-    - name: Checkout code
-      uses: actions/checkout@08eba0b27e820071cde6df949e0beb9ba4906955 # ratchet:actions/checkout@v4
-
-    - name: Setup uv
-      uses: astral-sh/setup-uv@caf0cab7a618c569241d31dcd442f54681755d39 # ratchet:astral-sh/setup-uv@v3
-
-    - name: Setup Python
-      uses: actions/setup-python@a26af69be951a213d495a4c3e4e4022e16d87065 # ratchet:actions/setup-python@v5
-      with:
-        python-version: "3.11"
-
-    - name: Install Python dependencies with uv
-      shell: bash
-      run: |
-        uv pip install --system \
-          -r backend/requirements/default.txt \
-          -r backend/requirements/dev.txt
+    - name: Setup Python and Install Dependencies
+      uses: ./.github/actions/setup-python-and-install-dependencies

    - name: Generate OpenAPI schema
      shell: bash
@@ -29,6 +19,15 @@ runs:
      run: |
        python scripts/onyx_openapi_schema.py --filename generated/openapi.json

+    # needed for pulling openapitools/openapi-generator-cli
+    # otherwise, we hit the "Unauthenticated users" limit
+    # https://docs.docker.com/docker-hub/usage/
+    - name: Login to Docker Hub
+      uses: docker/login-action@5e57cd118135c172c3672efd75eb46360885c0ef # ratchet:docker/login-action@v3
+      with:
+        username: ${{ inputs['docker-username'] }}
+        password: ${{ inputs['docker-password'] }}
+
    - name: Generate OpenAPI Python client
      shell: bash
      run: |
@@ -41,10 +40,3 @@ runs:
          --package-name onyx_openapi_client \
          --skip-validate-spec \
          --openapi-normalizer "SIMPLIFY_ONEOF_ANYOF=true,SET_OAS3_NULLABLE=true"
-
-    - name: Upload OpenAPI artifacts
-      uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # ratchet:actions/upload-artifact@v4
-      with:
-        name: openapi-artifacts
-        path: backend/generated/
-
--- a/.github/actions/setup-playwright/action.yml
+++ b/.github/actions/setup-playwright/action.yml
@@ -0,0 +1,17 @@
+name: "Setup Playwright"
+description: "Sets up Playwright and system deps (assumes Python and Playwright are installed)"
+runs:
+  using: "composite"
+  steps:
+    - name: Cache playwright cache
+      uses: runs-on/cache@50350ad4242587b6c8c2baa2e740b1bc11285ff4 # ratchet:runs-on/cache@v4
+      with:
+        path: ~/.cache/ms-playwright
+        key: ${{ runner.os }}-playwright-${{ hashFiles('backend/requirements/default.txt') }}
+        restore-keys: |
+          ${{ runner.os }}-playwright-
+
+    - name: Install playwright
+      shell: bash
+      run: |
+        playwright install chromium --with-deps
--- a/.github/actions/setup-python-and-install-dependencies/action.yml
+++ b/.github/actions/setup-python-and-install-dependencies/action.yml
@@ -0,0 +1,38 @@
+name: "Setup Python and Install Dependencies"
+description: "Sets up Python with uv and installs deps"
+runs:
+  using: "composite"
+  steps:
+    - name: Setup uv
+      uses: astral-sh/setup-uv@caf0cab7a618c569241d31dcd442f54681755d39 # ratchet:astral-sh/setup-uv@v3
+      # TODO: Enable caching once there is a uv.lock file checked in.
+      # with:
+      #   enable-cache: true
+
+    - name: Cache uv cache directory
+      uses: runs-on/cache@50350ad4242587b6c8c2baa2e740b1bc11285ff4 # ratchet:runs-on/cache@v4
+      with:
+        path: ~/.cache/uv
+        key: ${{ runner.os }}-uv-${{ hashFiles('backend/requirements/*.txt', 'backend/pyproject.toml') }}
+        restore-keys: |
+          ${{ runner.os }}-uv-
+
+    - name: Setup Python
+      uses: actions/setup-python@a26af69be951a213d495a4c3e4e4022e16d87065 # ratchet:actions/setup-python@v5
+      with:
+        python-version: "3.11"
+
+    - name: Create virtual environment
+      shell: bash
+      run: |
+        uv venv ${{ runner.temp }}/venv
+        echo "VENV_PATH=${{ runner.temp }}/venv" >> $GITHUB_ENV
+        echo "${{ runner.temp }}/venv/bin" >> $GITHUB_PATH
+
+    - name: Install Python dependencies with uv
+      shell: bash
+      run: |
+        uv pip install \
+          -r backend/requirements/default.txt \
+          -r backend/requirements/dev.txt \
+          -r backend/requirements/model_server.txt
--- a/.github/actions/slack-notify/action.yml
+++ b/.github/actions/slack-notify/action.yml
@@ -0,0 +1,101 @@
+name: "Slack Notify on Failure"
+description: "Sends a Slack notification when a workflow fails"
+inputs:
+  webhook-url:
+    description: "Slack webhook URL (can also use SLACK_WEBHOOK_URL env var)"
+    required: false
+  failed-jobs:
+    description: "List of failed job names (newline-separated)"
+    required: false
+  title:
+    description: "Title for the notification"
+    required: false
+    default: "🚨 Workflow Failed"
+  ref-name:
+    description: "Git ref name (tag/branch)"
+    required: false
+runs:
+  using: "composite"
+  steps:
+    - name: Send Slack notification
+      shell: bash
+      env:
+        SLACK_WEBHOOK_URL: ${{ inputs.webhook-url }}
+      run: |
+        if [ -z "$SLACK_WEBHOOK_URL" ]; then
+          echo "webhook-url input or SLACK_WEBHOOK_URL env var is not set, skipping notification"
+          exit 0
+        fi
+
+        # Get inputs with defaults
+        FAILED_JOBS="${{ inputs.failed-jobs }}"
+        TITLE="${{ inputs.title }}"
+        REF_NAME="${{ inputs.ref-name }}"
+        REPO="${{ github.repository }}"
+        WORKFLOW="${{ github.workflow }}"
+        RUN_NUMBER="${{ github.run_number }}"
+        RUN_ID="${{ github.run_id }}"
+        SERVER_URL="${{ github.server_url }}"
+        WORKFLOW_URL="${SERVER_URL}/${REPO}/actions/runs/${RUN_ID}"
+
+        # Use ref_name from input or fall back to github.ref_name
+        if [ -z "$REF_NAME" ]; then
+          REF_NAME="${{ github.ref_name }}"
+        fi
+
+        # Escape JSON special characters
+        escape_json() {
+          local input="$1"
+          # Escape backslashes first (but preserve \n sequences)
+          # Protect \n sequences temporarily
+          input=$(printf '%s' "$input" | sed 's/\\n/\x01NL\x01/g')
+          # Escape remaining backslashes
+          input=$(printf '%s' "$input" | sed 's/\\/\\\\/g')
+          # Restore \n sequences (single backslash, will be correct in JSON)
+          input=$(printf '%s' "$input" | sed 's/\x01NL\x01/\\n/g')
+          # Escape quotes
+          printf '%s' "$input" | sed 's/"/\\"/g'
+        }
+
+        REF_NAME_ESC=$(escape_json "$REF_NAME")
+        FAILED_JOBS_ESC=$(escape_json "$FAILED_JOBS")
+        WORKFLOW_URL_ESC=$(escape_json "$WORKFLOW_URL")
+        TITLE_ESC=$(escape_json "$TITLE")
+
+        # Build JSON payload piece by piece
+        # Note: FAILED_JOBS_ESC already contains \n sequences that should remain as \n in JSON
+        PAYLOAD="{"
+        PAYLOAD="${PAYLOAD}\"text\":\"${TITLE_ESC}\","
+        PAYLOAD="${PAYLOAD}\"blocks\":[{"
+        PAYLOAD="${PAYLOAD}\"type\":\"header\","
+        PAYLOAD="${PAYLOAD}\"text\":{\"type\":\"plain_text\",\"text\":\"${TITLE_ESC}\"}"
+        PAYLOAD="${PAYLOAD}},{"
+        PAYLOAD="${PAYLOAD}\"type\":\"section\","
+        PAYLOAD="${PAYLOAD}\"fields\":["
+        if [ -n "$REF_NAME" ]; then
+          PAYLOAD="${PAYLOAD}{\"type\":\"mrkdwn\",\"text\":\"*Ref:*\\n${REF_NAME_ESC}\"},"
+        fi
+        PAYLOAD="${PAYLOAD}{\"type\":\"mrkdwn\",\"text\":\"*Run ID:*\\n#${RUN_NUMBER}\"}"
+        PAYLOAD="${PAYLOAD}]"
+        PAYLOAD="${PAYLOAD}}"
+        if [ -n "$FAILED_JOBS" ]; then
+          PAYLOAD="${PAYLOAD},{"
+          PAYLOAD="${PAYLOAD}\"type\":\"section\","
+          PAYLOAD="${PAYLOAD}\"text\":{\"type\":\"mrkdwn\",\"text\":\"*Failed Jobs:*\\n${FAILED_JOBS_ESC}\"}"
+          PAYLOAD="${PAYLOAD}}"
+        fi
+        PAYLOAD="${PAYLOAD},{"
+        PAYLOAD="${PAYLOAD}\"type\":\"actions\","
+        PAYLOAD="${PAYLOAD}\"elements\":[{"
+        PAYLOAD="${PAYLOAD}\"type\":\"button\","
+        PAYLOAD="${PAYLOAD}\"text\":{\"type\":\"plain_text\",\"text\":\"View Workflow Run\"},"
+        PAYLOAD="${PAYLOAD}\"url\":\"${WORKFLOW_URL_ESC}\""
+        PAYLOAD="${PAYLOAD}}]"
+        PAYLOAD="${PAYLOAD}}"
+        PAYLOAD="${PAYLOAD}]"
+        PAYLOAD="${PAYLOAD}}"
+
+        curl -X POST -H 'Content-type: application/json' \
+          --data "$PAYLOAD" \
+          "$SLACK_WEBHOOK_URL"
+
--- a/.github/dependabot.yml
+++ b/.github/dependabot.yml
@@ -9,3 +9,12 @@ updates:
      - "jmelahman"
    labels:
      - "dependabot:actions"
+  - package-ecosystem: "pip"
+    directory: "/backend"
+    schedule:
+      interval: "weekly"
+    open-pull-requests-limit: 3
+    assignees:
+      - "jmelahman"
+    labels:
+      - "dependabot:python"
--- a/.github/runs-on.yml
+++ b/.github/runs-on.yml
@@ -0,0 +1 @@
+_extend: .github-private
--- a/.github/workflows/check-lazy-imports.yml
+++ b/.github/workflows/check-lazy-imports.yml
@@ -1,4 +1,7 @@
 name: Check Lazy Imports
+concurrency:
+  group: Check-Lazy-Imports-${{ github.workflow }}-${{ github.head_ref || github.event.workflow_run.head_branch || github.run_id }}
+  cancel-in-progress: true

 on:
  merge_group:
@@ -16,9 +19,9 @@ jobs:
      uses: actions/checkout@08eba0b27e820071cde6df949e0beb9ba4906955 # ratchet:actions/checkout@v4

    - name: Set up Python
-      uses: actions/setup-python@7f4fc3e22c37d6ff65e88745f38bd3157c663f7c # ratchet:actions/setup-python@v4
+      uses: actions/setup-python@e797f83bcb11b83ae66e0230d6156d7c80228e7c # ratchet:actions/setup-python@v6
      with:
        python-version: '3.11'

    - name: Check lazy imports
-      run: python3 backend/scripts/check_lazy_imports.py
+      run: python3 backend/scripts/check_lazy_imports.py
--- a/.github/workflows/deployment.yml
+++ b/.github/workflows/deployment.yml
@@ -0,0 +1,512 @@
+name: Build and Push Docker Images on Tag
+
+on:
+  push:
+    tags:
+      - "*"
+  workflow_dispatch:
+
+env:
+  IS_DRY_RUN: ${{ github.event_name == 'workflow_dispatch' }}
+  EDGE_TAG: ${{ startsWith(github.ref_name, 'nightly-latest') }}
+
+jobs:
+  # Determine which components to build based on the tag
+  determine-builds:
+    # NOTE: Github-hosted runners have about 20s faster queue times and are preferred here.
+    runs-on: ubuntu-slim
+    outputs:
+      build-web: ${{ steps.check.outputs.build-web }}
+      build-web-cloud: ${{ steps.check.outputs.build-web-cloud }}
+      build-backend: ${{ steps.check.outputs.build-backend }}
+      build-model-server: ${{ steps.check.outputs.build-model-server }}
+      is-cloud-tag: ${{ steps.check.outputs.is-cloud-tag }}
+      is-stable: ${{ steps.check.outputs.is-stable }}
+      is-beta: ${{ steps.check.outputs.is-beta }}
+      is-stable-standalone: ${{ steps.check.outputs.is-stable-standalone }}
+      is-beta-standalone: ${{ steps.check.outputs.is-beta-standalone }}
+      sanitized-tag: ${{ steps.check.outputs.sanitized-tag }}
+    steps:
+      - name: Check which components to build and version info
+        id: check
+        run: |
+          TAG="${{ github.ref_name }}"
+          # Sanitize tag name by replacing slashes with hyphens (for Docker tag compatibility)
+          SANITIZED_TAG=$(echo "$TAG" | tr '/' '-')
+          IS_CLOUD=false
+          BUILD_WEB=false
+          BUILD_WEB_CLOUD=false
+          BUILD_BACKEND=true
+          BUILD_MODEL_SERVER=true
+          IS_STABLE=false
+          IS_BETA=false
+          IS_STABLE_STANDALONE=false
+          IS_BETA_STANDALONE=false
+
+          if [[ "$TAG" == *cloud* ]]; then
+            IS_CLOUD=true
+            BUILD_WEB_CLOUD=true
+          else
+            BUILD_WEB=true
+          fi
+
+          # Version checks (for web - any stable version)
+          if [[ "$TAG" =~ ^v[0-9]+\.[0-9]+\.[0-9]+$ ]]; then
+            IS_STABLE=true
+          fi
+          if [[ "$TAG" =~ ^v[0-9]+\.[0-9]+\.[0-9]+-beta\.[0-9]+$ ]]; then
+            IS_BETA=true
+          fi
+
+          # Version checks (for backend/model-server - stable version excluding cloud tags)
+          if [[ "$TAG" =~ ^v[0-9]+\.[0-9]+\.[0-9]+$ ]] && [[ "$TAG" != *cloud* ]]; then
+            IS_STABLE_STANDALONE=true
+          fi
+          if [[ "$TAG" =~ ^v[0-9]+\.[0-9]+\.[0-9]+-beta\.[0-9]+$ ]] && [[ "$TAG" != *cloud* ]]; then
+            IS_BETA_STANDALONE=true
+          fi
+
+          {
+            echo "build-web=$BUILD_WEB"
+            echo "build-web-cloud=$BUILD_WEB_CLOUD"
+            echo "build-backend=$BUILD_BACKEND"
+            echo "build-model-server=$BUILD_MODEL_SERVER"
+            echo "is-cloud-tag=$IS_CLOUD"
+            echo "is-stable=$IS_STABLE"
+            echo "is-beta=$IS_BETA"
+            echo "is-stable-standalone=$IS_STABLE_STANDALONE"
+            echo "is-beta-standalone=$IS_BETA_STANDALONE"
+            echo "sanitized-tag=$SANITIZED_TAG"
+          } >> "$GITHUB_OUTPUT"
+
+  build-web:
+    needs: determine-builds
+    if: needs.determine-builds.outputs.build-web == 'true'
+    runs-on:
+      - runs-on
+      - runner=4cpu-linux-x64
+      - run-id=${{ github.run_id }}-web-build
+      - extras=ecr-cache
+    env:
+      REGISTRY_IMAGE: onyxdotapp/onyx-web-server
+      DEPLOYMENT: standalone
+    steps:
+      - uses: runs-on/action@cd2b598b0515d39d78c38a02d529db87d2196d1e # ratchet:runs-on/action@v2
+
+      - name: Checkout
+        uses: actions/checkout@08eba0b27e820071cde6df949e0beb9ba4906955 # ratchet:actions/checkout@v4
+
+      - name: Docker meta
+        id: meta
+        uses: docker/metadata-action@318604b99e75e41977312d83839a89be02ca4893 # ratchet:docker/metadata-action@v5
+        with:
+          images: ${{ github.event_name == 'workflow_dispatch' && env.RUNS_ON_ECR_CACHE || env.REGISTRY_IMAGE }}
+          flavor: |
+            latest=false
+          tags: |
+            type=raw,value=${{ github.event_name == 'workflow_dispatch' && format('web-{0}', needs.determine-builds.outputs.sanitized-tag) || github.ref_name }}
+            type=raw,value=${{ github.event_name != 'workflow_dispatch' && needs.determine-builds.outputs.is-stable == 'true' && 'latest' || '' }}
+            type=raw,value=${{ github.event_name != 'workflow_dispatch' && env.EDGE_TAG == 'true' && 'edge' || '' }}
+            type=raw,value=${{ github.event_name != 'workflow_dispatch' && needs.determine-builds.outputs.is-beta == 'true' && 'beta' || '' }}
+
+      - name: Set up Docker Buildx
+        uses: docker/setup-buildx-action@e468171a9de216ec08956ac3ada2f0791b6bd435 # ratchet:docker/setup-buildx-action@v3
+
+      - name: Login to Docker Hub
+        uses: docker/login-action@5e57cd118135c172c3672efd75eb46360885c0ef # ratchet:docker/login-action@v3
+        with:
+          username: ${{ secrets.DOCKER_USERNAME }}
+          password: ${{ secrets.DOCKER_TOKEN }}
+
+      - name: Build and push
+        uses: docker/build-push-action@263435318d21b8e681c14492fe198d362a7d2c83 # ratchet:docker/build-push-action@v6
+        with:
+          context: ./web
+          file: ./web/Dockerfile
+          platforms: linux/amd64,linux/arm64
+          push: true
+          tags: ${{ steps.meta.outputs.tags }}
+          labels: ${{ steps.meta.outputs.labels }}
+          build-args: |
+            ONYX_VERSION=${{ github.ref_name }}
+            NODE_OPTIONS=--max-old-space-size=8192
+          cache-from: |
+            type=registry,ref=${{ env.REGISTRY_IMAGE }}:latest
+            type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:web-${{ env.DEPLOYMENT }}-cache
+          cache-to: |
+            type=inline
+            type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:web-${{ env.DEPLOYMENT }}-cache,mode=max
+
+  build-web-cloud:
+    needs: determine-builds
+    if: needs.determine-builds.outputs.build-web-cloud == 'true'
+    runs-on:
+      - runs-on
+      - runner=4cpu-linux-x64
+      - run-id=${{ github.run_id }}-web-cloud-build
+      - extras=ecr-cache
+    env:
+      REGISTRY_IMAGE: onyxdotapp/onyx-web-server-cloud
+      DEPLOYMENT: cloud
+    steps:
+      - uses: runs-on/action@cd2b598b0515d39d78c38a02d529db87d2196d1e # ratchet:runs-on/action@v2
+
+      - name: Checkout
+        uses: actions/checkout@08eba0b27e820071cde6df949e0beb9ba4906955 # ratchet:actions/checkout@v4
+
+      - name: Docker meta
+        id: meta
+        uses: docker/metadata-action@318604b99e75e41977312d83839a89be02ca4893 # ratchet:docker/metadata-action@v5
+        with:
+          images: ${{ github.event_name == 'workflow_dispatch' && env.RUNS_ON_ECR_CACHE || env.REGISTRY_IMAGE }}
+          flavor: |
+            latest=false
+          tags: |
+            type=raw,value=${{ github.event_name == 'workflow_dispatch' && format('web-cloud-{0}', needs.determine-builds.outputs.sanitized-tag) || github.ref_name }}
+
+      - name: Set up Docker Buildx
+        uses: docker/setup-buildx-action@e468171a9de216ec08956ac3ada2f0791b6bd435 # ratchet:docker/setup-buildx-action@v3
+
+      - name: Login to Docker Hub
+        uses: docker/login-action@5e57cd118135c172c3672efd75eb46360885c0ef # ratchet:docker/login-action@v3
+        with:
+          username: ${{ secrets.DOCKER_USERNAME }}
+          password: ${{ secrets.DOCKER_TOKEN }}
+
+      - name: Build and push
+        uses: docker/build-push-action@263435318d21b8e681c14492fe198d362a7d2c83 # ratchet:docker/build-push-action@v6
+        with:
+          context: ./web
+          file: ./web/Dockerfile
+          platforms: linux/amd64,linux/arm64
+          push: true
+          tags: ${{ steps.meta.outputs.tags }}
+          labels: ${{ steps.meta.outputs.labels }}
+          build-args: |
+            ONYX_VERSION=${{ github.ref_name }}
+            NEXT_PUBLIC_CLOUD_ENABLED=true
+            NEXT_PUBLIC_POSTHOG_KEY=${{ secrets.POSTHOG_KEY }}
+            NEXT_PUBLIC_POSTHOG_HOST=${{ secrets.POSTHOG_HOST }}
+            NEXT_PUBLIC_SENTRY_DSN=${{ secrets.SENTRY_DSN }}
+            NEXT_PUBLIC_STRIPE_PUBLISHABLE_KEY=${{ secrets.STRIPE_PUBLISHABLE_KEY }}
+            NEXT_PUBLIC_GTM_ENABLED=true
+            NEXT_PUBLIC_FORGOT_PASSWORD_ENABLED=true
+            NEXT_PUBLIC_INCLUDE_ERROR_POPUP_SUPPORT_LINK=true
+            NODE_OPTIONS=--max-old-space-size=8192
+          cache-from: |
+            type=registry,ref=${{ env.REGISTRY_IMAGE }}:latest
+            type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:cloudweb-${{ env.DEPLOYMENT }}-cache
+          cache-to: |
+            type=inline
+            type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:cloudweb-${{ env.DEPLOYMENT }}-cache,mode=max
+
+  build-backend:
+    needs: determine-builds
+    if: needs.determine-builds.outputs.build-backend == 'true'
+    runs-on:
+      - runs-on
+      - runner=2cpu-linux-x64
+      - run-id=${{ github.run_id }}-backend-build
+      - extras=ecr-cache
+    env:
+      REGISTRY_IMAGE: ${{ contains(github.ref_name, 'cloud') && 'onyxdotapp/onyx-backend-cloud' || 'onyxdotapp/onyx-backend' }}
+      DEPLOYMENT: ${{ contains(github.ref_name, 'cloud') && 'cloud' || 'standalone' }}
+    steps:
+      - uses: runs-on/action@cd2b598b0515d39d78c38a02d529db87d2196d1e # ratchet:runs-on/action@v2
+
+      - name: Checkout code
+        uses: actions/checkout@08eba0b27e820071cde6df949e0beb9ba4906955 # ratchet:actions/checkout@v4
+
+      - name: Docker meta
+        id: meta
+        uses: docker/metadata-action@318604b99e75e41977312d83839a89be02ca4893 # ratchet:docker/metadata-action@v5
+        with:
+          images: ${{ github.event_name == 'workflow_dispatch' && env.RUNS_ON_ECR_CACHE || env.REGISTRY_IMAGE }}
+          flavor: |
+            latest=false
+          tags: |
+            type=raw,value=${{ github.event_name == 'workflow_dispatch' && format('backend-{0}', needs.determine-builds.outputs.sanitized-tag) || github.ref_name }}
+            type=raw,value=${{ github.event_name != 'workflow_dispatch' && needs.determine-builds.outputs.is-stable-standalone == 'true' && 'latest' || '' }}
+            type=raw,value=${{ github.event_name != 'workflow_dispatch' && env.EDGE_TAG == 'true' && 'edge' || '' }}
+            type=raw,value=${{ github.event_name != 'workflow_dispatch' && needs.determine-builds.outputs.is-beta-standalone == 'true' && 'beta' || '' }}
+
+      - name: Set up Docker Buildx
+        uses: docker/setup-buildx-action@e468171a9de216ec08956ac3ada2f0791b6bd435 # ratchet:docker/setup-buildx-action@v3
+
+      - name: Login to Docker Hub
+        uses: docker/login-action@5e57cd118135c172c3672efd75eb46360885c0ef # ratchet:docker/login-action@v3
+        with:
+          username: ${{ secrets.DOCKER_USERNAME }}
+          password: ${{ secrets.DOCKER_TOKEN }}
+
+      - name: Build and push
+        uses: docker/build-push-action@263435318d21b8e681c14492fe198d362a7d2c83 # ratchet:docker/build-push-action@v6
+        with:
+          context: ./backend
+          file: ./backend/Dockerfile
+          platforms: linux/amd64,linux/arm64
+          push: true
+          tags: ${{ steps.meta.outputs.tags }}
+          labels: ${{ steps.meta.outputs.labels }}
+          build-args: |
+            ONYX_VERSION=${{ github.ref_name }}
+          cache-from: |
+            type=registry,ref=${{ env.REGISTRY_IMAGE }}:latest
+            type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:backend-${{ env.DEPLOYMENT }}-cache
+          cache-to: |
+            type=inline
+            type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:backend-${{ env.DEPLOYMENT }}-cache,mode=max
+
+  build-model-server:
+    needs: determine-builds
+    if: needs.determine-builds.outputs.build-model-server == 'true'
+    runs-on:
+      - runs-on
+      - runner=2cpu-linux-x64
+      - run-id=${{ github.run_id }}-model-server-build
+      - volume=40gb
+      - extras=ecr-cache
+    env:
+      REGISTRY_IMAGE: ${{ contains(github.ref_name, 'cloud') && 'onyxdotapp/onyx-model-server-cloud' || 'onyxdotapp/onyx-model-server' }}
+      DOCKER_BUILDKIT: 1
+      BUILDKIT_PROGRESS: plain
+      DEPLOYMENT: ${{ contains(github.ref_name, 'cloud') && 'cloud' || 'standalone' }}
+    steps:
+      - uses: runs-on/action@cd2b598b0515d39d78c38a02d529db87d2196d1e # ratchet:runs-on/action@v2
+
+      - name: Checkout code
+        uses: actions/checkout@08eba0b27e820071cde6df949e0beb9ba4906955 # ratchet:actions/checkout@v4
+
+      - name: Docker meta
+        id: meta
+        uses: docker/metadata-action@318604b99e75e41977312d83839a89be02ca4893 # ratchet:docker/metadata-action@v5
+        with:
+          images: ${{ github.event_name == 'workflow_dispatch' && env.RUNS_ON_ECR_CACHE || env.REGISTRY_IMAGE }}
+          flavor: |
+            latest=false
+          tags: |
+            type=raw,value=${{ github.event_name == 'workflow_dispatch' && format('model-server-{0}', needs.determine-builds.outputs.sanitized-tag) || github.ref_name }}
+            type=raw,value=${{ github.event_name != 'workflow_dispatch' && needs.determine-builds.outputs.is-stable-standalone == 'true' && 'latest' || '' }}
+            type=raw,value=${{ github.event_name != 'workflow_dispatch' && env.EDGE_TAG == 'true' && 'edge' || '' }}
+            type=raw,value=${{ github.event_name != 'workflow_dispatch' && needs.determine-builds.outputs.is-beta-standalone == 'true' && 'beta' || '' }}
+
+      - name: Set up Docker Buildx
+        uses: docker/setup-buildx-action@e468171a9de216ec08956ac3ada2f0791b6bd435 # ratchet:docker/setup-buildx-action@v3
+        with:
+          driver-opts: |
+            image=moby/buildkit:latest
+            network=host
+
+      - name: Login to Docker Hub
+        uses: docker/login-action@5e57cd118135c172c3672efd75eb46360885c0ef # ratchet:docker/login-action@v3
+        with:
+          username: ${{ secrets.DOCKER_USERNAME }}
+          password: ${{ secrets.DOCKER_TOKEN }}
+
+      - name: Build and push
+        uses: docker/build-push-action@263435318d21b8e681c14492fe198d362a7d2c83 # ratchet:docker/build-push-action@v6
+        with:
+          context: ./backend
+          file: ./backend/Dockerfile.model_server
+          platforms: linux/amd64,linux/arm64
+          push: true
+          tags: ${{ steps.meta.outputs.tags }}
+          labels: ${{ steps.meta.outputs.labels }}
+          build-args: |
+            ONYX_VERSION=${{ github.ref_name }}
+          cache-from: |
+            type=registry,ref=${{ env.REGISTRY_IMAGE }}:latest
+            type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:model-server-${{ env.DEPLOYMENT }}-cache
+          cache-to: |
+            type=inline
+            type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:model-server-${{ env.DEPLOYMENT }}-cache,mode=max
+
+  trivy-scan-web:
+    needs: [determine-builds, build-web]
+    if: needs.build-web.result == 'success'
+    runs-on:
+      - runs-on
+      - runner=2cpu-linux-x64
+      - run-id=${{ github.run_id }}-trivy-scan-web
+      - extras=ecr-cache
+    env:
+      REGISTRY_IMAGE: onyxdotapp/onyx-web-server
+    steps:
+      - uses: runs-on/action@cd2b598b0515d39d78c38a02d529db87d2196d1e # ratchet:runs-on/action@v2
+
+      - name: Run Trivy vulnerability scanner
+        uses: nick-fields/retry@ce71cc2ab81d554ebbe88c79ab5975992d79ba08 # ratchet:nick-fields/retry@v3
+        with:
+          timeout_minutes: 30
+          max_attempts: 3
+          retry_wait_seconds: 10
+          command: |
+            if [ "${{ github.event_name }}" == "workflow_dispatch" ]; then
+              SCAN_IMAGE="${{ env.RUNS_ON_ECR_CACHE }}:web-${{ needs.determine-builds.outputs.sanitized-tag }}"
+            else
+              SCAN_IMAGE="docker.io/${{ env.REGISTRY_IMAGE }}:${{ github.ref_name }}"
+            fi
+            docker run --rm -v $HOME/.cache/trivy:/root/.cache/trivy \
+              -e TRIVY_DB_REPOSITORY="public.ecr.aws/aquasecurity/trivy-db:2" \
+              -e TRIVY_JAVA_DB_REPOSITORY="public.ecr.aws/aquasecurity/trivy-java-db:1" \
+              -e TRIVY_USERNAME="${{ secrets.DOCKER_USERNAME }}" \
+              -e TRIVY_PASSWORD="${{ secrets.DOCKER_TOKEN }}" \
+              aquasec/trivy@sha256:a22415a38938a56c379387a8163fcb0ce38b10ace73e593475d3658d578b2436 \
+              image \
+              --skip-version-check \
+              --timeout 20m \
+              --severity CRITICAL,HIGH \
+              ${SCAN_IMAGE}
+
+  trivy-scan-web-cloud:
+    needs: [determine-builds, build-web-cloud]
+    if: needs.build-web-cloud.result == 'success'
+    runs-on:
+      - runs-on
+      - runner=2cpu-linux-x64
+      - run-id=${{ github.run_id }}-trivy-scan-web-cloud
+      - extras=ecr-cache
+    env:
+      REGISTRY_IMAGE: onyxdotapp/onyx-web-server-cloud
+    steps:
+      - uses: runs-on/action@cd2b598b0515d39d78c38a02d529db87d2196d1e # ratchet:runs-on/action@v2
+
+      - name: Run Trivy vulnerability scanner
+        uses: nick-fields/retry@ce71cc2ab81d554ebbe88c79ab5975992d79ba08 # ratchet:nick-fields/retry@v3
+        with:
+          timeout_minutes: 30
+          max_attempts: 3
+          retry_wait_seconds: 10
+          command: |
+            if [ "${{ github.event_name }}" == "workflow_dispatch" ]; then
+              SCAN_IMAGE="${{ env.RUNS_ON_ECR_CACHE }}:web-cloud-${{ needs.determine-builds.outputs.sanitized-tag }}"
+            else
+              SCAN_IMAGE="docker.io/${{ env.REGISTRY_IMAGE }}:${{ github.ref_name }}"
+            fi
+            docker run --rm -v $HOME/.cache/trivy:/root/.cache/trivy \
+              -e TRIVY_DB_REPOSITORY="public.ecr.aws/aquasecurity/trivy-db:2" \
+              -e TRIVY_JAVA_DB_REPOSITORY="public.ecr.aws/aquasecurity/trivy-java-db:1" \
+              -e TRIVY_USERNAME="${{ secrets.DOCKER_USERNAME }}" \
+              -e TRIVY_PASSWORD="${{ secrets.DOCKER_TOKEN }}" \
+              aquasec/trivy@sha256:a22415a38938a56c379387a8163fcb0ce38b10ace73e593475d3658d578b2436 \
+              image \
+              --skip-version-check \
+              --timeout 20m \
+              --severity CRITICAL,HIGH \
+              ${SCAN_IMAGE}
+
+  trivy-scan-backend:
+    needs: [determine-builds, build-backend]
+    if: needs.build-backend.result == 'success'
+    runs-on:
+      - runs-on
+      - runner=2cpu-linux-x64
+      - run-id=${{ github.run_id }}-trivy-scan-backend
+      - extras=ecr-cache
+    env:
+      REGISTRY_IMAGE: ${{ contains(github.ref_name, 'cloud') && 'onyxdotapp/onyx-backend-cloud' || 'onyxdotapp/onyx-backend' }}
+    steps:
+      - uses: runs-on/action@cd2b598b0515d39d78c38a02d529db87d2196d1e # ratchet:runs-on/action@v2
+
+      - name: Checkout
+        uses: actions/checkout@08eba0b27e820071cde6df949e0beb9ba4906955 # ratchet:actions/checkout@v4
+
+      - name: Run Trivy vulnerability scanner
+        uses: nick-fields/retry@ce71cc2ab81d554ebbe88c79ab5975992d79ba08 # ratchet:nick-fields/retry@v3
+        with:
+          timeout_minutes: 30
+          max_attempts: 3
+          retry_wait_seconds: 10
+          command: |
+            if [ "${{ github.event_name }}" == "workflow_dispatch" ]; then
+              SCAN_IMAGE="${{ env.RUNS_ON_ECR_CACHE }}:backend-${{ needs.determine-builds.outputs.sanitized-tag }}"
+            else
+              SCAN_IMAGE="docker.io/${{ env.REGISTRY_IMAGE }}:${{ github.ref_name }}"
+            fi
+            docker run --rm -v $HOME/.cache/trivy:/root/.cache/trivy \
+              -v ${{ github.workspace }}/backend/.trivyignore:/tmp/.trivyignore:ro \
+              -e TRIVY_DB_REPOSITORY="public.ecr.aws/aquasecurity/trivy-db:2" \
+              -e TRIVY_JAVA_DB_REPOSITORY="public.ecr.aws/aquasecurity/trivy-java-db:1" \
+              -e TRIVY_USERNAME="${{ secrets.DOCKER_USERNAME }}" \
+              -e TRIVY_PASSWORD="${{ secrets.DOCKER_TOKEN }}" \
+              aquasec/trivy@sha256:a22415a38938a56c379387a8163fcb0ce38b10ace73e593475d3658d578b2436 \
+              image \
+              --skip-version-check \
+              --timeout 20m \
+              --severity CRITICAL,HIGH \
+              --ignorefile /tmp/.trivyignore \
+              ${SCAN_IMAGE}
+
+  trivy-scan-model-server:
+    needs: [determine-builds, build-model-server]
+    if: needs.build-model-server.result == 'success'
+    runs-on:
+      - runs-on
+      - runner=2cpu-linux-x64
+      - run-id=${{ github.run_id }}-trivy-scan-model-server
+      - extras=ecr-cache
+    env:
+      REGISTRY_IMAGE: ${{ contains(github.ref_name, 'cloud') && 'onyxdotapp/onyx-model-server-cloud' || 'onyxdotapp/onyx-model-server' }}
+    steps:
+      - uses: runs-on/action@cd2b598b0515d39d78c38a02d529db87d2196d1e # ratchet:runs-on/action@v2
+
+      - name: Run Trivy vulnerability scanner
+        uses: nick-fields/retry@ce71cc2ab81d554ebbe88c79ab5975992d79ba08 # ratchet:nick-fields/retry@v3
+        with:
+          timeout_minutes: 30
+          max_attempts: 3
+          retry_wait_seconds: 10
+          command: |
+            if [ "${{ github.event_name }}" == "workflow_dispatch" ]; then
+              SCAN_IMAGE="${{ env.RUNS_ON_ECR_CACHE }}:model-server-${{ needs.determine-builds.outputs.sanitized-tag }}"
+            else
+              SCAN_IMAGE="docker.io/${{ env.REGISTRY_IMAGE }}:${{ github.ref_name }}"
+            fi
+            docker run --rm -v $HOME/.cache/trivy:/root/.cache/trivy \
+              -e TRIVY_DB_REPOSITORY="public.ecr.aws/aquasecurity/trivy-db:2" \
+              -e TRIVY_JAVA_DB_REPOSITORY="public.ecr.aws/aquasecurity/trivy-java-db:1" \
+              -e TRIVY_USERNAME="${{ secrets.DOCKER_USERNAME }}" \
+              -e TRIVY_PASSWORD="${{ secrets.DOCKER_TOKEN }}" \
+              aquasec/trivy@sha256:a22415a38938a56c379387a8163fcb0ce38b10ace73e593475d3658d578b2436 \
+              image \
+              --skip-version-check \
+              --timeout 20m \
+              --severity CRITICAL,HIGH \
+              ${SCAN_IMAGE}
+
+  notify-slack-on-failure:
+    needs: [build-web, build-web-cloud, build-backend, build-model-server]
+    if: always() && (needs.build-web.result == 'failure' || needs.build-web-cloud.result == 'failure' || needs.build-backend.result == 'failure' || needs.build-model-server.result == 'failure') && github.event_name != 'workflow_dispatch'
+    runs-on: ubuntu-slim
+    steps:
+      - name: Checkout
+        uses: actions/checkout@08eba0b27e820071cde6df949e0beb9ba4906955 # ratchet:actions/checkout@v4
+
+      - name: Determine failed jobs
+        id: failed-jobs
+        shell: bash
+        run: |
+          FAILED_JOBS=""
+          if [ "${{ needs.build-web.result }}" == "failure" ]; then
+            FAILED_JOBS="${FAILED_JOBS}• build-web\\n"
+          fi
+          if [ "${{ needs.build-web-cloud.result }}" == "failure" ]; then
+            FAILED_JOBS="${FAILED_JOBS}• build-web-cloud\\n"
+          fi
+          if [ "${{ needs.build-backend.result }}" == "failure" ]; then
+            FAILED_JOBS="${FAILED_JOBS}• build-backend\\n"
+          fi
+          if [ "${{ needs.build-model-server.result }}" == "failure" ]; then
+            FAILED_JOBS="${FAILED_JOBS}• build-model-server\\n"
+          fi
+          # Remove trailing \n and set output
+          FAILED_JOBS=$(printf '%s' "$FAILED_JOBS" | sed 's/\\n$//')
+          echo "jobs=$FAILED_JOBS" >> "$GITHUB_OUTPUT"
+
+      - name: Send Slack notification
+        uses: ./.github/actions/slack-notify
+        with:
+          webhook-url: ${{ secrets.MONITOR_DEPLOYMENTS_WEBHOOK }}
+          failed-jobs: ${{ steps.failed-jobs.outputs.jobs }}
+          title: "🚨 Deployment Workflow Failed"
+          ref-name: ${{ github.ref_name }}
--- a/.github/workflows/docker-build-push-backend-container-on-tag.yml
+++ b/.github/workflows/docker-build-push-backend-container-on-tag.yml
@@ -1,198 +0,0 @@
-name: Build and Push Backend Image on Tag
-
-on:
-  push:
-    tags:
-      - "*"
-
-env:
-  REGISTRY_IMAGE: ${{ contains(github.ref_name, 'cloud') && 'onyxdotapp/onyx-backend-cloud' || 'onyxdotapp/onyx-backend' }}
-  DEPLOYMENT: ${{ contains(github.ref_name, 'cloud') && 'cloud' || 'standalone' }}
-
-  # tag nightly builds with "edge"
-  EDGE_TAG: ${{ startsWith(github.ref_name, 'nightly-latest') }}
-
-jobs:
-  build-and-push:
-    # TODO: investigate a matrix build like the web container
-    # See https://runs-on.com/runners/linux/
-    runs-on:
-      - runs-on
-      - runner=${{ matrix.platform == 'linux/amd64' && '8cpu-linux-x64' || '8cpu-linux-arm64' }}
-      - run-id=${{ github.run_id }}
-      - tag=platform-${{ matrix.platform }}
-    strategy:
-      fail-fast: false
-      matrix:
-        platform:
-          - linux/amd64
-          - linux/arm64
-          
-    steps:
-      - name: Prepare
-        run: |
-          platform=${{ matrix.platform }}
-          echo "PLATFORM_PAIR=${platform//\//-}" >> $GITHUB_ENV
-
-      - name: Check if stable release version
-        id: check_version
-        run: |
-          if [[ "${{ github.ref_name }}" =~ ^v[0-9]+\.[0-9]+\.[0-9]+$ ]] && [[ "${{ github.ref_name }}" != *"cloud"* ]]; then
-            echo "is_stable=true" >> $GITHUB_OUTPUT
-          else
-            echo "is_stable=false" >> $GITHUB_OUTPUT
-          fi
-          if [[ "${{ github.ref_name }}" =~ ^v[0-9]+\.[0-9]+\.[0-9]+-beta\.[0-9]+$ ]] && [[ "${{ github.ref_name }}" != *"cloud"* ]]; then
-            echo "is_beta=true" >> $GITHUB_OUTPUT
-          else
-            echo "is_beta=false" >> $GITHUB_OUTPUT
-          fi
-
-      - name: Checkout code
-        uses: actions/checkout@08eba0b27e820071cde6df949e0beb9ba4906955 # ratchet:actions/checkout@v4
-
-      - name: Docker meta
-        id: meta
-        uses: docker/metadata-action@318604b99e75e41977312d83839a89be02ca4893 # ratchet:docker/metadata-action@v5
-        with:
-          images: ${{ env.REGISTRY_IMAGE }}
-          flavor: |
-            latest=false
-          tags: |
-            type=raw,value=${{ github.ref_name }}
-            type=raw,value=${{ steps.check_version.outputs.is_stable == 'true' && 'latest' || '' }}
-            type=raw,value=${{ env.EDGE_TAG == 'true' && 'edge' || '' }}
-            type=raw,value=${{ steps.check_version.outputs.is_beta == 'true' && 'beta' || '' }}
-            
-      - name: Set up Docker Buildx
-        uses: docker/setup-buildx-action@e468171a9de216ec08956ac3ada2f0791b6bd435 # ratchet:docker/setup-buildx-action@v3
-
-      - name: Login to Docker Hub
-        uses: docker/login-action@5e57cd118135c172c3672efd75eb46360885c0ef # ratchet:docker/login-action@v3
-        with:
-          username: ${{ secrets.DOCKER_USERNAME }}
-          password: ${{ secrets.DOCKER_TOKEN }}
-
-      - name: Install build-essential
-        run: |
-          sudo apt-get update
-          sudo apt-get install -y build-essential
-
-      - name: Backend Image Docker Build and Push
-        id: build
-        uses: docker/build-push-action@263435318d21b8e681c14492fe198d362a7d2c83 # ratchet:docker/build-push-action@v6
-        with:
-          context: ./backend
-          file: ./backend/Dockerfile
-          platforms: ${{ matrix.platform }}
-          push: true
-          build-args: |
-            ONYX_VERSION=${{ github.ref_name }}
-          labels: ${{ steps.meta.outputs.labels }}
-          outputs: type=image,name=${{ env.REGISTRY_IMAGE }},push-by-digest=true,name-canonical=true,push=true
-          cache-from: type=s3,prefix=cache/${{ github.repository }}/${{ env.DEPLOYMENT }}/backend-${{ env.PLATFORM_PAIR }}/,region=${{ env.RUNS_ON_AWS_REGION }},bucket=${{ env.RUNS_ON_S3_BUCKET_CACHE }}
-          cache-to: type=s3,prefix=cache/${{ github.repository }}/${{ env.DEPLOYMENT }}/backend-${{ env.PLATFORM_PAIR }}/,region=${{ env.RUNS_ON_AWS_REGION }},bucket=${{ env.RUNS_ON_S3_BUCKET_CACHE }},mode=max
-
-      - name: Export digest      
-        run: |
-          mkdir -p /tmp/digests
-          digest="${{ steps.build.outputs.digest }}"
-          touch "/tmp/digests/${digest#sha256:}"
-
-      - name: Upload digest
-        uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # ratchet:actions/upload-artifact@v4
-        with:
-          name: backend-digests-${{ env.PLATFORM_PAIR }}-${{ github.run_id }}
-          path: /tmp/digests/*
-          if-no-files-found: error
-          retention-days: 1
-          
-  merge:
-    runs-on: ubuntu-latest
-    needs:
-      - build-and-push
-    steps:
-      # Needed for trivyignore
-      - name: Checkout
-        uses: actions/checkout@08eba0b27e820071cde6df949e0beb9ba4906955 # ratchet:actions/checkout@v4
-      
-      - name: Check if stable release version
-        id: check_version
-        run: |
-          if [[ "${{ github.ref_name }}" =~ ^v[0-9]+\.[0-9]+\.[0-9]+$ ]] && [[ "${{ github.ref_name }}" != *"cloud"* ]]; then
-            echo "is_stable=true" >> $GITHUB_OUTPUT
-          else
-            echo "is_stable=false" >> $GITHUB_OUTPUT
-          fi
-          if [[ "${{ github.ref_name }}" =~ ^v[0-9]+\.[0-9]+\.[0-9]+-beta\.[0-9]+$ ]] && [[ "${{ github.ref_name }}" != *"cloud"* ]]; then
-            echo "is_beta=true" >> $GITHUB_OUTPUT
-          else
-            echo "is_beta=false" >> $GITHUB_OUTPUT
-          fi
-        
-      - name: Download digests
-        uses: actions/download-artifact@d3f86a106a0bac45b974a628896c90dbdf5c8093 # ratchet:actions/download-artifact@v4
-        with:
-          path: /tmp/digests
-          pattern: backend-digests-*-${{ github.run_id }}
-          merge-multiple: true
-
-      - name: Set up Docker Buildx
-        uses: docker/setup-buildx-action@e468171a9de216ec08956ac3ada2f0791b6bd435 # ratchet:docker/setup-buildx-action@v3
-
-      - name: Docker meta
-        id: meta
-        uses: docker/metadata-action@318604b99e75e41977312d83839a89be02ca4893 # ratchet:docker/metadata-action@v5
-        with:
-          images: ${{ env.REGISTRY_IMAGE }}
-          flavor: |
-            latest=false
-          tags: |
-            type=raw,value=${{ github.ref_name }}
-            type=raw,value=${{ steps.check_version.outputs.is_stable == 'true' && 'latest' || '' }}
-            type=raw,value=${{ env.EDGE_TAG == 'true' && 'edge' || '' }}
-            type=raw,value=${{ steps.check_version.outputs.is_beta == 'true' && 'beta' || '' }}
-
-      - name: Login to Docker Hub
-        uses: docker/login-action@5e57cd118135c172c3672efd75eb46360885c0ef # ratchet:docker/login-action@v3
-        with:
-          username: ${{ secrets.DOCKER_USERNAME }}
-          password: ${{ secrets.DOCKER_TOKEN }}
-
-      - name: Create manifest list and push
-        working-directory: /tmp/digests
-        run: |
-          docker buildx imagetools create $(jq -cr '.tags | map("-t " + .) | join(" ")' <<< "$DOCKER_METADATA_OUTPUT_JSON") \
-            $(printf '${{ env.REGISTRY_IMAGE }}@sha256:%s ' *)
-
-      - name: Inspect image
-        run: |
-          docker buildx imagetools inspect ${{ env.REGISTRY_IMAGE }}:${{ steps.meta.outputs.version }}
-          
-      # trivy has their own rate limiting issues causing this action to flake
-      # we worked around it by hardcoding to different db repos in env
-      # can re-enable when they figure it out
-      # https://github.com/aquasecurity/trivy/discussions/7538
-      # https://github.com/aquasecurity/trivy-action/issues/389
-      # Security: Using pinned digest (0.65.0@sha256:a22415a38938a56c379387a8163fcb0ce38b10ace73e593475d3658d578b2436)
-      # Security: No Docker socket mount needed for remote registry scanning
-      - name: Run Trivy vulnerability scanner
-        uses: nick-fields/retry@ce71cc2ab81d554ebbe88c79ab5975992d79ba08 # ratchet:nick-fields/retry@v3
-        with:
-          timeout_minutes: 30
-          max_attempts: 3
-          retry_wait_seconds: 10
-          command: |
-            docker run --rm -v $HOME/.cache/trivy:/root/.cache/trivy \
-              -v ${{ github.workspace }}/backend/.trivyignore:/tmp/.trivyignore:ro \
-              -e TRIVY_DB_REPOSITORY="public.ecr.aws/aquasecurity/trivy-db:2" \
-              -e TRIVY_JAVA_DB_REPOSITORY="public.ecr.aws/aquasecurity/trivy-java-db:1" \
-              -e TRIVY_USERNAME="${{ secrets.DOCKER_USERNAME }}" \
-              -e TRIVY_PASSWORD="${{ secrets.DOCKER_TOKEN }}" \
-              aquasec/trivy@sha256:a22415a38938a56c379387a8163fcb0ce38b10ace73e593475d3658d578b2436 \
-              image \
-              --skip-version-check \
-              --timeout 20m \
-              --severity CRITICAL,HIGH \
-              --ignorefile /tmp/.trivyignore \
-              docker.io/${{ env.REGISTRY_IMAGE }}:${{ github.ref_name }}
--- a/.github/workflows/docker-build-push-cloud-web-container-on-tag.yml
+++ b/.github/workflows/docker-build-push-cloud-web-container-on-tag.yml
@@ -1,158 +0,0 @@
-name: Build and Push Cloud Web Image on Tag
-# Identical to the web container build, but with correct image tag and build args
-
-on:
-  push:
-    tags:
-      - "*cloud*"
-
-env:
-  REGISTRY_IMAGE: onyxdotapp/onyx-web-server-cloud
-  DEPLOYMENT: cloud
-  
-jobs:
-  build:
-    runs-on:
-      - runs-on
-      - runner=${{ matrix.platform == 'linux/amd64' && '8cpu-linux-x64' || '8cpu-linux-arm64' }}
-      - run-id=${{ github.run_id }}
-      - tag=platform-${{ matrix.platform }}
-    strategy:
-      fail-fast: false
-      matrix:
-        platform:
-          - linux/amd64
-          - linux/arm64
-
-    steps:
-      - name: Prepare
-        run: |
-          platform=${{ matrix.platform }}
-          echo "PLATFORM_PAIR=${platform//\//-}" >> $GITHUB_ENV
-
-      - name: Checkout
-        uses: actions/checkout@08eba0b27e820071cde6df949e0beb9ba4906955 # ratchet:actions/checkout@v4
-
-      - name: Docker meta
-        id: meta
-        uses: docker/metadata-action@318604b99e75e41977312d83839a89be02ca4893 # ratchet:docker/metadata-action@v5
-        with:
-          images: ${{ env.REGISTRY_IMAGE }}
-          flavor: |
-            latest=false
-          tags: |
-            type=raw,value=${{ github.ref_name }}
-
-      - name: Set up Docker Buildx
-        uses: docker/setup-buildx-action@e468171a9de216ec08956ac3ada2f0791b6bd435 # ratchet:docker/setup-buildx-action@v3
-
-      - name: Login to Docker Hub
-        uses: docker/login-action@5e57cd118135c172c3672efd75eb46360885c0ef # ratchet:docker/login-action@v3
-        with:
-          username: ${{ secrets.DOCKER_USERNAME }}
-          password: ${{ secrets.DOCKER_TOKEN }}
-
-      - name: Build and push by digest
-        id: build
-        uses: docker/build-push-action@263435318d21b8e681c14492fe198d362a7d2c83 # ratchet:docker/build-push-action@v6
-        with:
-          context: ./web
-          file: ./web/Dockerfile
-          platforms: ${{ matrix.platform }}
-          push: true
-          build-args: |
-            ONYX_VERSION=${{ github.ref_name }}
-            NEXT_PUBLIC_CLOUD_ENABLED=true
-            NEXT_PUBLIC_POSTHOG_KEY=${{ secrets.POSTHOG_KEY }}
-            NEXT_PUBLIC_POSTHOG_HOST=${{ secrets.POSTHOG_HOST }}
-            NEXT_PUBLIC_SENTRY_DSN=${{ secrets.SENTRY_DSN }}
-            NEXT_PUBLIC_STRIPE_PUBLISHABLE_KEY=${{ secrets.STRIPE_PUBLISHABLE_KEY }}
-            NEXT_PUBLIC_GTM_ENABLED=true
-            NEXT_PUBLIC_FORGOT_PASSWORD_ENABLED=true
-            NEXT_PUBLIC_INCLUDE_ERROR_POPUP_SUPPORT_LINK=true
-            NODE_OPTIONS=--max-old-space-size=8192
-          labels: ${{ steps.meta.outputs.labels }}
-          outputs: type=image,name=${{ env.REGISTRY_IMAGE }},push-by-digest=true,name-canonical=true,push=true
-          cache-from: type=s3,prefix=cache/${{ github.repository }}/${{ env.DEPLOYMENT }}/cloudweb-${{ env.PLATFORM_PAIR }}/,region=${{ env.RUNS_ON_AWS_REGION }},bucket=${{ env.RUNS_ON_S3_BUCKET_CACHE }}
-          cache-to: type=s3,prefix=cache/${{ github.repository }}/${{ env.DEPLOYMENT }}/cloudweb-${{ env.PLATFORM_PAIR }}/,region=${{ env.RUNS_ON_AWS_REGION }},bucket=${{ env.RUNS_ON_S3_BUCKET_CACHE }},mode=max
-          # no-cache needed due to weird interactions with the builds for different platforms
-          # NOTE(rkuo): this may not be true any more with the proper cache prefixing by architecture - currently testing with it off
-
-      - name: Export digest
-        run: |
-          mkdir -p /tmp/digests
-          digest="${{ steps.build.outputs.digest }}"
-          touch "/tmp/digests/${digest#sha256:}"
-
-      - name: Upload digest
-        uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # ratchet:actions/upload-artifact@v4
-        with:
-          name: cloudweb-digests-${{ env.PLATFORM_PAIR }}-${{ github.run_id }}
-          path: /tmp/digests/*
-          if-no-files-found: error
-          retention-days: 1
-
-  merge:
-    runs-on: ubuntu-latest
-    needs:
-      - build
-    steps:
-      - name: Download digests
-        uses: actions/download-artifact@d3f86a106a0bac45b974a628896c90dbdf5c8093 # ratchet:actions/download-artifact@v4
-        with:
-          path: /tmp/digests
-          pattern: cloudweb-digests-*-${{ github.run_id }}
-          merge-multiple: true
-
-      - name: Set up Docker Buildx
-        uses: docker/setup-buildx-action@e468171a9de216ec08956ac3ada2f0791b6bd435 # ratchet:docker/setup-buildx-action@v3
-
-      - name: Docker meta
-        id: meta
-        uses: docker/metadata-action@318604b99e75e41977312d83839a89be02ca4893 # ratchet:docker/metadata-action@v5
-        with:
-          images: ${{ env.REGISTRY_IMAGE }}
-          flavor: |
-            latest=false
-          tags: |
-            type=raw,value=${{ github.ref_name }}
-
-      - name: Login to Docker Hub
-        uses: docker/login-action@5e57cd118135c172c3672efd75eb46360885c0ef # ratchet:docker/login-action@v3
-        with:
-          username: ${{ secrets.DOCKER_USERNAME }}
-          password: ${{ secrets.DOCKER_TOKEN }}
-
-      - name: Create manifest list and push
-        working-directory: /tmp/digests
-        run: |
-          docker buildx imagetools create $(jq -cr '.tags | map("-t " + .) | join(" ")' <<< "$DOCKER_METADATA_OUTPUT_JSON") \
-            $(printf '${{ env.REGISTRY_IMAGE }}@sha256:%s ' *)
-
-      - name: Inspect image
-        run: |
-          docker buildx imagetools inspect ${{ env.REGISTRY_IMAGE }}:${{ steps.meta.outputs.version }}
-
-      # trivy has their own rate limiting issues causing this action to flake
-      # we worked around it by hardcoding to different db repos in env
-      # can re-enable when they figure it out
-      # https://github.com/aquasecurity/trivy/discussions/7538
-      # https://github.com/aquasecurity/trivy-action/issues/389
-      - name: Run Trivy vulnerability scanner
-        uses: nick-fields/retry@ce71cc2ab81d554ebbe88c79ab5975992d79ba08 # ratchet:nick-fields/retry@v3
-        with:
-          timeout_minutes: 30
-          max_attempts: 3
-          retry_wait_seconds: 10
-          command: |
-            docker run --rm -v $HOME/.cache/trivy:/root/.cache/trivy \
-              -e TRIVY_DB_REPOSITORY="public.ecr.aws/aquasecurity/trivy-db:2" \
-              -e TRIVY_JAVA_DB_REPOSITORY="public.ecr.aws/aquasecurity/trivy-java-db:1" \
-              -e TRIVY_USERNAME="${{ secrets.DOCKER_USERNAME }}" \
-              -e TRIVY_PASSWORD="${{ secrets.DOCKER_TOKEN }}" \
-              aquasec/trivy@sha256:a22415a38938a56c379387a8163fcb0ce38b10ace73e593475d3658d578b2436 \
-              image \
-              --skip-version-check \
-              --timeout 20m \
-              --severity CRITICAL,HIGH \
-              docker.io/${{ env.REGISTRY_IMAGE }}:${{ github.ref_name }}
--- a/.github/workflows/docker-build-push-model-server-container-on-tag.yml
+++ b/.github/workflows/docker-build-push-model-server-container-on-tag.yml
@@ -1,207 +0,0 @@
-name: Build and Push Model Server Image on Tag
-
-on:
-  push:
-    tags:
-      - "*"
-
-env:
-  REGISTRY_IMAGE: ${{ contains(github.ref_name, 'cloud') && 'onyxdotapp/onyx-model-server-cloud' || 'onyxdotapp/onyx-model-server' }}
-  DOCKER_BUILDKIT: 1
-  BUILDKIT_PROGRESS: plain
-  DEPLOYMENT: ${{ contains(github.ref_name, 'cloud') && 'cloud' || 'standalone' }}
-
-  # tag nightly builds with "edge"
-  EDGE_TAG: ${{ startsWith(github.ref_name, 'nightly-latest') }}
-  
-jobs:
-
-#   Bypassing this for now as the idea of not building is glitching
-#   releases and builds that depends on everything being tagged in docker
-#   1) Preliminary job to check if the changed files are relevant
-#   check_model_server_changes:
-#     runs-on: ubuntu-latest
-#     outputs:
-#       changed: ${{ steps.check.outputs.changed }}
-#     steps:
-#       - name: Checkout code
-#         uses: actions/checkout@08eba0b27e820071cde6df949e0beb9ba4906955 # ratchet:actions/checkout@v4
-# 
-#       - name: Check if relevant files changed
-#         id: check
-#         run: |
-#           # Default to "false"
-#           echo "changed=false" >> $GITHUB_OUTPUT
-# 
-#           # Compare the previous commit (github.event.before) to the current one (github.sha)
-#           # If any file in backend/model_server/** or backend/Dockerfile.model_server is changed,
-#           # set changed=true
-#           if git diff --name-only ${{ github.event.before }} ${{ github.sha }} \
-#              | grep -E '^backend/model_server/|^backend/Dockerfile.model_server'; then
-#             echo "changed=true" >> $GITHUB_OUTPUT
-#           fi
-
-  check_model_server_changes:
-    runs-on: ubuntu-latest
-    outputs:
-      changed: "true"
-    steps:
-      - name: Bypass check and set output
-        run: echo "changed=true" >> $GITHUB_OUTPUT
-        
-  build-amd64:
-    needs: [check_model_server_changes]
-    if: needs.check_model_server_changes.outputs.changed == 'true'
-    runs-on:
-      [runs-on, runner=8cpu-linux-x64, "run-id=${{ github.run_id }}-amd64"]
-    env:
-      PLATFORM_PAIR: linux-amd64
-    steps:
-      - name: Checkout code
-        uses: actions/checkout@08eba0b27e820071cde6df949e0beb9ba4906955 # ratchet:actions/checkout@v4
-
-      - name: System Info
-        run: |
-          df -h
-          free -h
-          docker system prune -af --volumes
-
-      - name: Set up Docker Buildx
-        uses: docker/setup-buildx-action@e468171a9de216ec08956ac3ada2f0791b6bd435 # ratchet:docker/setup-buildx-action@v3
-        with:
-          driver-opts: |
-            image=moby/buildkit:latest
-            network=host
-
-      - name: Login to Docker Hub
-        uses: docker/login-action@5e57cd118135c172c3672efd75eb46360885c0ef # ratchet:docker/login-action@v3
-        with:
-          username: ${{ secrets.DOCKER_USERNAME }}
-          password: ${{ secrets.DOCKER_TOKEN }}
-
-      - name: Build and Push AMD64
-        uses: docker/build-push-action@263435318d21b8e681c14492fe198d362a7d2c83 # ratchet:docker/build-push-action@v6
-        with:
-          context: ./backend
-          file: ./backend/Dockerfile.model_server
-          platforms: linux/amd64
-          push: true
-          tags: ${{ env.REGISTRY_IMAGE }}:${{ github.ref_name }}-amd64
-          build-args: |
-            ONYX_VERSION=${{ github.ref_name }}
-          outputs: type=registry
-          provenance: false
-          cache-from: type=s3,prefix=cache/${{ github.repository }}/${{ env.DEPLOYMENT }}/model-server-${{ env.PLATFORM_PAIR }}/,region=${{ env.RUNS_ON_AWS_REGION }},bucket=${{ env.RUNS_ON_S3_BUCKET_CACHE }}
-          cache-to: type=s3,prefix=cache/${{ github.repository }}/${{ env.DEPLOYMENT }}/model-server-${{ env.PLATFORM_PAIR }}/,region=${{ env.RUNS_ON_AWS_REGION }},bucket=${{ env.RUNS_ON_S3_BUCKET_CACHE }},mode=max
-#           no-cache: true
-
-  build-arm64:
-    needs: [check_model_server_changes]
-    if: needs.check_model_server_changes.outputs.changed == 'true'
-    runs-on:
-      [runs-on, runner=8cpu-linux-arm64, "run-id=${{ github.run_id }}-arm64"]
-    env:
-      PLATFORM_PAIR: linux-arm64
-    steps:
-      - name: Checkout code
-        uses: actions/checkout@08eba0b27e820071cde6df949e0beb9ba4906955 # ratchet:actions/checkout@v4
-
-      - name: System Info
-        run: |
-          df -h
-          free -h
-          docker system prune -af --volumes
-
-      - name: Set up Docker Buildx
-        uses: docker/setup-buildx-action@e468171a9de216ec08956ac3ada2f0791b6bd435 # ratchet:docker/setup-buildx-action@v3
-        with:
-          driver-opts: |
-            image=moby/buildkit:latest
-            network=host
-
-      - name: Login to Docker Hub
-        uses: docker/login-action@5e57cd118135c172c3672efd75eb46360885c0ef # ratchet:docker/login-action@v3
-        with:
-          username: ${{ secrets.DOCKER_USERNAME }}
-          password: ${{ secrets.DOCKER_TOKEN }}
-
-      - name: Build and Push ARM64
-        uses: docker/build-push-action@263435318d21b8e681c14492fe198d362a7d2c83 # ratchet:docker/build-push-action@v6
-        with:
-          context: ./backend
-          file: ./backend/Dockerfile.model_server
-          platforms: linux/arm64
-          push: true
-          tags: ${{ env.REGISTRY_IMAGE }}:${{ github.ref_name }}-arm64
-          build-args: |
-            ONYX_VERSION=${{ github.ref_name }}
-          outputs: type=registry
-          provenance: false
-          cache-from: type=s3,prefix=cache/${{ github.repository }}/${{ env.DEPLOYMENT }}/model-server-${{ env.PLATFORM_PAIR }}/,region=${{ env.RUNS_ON_AWS_REGION }},bucket=${{ env.RUNS_ON_S3_BUCKET_CACHE }}
-          cache-to: type=s3,prefix=cache/${{ github.repository }}/${{ env.DEPLOYMENT }}/model-server-${{ env.PLATFORM_PAIR }}/,region=${{ env.RUNS_ON_AWS_REGION }},bucket=${{ env.RUNS_ON_S3_BUCKET_CACHE }},mode=max
-
-  merge-and-scan:
-    needs: [build-amd64, build-arm64, check_model_server_changes]
-    if: needs.check_model_server_changes.outputs.changed == 'true'
-    runs-on: ubuntu-latest
-    steps:
-      - name: Check if stable release version
-        id: check_version
-        run: |
-          if [[ "${{ github.ref_name }}" =~ ^v[0-9]+\.[0-9]+\.[0-9]+$ ]] && [[ "${{ github.ref_name }}" != *"cloud"* ]]; then
-            echo "is_stable=true" >> $GITHUB_OUTPUT
-          else
-            echo "is_stable=false" >> $GITHUB_OUTPUT
-          fi
-          if [[ "${{ github.ref_name }}" =~ ^v[0-9]+\.[0-9]+\.[0-9]+-beta\.[0-9]+$ ]] && [[ "${{ github.ref_name }}" != *"cloud"* ]]; then
-            echo "is_beta=true" >> $GITHUB_OUTPUT
-          else
-            echo "is_beta=false" >> $GITHUB_OUTPUT
-          fi
-
-      - name: Login to Docker Hub
-        uses: docker/login-action@5e57cd118135c172c3672efd75eb46360885c0ef # ratchet:docker/login-action@v3
-        with:
-          username: ${{ secrets.DOCKER_USERNAME }}
-          password: ${{ secrets.DOCKER_TOKEN }}
-
-      - name: Create and Push Multi-arch Manifest
-        run: |
-          docker buildx create --use
-          docker buildx imagetools create -t ${{ env.REGISTRY_IMAGE }}:${{ github.ref_name }} \
-            ${{ env.REGISTRY_IMAGE }}:${{ github.ref_name }}-amd64 \
-            ${{ env.REGISTRY_IMAGE }}:${{ github.ref_name }}-arm64
-          if [[ "${{ steps.check_version.outputs.is_stable }}" == "true" ]]; then
-            docker buildx imagetools create -t ${{ env.REGISTRY_IMAGE }}:latest \
-              ${{ env.REGISTRY_IMAGE }}:${{ github.ref_name }}-amd64 \
-              ${{ env.REGISTRY_IMAGE }}:${{ github.ref_name }}-arm64
-          fi
-          if [[ "${{ env.EDGE_TAG }}" == "true" ]]; then
-            docker buildx imagetools create -t ${{ env.REGISTRY_IMAGE }}:edge \
-              ${{ env.REGISTRY_IMAGE }}:${{ github.ref_name }}-amd64 \
-              ${{ env.REGISTRY_IMAGE }}:${{ github.ref_name }}-arm64
-          fi
-          if [[ "${{ steps.check_version.outputs.is_beta }}" == "true" ]]; then
-            docker buildx imagetools create -t ${{ env.REGISTRY_IMAGE }}:beta \
-              ${{ env.REGISTRY_IMAGE }}:${{ github.ref_name }}-amd64 \
-              ${{ env.REGISTRY_IMAGE }}:${{ github.ref_name }}-arm64
-          fi
-
-      - name: Run Trivy vulnerability scanner
-        uses: nick-fields/retry@ce71cc2ab81d554ebbe88c79ab5975992d79ba08 # ratchet:nick-fields/retry@v3
-        with:
-          timeout_minutes: 30
-          max_attempts: 3
-          retry_wait_seconds: 10
-          command: |
-            docker run --rm -v $HOME/.cache/trivy:/root/.cache/trivy \
-              -e TRIVY_DB_REPOSITORY="public.ecr.aws/aquasecurity/trivy-db:2" \
-              -e TRIVY_JAVA_DB_REPOSITORY="public.ecr.aws/aquasecurity/trivy-java-db:1" \
-              -e TRIVY_USERNAME="${{ secrets.DOCKER_USERNAME }}" \
-              -e TRIVY_PASSWORD="${{ secrets.DOCKER_TOKEN }}" \
-              aquasec/trivy@sha256:a22415a38938a56c379387a8163fcb0ce38b10ace73e593475d3658d578b2436 \
-              image \
-              --skip-version-check \
-              --timeout 20m \
-              --severity CRITICAL,HIGH \
-              docker.io/${{ env.REGISTRY_IMAGE }}:${{ github.ref_name }}
--- a/.github/workflows/docker-build-push-web-container-on-tag.yml
+++ b/.github/workflows/docker-build-push-web-container-on-tag.yml
@@ -1,204 +0,0 @@
-name: Build and Push Web Image on Tag
-
-on:
-  push:
-    tags:
-      - "*"
-
-env:
-  REGISTRY_IMAGE: onyxdotapp/onyx-web-server
-
-  # tag nightly builds with "edge"
-  EDGE_TAG: ${{ startsWith(github.ref_name, 'nightly-latest') }}
-
-  DEPLOYMENT: standalone
-
-jobs:
-  precheck:
-    runs-on: [runs-on, runner=2cpu-linux-x64, "run-id=${{ github.run_id }}"]
-    outputs:
-      should-run: ${{ steps.set-output.outputs.should-run }}
-    steps:
-      - name: Check if tag contains "cloud"
-        id: set-output
-        run: |
-          if [[ "${{ github.ref_name }}" == *cloud* ]]; then
-            echo "should-run=false" >> "$GITHUB_OUTPUT"
-          else
-            echo "should-run=true" >> "$GITHUB_OUTPUT"
-          fi
-  build:
-    needs: precheck
-    if: needs.precheck.outputs.should-run == 'true'
-    runs-on:
-      - runs-on
-      - runner=${{ matrix.platform == 'linux/amd64' && '8cpu-linux-x64' || '8cpu-linux-arm64' }}
-      - run-id=${{ github.run_id }}
-      - tag=platform-${{ matrix.platform }}
-    strategy:
-      fail-fast: false
-      matrix:
-        platform:
-          - linux/amd64
-          - linux/arm64
-
-    steps:
-      - name: Prepare
-        run: |
-          platform=${{ matrix.platform }}
-          echo "PLATFORM_PAIR=${platform//\//-}" >> $GITHUB_ENV
-
-      - name: Check if stable release version
-        id: check_version
-        run: |
-          if [[ "${{ github.ref_name }}" =~ ^v[0-9]+\.[0-9]+\.[0-9]+$ ]]; then
-            echo "is_stable=true" >> $GITHUB_OUTPUT
-          else
-            echo "is_stable=false" >> $GITHUB_OUTPUT
-          fi
-          if [[ "${{ github.ref_name }}" =~ ^v[0-9]+\.[0-9]+\.[0-9]+-beta\.[0-9]+$ ]]; then
-            echo "is_beta=true" >> $GITHUB_OUTPUT
-          else
-            echo "is_beta=false" >> $GITHUB_OUTPUT
-          fi
-
-      - name: Checkout
-        uses: actions/checkout@08eba0b27e820071cde6df949e0beb9ba4906955 # ratchet:actions/checkout@v4
-
-      - name: Docker meta
-        id: meta
-        uses: docker/metadata-action@318604b99e75e41977312d83839a89be02ca4893 # ratchet:docker/metadata-action@v5
-        with:
-          images: ${{ env.REGISTRY_IMAGE }}
-          flavor: |
-            latest=false
-          tags: |
-            type=raw,value=${{ github.ref_name }}
-            type=raw,value=${{ steps.check_version.outputs.is_stable == 'true' && 'latest' || '' }}
-            type=raw,value=${{ env.EDGE_TAG == 'true' && 'edge' || '' }}
-            type=raw,value=${{ steps.check_version.outputs.is_beta == 'true' && 'beta' || '' }}
-
-      - name: Set up Docker Buildx
-        uses: docker/setup-buildx-action@e468171a9de216ec08956ac3ada2f0791b6bd435 # ratchet:docker/setup-buildx-action@v3
-
-      - name: Login to Docker Hub
-        uses: docker/login-action@5e57cd118135c172c3672efd75eb46360885c0ef # ratchet:docker/login-action@v3
-        with:
-          username: ${{ secrets.DOCKER_USERNAME }}
-          password: ${{ secrets.DOCKER_TOKEN }}
-
-      - name: Build and push by digest
-        id: build
-        uses: docker/build-push-action@263435318d21b8e681c14492fe198d362a7d2c83 # ratchet:docker/build-push-action@v6
-        with:
-          context: ./web
-          file: ./web/Dockerfile
-          platforms: ${{ matrix.platform }}
-          push: true
-          build-args: |
-            ONYX_VERSION=${{ github.ref_name }}
-            NODE_OPTIONS=--max-old-space-size=8192
-
-          labels: ${{ steps.meta.outputs.labels }}
-          outputs: type=image,name=${{ env.REGISTRY_IMAGE }},push-by-digest=true,name-canonical=true,push=true
-          cache-from: type=s3,prefix=cache/${{ github.repository }}/${{ env.DEPLOYMENT }}/web-${{ env.PLATFORM_PAIR }}/,region=${{ env.RUNS_ON_AWS_REGION }},bucket=${{ env.RUNS_ON_S3_BUCKET_CACHE }}
-          cache-to: type=s3,prefix=cache/${{ github.repository }}/${{ env.DEPLOYMENT }}/web-${{ env.PLATFORM_PAIR }}/,region=${{ env.RUNS_ON_AWS_REGION }},bucket=${{ env.RUNS_ON_S3_BUCKET_CACHE }},mode=max
-          # no-cache needed due to weird interactions with the builds for different platforms
-          # NOTE(rkuo): this may not be true any more with the proper cache prefixing by architecture - currently testing with it off
-          
-      - name: Export digest
-        run: |
-          mkdir -p /tmp/digests
-          digest="${{ steps.build.outputs.digest }}"
-          touch "/tmp/digests/${digest#sha256:}"
-
-      - name: Upload digest
-        uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # ratchet:actions/upload-artifact@v4
-        with:
-          name: web-digests-${{ env.PLATFORM_PAIR }}-${{ github.run_id }}
-          path: /tmp/digests/*
-          if-no-files-found: error
-          retention-days: 1
-
-  merge:
-    needs:
-      - build
-    if: needs.precheck.outputs.should-run == 'true'
-    runs-on: ubuntu-latest
-    steps:
-      - name: Check if stable release version
-        id: check_version
-        run: |
-          if [[ "${{ github.ref_name }}" =~ ^v[0-9]+\.[0-9]+\.[0-9]+$ ]] && [[ "${{ github.ref_name }}" != *"cloud"* ]]; then
-            echo "is_stable=true" >> $GITHUB_OUTPUT
-          else
-            echo "is_stable=false" >> $GITHUB_OUTPUT
-          fi
-          if [[ "${{ github.ref_name }}" =~ ^v[0-9]+\.[0-9]+\.[0-9]+-beta\.[0-9]+$ ]]; then
-            echo "is_beta=true" >> $GITHUB_OUTPUT
-          else
-            echo "is_beta=false" >> $GITHUB_OUTPUT
-          fi
-        
-      - name: Download digests
-        uses: actions/download-artifact@d3f86a106a0bac45b974a628896c90dbdf5c8093 # ratchet:actions/download-artifact@v4
-        with:
-          path: /tmp/digests
-          pattern: web-digests-*-${{ github.run_id }}
-          merge-multiple: true
-
-      - name: Set up Docker Buildx
-        uses: docker/setup-buildx-action@e468171a9de216ec08956ac3ada2f0791b6bd435 # ratchet:docker/setup-buildx-action@v3
-
-      - name: Docker meta
-        id: meta
-        uses: docker/metadata-action@318604b99e75e41977312d83839a89be02ca4893 # ratchet:docker/metadata-action@v5
-        with:
-          images: ${{ env.REGISTRY_IMAGE }}
-          flavor: |
-            latest=false
-          tags: |
-            type=raw,value=${{ github.ref_name }}
-            type=raw,value=${{ steps.check_version.outputs.is_stable == 'true' && 'latest' || '' }}
-            type=raw,value=${{ env.EDGE_TAG == 'true' && 'edge' || '' }}
-            type=raw,value=${{ steps.check_version.outputs.is_beta == 'true' && 'beta' || '' }}
-
-      - name: Login to Docker Hub
-        uses: docker/login-action@5e57cd118135c172c3672efd75eb46360885c0ef # ratchet:docker/login-action@v3
-        with:
-          username: ${{ secrets.DOCKER_USERNAME }}
-          password: ${{ secrets.DOCKER_TOKEN }}
-
-      - name: Create manifest list and push
-        working-directory: /tmp/digests
-        run: |
-          docker buildx imagetools create $(jq -cr '.tags | map("-t " + .) | join(" ")' <<< "$DOCKER_METADATA_OUTPUT_JSON") \
-            $(printf '${{ env.REGISTRY_IMAGE }}@sha256:%s ' *)
-
-      - name: Inspect image
-        run: |
-          docker buildx imagetools inspect ${{ env.REGISTRY_IMAGE }}:${{ steps.meta.outputs.version }}
-
-      # trivy has their own rate limiting issues causing this action to flake
-      # we worked around it by hardcoding to different db repos in env
-      # can re-enable when they figure it out
-      # https://github.com/aquasecurity/trivy/discussions/7538
-      # https://github.com/aquasecurity/trivy-action/issues/389
-      - name: Run Trivy vulnerability scanner
-        uses: nick-fields/retry@ce71cc2ab81d554ebbe88c79ab5975992d79ba08 # ratchet:nick-fields/retry@v3
-        with:
-          timeout_minutes: 30
-          max_attempts: 3
-          retry_wait_seconds: 10
-          command: |
-            docker run --rm -v $HOME/.cache/trivy:/root/.cache/trivy \
-              -e TRIVY_DB_REPOSITORY="public.ecr.aws/aquasecurity/trivy-db:2" \
-              -e TRIVY_JAVA_DB_REPOSITORY="public.ecr.aws/aquasecurity/trivy-java-db:1" \
-              -e TRIVY_USERNAME="${{ secrets.DOCKER_USERNAME }}" \
-              -e TRIVY_PASSWORD="${{ secrets.DOCKER_TOKEN }}" \
-              aquasec/trivy@sha256:a22415a38938a56c379387a8163fcb0ce38b10ace73e593475d3658d578b2436 \
-              image \
-              --skip-version-check \
-              --timeout 20m \
-              --severity CRITICAL,HIGH \
-              docker.io/${{ env.REGISTRY_IMAGE }}:${{ github.ref_name }}
--- a/.github/workflows/docker-tag-beta.yml
+++ b/.github/workflows/docker-tag-beta.yml
@@ -14,13 +14,13 @@ jobs:
  tag:
    # See https://runs-on.com/runners/linux/
    # use a lower powered instance since this just does i/o to docker hub
-    runs-on: [runs-on, runner=2cpu-linux-x64, "run-id=${{ github.run_id }}"]
+    runs-on: [runs-on, runner=2cpu-linux-x64, "run-id=${{ github.run_id }}-tag"]
    steps:
      - name: Set up Docker Buildx
-        uses: docker/setup-buildx-action@f211e3e9ded2d9377c8cadc4489a4e38014bc4c9 # ratchet:docker/setup-buildx-action@v1
+        uses: docker/setup-buildx-action@e468171a9de216ec08956ac3ada2f0791b6bd435 # ratchet:docker/setup-buildx-action@v3

      - name: Login to Docker Hub
-        uses: docker/login-action@dd4fa0671be5250ee6f50aedf4cb05514abda2c7 # ratchet:docker/login-action@v1
+        uses: docker/login-action@5e57cd118135c172c3672efd75eb46360885c0ef # ratchet:docker/login-action@v3
        with:
          username: ${{ secrets.DOCKER_USERNAME }}
          password: ${{ secrets.DOCKER_TOKEN }}
--- a/.github/workflows/docker-tag-latest.yml
+++ b/.github/workflows/docker-tag-latest.yml
@@ -14,13 +14,13 @@ jobs:
  tag:
    # See https://runs-on.com/runners/linux/
    # use a lower powered instance since this just does i/o to docker hub
-    runs-on: [runs-on, runner=2cpu-linux-x64, "run-id=${{ github.run_id }}"]
+    runs-on: [runs-on, runner=2cpu-linux-x64, "run-id=${{ github.run_id }}-tag"]
    steps:
      - name: Set up Docker Buildx
-        uses: docker/setup-buildx-action@f211e3e9ded2d9377c8cadc4489a4e38014bc4c9 # ratchet:docker/setup-buildx-action@v1
+        uses: docker/setup-buildx-action@e468171a9de216ec08956ac3ada2f0791b6bd435 # ratchet:docker/setup-buildx-action@v3

      - name: Login to Docker Hub
-        uses: docker/login-action@dd4fa0671be5250ee6f50aedf4cb05514abda2c7 # ratchet:docker/login-action@v1
+        uses: docker/login-action@5e57cd118135c172c3672efd75eb46360885c0ef # ratchet:docker/login-action@v3
        with:
          username: ${{ secrets.DOCKER_USERNAME }}
          password: ${{ secrets.DOCKER_TOKEN }}
--- a/.github/workflows/nightly-close-stale-issues.yml
+++ b/.github/workflows/nightly-close-stale-issues.yml
@@ -7,7 +7,7 @@ permissions:
  # contents: write # only for delete-branch option
  issues: write
  pull-requests: write
-  
+
 jobs:
  stale:
    runs-on: ubuntu-latest
@@ -20,4 +20,3 @@ jobs:
          close-pr-message: 'This PR was closed because it has been stalled for 90 days with no activity.'
          days-before-stale: 75
 #           days-before-close: 90  # uncomment after we test stale behavior
-          
--- a/.github/workflows/nightly-scan-licenses.yml
+++ b/.github/workflows/nightly-scan-licenses.yml
@@ -16,18 +16,18 @@ permissions:
  actions: read
  contents: read
  security-events: write
-  
+
 jobs:
  scan-licenses:
    # See https://runs-on.com/runners/linux/
-    runs-on: [runs-on,runner=2cpu-linux-x64,"run-id=${{ github.run_id }}"]
+    runs-on: [runs-on,runner=2cpu-linux-x64,"run-id=${{ github.run_id }}-scan-licenses"]

    steps:
      - name: Checkout code
        uses: actions/checkout@08eba0b27e820071cde6df949e0beb9ba4906955 # ratchet:actions/checkout@v4
-        
+
      - name: Set up Python
-        uses: actions/setup-python@a26af69be951a213d495a4c3e4e4022e16d87065 # ratchet:actions/setup-python@v5
+        uses: actions/setup-python@e797f83bcb11b83ae66e0230d6156d7c80228e7c # ratchet:actions/setup-python@v6
        with:
          python-version: '3.11'
          cache: 'pip'
@@ -35,7 +35,7 @@ jobs:
            backend/requirements/default.txt
            backend/requirements/dev.txt
            backend/requirements/model_server.txt
-      
+
      - name: Get explicit and transitive dependencies
        run: |
          python -m pip install --upgrade pip
@@ -43,28 +43,28 @@ jobs:
          pip install --retries 5 --timeout 30 -r backend/requirements/dev.txt
          pip install --retries 5 --timeout 30 -r backend/requirements/model_server.txt
          pip freeze > requirements-all.txt
-                    
+
      - name: Check python
        id: license_check_report
-        uses: pilosus/action-pip-license-checker@cc7a461bfa27b44ad187b8578c881ef5138c13fd # ratchet:pilosus/action-pip-license-checker@v2
+        uses: pilosus/action-pip-license-checker@e909b0226ff49d3235c99c4585bc617f49fff16a # ratchet:pilosus/action-pip-license-checker@v3
        with:
          requirements: 'requirements-all.txt'
          fail: 'Copyleft'
          exclude: '(?i)^(pylint|aio[-_]*).*'
-          
+
      - name: Print report
        if: always()
        run: echo "${{ steps.license_check_report.outputs.report }}"
-      
+
      - name: Install npm dependencies
        working-directory: ./web
        run: npm ci

        # be careful enabling the sarif and upload as it may spam the security tab
-        # with a huge amount of items. Work out the issues before enabling upload.       
+        # with a huge amount of items. Work out the issues before enabling upload.
 #       - name: Run Trivy vulnerability scanner in repo mode
 #         if: always()
-#         uses: aquasecurity/trivy-action@18f2510ee396bbf400402947b394f2dd8c87dbb0 # ratchet:aquasecurity/trivy-action@0.29.0
+#         uses: aquasecurity/trivy-action@b6643a29fecd7f34b3597bc6acb0a98b03d33ff8 # ratchet:aquasecurity/trivy-action@0.33.1
 #         with:
 #           scan-type: fs
 #           scan-ref: .
@@ -73,7 +73,7 @@ jobs:
 #           severity: HIGH,CRITICAL
 # #           format: sarif
 # #           output: trivy-results.sarif
-# 
+#
 # #       - name: Upload Trivy scan results to GitHub Security tab
 # #         uses: github/codeql-action/upload-sarif@v3
 # #         with:
@@ -81,8 +81,8 @@ jobs:

  scan-trivy:
    # See https://runs-on.com/runners/linux/
-    runs-on: [runs-on,runner=2cpu-linux-x64,"run-id=${{ github.run_id }}"]
-      
+    runs-on: [runs-on,runner=2cpu-linux-x64,"run-id=${{ github.run_id }}-scan-trivy"]
+
    steps:
    - name: Set up Docker Buildx
      uses: docker/setup-buildx-action@e468171a9de216ec08956ac3ada2f0791b6bd435 # ratchet:docker/setup-buildx-action@v3
@@ -98,7 +98,7 @@ jobs:
      run: docker pull onyxdotapp/onyx-backend:latest

    - name: Run Trivy vulnerability scanner on backend
-      uses: aquasecurity/trivy-action@18f2510ee396bbf400402947b394f2dd8c87dbb0 # ratchet:aquasecurity/trivy-action@0.29.0
+      uses: aquasecurity/trivy-action@b6643a29fecd7f34b3597bc6acb0a98b03d33ff8 # ratchet:aquasecurity/trivy-action@0.33.1
      env:
        TRIVY_DB_REPOSITORY: 'public.ecr.aws/aquasecurity/trivy-db:2'
        TRIVY_JAVA_DB_REPOSITORY: 'public.ecr.aws/aquasecurity/trivy-java-db:1'
@@ -112,9 +112,9 @@ jobs:
    # Web server
    - name: Pull web server docker image
      run: docker pull onyxdotapp/onyx-web-server:latest
-          
+
    - name: Run Trivy vulnerability scanner on web server
-      uses: aquasecurity/trivy-action@18f2510ee396bbf400402947b394f2dd8c87dbb0 # ratchet:aquasecurity/trivy-action@0.29.0
+      uses: aquasecurity/trivy-action@b6643a29fecd7f34b3597bc6acb0a98b03d33ff8 # ratchet:aquasecurity/trivy-action@0.33.1
      env:
        TRIVY_DB_REPOSITORY: 'public.ecr.aws/aquasecurity/trivy-db:2'
        TRIVY_JAVA_DB_REPOSITORY: 'public.ecr.aws/aquasecurity/trivy-java-db:1'
@@ -130,7 +130,7 @@ jobs:
      run: docker pull onyxdotapp/onyx-model-server:latest

    - name: Run Trivy vulnerability scanner
-      uses: aquasecurity/trivy-action@18f2510ee396bbf400402947b394f2dd8c87dbb0 # ratchet:aquasecurity/trivy-action@0.29.0
+      uses: aquasecurity/trivy-action@b6643a29fecd7f34b3597bc6acb0a98b03d33ff8 # ratchet:aquasecurity/trivy-action@0.33.1
      env:
        TRIVY_DB_REPOSITORY: 'public.ecr.aws/aquasecurity/trivy-db:2'
        TRIVY_JAVA_DB_REPOSITORY: 'public.ecr.aws/aquasecurity/trivy-java-db:1'
@@ -139,4 +139,4 @@ jobs:
        scanners: license
        severity: HIGH,CRITICAL
        vuln-type: library
-        exit-code: 0
+        exit-code: 0
--- a/.github/workflows/pr-external-dependency-unit-tests.yml
+++ b/.github/workflows/pr-external-dependency-unit-tests.yml
@@ -1,4 +1,7 @@
 name: External Dependency Unit Tests
+concurrency:
+  group: External-Dependency-Unit-Tests-${{ github.workflow }}-${{ github.head_ref || github.event.workflow_run.head_branch || github.run_id }}
+  cancel-in-progress: true

 on:
  merge_group:
@@ -27,7 +30,8 @@ env:

 jobs:
  discover-test-dirs:
-    runs-on: ubuntu-latest
+    # NOTE: Github-hosted runners have about 20s faster queue times and are preferred here.
+    runs-on: ubuntu-slim
    outputs:
      test-dirs: ${{ steps.set-matrix.outputs.test-dirs }}
    steps:
@@ -44,7 +48,11 @@ jobs:
  external-dependency-unit-tests:
    needs: discover-test-dirs
    # Use larger runner with more resources for Vespa
-    runs-on: [runs-on, runner=16cpu-linux-x64, "run-id=${{ github.run_id }}", "extras=s3-cache"]
+    runs-on:
+      - runs-on
+      - runner=2cpu-linux-arm64
+      - ${{ format('run-id={0}-external-dependency-unit-tests-job-{1}', github.run_id, strategy['job-index']) }}
+      - extras=s3-cache
    strategy:
      fail-fast: false
      matrix:
@@ -60,31 +68,20 @@ jobs:
      - name: Checkout code
        uses: actions/checkout@08eba0b27e820071cde6df949e0beb9ba4906955 # ratchet:actions/checkout@v4

-      - name: Setup uv
-        uses: astral-sh/setup-uv@d0cc045d04ccac9d8b7881df0226f9e82c39688e # ratchet:astral-sh/setup-uv@v6
-        with:
-          enable-cache: true
+      - name: Setup Python and Install Dependencies
+        uses: ./.github/actions/setup-python-and-install-dependencies

-      - name: Cache uv cache directory
-        uses: actions/cache@0057852bfaa89a56745cba8c7296529d2fc39830 # ratchet:actions/cache@v4
-        with:
-          path: ~/.cache/uv
-          key: ${{ runner.os }}-${{ github.workflow }}-uv-${{ hashFiles('backend/requirements/*.txt', 'backend/pyproject.toml') }}
-          restore-keys: |
-            ${{ runner.os }}-${{ github.workflow }}-uv-
+      - name: Setup Playwright
+        uses: ./.github/actions/setup-playwright

-      - name: Set up Python
-        uses: actions/setup-python@a26af69be951a213d495a4c3e4e4022e16d87065 # ratchet:actions/setup-python@v5
+      # needed for pulling Vespa, Redis, Postgres, and Minio images
+      # otherwise, we hit the "Unauthenticated users" limit
+      # https://docs.docker.com/docker-hub/usage/
+      - name: Login to Docker Hub
+        uses: docker/login-action@5e57cd118135c172c3672efd75eb46360885c0ef # ratchet:docker/login-action@v3
        with:
-          python-version: "3.11"
-
-      - name: Install Dependencies
-        run: |
-          uv pip install --system \
-            -r backend/requirements/default.txt \
-            -r backend/requirements/dev.txt
-          playwright install chromium
-          playwright install-deps chromium
+          username: ${{ secrets.DOCKER_USERNAME }}
+          password: ${{ secrets.DOCKER_TOKEN }}

      - name: Set up Standard Dependencies
        run: |
--- a/.github/workflows/pr-helm-chart-testing.yml
+++ b/.github/workflows/pr-helm-chart-testing.yml
@@ -1,15 +1,18 @@
 name: Helm - Lint and Test Charts
+concurrency:
+  group: Helm-Lint-and-Test-Charts-${{ github.workflow }}-${{ github.head_ref || github.event.workflow_run.head_branch || github.run_id }}
+  cancel-in-progress: true

 on:
  merge_group:
  pull_request:
    branches: [ main ]
  workflow_dispatch:  # Allows manual triggering
-  
+
 jobs:
  helm-chart-check:
    # See https://runs-on.com/runners/linux/
-    runs-on: [runs-on,runner=8cpu-linux-x64,hdd=256,"run-id=${{ github.run_id }}"]
+    runs-on: [runs-on,runner=8cpu-linux-x64,hdd=256,"run-id=${{ github.run_id }}-helm-chart-check"]

    # fetch-depth 0 is required for helm/chart-testing-action
    steps:
@@ -17,12 +20,12 @@ jobs:
      uses: actions/checkout@08eba0b27e820071cde6df949e0beb9ba4906955 # ratchet:actions/checkout@v4
      with:
        fetch-depth: 0
-        
+
    - name: Set up Helm
      uses: azure/setup-helm@1a275c3b69536ee54be43f2070a358922e12c8d4 # ratchet:azure/setup-helm@v4.3.1
      with:
        version: v3.19.0
-      
+
    - name: Set up chart-testing
      uses: helm/chart-testing-action@6ec842c01de15ebb84c8627d2744a0c2f2755c9f # ratchet:helm/chart-testing-action@v2.8.0

@@ -41,7 +44,7 @@ jobs:
 #     - name: Force run chart-testing (list-changed)
 #       id: list-changed
 #       run: echo "changed=true" >> $GITHUB_OUTPUT
-        
+
    # lint all charts if any changes were detected
    - name: Run chart-testing (lint)
      if: steps.list-changed.outputs.changed == 'true'
@@ -51,7 +54,7 @@ jobs:

    - name: Create kind cluster
      if: steps.list-changed.outputs.changed == 'true'
-      uses: helm/kind-action@a1b0e391336a6ee6713a0583f8c6240d70863de3 # ratchet:helm/kind-action@v1.12.0
+      uses: helm/kind-action@92086f6be054225fa813e0a4b13787fc9088faab # ratchet:helm/kind-action@v1.13.0

    - name: Pre-install cluster status check
      if: steps.list-changed.outputs.changed == 'true'
@@ -118,7 +121,7 @@ jobs:
      if: steps.list-changed.outputs.changed == 'true'
      run: |
        echo "=== Starting chart installation with monitoring ==="
-        
+
        # Function to monitor cluster state
        monitor_cluster() {
          while true; do
@@ -140,11 +143,11 @@ jobs:
            sleep 60
          done
        }
-        
+
        # Start monitoring in background
        monitor_cluster &
        MONITOR_PID=$!
-        
+
        # Set up cleanup
        cleanup() {
          echo "=== Cleaning up monitoring process ==="
@@ -153,10 +156,10 @@ jobs:
          kubectl get pods --all-namespaces
          kubectl get events --all-namespaces --sort-by=.lastTimestamp | tail -20
        }
-        
+
        # Trap cleanup on exit
        trap cleanup EXIT
-        
+
        # Run the actual installation with detailed logging
        echo "=== Starting ct install ==="
        set +e
@@ -214,15 +217,15 @@ jobs:
        echo "=== Final cluster state ==="
        kubectl get pods --all-namespaces
        kubectl get events --all-namespaces --sort-by=.lastTimestamp | tail -10
-        
+
        echo "=== Pod descriptions for debugging ==="
        kubectl describe pods --all-namespaces | grep -A 10 -B 3 "Failed\|Error\|Warning\|Pending" || echo "No problematic pods found"
-        
+
        echo "=== Recent logs for debugging ==="
        kubectl logs --all-namespaces --tail=50 | grep -i "error\|timeout\|failed\|pull" || echo "No error logs found"

        echo "=== Helm releases ==="
        helm list --all-namespaces
-      # the following would install only changed charts, but we only have one chart so 
+      # the following would install only changed charts, but we only have one chart so
      # don't worry about that for now
      # run: ct install --target-branch ${{ github.event.repository.default_branch }}
--- a/.github/workflows/pr-integration-tests.yml
+++ b/.github/workflows/pr-integration-tests.yml
@@ -11,11 +11,6 @@ on:
      - "release/**"

 env:
-  # Private Registry Configuration
-  PRIVATE_REGISTRY: experimental-registry.blacksmith.sh:5000
-  PRIVATE_REGISTRY_USERNAME: ${{ secrets.PRIVATE_REGISTRY_USERNAME }}
-  PRIVATE_REGISTRY_PASSWORD: ${{ secrets.PRIVATE_REGISTRY_PASSWORD }}
-
  # Test Environment Variables
  OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
  SLACK_BOT_TOKEN: ${{ secrets.SLACK_BOT_TOKEN }}
@@ -35,7 +30,8 @@ env:

 jobs:
  discover-test-dirs:
-    runs-on: blacksmith-2vcpu-ubuntu-2404-arm
+    # NOTE: Github-hosted runners have about 20s faster queue times and are preferred here.
+    runs-on: ubuntu-slim
    outputs:
      test-dirs: ${{ steps.set-matrix.outputs.test-dirs }}
    steps:
@@ -62,126 +58,16 @@ jobs:
          all_dirs="[${all_dirs%,}]"
          echo "test-dirs=$all_dirs" >> $GITHUB_OUTPUT

-  prepare-build:
-    runs-on: blacksmith-2vcpu-ubuntu-2404-arm
-    steps:
-      - name: Checkout code
-        uses: actions/checkout@08eba0b27e820071cde6df949e0beb9ba4906955 # ratchet:actions/checkout@v4
-
-      - name: Prepare build
-        uses: ./.github/actions/prepare-build

  build-backend-image:
-    runs-on: blacksmith-16vcpu-ubuntu-2404-arm
+    runs-on: [runs-on, runner=1cpu-linux-arm64, "run-id=${{ github.run_id }}-build-backend-image", "extras=ecr-cache"]
    steps:
+      - uses: runs-on/action@cd2b598b0515d39d78c38a02d529db87d2196d1e # ratchet:runs-on/action@v2
      - name: Checkout code
        uses: actions/checkout@08eba0b27e820071cde6df949e0beb9ba4906955 # ratchet:actions/checkout@v4

-      - name: Login to Private Registry
-        uses: docker/login-action@5e57cd118135c172c3672efd75eb46360885c0ef # ratchet:docker/login-action@v3
-        with:
-          registry: ${{ env.PRIVATE_REGISTRY }}
-          username: ${{ env.PRIVATE_REGISTRY_USERNAME }}
-          password: ${{ env.PRIVATE_REGISTRY_PASSWORD }}
-
      - name: Set up Docker Buildx
-        uses: useblacksmith/setup-docker-builder@affa10db466676f3dfb3e54caeb228ee0691510f # ratchet:useblacksmith/setup-docker-builder@v1
-
-      - name: Build and push Backend Docker image
-        uses: useblacksmith/build-push-action@30c71162f16ea2c27c3e21523255d209b8b538c1 # ratchet:useblacksmith/build-push-action@v2
-        with:
-          context: ./backend
-          file: ./backend/Dockerfile
-          platforms: linux/arm64
-          tags: ${{ env.PRIVATE_REGISTRY }}/integration-test-onyx-backend:test-${{ github.run_id }}
-          push: true
-          outputs: type=registry
-          no-cache: true
-
-
-  build-model-server-image:
-    runs-on: blacksmith-16vcpu-ubuntu-2404-arm
-    steps:
-      - name: Checkout code
-        uses: actions/checkout@08eba0b27e820071cde6df949e0beb9ba4906955 # ratchet:actions/checkout@v4
-
-      - name: Login to Private Registry
-        uses: docker/login-action@5e57cd118135c172c3672efd75eb46360885c0ef # ratchet:docker/login-action@v3
-        with:
-          registry: ${{ env.PRIVATE_REGISTRY }}
-          username: ${{ env.PRIVATE_REGISTRY_USERNAME }}
-          password: ${{ env.PRIVATE_REGISTRY_PASSWORD }}
-
-      - name: Set up Docker Buildx
-        uses: useblacksmith/setup-docker-builder@affa10db466676f3dfb3e54caeb228ee0691510f # ratchet:useblacksmith/setup-docker-builder@v1
-
-      - name: Build and push Model Server Docker image
-        uses: useblacksmith/build-push-action@30c71162f16ea2c27c3e21523255d209b8b538c1 # ratchet:useblacksmith/build-push-action@v2
-        with:
-          context: ./backend
-          file: ./backend/Dockerfile.model_server
-          platforms: linux/arm64
-          tags: ${{ env.PRIVATE_REGISTRY }}/integration-test-onyx-model-server:test-${{ github.run_id }}
-          push: true
-          outputs: type=registry
-          provenance: false
-          no-cache: ${{ vars.DOCKER_NO_CACHE == 'true' }}
-
-
-  build-integration-image:
-    needs: prepare-build
-    runs-on: blacksmith-16vcpu-ubuntu-2404-arm
-    steps:
-      - name: Checkout code
-        uses: actions/checkout@08eba0b27e820071cde6df949e0beb9ba4906955 # ratchet:actions/checkout@v4
-
-      - name: Login to Private Registry
-        uses: docker/login-action@5e57cd118135c172c3672efd75eb46360885c0ef # ratchet:docker/login-action@v3
-        with:
-          registry: ${{ env.PRIVATE_REGISTRY }}
-          username: ${{ env.PRIVATE_REGISTRY_USERNAME }}
-          password: ${{ env.PRIVATE_REGISTRY_PASSWORD }}
-
-      - name: Download OpenAPI artifacts
-        uses: actions/download-artifact@d3f86a106a0bac45b974a628896c90dbdf5c8093 # ratchet:actions/download-artifact@v4
-        with:
-          name: openapi-artifacts
-          path: backend/generated/
-
-      - name: Set up Docker Buildx
-        uses: useblacksmith/setup-docker-builder@affa10db466676f3dfb3e54caeb228ee0691510f # ratchet:useblacksmith/setup-docker-builder@v1
-
-      - name: Build and push integration test image with Docker Bake
-        env:
-          REGISTRY: ${{ env.PRIVATE_REGISTRY }}
-          TAG: test-${{ github.run_id }}
-        run: cd backend && docker buildx bake --no-cache --push integration
-
-  integration-tests:
-    needs:
-      [
-        discover-test-dirs,
-        build-backend-image,
-        build-model-server-image,
-        build-integration-image,
-      ]
-    runs-on: blacksmith-8vcpu-ubuntu-2404-arm
-
-    strategy:
-      fail-fast: false
-      matrix:
-        test-dir: ${{ fromJson(needs.discover-test-dirs.outputs.test-dirs) }}
-
-    steps:
-      - name: Checkout code
-        uses: actions/checkout@08eba0b27e820071cde6df949e0beb9ba4906955 # ratchet:actions/checkout@v4
-
-      - name: Login to Private Registry
-        uses: docker/login-action@5e57cd118135c172c3672efd75eb46360885c0ef # ratchet:docker/login-action@v3
-        with:
-          registry: ${{ env.PRIVATE_REGISTRY }}
-          username: ${{ env.PRIVATE_REGISTRY_USERNAME }}
-          password: ${{ env.PRIVATE_REGISTRY_PASSWORD }}
+        uses: docker/setup-buildx-action@e468171a9de216ec08956ac3ada2f0791b6bd435 # ratchet:docker/setup-buildx-action@v3

      # needed for pulling Vespa, Redis, Postgres, and Minio images
      # otherwise, we hit the "Unauthenticated users" limit
@@ -192,23 +78,105 @@ jobs:
          username: ${{ secrets.DOCKER_USERNAME }}
          password: ${{ secrets.DOCKER_TOKEN }}

-      - name: Pull Docker images
-        run: |
-          # Pull all images from registry in parallel
-          echo "Pulling Docker images in parallel..."
-          # Pull images from private registry
-          (docker pull --platform linux/arm64 ${{ env.PRIVATE_REGISTRY }}/integration-test-onyx-backend:test-${{ github.run_id }}) &
-          (docker pull --platform linux/arm64 ${{ env.PRIVATE_REGISTRY }}/integration-test-onyx-model-server:test-${{ github.run_id }}) &
-          (docker pull --platform linux/arm64 ${{ env.PRIVATE_REGISTRY }}/integration-test-onyx-integration:test-${{ github.run_id }}) &
+      - name: Build and push Backend Docker image
+        uses: docker/build-push-action@263435318d21b8e681c14492fe198d362a7d2c83 # ratchet:docker/build-push-action@v6
+        with:
+          context: ./backend
+          file: ./backend/Dockerfile
+          push: true
+          tags: ${{ env.RUNS_ON_ECR_CACHE }}:integration-test-backend-test-${{ github.run_id }}
+          cache-from: type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:integration-test-backend-cache
+          cache-to: type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:integration-test-backend-cache,mode=max
+          no-cache: ${{ vars.DOCKER_NO_CACHE == 'true' }}

-          # Wait for all background jobs to complete
-          wait
-          echo "All Docker images pulled successfully"

-          # Re-tag to remove registry prefix for docker-compose
-          docker tag ${{ env.PRIVATE_REGISTRY }}/integration-test-onyx-backend:test-${{ github.run_id }} onyxdotapp/onyx-backend:test
-          docker tag ${{ env.PRIVATE_REGISTRY }}/integration-test-onyx-model-server:test-${{ github.run_id }} onyxdotapp/onyx-model-server:test
-          docker tag ${{ env.PRIVATE_REGISTRY }}/integration-test-onyx-integration:test-${{ github.run_id }} onyxdotapp/onyx-integration:test
+  build-model-server-image:
+    runs-on: [runs-on, runner=1cpu-linux-arm64, "run-id=${{ github.run_id }}-build-model-server-image", "extras=ecr-cache"]
+    steps:
+      - uses: runs-on/action@cd2b598b0515d39d78c38a02d529db87d2196d1e # ratchet:runs-on/action@v2
+      - name: Checkout code
+        uses: actions/checkout@08eba0b27e820071cde6df949e0beb9ba4906955 # ratchet:actions/checkout@v4
+
+      - name: Set up Docker Buildx
+        uses: docker/setup-buildx-action@e468171a9de216ec08956ac3ada2f0791b6bd435 # ratchet:docker/setup-buildx-action@v3
+
+      # needed for pulling Vespa, Redis, Postgres, and Minio images
+      # otherwise, we hit the "Unauthenticated users" limit
+      # https://docs.docker.com/docker-hub/usage/
+      - name: Login to Docker Hub
+        uses: docker/login-action@5e57cd118135c172c3672efd75eb46360885c0ef # ratchet:docker/login-action@v3
+        with:
+          username: ${{ secrets.DOCKER_USERNAME }}
+          password: ${{ secrets.DOCKER_TOKEN }}
+
+      - name: Build and push Model Server Docker image
+        uses: docker/build-push-action@263435318d21b8e681c14492fe198d362a7d2c83 # ratchet:docker/build-push-action@v6
+        with:
+          context: ./backend
+          file: ./backend/Dockerfile.model_server
+          push: true
+          tags: ${{ env.RUNS_ON_ECR_CACHE }}:integration-test-model-server-test-${{ github.run_id }}
+          cache-from: type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:integration-test-model-server-cache
+          cache-to: type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:integration-test-model-server-cache,mode=max
+
+
+  build-integration-image:
+    runs-on: [runs-on, runner=2cpu-linux-arm64, "run-id=${{ github.run_id }}-build-integration-image", "extras=ecr-cache"]
+    steps:
+      - uses: runs-on/action@cd2b598b0515d39d78c38a02d529db87d2196d1e # ratchet:runs-on/action@v2
+      - name: Checkout code
+        uses: actions/checkout@08eba0b27e820071cde6df949e0beb9ba4906955 # ratchet:actions/checkout@v4
+
+      - name: Set up Docker Buildx
+        uses: docker/setup-buildx-action@e468171a9de216ec08956ac3ada2f0791b6bd435 # ratchet:docker/setup-buildx-action@v3
+
+      # needed for pulling openapitools/openapi-generator-cli
+      # otherwise, we hit the "Unauthenticated users" limit
+      # https://docs.docker.com/docker-hub/usage/
+      - name: Login to Docker Hub
+        uses: docker/login-action@5e57cd118135c172c3672efd75eb46360885c0ef # ratchet:docker/login-action@v3
+        with:
+          username: ${{ secrets.DOCKER_USERNAME }}
+          password: ${{ secrets.DOCKER_TOKEN }}
+
+      - name: Build and push integration test image with Docker Bake
+        env:
+          REPOSITORY: ${{ env.RUNS_ON_ECR_CACHE }}
+          TAG: integration-test-${{ github.run_id }}
+        run: cd backend && docker buildx bake --push integration
+
+  integration-tests:
+    needs:
+      [
+        discover-test-dirs,
+        build-backend-image,
+        build-model-server-image,
+        build-integration-image,
+      ]
+    runs-on:
+      - runs-on
+      - runner=4cpu-linux-arm64
+      - ${{ format('run-id={0}-integration-tests-job-{1}', github.run_id, strategy['job-index']) }}
+      - extras=ecr-cache
+
+    strategy:
+      fail-fast: false
+      matrix:
+        test-dir: ${{ fromJson(needs.discover-test-dirs.outputs.test-dirs) }}
+
+    steps:
+      - uses: runs-on/action@cd2b598b0515d39d78c38a02d529db87d2196d1e # ratchet:runs-on/action@v2
+      - name: Checkout code
+        uses: actions/checkout@08eba0b27e820071cde6df949e0beb9ba4906955 # ratchet:actions/checkout@v4
+
+      # needed for pulling Vespa, Redis, Postgres, and Minio images
+      # otherwise, we hit the "Unauthenticated users" limit
+      # https://docs.docker.com/docker-hub/usage/
+      - name: Login to Docker Hub
+        uses: docker/login-action@5e57cd118135c172c3672efd75eb46360885c0ef # ratchet:docker/login-action@v3
+        with:
+          username: ${{ secrets.DOCKER_USERNAME }}
+          password: ${{ secrets.DOCKER_TOKEN }}

      # NOTE: Use pre-ping/null pool to reduce flakiness due to dropped connections
      # NOTE: don't need web server for integration tests
@@ -221,7 +189,8 @@ jobs:
          POSTGRES_USE_NULL_POOL=true \
          REQUIRE_EMAIL_VERIFICATION=false \
          DISABLE_TELEMETRY=true \
-          IMAGE_TAG=test \
+          ONYX_BACKEND_IMAGE=${{ env.RUNS_ON_ECR_CACHE }}:integration-test-backend-test-${{ github.run_id }} \
+          ONYX_MODEL_SERVER_IMAGE=${{ env.RUNS_ON_ECR_CACHE }}:integration-test-model-server-test-${{ github.run_id }} \
          INTEGRATION_TESTS_MODE=true \
          CHECK_TTL_MANAGEMENT_TASK_FREQUENCY_IN_HOURS=0.001 \
          docker compose -f docker-compose.yml -f docker-compose.dev.yml up \
@@ -248,15 +217,15 @@ jobs:
          while true; do
            current_time=$(date +%s)
            elapsed_time=$((current_time - start_time))
-            
+
            if [ $elapsed_time -ge $timeout ]; then
              echo "Timeout reached. Service did not become ready in 5 minutes."
              exit 1
            fi
-            
+
            # Use curl with error handling to ignore specific exit code 56
            response=$(curl -s -o /dev/null -w "%{http_code}" http://localhost:8080/health || echo "curl_error")
-            
+
            if [ "$response" = "200" ]; then
              echo "Service is ready!"
              break
@@ -265,7 +234,7 @@ jobs:
            else
              echo "Service not ready yet (HTTP status $response). Retrying in 5 seconds..."
            fi
-            
+
            sleep 5
          done
          echo "Finished waiting for service."
@@ -314,7 +283,7 @@ jobs:
              -e TEST_WEB_HOSTNAME=test-runner \
              -e MOCK_CONNECTOR_SERVER_HOST=mock_connector_server \
              -e MOCK_CONNECTOR_SERVER_PORT=8001 \
-              onyxdotapp/onyx-integration:test \
+              ${{ env.RUNS_ON_ECR_CACHE }}:integration-test-${{ github.run_id }} \
              /app/tests/integration/${{ matrix.test-dir.path }}

      # ------------------------------------------------------------
@@ -333,18 +302,12 @@ jobs:

      - name: Upload logs
        if: always()
-        uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # ratchet:actions/upload-artifact@v4
+        uses: actions/upload-artifact@330a01c490aca151604b8cf639adc76d48f6c5d4 # ratchet:actions/upload-artifact@v4
        with:
          name: docker-all-logs-${{ matrix.test-dir.name }}
          path: ${{ github.workspace }}/docker-compose.log
      # ------------------------------------------------------------

-      - name: Stop Docker containers
-        if: always()
-        run: |
-          cd deployment/docker_compose
-          docker compose down -v
-

  multitenant-tests:
    needs:
@@ -353,35 +316,19 @@ jobs:
        build-model-server-image,
        build-integration-image,
      ]
-    runs-on: blacksmith-8vcpu-ubuntu-2404-arm
+    runs-on: [runs-on, runner=8cpu-linux-arm64, "run-id=${{ github.run_id }}-multitenant-tests", "extras=ecr-cache"]

    steps:
+      - uses: runs-on/action@cd2b598b0515d39d78c38a02d529db87d2196d1e # ratchet:runs-on/action@v2
      - name: Checkout code
        uses: actions/checkout@08eba0b27e820071cde6df949e0beb9ba4906955 # ratchet:actions/checkout@v4

-      - name: Login to Private Registry
-        uses: docker/login-action@5e57cd118135c172c3672efd75eb46360885c0ef # ratchet:docker/login-action@v3
-        with:
-          registry: ${{ env.PRIVATE_REGISTRY }}
-          username: ${{ env.PRIVATE_REGISTRY_USERNAME }}
-          password: ${{ env.PRIVATE_REGISTRY_PASSWORD }}
-
      - name: Login to Docker Hub
        uses: docker/login-action@5e57cd118135c172c3672efd75eb46360885c0ef # ratchet:docker/login-action@v3
        with:
          username: ${{ secrets.DOCKER_USERNAME }}
          password: ${{ secrets.DOCKER_TOKEN }}

-      - name: Pull Docker images
-        run: |
-          (docker pull --platform linux/arm64 ${{ env.PRIVATE_REGISTRY }}/integration-test-onyx-backend:test-${{ github.run_id }}) &
-          (docker pull --platform linux/arm64 ${{ env.PRIVATE_REGISTRY }}/integration-test-onyx-model-server:test-${{ github.run_id }}) &
-          (docker pull --platform linux/arm64 ${{ env.PRIVATE_REGISTRY }}/integration-test-onyx-integration:test-${{ github.run_id }}) &
-          wait
-          docker tag ${{ env.PRIVATE_REGISTRY }}/integration-test-onyx-backend:test-${{ github.run_id }} onyxdotapp/onyx-backend:test
-          docker tag ${{ env.PRIVATE_REGISTRY }}/integration-test-onyx-model-server:test-${{ github.run_id }} onyxdotapp/onyx-model-server:test
-          docker tag ${{ env.PRIVATE_REGISTRY }}/integration-test-onyx-integration:test-${{ github.run_id }} onyxdotapp/onyx-integration:test
-
      - name: Start Docker containers for multi-tenant tests
        run: |
          cd deployment/docker_compose
@@ -390,7 +337,8 @@ jobs:
          AUTH_TYPE=cloud \
          REQUIRE_EMAIL_VERIFICATION=false \
          DISABLE_TELEMETRY=true \
-          IMAGE_TAG=test \
+          ONYX_BACKEND_IMAGE=${{ env.RUNS_ON_ECR_CACHE }}:integration-test-backend-test-${{ github.run_id }} \
+          ONYX_MODEL_SERVER_IMAGE=${{ env.RUNS_ON_ECR_CACHE }}:integration-test-model-server-test-${{ github.run_id }} \
          DEV_MODE=true \
          docker compose -f docker-compose.multitenant-dev.yml up \
            relational_db \
@@ -453,9 +401,8 @@ jobs:
            -e SKIP_RESET=true \
            -e REQUIRE_EMAIL_VERIFICATION=false \
            -e DISABLE_TELEMETRY=true \
-            -e IMAGE_TAG=test \
            -e DEV_MODE=true \
-            onyxdotapp/onyx-integration:test \
+            ${{ env.RUNS_ON_ECR_CACHE }}:integration-test-${{ github.run_id }} \
            /app/tests/integration/multitenant_tests

      - name: Dump API server logs (multi-tenant)
@@ -472,7 +419,7 @@ jobs:

      - name: Upload logs (multi-tenant)
        if: always()
-        uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # ratchet:actions/upload-artifact@v4
+        uses: actions/upload-artifact@330a01c490aca151604b8cf639adc76d48f6c5d4 # ratchet:actions/upload-artifact@v4
        with:
          name: docker-all-logs-multitenant
          path: ${{ github.workspace }}/docker-compose-multitenant.log
@@ -483,8 +430,9 @@ jobs:
          cd deployment/docker_compose
          docker compose -f docker-compose.multitenant-dev.yml down -v

-  required: 
-    runs-on: blacksmith-2vcpu-ubuntu-2404-arm
+  required:
+    # NOTE: Github-hosted runners have about 20s faster queue times and are preferred here.
+    runs-on: ubuntu-slim
    needs: [integration-tests, multitenant-tests]
    if: ${{ always() }}
    steps:
--- a/.github/workflows/pr-jest-tests.yml
+++ b/.github/workflows/pr-jest-tests.yml
@@ -14,9 +14,11 @@ jobs:
        uses: actions/checkout@08eba0b27e820071cde6df949e0beb9ba4906955 # ratchet:actions/checkout@v4

      - name: Setup node
-        uses: actions/setup-node@49933ea5288caeca8642d1e84afbd3f7d6820020 # ratchet:actions/setup-node@v4
+        uses: actions/setup-node@2028fbc5c25fe9cf00d9f06a71cc4710d4507903 # ratchet:actions/setup-node@v4
        with:
          node-version: 22
+          cache: 'npm'
+          cache-dependency-path: ./web/package-lock.json

      - name: Install node dependencies
        working-directory: ./web
@@ -28,7 +30,7 @@ jobs:

      - name: Upload coverage reports
        if: always()
-        uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # ratchet:actions/upload-artifact@v4
+        uses: actions/upload-artifact@330a01c490aca151604b8cf639adc76d48f6c5d4 # ratchet:actions/upload-artifact@v4
        with:
          name: jest-coverage-${{ github.run_id }}
          path: ./web/coverage
--- a/.github/workflows/pr-labeler.yml
+++ b/.github/workflows/pr-labeler.yml
@@ -27,7 +27,7 @@ jobs:
            echo "::error::❌ Your PR title does not follow the Conventional Commits format.
              This check ensures that all pull requests use clear, consistent titles that help automate changelogs and improve project history.

-              Please update your PR title to follow the Conventional Commits style.  
+              Please update your PR title to follow the Conventional Commits style.
              Here is a link to a blog explaining the reason why we've included the Conventional Commits style into our PR titles: https://xfuture-blog.com/working-with-conventional-commits

              **Here are some examples of valid PR titles:**
--- a/.github/workflows/pr-linear-check.yml
+++ b/.github/workflows/pr-linear-check.yml
@@ -1,4 +1,7 @@
 name: Ensure PR references Linear
+concurrency:
+  group: Ensure-PR-references-Linear-${{ github.workflow }}-${{ github.head_ref || github.event.workflow_run.head_branch || github.run_id }}
+  cancel-in-progress: true

 on:
  pull_request:
--- a/.github/workflows/pr-mit-integration-tests.yml
+++ b/.github/workflows/pr-mit-integration-tests.yml
@@ -8,11 +8,6 @@ on:
    types: [checks_requested]

 env:
-  # Private Registry Configuration
-  PRIVATE_REGISTRY: experimental-registry.blacksmith.sh:5000
-  PRIVATE_REGISTRY_USERNAME: ${{ secrets.PRIVATE_REGISTRY_USERNAME }}
-  PRIVATE_REGISTRY_PASSWORD: ${{ secrets.PRIVATE_REGISTRY_PASSWORD }}
-
  # Test Environment Variables
  OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
  SLACK_BOT_TOKEN: ${{ secrets.SLACK_BOT_TOKEN }}
@@ -31,7 +26,8 @@ env:

 jobs:
  discover-test-dirs:
-    runs-on: blacksmith-2vcpu-ubuntu-2404-arm
+    # NOTE: Github-hosted runners have about 20s faster queue times and are preferred here.
+    runs-on: ubuntu-slim
    outputs:
      test-dirs: ${{ steps.set-matrix.outputs.test-dirs }}
    steps:
@@ -58,127 +54,15 @@ jobs:
          all_dirs="[${all_dirs%,}]"
          echo "test-dirs=$all_dirs" >> $GITHUB_OUTPUT

-  prepare-build:
-    runs-on: blacksmith-2vcpu-ubuntu-2404-arm
-    steps:
-      - name: Checkout code
-        uses: actions/checkout@08eba0b27e820071cde6df949e0beb9ba4906955 # ratchet:actions/checkout@v4
-
-      - name: Prepare build
-        uses: ./.github/actions/prepare-build
-
  build-backend-image:
-    runs-on: blacksmith-16vcpu-ubuntu-2404-arm
+    runs-on: [runs-on, runner=1cpu-linux-arm64, "run-id=${{ github.run_id }}-build-backend-image", "extras=ecr-cache"]
    steps:
+      - uses: runs-on/action@cd2b598b0515d39d78c38a02d529db87d2196d1e # ratchet:runs-on/action@v2
      - name: Checkout code
        uses: actions/checkout@08eba0b27e820071cde6df949e0beb9ba4906955 # ratchet:actions/checkout@v4

-      - name: Login to Private Registry
-        uses: docker/login-action@5e57cd118135c172c3672efd75eb46360885c0ef # ratchet:docker/login-action@v3
-        with:
-          registry: ${{ env.PRIVATE_REGISTRY }}
-          username: ${{ env.PRIVATE_REGISTRY_USERNAME }}
-          password: ${{ env.PRIVATE_REGISTRY_PASSWORD }}
-
      - name: Set up Docker Buildx
-        uses: useblacksmith/setup-docker-builder@affa10db466676f3dfb3e54caeb228ee0691510f # ratchet:useblacksmith/setup-docker-builder@v1
-
-      - name: Build and push Backend Docker image
-        uses: useblacksmith/build-push-action@30c71162f16ea2c27c3e21523255d209b8b538c1 # ratchet:useblacksmith/build-push-action@v2
-        with:
-          context: ./backend
-          file: ./backend/Dockerfile
-          platforms: linux/arm64
-          tags: ${{ env.PRIVATE_REGISTRY }}/integration-test-onyx-backend:test-${{ github.run_id }}
-          push: true
-          outputs: type=registry
-          no-cache: true
-
-
-  build-model-server-image:
-    runs-on: blacksmith-16vcpu-ubuntu-2404-arm
-    steps:
-      - name: Checkout code
-        uses: actions/checkout@08eba0b27e820071cde6df949e0beb9ba4906955 # ratchet:actions/checkout@v4
-
-      - name: Login to Private Registry
-        uses: docker/login-action@5e57cd118135c172c3672efd75eb46360885c0ef # ratchet:docker/login-action@v3
-        with:
-          registry: ${{ env.PRIVATE_REGISTRY }}
-          username: ${{ env.PRIVATE_REGISTRY_USERNAME }}
-          password: ${{ env.PRIVATE_REGISTRY_PASSWORD }}
-
-      - name: Set up Docker Buildx
-        uses: useblacksmith/setup-docker-builder@affa10db466676f3dfb3e54caeb228ee0691510f # ratchet:useblacksmith/setup-docker-builder@v1
-
-      - name: Build and push Model Server Docker image
-        uses: useblacksmith/build-push-action@30c71162f16ea2c27c3e21523255d209b8b538c1 # ratchet:useblacksmith/build-push-action@v2
-        with:
-          context: ./backend
-          file: ./backend/Dockerfile.model_server
-          platforms: linux/arm64
-          tags: ${{ env.PRIVATE_REGISTRY }}/integration-test-onyx-model-server:test-${{ github.run_id }}
-          push: true
-          outputs: type=registry
-          provenance: false
-          no-cache: ${{ vars.DOCKER_NO_CACHE == 'true' }}
-
-
-  build-integration-image:
-    needs: prepare-build
-    runs-on: blacksmith-16vcpu-ubuntu-2404-arm
-    steps:
-      - name: Checkout code
-        uses: actions/checkout@08eba0b27e820071cde6df949e0beb9ba4906955 # ratchet:actions/checkout@v4
-
-      - name: Login to Private Registry
-        uses: docker/login-action@5e57cd118135c172c3672efd75eb46360885c0ef # ratchet:docker/login-action@v3
-        with:
-          registry: ${{ env.PRIVATE_REGISTRY }}
-          username: ${{ env.PRIVATE_REGISTRY_USERNAME }}
-          password: ${{ env.PRIVATE_REGISTRY_PASSWORD }}
-
-      - name: Download OpenAPI artifacts
-        uses: actions/download-artifact@d3f86a106a0bac45b974a628896c90dbdf5c8093 # ratchet:actions/download-artifact@v4
-        with:
-          name: openapi-artifacts
-          path: backend/generated/
-
-      - name: Set up Docker Buildx
-        uses: useblacksmith/setup-docker-builder@affa10db466676f3dfb3e54caeb228ee0691510f # ratchet:useblacksmith/setup-docker-builder@v1
-
-      - name: Build and push integration test image with Docker Bake
-        env:
-          REGISTRY: ${{ env.PRIVATE_REGISTRY }}
-          TAG: test-${{ github.run_id }}
-        run: cd backend && docker buildx bake --no-cache --push integration
-
-  integration-tests-mit:
-    needs:
-      [
-        discover-test-dirs,
-        build-backend-image,
-        build-model-server-image,
-        build-integration-image,
-      ]
-    # See https://docs.blacksmith.sh/blacksmith-runners/overview
-    runs-on: blacksmith-8vcpu-ubuntu-2404-arm
-
-    strategy:
-      fail-fast: false
-      matrix:
-        test-dir: ${{ fromJson(needs.discover-test-dirs.outputs.test-dirs) }}
-
-    steps:
-      - name: Checkout code
-        uses: actions/checkout@08eba0b27e820071cde6df949e0beb9ba4906955 # ratchet:actions/checkout@v4
-
-      - name: Login to Private Registry
-        uses: docker/login-action@5e57cd118135c172c3672efd75eb46360885c0ef # ratchet:docker/login-action@v3
-        with:
-          registry: ${{ env.PRIVATE_REGISTRY }}
-          username: ${{ env.PRIVATE_REGISTRY_USERNAME }}
-          password: ${{ env.PRIVATE_REGISTRY_PASSWORD }}
+        uses: docker/setup-buildx-action@e468171a9de216ec08956ac3ada2f0791b6bd435 # ratchet:docker/setup-buildx-action@v3

      # needed for pulling Vespa, Redis, Postgres, and Minio images
      # otherwise, we hit the "Unauthenticated users" limit
@@ -189,23 +73,103 @@ jobs:
          username: ${{ secrets.DOCKER_USERNAME }}
          password: ${{ secrets.DOCKER_TOKEN }}

-      - name: Pull Docker images
-        run: |
-          # Pull all images from registry in parallel
-          echo "Pulling Docker images in parallel..."
-          # Pull images from private registry
-          (docker pull --platform linux/arm64 ${{ env.PRIVATE_REGISTRY }}/integration-test-onyx-backend:test-${{ github.run_id }}) &
-          (docker pull --platform linux/arm64 ${{ env.PRIVATE_REGISTRY }}/integration-test-onyx-model-server:test-${{ github.run_id }}) &
-          (docker pull --platform linux/arm64 ${{ env.PRIVATE_REGISTRY }}/integration-test-onyx-integration:test-${{ github.run_id }}) &
+      - name: Build and push Backend Docker image
+        uses: docker/build-push-action@263435318d21b8e681c14492fe198d362a7d2c83 # ratchet:docker/build-push-action@v6
+        with:
+          context: ./backend
+          file: ./backend/Dockerfile
+          push: true
+          tags: ${{ env.RUNS_ON_ECR_CACHE }}:integration-test-backend-test-${{ github.run_id }}
+          cache-from: type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:integration-test-backend-cache
+          cache-to: type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:integration-test-backend-cache,mode=max
+          no-cache: ${{ vars.DOCKER_NO_CACHE == 'true' }}

-          # Wait for all background jobs to complete
-          wait
-          echo "All Docker images pulled successfully"
+  build-model-server-image:
+    runs-on: [runs-on, runner=1cpu-linux-arm64, "run-id=${{ github.run_id }}-build-model-server-image", "extras=ecr-cache"]
+    steps:
+      - uses: runs-on/action@cd2b598b0515d39d78c38a02d529db87d2196d1e # ratchet:runs-on/action@v2
+      - name: Checkout code
+        uses: actions/checkout@08eba0b27e820071cde6df949e0beb9ba4906955 # ratchet:actions/checkout@v4

-          # Re-tag to remove registry prefix for docker-compose
-          docker tag ${{ env.PRIVATE_REGISTRY }}/integration-test-onyx-backend:test-${{ github.run_id }} onyxdotapp/onyx-backend:test
-          docker tag ${{ env.PRIVATE_REGISTRY }}/integration-test-onyx-model-server:test-${{ github.run_id }} onyxdotapp/onyx-model-server:test
-          docker tag ${{ env.PRIVATE_REGISTRY }}/integration-test-onyx-integration:test-${{ github.run_id }} onyxdotapp/onyx-integration:test
+      - name: Set up Docker Buildx
+        uses: docker/setup-buildx-action@e468171a9de216ec08956ac3ada2f0791b6bd435 # ratchet:docker/setup-buildx-action@v3
+
+      # needed for pulling Vespa, Redis, Postgres, and Minio images
+      # otherwise, we hit the "Unauthenticated users" limit
+      # https://docs.docker.com/docker-hub/usage/
+      - name: Login to Docker Hub
+        uses: docker/login-action@5e57cd118135c172c3672efd75eb46360885c0ef # ratchet:docker/login-action@v3
+        with:
+          username: ${{ secrets.DOCKER_USERNAME }}
+          password: ${{ secrets.DOCKER_TOKEN }}
+
+      - name: Build and push Model Server Docker image
+        uses: docker/build-push-action@263435318d21b8e681c14492fe198d362a7d2c83 # ratchet:docker/build-push-action@v6
+        with:
+          context: ./backend
+          file: ./backend/Dockerfile.model_server
+          push: true
+          tags: ${{ env.RUNS_ON_ECR_CACHE }}:integration-test-model-server-test-${{ github.run_id }}
+          cache-from: type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:integration-test-model-server-cache
+          cache-to: type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:integration-test-model-server-cache,mode=max
+
+  build-integration-image:
+    runs-on: [runs-on, runner=2cpu-linux-arm64, "run-id=${{ github.run_id }}-build-integration-image", "extras=ecr-cache"]
+    steps:
+      - uses: runs-on/action@cd2b598b0515d39d78c38a02d529db87d2196d1e # ratchet:runs-on/action@v2
+      - name: Checkout code
+        uses: actions/checkout@08eba0b27e820071cde6df949e0beb9ba4906955 # ratchet:actions/checkout@v4
+
+      - name: Set up Docker Buildx
+        uses: docker/setup-buildx-action@e468171a9de216ec08956ac3ada2f0791b6bd435 # ratchet:docker/setup-buildx-action@v3
+
+      # needed for pulling openapitools/openapi-generator-cli
+      # otherwise, we hit the "Unauthenticated users" limit
+      # https://docs.docker.com/docker-hub/usage/
+      - name: Login to Docker Hub
+        uses: docker/login-action@5e57cd118135c172c3672efd75eb46360885c0ef # ratchet:docker/login-action@v3
+        with:
+          username: ${{ secrets.DOCKER_USERNAME }}
+          password: ${{ secrets.DOCKER_TOKEN }}
+
+      - name: Build and push integration test image with Docker Bake
+        env:
+          REPOSITORY: ${{ env.RUNS_ON_ECR_CACHE }}
+          TAG: integration-test-${{ github.run_id }}
+        run: cd backend && docker buildx bake --push integration
+
+  integration-tests-mit:
+    needs:
+      [
+        discover-test-dirs,
+        build-backend-image,
+        build-model-server-image,
+        build-integration-image,
+      ]
+    runs-on:
+      - runs-on
+      - runner=4cpu-linux-arm64
+      - ${{ format('run-id={0}-integration-tests-mit-job-{1}', github.run_id, strategy['job-index']) }}
+      - extras=ecr-cache
+
+    strategy:
+      fail-fast: false
+      matrix:
+        test-dir: ${{ fromJson(needs.discover-test-dirs.outputs.test-dirs) }}
+
+    steps:
+      - uses: runs-on/action@cd2b598b0515d39d78c38a02d529db87d2196d1e # ratchet:runs-on/action@v2
+      - name: Checkout code
+        uses: actions/checkout@08eba0b27e820071cde6df949e0beb9ba4906955 # ratchet:actions/checkout@v4
+
+      # needed for pulling Vespa, Redis, Postgres, and Minio images
+      # otherwise, we hit the "Unauthenticated users" limit
+      # https://docs.docker.com/docker-hub/usage/
+      - name: Login to Docker Hub
+        uses: docker/login-action@5e57cd118135c172c3672efd75eb46360885c0ef # ratchet:docker/login-action@v3
+        with:
+          username: ${{ secrets.DOCKER_USERNAME }}
+          password: ${{ secrets.DOCKER_TOKEN }}

      # NOTE: Use pre-ping/null pool to reduce flakiness due to dropped connections
      # NOTE: don't need web server for integration tests
@@ -217,7 +181,8 @@ jobs:
          POSTGRES_USE_NULL_POOL=true \
          REQUIRE_EMAIL_VERIFICATION=false \
          DISABLE_TELEMETRY=true \
-          IMAGE_TAG=test \
+          ONYX_BACKEND_IMAGE=${{ env.RUNS_ON_ECR_CACHE }}:integration-test-backend-test-${{ github.run_id }} \
+          ONYX_MODEL_SERVER_IMAGE=${{ env.RUNS_ON_ECR_CACHE }}:integration-test-model-server-test-${{ github.run_id }} \
          INTEGRATION_TESTS_MODE=true \
          docker compose -f docker-compose.yml -f docker-compose.dev.yml up \
            relational_db \
@@ -243,15 +208,15 @@ jobs:
          while true; do
            current_time=$(date +%s)
            elapsed_time=$((current_time - start_time))
-            
+
            if [ $elapsed_time -ge $timeout ]; then
              echo "Timeout reached. Service did not become ready in 5 minutes."
              exit 1
            fi
-            
+
            # Use curl with error handling to ignore specific exit code 56
            response=$(curl -s -o /dev/null -w "%{http_code}" http://localhost:8080/health || echo "curl_error")
-            
+
            if [ "$response" = "200" ]; then
              echo "Service is ready!"
              break
@@ -260,7 +225,7 @@ jobs:
            else
              echo "Service not ready yet (HTTP status $response). Retrying in 5 seconds..."
            fi
-            
+
            sleep 5
          done
          echo "Finished waiting for service."
@@ -310,7 +275,7 @@ jobs:
              -e TEST_WEB_HOSTNAME=test-runner \
              -e MOCK_CONNECTOR_SERVER_HOST=mock_connector_server \
              -e MOCK_CONNECTOR_SERVER_PORT=8001 \
-              onyxdotapp/onyx-integration:test \
+              ${{ env.RUNS_ON_ECR_CACHE }}:integration-test-${{ github.run_id }} \
              /app/tests/integration/${{ matrix.test-dir.path }}

      # ------------------------------------------------------------
@@ -329,21 +294,16 @@ jobs:

      - name: Upload logs
        if: always()
-        uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # ratchet:actions/upload-artifact@v4
+        uses: actions/upload-artifact@330a01c490aca151604b8cf639adc76d48f6c5d4 # ratchet:actions/upload-artifact@v4
        with:
          name: docker-all-logs-${{ matrix.test-dir.name }}
          path: ${{ github.workspace }}/docker-compose.log
      # ------------------------------------------------------------

-      - name: Stop Docker containers
-        if: always()
-        run: |
-          cd deployment/docker_compose
-          docker compose down -v

-  
-  required: 
-    runs-on: blacksmith-2vcpu-ubuntu-2404-arm
+  required:
+    # NOTE: Github-hosted runners have about 20s faster queue times and are preferred here.
+    runs-on: ubuntu-slim
    needs: [integration-tests-mit]
    if: ${{ always() }}
    steps:
--- a/.github/workflows/pr-playwright-tests.yml
+++ b/.github/workflows/pr-playwright-tests.yml
@@ -6,13 +6,6 @@ concurrency:
 on: push

 env:
-  # AWS ECR Configuration
-  AWS_REGION: ${{ secrets.AWS_REGION || 'us-west-2' }}
-  ECR_REGISTRY: ${{ secrets.ECR_REGISTRY }}
-  AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID_ECR }}
-  AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY_ECR }}
-  BUILDX_NO_DEFAULT_ATTESTATIONS: 1
-
  # Test Environment Variables
  OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
  SLACK_BOT_TOKEN: ${{ secrets.SLACK_BOT_TOKEN }}
@@ -23,131 +16,37 @@ env:
  SLACK_CLIENT_ID: ${{ secrets.SLACK_CLIENT_ID }}
  SLACK_CLIENT_SECRET: ${{ secrets.SLACK_CLIENT_SECRET }}

+  # for MCP Oauth tests
+  MCP_OAUTH_CLIENT_ID: ${{ secrets.MCP_OAUTH_CLIENT_ID }}
+  MCP_OAUTH_CLIENT_SECRET: ${{ secrets.MCP_OAUTH_CLIENT_SECRET }}
+  MCP_OAUTH_ISSUER: ${{ secrets.MCP_OAUTH_ISSUER }}
+  MCP_OAUTH_JWKS_URI: ${{ secrets.MCP_OAUTH_JWKS_URI }}
+  MCP_OAUTH_USERNAME: ${{ vars.MCP_OAUTH_USERNAME }}
+  MCP_OAUTH_PASSWORD: ${{ secrets.MCP_OAUTH_PASSWORD }}
+
  MOCK_LLM_RESPONSE: true
+  MCP_TEST_SERVER_PORT: 8004
+  MCP_TEST_SERVER_URL: http://host.docker.internal:8004/mcp
+  MCP_TEST_SERVER_PUBLIC_URL: http://host.docker.internal:8004/mcp
+  MCP_TEST_SERVER_BIND_HOST: 0.0.0.0
+  MCP_TEST_SERVER_PUBLIC_HOST: host.docker.internal
+  MCP_SERVER_HOST: 0.0.0.0
+  MCP_SERVER_PUBLIC_HOST: host.docker.internal
+  MCP_SERVER_PUBLIC_URL: http://host.docker.internal:8004/mcp

 jobs:
  build-web-image:
-    runs-on: blacksmith-8vcpu-ubuntu-2404-arm
+    runs-on: [runs-on, runner=4cpu-linux-arm64, "run-id=${{ github.run_id }}-build-web-image", "extras=ecr-cache"]
    steps:
+      - uses: runs-on/action@cd2b598b0515d39d78c38a02d529db87d2196d1e # ratchet:runs-on/action@v2
+
      - name: Checkout code
        uses: actions/checkout@08eba0b27e820071cde6df949e0beb9ba4906955 # ratchet:actions/checkout@v4

-      - name: Configure AWS credentials
-        uses: aws-actions/configure-aws-credentials@00943011d9042930efac3dcd3a170e4273319bc8 # ratchet:aws-actions/configure-aws-credentials@v4
-        with:
-          aws-access-key-id: ${{ env.AWS_ACCESS_KEY_ID }}
-          aws-secret-access-key: ${{ env.AWS_SECRET_ACCESS_KEY }}
-          aws-region: ${{ env.AWS_REGION }}
-
-      - name: Login to Amazon ECR
-        id: login-ecr
-        uses: aws-actions/amazon-ecr-login@062b18b96a7aff071d4dc91bc00c4c1a7945b076 # ratchet:aws-actions/amazon-ecr-login@v2
-
      - name: Set up Docker Buildx
-        uses: useblacksmith/setup-docker-builder@affa10db466676f3dfb3e54caeb228ee0691510f # ratchet:useblacksmith/setup-docker-builder@v1
+        uses: docker/setup-buildx-action@e468171a9de216ec08956ac3ada2f0791b6bd435 # ratchet:docker/setup-buildx-action@v3

-      - name: Build and push Web Docker image
-        uses: useblacksmith/build-push-action@30c71162f16ea2c27c3e21523255d209b8b538c1 # ratchet:useblacksmith/build-push-action@v2
-        with:
-          context: ./web
-          file: ./web/Dockerfile
-          platforms: linux/arm64
-          tags: ${{ env.ECR_REGISTRY }}/integration-test-onyx-web-server:playwright-test-${{ github.run_id }}
-          provenance: false
-          sbom: false
-          push: true
-          outputs: type=registry
-          no-cache: ${{ vars.DOCKER_NO_CACHE == 'true' }}
-
-  build-backend-image:
-    runs-on: blacksmith-8vcpu-ubuntu-2404-arm
-    steps:
-      - name: Checkout code
-        uses: actions/checkout@08eba0b27e820071cde6df949e0beb9ba4906955 # ratchet:actions/checkout@v4
-
-      - name: Configure AWS credentials
-        uses: aws-actions/configure-aws-credentials@00943011d9042930efac3dcd3a170e4273319bc8 # ratchet:aws-actions/configure-aws-credentials@v4
-        with:
-          aws-access-key-id: ${{ env.AWS_ACCESS_KEY_ID }}
-          aws-secret-access-key: ${{ env.AWS_SECRET_ACCESS_KEY }}
-          aws-region: ${{ env.AWS_REGION }}
-
-      - name: Login to Amazon ECR
-        id: login-ecr
-        uses: aws-actions/amazon-ecr-login@062b18b96a7aff071d4dc91bc00c4c1a7945b076 # ratchet:aws-actions/amazon-ecr-login@v2
-
-      - name: Set up Docker Buildx
-        uses: useblacksmith/setup-docker-builder@affa10db466676f3dfb3e54caeb228ee0691510f # ratchet:useblacksmith/setup-docker-builder@v1
-
-      - name: Build and push Backend Docker image
-        uses: useblacksmith/build-push-action@30c71162f16ea2c27c3e21523255d209b8b538c1 # ratchet:useblacksmith/build-push-action@v2
-        with:
-          context: ./backend
-          file: ./backend/Dockerfile
-          platforms: linux/arm64
-          tags: ${{ env.ECR_REGISTRY }}/integration-test-onyx-backend:playwright-test-${{ github.run_id }}
-          provenance: false
-          sbom: false
-          push: true
-          outputs: type=registry
-          no-cache: ${{ vars.DOCKER_NO_CACHE == 'true' }}
-
-  build-model-server-image:
-    runs-on: blacksmith-8vcpu-ubuntu-2404-arm
-    steps:
-      - name: Checkout code
-        uses: actions/checkout@08eba0b27e820071cde6df949e0beb9ba4906955 # ratchet:actions/checkout@v4
-
-      - name: Configure AWS credentials
-        uses: aws-actions/configure-aws-credentials@00943011d9042930efac3dcd3a170e4273319bc8 # ratchet:aws-actions/configure-aws-credentials@v4
-        with:
-          aws-access-key-id: ${{ env.AWS_ACCESS_KEY_ID }}
-          aws-secret-access-key: ${{ env.AWS_SECRET_ACCESS_KEY }}
-          aws-region: ${{ env.AWS_REGION }}
-
-      - name: Login to Amazon ECR
-        id: login-ecr
-        uses: aws-actions/amazon-ecr-login@062b18b96a7aff071d4dc91bc00c4c1a7945b076 # ratchet:aws-actions/amazon-ecr-login@v2
-
-      - name: Set up Docker Buildx
-        uses: useblacksmith/setup-docker-builder@affa10db466676f3dfb3e54caeb228ee0691510f # ratchet:useblacksmith/setup-docker-builder@v1
-
-      - name: Build and push Model Server Docker image
-        uses: useblacksmith/build-push-action@30c71162f16ea2c27c3e21523255d209b8b538c1 # ratchet:useblacksmith/build-push-action@v2
-        with:
-          context: ./backend
-          file: ./backend/Dockerfile.model_server
-          platforms: linux/arm64
-          tags: ${{ env.ECR_REGISTRY }}/integration-test-onyx-model-server:playwright-test-${{ github.run_id }}
-          provenance: false
-          sbom: false
-          push: true
-          outputs: type=registry
-          no-cache: ${{ vars.DOCKER_NO_CACHE == 'true' }}
-
-  playwright-tests:
-    needs: [build-web-image, build-backend-image, build-model-server-image]
-    name: Playwright Tests
-    runs-on: blacksmith-8vcpu-ubuntu-2404-arm
-    steps:
-      - name: Checkout code
-        uses: actions/checkout@08eba0b27e820071cde6df949e0beb9ba4906955 # ratchet:actions/checkout@v4
-        with:
-          fetch-depth: 0
-
-      - name: Configure AWS credentials
-        uses: aws-actions/configure-aws-credentials@00943011d9042930efac3dcd3a170e4273319bc8 # ratchet:aws-actions/configure-aws-credentials@v4
-        with:
-          aws-access-key-id: ${{ env.AWS_ACCESS_KEY_ID }}
-          aws-secret-access-key: ${{ env.AWS_SECRET_ACCESS_KEY }}
-          aws-region: ${{ env.AWS_REGION }}
-
-      - name: Login to Amazon ECR
-        id: login-ecr
-        uses: aws-actions/amazon-ecr-login@062b18b96a7aff071d4dc91bc00c4c1a7945b076 # ratchet:aws-actions/amazon-ecr-login@v2
-
-      # needed for pulling Vespa, Redis, Postgres, and Minio images
-      # otherwise, we hit the "Unauthenticated users" limit
+      # needed for pulling external images otherwise, we hit the "Unauthenticated users" limit
      # https://docs.docker.com/docker-hub/usage/
      - name: Login to Docker Hub
        uses: docker/login-action@5e57cd118135c172c3672efd75eb46360885c0ef # ratchet:docker/login-action@v3
@@ -155,32 +54,115 @@ jobs:
          username: ${{ secrets.DOCKER_USERNAME }}
          password: ${{ secrets.DOCKER_TOKEN }}

-      - name: Pull Docker images
-        run: |
-          # Pull all images from ECR in parallel
-          echo "Pulling Docker images in parallel..."
-          (docker pull ${{ env.ECR_REGISTRY }}/integration-test-onyx-web-server:playwright-test-${{ github.run_id }}) &
-          (docker pull ${{ env.ECR_REGISTRY }}/integration-test-onyx-backend:playwright-test-${{ github.run_id }}) &
-          (docker pull ${{ env.ECR_REGISTRY }}/integration-test-onyx-model-server:playwright-test-${{ github.run_id }}) &
+      - name: Build and push Web Docker image
+        uses: docker/build-push-action@263435318d21b8e681c14492fe198d362a7d2c83 # ratchet:docker/build-push-action@v6
+        with:
+          context: ./web
+          file: ./web/Dockerfile
+          platforms: linux/arm64
+          tags: ${{ env.RUNS_ON_ECR_CACHE }}:playwright-test-web-${{ github.run_id }}
+          push: true
+          cache-from: type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:playwright-test-web-cache
+          cache-to: type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:playwright-test-web-cache,mode=max
+          no-cache: ${{ vars.DOCKER_NO_CACHE == 'true' }}

-          # Wait for all background jobs to complete
-          wait
-          echo "All Docker images pulled successfully"
+  build-backend-image:
+    runs-on: [runs-on, runner=1cpu-linux-arm64, "run-id=${{ github.run_id }}-build-backend-image", "extras=ecr-cache"]
+    steps:
+      - uses: runs-on/action@cd2b598b0515d39d78c38a02d529db87d2196d1e # ratchet:runs-on/action@v2

-          # Re-tag with expected names for docker-compose
-          docker tag ${{ env.ECR_REGISTRY }}/integration-test-onyx-web-server:playwright-test-${{ github.run_id }} onyxdotapp/onyx-web-server:test
-          docker tag ${{ env.ECR_REGISTRY }}/integration-test-onyx-backend:playwright-test-${{ github.run_id }} onyxdotapp/onyx-backend:test
-          docker tag ${{ env.ECR_REGISTRY }}/integration-test-onyx-model-server:playwright-test-${{ github.run_id }} onyxdotapp/onyx-model-server:test
+      - name: Checkout code
+        uses: actions/checkout@08eba0b27e820071cde6df949e0beb9ba4906955 # ratchet:actions/checkout@v4
+
+      - name: Set up Docker Buildx
+        uses: docker/setup-buildx-action@e468171a9de216ec08956ac3ada2f0791b6bd435 # ratchet:docker/setup-buildx-action@v3
+
+      # needed for pulling external images otherwise, we hit the "Unauthenticated users" limit
+      # https://docs.docker.com/docker-hub/usage/
+      - name: Login to Docker Hub
+        uses: docker/login-action@5e57cd118135c172c3672efd75eb46360885c0ef # ratchet:docker/login-action@v3
+        with:
+          username: ${{ secrets.DOCKER_USERNAME }}
+          password: ${{ secrets.DOCKER_TOKEN }}
+
+      - name: Build and push Backend Docker image
+        uses: docker/build-push-action@263435318d21b8e681c14492fe198d362a7d2c83 # ratchet:docker/build-push-action@v6
+        with:
+          context: ./backend
+          file: ./backend/Dockerfile
+          platforms: linux/arm64
+          tags: ${{ env.RUNS_ON_ECR_CACHE }}:playwright-test-backend-${{ github.run_id }}
+          push: true
+          cache-from: type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:playwright-test-backend-cache
+          cache-to: type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:playwright-test-backend-cache,mode=max
+          no-cache: ${{ vars.DOCKER_NO_CACHE == 'true' }}
+
+  build-model-server-image:
+    runs-on: [runs-on, runner=1cpu-linux-arm64, "run-id=${{ github.run_id }}-build-model-server-image", "extras=ecr-cache"]
+    steps:
+      - uses: runs-on/action@cd2b598b0515d39d78c38a02d529db87d2196d1e # ratchet:runs-on/action@v2
+
+      - name: Checkout code
+        uses: actions/checkout@08eba0b27e820071cde6df949e0beb9ba4906955 # ratchet:actions/checkout@v4
+
+      - name: Set up Docker Buildx
+        uses: docker/setup-buildx-action@e468171a9de216ec08956ac3ada2f0791b6bd435 # ratchet:docker/setup-buildx-action@v3
+
+      # needed for pulling external images otherwise, we hit the "Unauthenticated users" limit
+      # https://docs.docker.com/docker-hub/usage/
+      - name: Login to Docker Hub
+        uses: docker/login-action@5e57cd118135c172c3672efd75eb46360885c0ef # ratchet:docker/login-action@v3
+        with:
+          username: ${{ secrets.DOCKER_USERNAME }}
+          password: ${{ secrets.DOCKER_TOKEN }}
+
+      - name: Build and push Model Server Docker image
+        uses: docker/build-push-action@263435318d21b8e681c14492fe198d362a7d2c83 # ratchet:docker/build-push-action@v6
+        with:
+          context: ./backend
+          file: ./backend/Dockerfile.model_server
+          platforms: linux/arm64
+          tags: ${{ env.RUNS_ON_ECR_CACHE }}:playwright-test-model-server-${{ github.run_id }}
+          push: true
+          cache-from: type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:playwright-test-model-server-cache
+          cache-to: type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:playwright-test-model-server-cache,mode=max
+          no-cache: ${{ vars.DOCKER_NO_CACHE == 'true' }}
+
+  playwright-tests:
+    needs: [build-web-image, build-backend-image, build-model-server-image]
+    name: Playwright Tests (${{ matrix.project }})
+    runs-on: [runs-on, runner=8cpu-linux-arm64, "run-id=${{ github.run_id }}-playwright-tests-${{ matrix.project }}", "extras=ecr-cache"]
+    strategy:
+      fail-fast: false
+      matrix:
+        project: [admin, no-auth, exclusive]
+    steps:
+      - uses: runs-on/action@cd2b598b0515d39d78c38a02d529db87d2196d1e # ratchet:runs-on/action@v2
+
+      - name: Checkout code
+        uses: actions/checkout@08eba0b27e820071cde6df949e0beb9ba4906955 # ratchet:actions/checkout@v4
+        with:
+          fetch-depth: 0

      - name: Setup node
-        uses: actions/setup-node@49933ea5288caeca8642d1e84afbd3f7d6820020 # ratchet:actions/setup-node@v4
+        uses: actions/setup-node@2028fbc5c25fe9cf00d9f06a71cc4710d4507903 # ratchet:actions/setup-node@v4
        with:
          node-version: 22
+          cache: 'npm'
+          cache-dependency-path: ./web/package-lock.json

      - name: Install node dependencies
        working-directory: ./web
        run: npm ci

+      - name: Cache playwright cache
+        uses: runs-on/cache@50350ad4242587b6c8c2baa2e740b1bc11285ff4 # ratchet:runs-on/cache@v4
+        with:
+          path: ~/.cache/ms-playwright
+          key: ${{ runner.os }}-playwright-npm-${{ hashFiles('web/package-lock.json') }}
+          restore-keys: |
+            ${{ runner.os }}-playwright-npm-
+
      - name: Install playwright browsers
        working-directory: ./web
        run: npx playwright install --with-deps
@@ -194,13 +176,24 @@ jobs:
          EXA_API_KEY=${{ env.EXA_API_KEY }}
          REQUIRE_EMAIL_VERIFICATION=false
          DISABLE_TELEMETRY=true
-          IMAGE_TAG=test
+          ONYX_BACKEND_IMAGE=${{ env.RUNS_ON_ECR_CACHE }}:playwright-test-backend-${{ github.run_id }}
+          ONYX_MODEL_SERVER_IMAGE=${{ env.RUNS_ON_ECR_CACHE }}:playwright-test-model-server-${{ github.run_id }}
+          ONYX_WEB_SERVER_IMAGE=${{ env.RUNS_ON_ECR_CACHE }}:playwright-test-web-${{ github.run_id }}
          EOF

+      # needed for pulling Vespa, Redis, Postgres, and Minio images
+      # otherwise, we hit the "Unauthenticated users" limit
+      # https://docs.docker.com/docker-hub/usage/
+      - name: Login to Docker Hub
+        uses: docker/login-action@5e57cd118135c172c3672efd75eb46360885c0ef # ratchet:docker/login-action@v3
+        with:
+          username: ${{ secrets.DOCKER_USERNAME }}
+          password: ${{ secrets.DOCKER_TOKEN }}
+
      - name: Start Docker containers
        run: |
          cd deployment/docker_compose
-          docker compose -f docker-compose.yml -f docker-compose.dev.yml up -d
+          docker compose -f docker-compose.yml -f docker-compose.dev.yml -f docker-compose.mcp-oauth-test.yml up -d
        id: start_docker

      - name: Wait for service to be ready
@@ -237,18 +230,41 @@ jobs:
          done
          echo "Finished waiting for service."

+      - name: Wait for MCP OAuth mock server
+        run: |
+          echo "Waiting for MCP OAuth mock server on port ${MCP_TEST_SERVER_PORT:-8004}..."
+          start_time=$(date +%s)
+          timeout=120
+
+          while true; do
+            current_time=$(date +%s)
+            elapsed_time=$((current_time - start_time))
+
+            if [ $elapsed_time -ge $timeout ]; then
+              echo "Timeout reached. MCP OAuth mock server did not become ready in ${timeout}s."
+              exit 1
+            fi
+
+            if curl -sf "http://localhost:${MCP_TEST_SERVER_PORT:-8004}/healthz" > /dev/null; then
+              echo "MCP OAuth mock server is ready!"
+              break
+            fi
+
+            sleep 3
+          done
+
      - name: Run Playwright tests
        working-directory: ./web
        run: |
          # Create test-results directory to ensure it exists for artifact upload
          mkdir -p test-results
-          npx playwright test
+          npx playwright test --project ${{ matrix.project }}

-      - uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # ratchet:actions/upload-artifact@v4
+      - uses: actions/upload-artifact@330a01c490aca151604b8cf639adc76d48f6c5d4 # ratchet:actions/upload-artifact@v4
        if: always()
        with:
          # Includes test results and trace.zip files
-          name: playwright-test-results-${{ github.run_id }}
+          name: playwright-test-results-${{ matrix.project }}-${{ github.run_id }}
          path: ./web/test-results/
          retention-days: 30

@@ -262,15 +278,11 @@ jobs:

      - name: Upload logs
        if: success() || failure()
-        uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # ratchet:actions/upload-artifact@v4
+        uses: actions/upload-artifact@330a01c490aca151604b8cf639adc76d48f6c5d4 # ratchet:actions/upload-artifact@v4
        with:
-          name: docker-logs
+          name: docker-logs-${{ matrix.project }}-${{ github.run_id }}
          path: ${{ github.workspace }}/docker-compose.log

-      - name: Stop Docker containers
-        run: |
-          cd deployment/docker_compose
-          docker compose down -v

 # NOTE: Chromatic UI diff testing is currently disabled.
 # We are using Playwright for local and CI testing without visual regression checks.
@@ -294,7 +306,7 @@ jobs:
 #         fetch-depth: 0

 #     - name: Setup node
-#       uses: actions/setup-node@49933ea5288caeca8642d1e84afbd3f7d6820020 # ratchet:actions/setup-node@v4
+#       uses: actions/setup-node@2028fbc5c25fe9cf00d9f06a71cc4710d4507903 # ratchet:actions/setup-node@v4
 #       with:
 #         node-version: 22

--- a/.github/workflows/pr-python-checks.yml
+++ b/.github/workflows/pr-python-checks.yml
@@ -1,4 +1,7 @@
 name: Python Checks
+concurrency:
+  group: Python-Checks-${{ github.workflow }}-${{ github.head_ref || github.event.workflow_run.head_branch || github.run_id }}
+  cancel-in-progress: true

 on:
  merge_group:
@@ -12,62 +15,27 @@ jobs:
    # See https://runs-on.com/runners/linux/
    # Note: Mypy seems quite optimized for x64 compared to arm64.
    # Similarly, mypy is single-threaded and incremental, so 2cpu is sufficient.
-    runs-on: [runs-on, runner=2cpu-linux-x64, "run-id=${{ github.run_id }}", "extras=s3-cache"]
+    runs-on: [runs-on, runner=2cpu-linux-x64, "run-id=${{ github.run_id }}-mypy-check", "extras=s3-cache"]

    steps:
      - uses: runs-on/action@cd2b598b0515d39d78c38a02d529db87d2196d1e # ratchet:runs-on/action@v2
      - name: Checkout code
        uses: actions/checkout@08eba0b27e820071cde6df949e0beb9ba4906955 # ratchet:actions/checkout@v4

-      - name: Setup uv
-        uses: astral-sh/setup-uv@d0cc045d04ccac9d8b7881df0226f9e82c39688e # ratchet:astral-sh/setup-uv@v6
+      # needed for pulling openapitools/openapi-generator-cli
+      # otherwise, we hit the "Unauthenticated users" limit
+      # https://docs.docker.com/docker-hub/usage/
+      - name: Login to Docker Hub
+        uses: docker/login-action@5e57cd118135c172c3672efd75eb46360885c0ef # ratchet:docker/login-action@v3
        with:
-          enable-cache: true
+          username: ${{ secrets.DOCKER_USERNAME }}
+          password: ${{ secrets.DOCKER_TOKEN }}

-      - name: Cache uv cache directory
-        uses: runs-on/cache@50350ad4242587b6c8c2baa2e740b1bc11285ff4 # ratchet:runs-on/cache@v4
+      - name: Prepare build
+        uses: ./.github/actions/prepare-build
        with:
-          path: ~/.cache/uv
-          key: ${{ runner.os }}-uv-${{ hashFiles('backend/requirements/*.txt', 'backend/pyproject.toml') }}
-          restore-keys: |
-            ${{ runner.os }}-uv-
-
-      - name: Setup Python
-        uses: actions/setup-python@a26af69be951a213d495a4c3e4e4022e16d87065 # ratchet:actions/setup-python@v5
-        with:
-          python-version: "3.11"
-
-      - name: Create virtual environment
-        working-directory: ./backend
-        run: uv venv .venv
-
-      - name: Install Python dependencies with uv
-        working-directory: ./backend
-        run: |
-          uv pip install \
-            -r requirements/default.txt \
-            -r requirements/dev.txt \
-            -r requirements/model_server.txt
-
-      - name: Generate OpenAPI schema
-        working-directory: ./backend
-        env:
-          PYTHONPATH: "."
-        run: |
-          uv run python scripts/onyx_openapi_schema.py --filename generated/openapi.json
-
-      - name: Generate OpenAPI Python client
-        working-directory: ./backend
-        run: |
-          docker run --rm \
-            -v "${{ github.workspace }}/backend/generated:/local" \
-            openapitools/openapi-generator-cli generate \
-            -i /local/openapi.json \
-            -g python \
-            -o /local/onyx_openapi_client \
-            --package-name onyx_openapi_client \
-            --skip-validate-spec \
-            --openapi-normalizer "SIMPLIFY_ONEOF_ANYOF=true,SET_OAS3_NULLABLE=true"
+          docker-username: ${{ secrets.DOCKER_USERNAME }}
+          docker-password: ${{ secrets.DOCKER_TOKEN }}

      - name: Cache mypy cache
        if: ${{ vars.DISABLE_MYPY_CACHE != 'true' }}
@@ -80,13 +48,16 @@ jobs:

      - name: Run MyPy
        working-directory: ./backend
-        run: uv run mypy .
+        env:
+          MYPY_FORCE_COLOR: 1
+          TERM: xterm-256color
+        run: mypy .

      - name: Check import order with reorder-python-imports
        working-directory: ./backend
        run: |
-          find ./onyx -name "*.py" | xargs uv run reorder-python-imports --py311-plus
+          find ./onyx -name "*.py" | xargs reorder-python-imports --py311-plus

      - name: Check code formatting with Black
        working-directory: ./backend
-        run: uv run black --check .
+        run: black --check .
--- a/.github/workflows/pr-python-connector-tests.yml
+++ b/.github/workflows/pr-python-connector-tests.yml
@@ -1,4 +1,7 @@
 name: Connector Tests
+concurrency:
+  group: Connector-Tests-${{ github.workflow }}-${{ github.head_ref || github.event.workflow_run.head_branch || github.run_id }}
+  cancel-in-progress: true

 on:
  merge_group:
@@ -119,7 +122,7 @@ env:
 jobs:
  connectors-check:
    # See https://runs-on.com/runners/linux/
-    runs-on: [runs-on, runner=8cpu-linux-x64, "run-id=${{ github.run_id }}", "extras=s3-cache"]
+    runs-on: [runs-on, runner=8cpu-linux-x64, "run-id=${{ github.run_id }}-connectors-check", "extras=s3-cache"]

    env:
      PYTHONPATH: ./backend
@@ -130,31 +133,11 @@ jobs:
      - name: Checkout code
        uses: actions/checkout@08eba0b27e820071cde6df949e0beb9ba4906955 # ratchet:actions/checkout@v4

-      - name: Setup uv
-        uses: astral-sh/setup-uv@d0cc045d04ccac9d8b7881df0226f9e82c39688e # ratchet:astral-sh/setup-uv@v6
-        with:
-          enable-cache: true
+      - name: Setup Python and Install Dependencies
+        uses: ./.github/actions/setup-python-and-install-dependencies

-      - name: Cache uv cache directory
-        uses: actions/cache@0057852bfaa89a56745cba8c7296529d2fc39830 # ratchet:actions/cache@v4
-        with:
-          path: ~/.cache/uv
-          key: ${{ runner.os }}-${{ github.workflow }}-uv-${{ hashFiles('backend/requirements/*.txt', 'backend/pyproject.toml') }}
-          restore-keys: |
-            ${{ runner.os }}-${{ github.workflow }}-uv-
-
-      - name: Set up Python
-        uses: actions/setup-python@a26af69be951a213d495a4c3e4e4022e16d87065 # ratchet:actions/setup-python@v5
-        with:
-          python-version: "3.11"
-
-      - name: Install Dependencies
-        run: |
-          uv pip install --system \
-            -r backend/requirements/default.txt \
-            -r backend/requirements/dev.txt
-          playwright install chromium
-          playwright install-deps chromium
+      - name: Setup Playwright
+        uses: ./.github/actions/setup-playwright

      - name: Detect Connector changes
        id: changes
--- a/.github/workflows/pr-python-model-tests.yml
+++ b/.github/workflows/pr-python-model-tests.yml
@@ -10,7 +10,7 @@ on:
        description: 'Branch to run the workflow on'
        required: false
        default: 'main'
-        
+
 env:
  # Bedrock
  AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }}
@@ -28,7 +28,7 @@ env:
 jobs:
  model-check:
    # See https://runs-on.com/runners/linux/
-    runs-on: [runs-on,runner=8cpu-linux-x64,"run-id=${{ github.run_id }}"]
+    runs-on: [runs-on,runner=8cpu-linux-x64,"run-id=${{ github.run_id }}-model-check"]

    env:
      PYTHONPATH: ./backend
@@ -53,9 +53,9 @@ jobs:
        run: |
          docker pull onyxdotapp/onyx-model-server:latest
          docker tag onyxdotapp/onyx-model-server:latest onyxdotapp/onyx-model-server:test
-          
+
      - name: Set up Python
-        uses: actions/setup-python@a26af69be951a213d495a4c3e4e4022e16d87065 # ratchet:actions/setup-python@v5
+        uses: actions/setup-python@e797f83bcb11b83ae66e0230d6156d7c80228e7c # ratchet:actions/setup-python@v6
        with:
          python-version: "3.11"
          cache: "pip"
@@ -90,15 +90,15 @@ jobs:
          while true; do
            current_time=$(date +%s)
            elapsed_time=$((current_time - start_time))
-            
+
            if [ $elapsed_time -ge $timeout ]; then
              echo "Timeout reached. Service did not become ready in 5 minutes."
              exit 1
            fi
-            
+
            # Use curl with error handling to ignore specific exit code 56
            response=$(curl -s -o /dev/null -w "%{http_code}" http://localhost:9000/api/health || echo "curl_error")
-            
+
            if [ "$response" = "200" ]; then
              echo "Service is ready!"
              break
@@ -107,11 +107,11 @@ jobs:
            else
              echo "Service not ready yet (HTTP status $response). Retrying in 5 seconds..."
            fi
-            
+
            sleep 5
          done
          echo "Finished waiting for service."
-          
+
      - name: Run Tests
        shell: script -q -e -c "bash --noprofile --norc -eo pipefail {0}"
        run: |
@@ -127,7 +127,7 @@ jobs:
            -H 'Content-type: application/json' \
            --data '{"text":"Scheduled Model Tests failed! Check the run at: https://github.com/${{ github.repository }}/actions/runs/${{ github.run_id }}"}' \
            $SLACK_WEBHOOK
-            
+
      - name: Dump all-container logs (optional)
        if: always()
        run: |
@@ -136,14 +136,7 @@ jobs:

      - name: Upload logs
        if: always()
-        uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # ratchet:actions/upload-artifact@v4
+        uses: actions/upload-artifact@330a01c490aca151604b8cf639adc76d48f6c5d4 # ratchet:actions/upload-artifact@v4
        with:
          name: docker-all-logs
          path: ${{ github.workspace }}/docker-compose.log
-          
-      - name: Stop Docker containers
-        if: always()
-        run: |
-          cd deployment/docker_compose
-          docker compose -f docker-compose.model-server-test.yml down -v
-          
--- a/.github/workflows/pr-python-tests.yml
+++ b/.github/workflows/pr-python-tests.yml
@@ -1,4 +1,7 @@
 name: Python Unit Tests
+concurrency:
+  group: Python-Unit-Tests-${{ github.workflow }}-${{ github.head_ref || github.event.workflow_run.head_branch || github.run_id }}
+  cancel-in-progress: true

 on:
  merge_group:
@@ -10,7 +13,8 @@ on:
 jobs:
  backend-check:
    # See https://runs-on.com/runners/linux/
-    runs-on: [runs-on,runner=8cpu-linux-x64,"run-id=${{ github.run_id }}"]
+    runs-on: [runs-on, runner=2cpu-linux-arm64, "run-id=${{ github.run_id }}-backend-check"]
+

    env:
      PYTHONPATH: ./backend
@@ -18,27 +22,15 @@ jobs:
      SF_USERNAME: ${{ secrets.SF_USERNAME }}
      SF_PASSWORD: ${{ secrets.SF_PASSWORD }}
      SF_SECURITY_TOKEN: ${{ secrets.SF_SECURITY_TOKEN }}
-      
+
    steps:
+    - uses: runs-on/action@cd2b598b0515d39d78c38a02d529db87d2196d1e # ratchet:runs-on/action@v2
+
    - name: Checkout code
      uses: actions/checkout@08eba0b27e820071cde6df949e0beb9ba4906955 # ratchet:actions/checkout@v4

-    - name: Set up Python
-      uses: actions/setup-python@a26af69be951a213d495a4c3e4e4022e16d87065 # ratchet:actions/setup-python@v5
-      with:
-        python-version: '3.11'
-        cache: 'pip'
-        cache-dependency-path: |
-          backend/requirements/default.txt
-          backend/requirements/dev.txt
-          backend/requirements/model_server.txt
-
-    - name: Install Dependencies
-      run: |
-        python -m pip install --upgrade pip
-        pip install --retries 5 --timeout 30 -r backend/requirements/default.txt
-        pip install --retries 5 --timeout 30 -r backend/requirements/dev.txt
-        pip install --retries 5 --timeout 30 -r backend/requirements/model_server.txt
+    - name: Setup Python and Install Dependencies
+      uses: ./.github/actions/setup-python-and-install-dependencies

    - name: Run Tests
      shell: script -q -e -c "bash --noprofile --norc -eo pipefail {0}"
--- a/.github/workflows/pr-quality-checks.yml
+++ b/.github/workflows/pr-quality-checks.yml
@@ -10,13 +10,13 @@ on:
 jobs:
  quality-checks:
    # See https://runs-on.com/runners/linux/
-    runs-on: [runs-on, runner=2cpu-linux-arm64, "run-id=${{ github.run_id }}"]
+    runs-on: [runs-on, runner=1cpu-linux-arm64, "run-id=${{ github.run_id }}-quality-checks"]
    steps:
      - uses: runs-on/action@cd2b598b0515d39d78c38a02d529db87d2196d1e # ratchet:runs-on/action@v2
      - uses: actions/checkout@08eba0b27e820071cde6df949e0beb9ba4906955 # ratchet:actions/checkout@v4
        with:
          fetch-depth: 0
-      - uses: actions/setup-python@a26af69be951a213d495a4c3e4e4022e16d87065 # ratchet:actions/setup-python@v5
+      - uses: actions/setup-python@e797f83bcb11b83ae66e0230d6156d7c80228e7c # ratchet:actions/setup-python@v6
        with:
          python-version: "3.11"
      - name: Setup Terraform
--- a/.github/workflows/tag-nightly.yml
+++ b/.github/workflows/tag-nightly.yml
@@ -9,7 +9,7 @@ permissions:

 jobs:
  create-and-push-tag:
-    runs-on: [runs-on, runner=2cpu-linux-x64, "run-id=${{ github.run_id }}"]
+    runs-on: [runs-on, runner=2cpu-linux-x64, "run-id=${{ github.run_id }}-create-and-push-tag"]

    steps:
      # actions using GITHUB_TOKEN cannot trigger another workflow, but we do want this to trigger docker pushes
--- a/.gitignore
+++ b/.gitignore
@@ -18,6 +18,7 @@ backend/tests/regression/search_quality/eval-*
 backend/tests/regression/search_quality/search_eval_config.yaml
 backend/tests/regression/search_quality/*.json
 backend/onyx/evals/data/
+backend/onyx/evals/one_off/*.json
 *.log

 # secret files
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -1,4 +1,15 @@
 repos:
+  - repo: https://github.com/pre-commit/pre-commit-hooks
+    rev: v4.6.0
+    hooks:
+      - id: check-yaml
+        files: ^.github/
+
+  - repo: https://github.com/rhysd/actionlint
+    rev: v1.7.8
+    hooks:
+      - id: actionlint
+
  - repo: https://github.com/psf/black
    rev: 25.1.0
    hooks:
@@ -54,15 +65,12 @@ repos:
        language: system
        pass_filenames: false
        files: \.tf$
+
      - id: check-lazy-imports
-        name: Check lazy imports are not directly imported
+        name: Check lazy imports
        entry: python3 backend/scripts/check_lazy_imports.py
        language: system
        files: ^backend/(?!\.venv/).*\.py$
-        pass_filenames: false
-        # Note: pass_filenames is false because tsc must check the entire
-        # project, but the files filter ensures this only runs when relevant
-        # files change. Using --incremental for faster subsequent checks.

  # We would like to have a mypy pre-commit hook, but due to the fact that
  # pre-commit runs in it's own isolated environment, we would need to install
--- a/README.md
+++ b/README.md
@@ -1,29 +1,34 @@
 <a name="readme-top"></a>

 <h2 align="center">
-    <a href="https://www.onyx.app/"> <img width="50%" src="https://github.com/onyx-dot-app/onyx/blob/logo/OnyxLogoCropped.jpg?raw=true)" /></a>
+    <a href="https://www.onyx.app/?utm_source=onyx_repo&utm_medium=github&utm_campaign=readme"> <img width="50%" src="https://github.com/onyx-dot-app/onyx/blob/logo/OnyxLogoCropped.jpg?raw=true" /></a>
 </h2>

 <p align="center">Open Source AI Platform</p>

 <p align="center">
    <a href="https://discord.gg/TDJ59cGV2X" target="_blank">
-        <img src="https://img.shields.io/badge/discord-join-blue.svg?logo=discord&logoColor=white" alt="Discord">
+        <img src="https://img.shields.io/badge/discord-join-blue.svg?logo=discord&logoColor=white" alt="Discord" />
    </a>
-    <a href="https://docs.onyx.app/" target="_blank">
-        <img src="https://img.shields.io/badge/docs-view-blue" alt="Documentation">
+    <a href="https://docs.onyx.app/?utm_source=onyx_repo&utm_medium=github&utm_campaign=readme" target="_blank">
+        <img src="https://img.shields.io/badge/docs-view-blue" alt="Documentation" />
    </a>
-    <a href="https://docs.onyx.app/" target="_blank">
-        <img src="https://img.shields.io/website?url=https://www.onyx.app&up_message=visit&up_color=blue" alt="Documentation">
+    <a href="https://www.onyx.app/?utm_source=onyx_repo&utm_medium=github&utm_campaign=readme" target="_blank">
+        <img src="https://img.shields.io/website?url=https://www.onyx.app&up_message=visit&up_color=blue" alt="Documentation" />
    </a>
    <a href="https://github.com/onyx-dot-app/onyx/blob/main/LICENSE" target="_blank">
-        <img src="https://img.shields.io/static/v1?label=license&message=MIT&color=blue" alt="License">
+        <img src="https://img.shields.io/static/v1?label=license&message=MIT&color=blue" alt="License" />
    </a>
 </p>

+<p align="center">
+  <a href="https://trendshift.io/repositories/12516" target="_blank">
+    <img src="https://trendshift.io/api/badge/repositories/12516" alt="onyx-dot-app/onyx | Trendshift" style="width: 250px; height: 55px;" />
+  </a>
+</p>


-**[Onyx](https://www.onyx.app/)** is a feature-rich, self-hostable Chat UI that works with any LLM. It is easy to deploy and can run in a completely airgapped environment.
+**[Onyx](https://www.onyx.app/?utm_source=onyx_repo&utm_medium=github&utm_campaign=readme)** is a feature-rich, self-hostable Chat UI that works with any LLM. It is easy to deploy and can run in a completely airgapped environment.

 Onyx comes loaded with advanced features like Agents, Web Search, RAG, MCP, Deep Research, Connectors to 40+ knowledge sources, and more.

@@ -52,7 +57,7 @@ Onyx comes loaded with advanced features like Agents, Web Search, RAG, MCP, Deep

 Onyx works with all LLMs (like OpenAI, Anthropic, Gemini, etc.) and self-hosted LLMs (like Ollama, vLLM, etc.)

-To learn more about the features, check out our [documentation](https://docs.onyx.app/welcome)!
+To learn more about the features, check out our [documentation](https://docs.onyx.app/welcome?utm_source=onyx_repo&utm_medium=github&utm_campaign=readme)!



@@ -60,13 +65,13 @@ To learn more about the features, check out our [documentation](https://docs.ony
 Onyx supports deployments in Docker, Kubernetes, Terraform, along with guides for major cloud providers.

 See guides below:
- [Docker](https://docs.onyx.app/deployment/local/docker) or [Quickstart](https://docs.onyx.app/deployment/getting_started/quickstart) (best for most users)
- [Kubernetes](https://docs.onyx.app/deployment/local/kubernetes) (best for large teams)
- [Terraform](https://docs.onyx.app/deployment/local/terraform) (best for teams already using Terraform)
- Cloud specific guides (best if specifically using [AWS EKS](https://docs.onyx.app/deployment/cloud/aws/eks), [Azure VMs](https://docs.onyx.app/deployment/cloud/azure), etc.)
+- [Docker](https://docs.onyx.app/deployment/local/docker?utm_source=onyx_repo&utm_medium=github&utm_campaign=readme) or [Quickstart](https://docs.onyx.app/deployment/getting_started/quickstart?utm_source=onyx_repo&utm_medium=github&utm_campaign=readme) (best for most users)
+- [Kubernetes](https://docs.onyx.app/deployment/local/kubernetes?utm_source=onyx_repo&utm_medium=github&utm_campaign=readme) (best for large teams)
+- [Terraform](https://docs.onyx.app/deployment/local/terraform?utm_source=onyx_repo&utm_medium=github&utm_campaign=readme) (best for teams already using Terraform)
+- Cloud specific guides (best if specifically using [AWS EKS](https://docs.onyx.app/deployment/cloud/aws/eks?utm_source=onyx_repo&utm_medium=github&utm_campaign=readme), [Azure VMs](https://docs.onyx.app/deployment/cloud/azure?utm_source=onyx_repo&utm_medium=github&utm_campaign=readme), etc.)

 > [!TIP]  
-> **To try Onyx for free without deploying, check out [Onyx Cloud](https://cloud.onyx.app/signup)**.
+> **To try Onyx for free without deploying, check out [Onyx Cloud](https://cloud.onyx.app/signup?utm_source=onyx_repo&utm_medium=github&utm_campaign=readme)**.



@@ -90,7 +95,7 @@ There are two editions of Onyx:

 - Onyx Community Edition (CE) is available freely under the MIT license.
 - Onyx Enterprise Edition (EE) includes extra features that are primarily useful for larger organizations.
-For feature details, check out [our website](https://www.onyx.app/pricing).
+For feature details, check out [our website](https://www.onyx.app/pricing?utm_source=onyx_repo&utm_medium=github&utm_campaign=readme).



--- a/backend/Dockerfile
+++ b/backend/Dockerfile
@@ -7,15 +7,12 @@ have a contract or agreement with DanswerAI, you are not permitted to use the En
 Edition features outside of personal development or testing purposes. Please reach out to \
 founders@onyx.app for more information. Please visit https://github.com/onyx-dot-app/onyx"

-# Default ONYX_VERSION, typically overriden during builds by GitHub Actions.
-ARG ONYX_VERSION=0.0.0-dev
 # DO_NOT_TRACK is used to disable telemetry for Unstructured
-ENV ONYX_VERSION=${ONYX_VERSION} \
-    DANSWER_RUNNING_IN_DOCKER="true" \
+ENV DANSWER_RUNNING_IN_DOCKER="true" \
    DO_NOT_TRACK="true" \
    PLAYWRIGHT_BROWSERS_PATH="/app/.cache/ms-playwright"

-COPY --from=ghcr.io/astral-sh/uv:latest /uv /uvx /bin/
+COPY --from=ghcr.io/astral-sh/uv:0.9.9 /uv /uvx /bin/

 # Install system dependencies
 # cmake needed for psycopg (postgres)
@@ -128,6 +125,10 @@ COPY --chown=onyx:onyx ./assets /app/assets

 ENV PYTHONPATH=/app

+# Default ONYX_VERSION, typically overriden during builds by GitHub Actions.
+ARG ONYX_VERSION=0.0.0-dev
+ENV ONYX_VERSION=${ONYX_VERSION}
+
 # Default command which does nothing
 # This container is used by api server and background which specify their own CMD
 CMD ["tail", "-f", "/dev/null"]
--- a/backend/Dockerfile.model_server
+++ b/backend/Dockerfile.model_server
@@ -6,13 +6,10 @@ AI models for Onyx. This container and all the code is MIT Licensed and free for
 You can find it at https://hub.docker.com/r/onyx/onyx-model-server. For more details, \
 visit https://github.com/onyx-dot-app/onyx."

-# Default ONYX_VERSION, typically overriden during builds by GitHub Actions.
-ARG ONYX_VERSION=0.0.0-dev
-ENV ONYX_VERSION=${ONYX_VERSION} \
-    DANSWER_RUNNING_IN_DOCKER="true" \
+ENV DANSWER_RUNNING_IN_DOCKER="true" \
    HF_HOME=/app/.cache/huggingface

-COPY --from=ghcr.io/astral-sh/uv:latest /uv /uvx /bin/
+COPY --from=ghcr.io/astral-sh/uv:0.9.9 /uv /uvx /bin/

 # Create non-root user for security best practices
 RUN mkdir -p /app && \
@@ -23,24 +20,6 @@ RUN mkdir -p /app && \
    chmod 755 /var/log/onyx && \
    chown onyx:onyx /var/log/onyx

-# --- add toolchain needed for Rust/Python builds (fastuuid) ---
-ENV RUSTUP_HOME=/usr/local/rustup \
-    CARGO_HOME=/usr/local/cargo \
-    PATH=/usr/local/cargo/bin:$PATH
-
-RUN set -eux; \
-    apt-get update && apt-get install -y --no-install-recommends \
-        build-essential \
-        pkg-config \
-        curl \
-        ca-certificates \
-    # Install latest stable Rust (supports Cargo.lock v4)
-    && curl -sSf https://sh.rustup.rs | sh -s -- -y --profile minimal --default-toolchain stable \
-    && rustc --version && cargo --version \
-    && apt-get remove -y --allow-remove-essential perl-base \
-    && apt-get autoremove -y \
-    && rm -rf /var/lib/apt/lists/*
-
 COPY ./requirements/model_server.txt /tmp/requirements.txt
 RUN uv pip install --system --no-cache-dir --upgrade \
        -r /tmp/requirements.txt && \
@@ -83,4 +62,8 @@ COPY ./model_server /app/model_server

 ENV PYTHONPATH=/app

+# Default ONYX_VERSION, typically overriden during builds by GitHub Actions.
+ARG ONYX_VERSION=0.0.0-dev
+ENV ONYX_VERSION=${ONYX_VERSION}
+
 CMD ["uvicorn", "model_server.main:app", "--host", "0.0.0.0", "--port", "9000"]
--- a/backend/alembic/versions/2acdef638fc2_add_switchover_type_field.py
+++ b/backend/alembic/versions/2acdef638fc2_add_switchover_type_field.py
@@ -0,0 +1,72 @@
+"""add switchover_type field and remove background_reindex_enabled
+
+Revision ID: 2acdef638fc2
+Revises: a4f23d6b71c8
+Create Date: 2025-01-XX XX:XX:XX.XXXXXX
+
+"""
+
+from alembic import op
+import sqlalchemy as sa
+
+from onyx.db.enums import SwitchoverType
+
+
+# revision identifiers, used by Alembic.
+revision = "2acdef638fc2"
+down_revision = "a4f23d6b71c8"
+branch_labels = None
+depends_on = None
+
+
+def upgrade() -> None:
+    # Add switchover_type column with default value of REINDEX
+    op.add_column(
+        "search_settings",
+        sa.Column(
+            "switchover_type",
+            sa.Enum(SwitchoverType, native_enum=False),
+            nullable=False,
+            server_default=SwitchoverType.REINDEX.value,
+        ),
+    )
+
+    # Migrate existing data: set switchover_type based on background_reindex_enabled
+    # REINDEX where background_reindex_enabled=True, INSTANT where False
+    op.execute(
+        """
+        UPDATE search_settings
+        SET switchover_type = CASE
+            WHEN background_reindex_enabled = true THEN 'REINDEX'
+            ELSE 'INSTANT'
+        END
+        """
+    )
+
+    # Remove the background_reindex_enabled column (replaced by switchover_type)
+    op.drop_column("search_settings", "background_reindex_enabled")
+
+
+def downgrade() -> None:
+    # Re-add the background_reindex_enabled column with default value of True
+    op.add_column(
+        "search_settings",
+        sa.Column(
+            "background_reindex_enabled",
+            sa.Boolean(),
+            nullable=False,
+            server_default="true",
+        ),
+    )
+    # Set background_reindex_enabled based on switchover_type
+    op.execute(
+        """
+        UPDATE search_settings
+        SET background_reindex_enabled = CASE
+            WHEN switchover_type = 'INSTANT' THEN false
+            ELSE true
+        END
+        """
+    )
+    # Remove the switchover_type column
+    op.drop_column("search_settings", "switchover_type")
--- a/backend/alembic/versions/9drpiiw74ljy_add_config_to_federated_connector.py
+++ b/backend/alembic/versions/9drpiiw74ljy_add_config_to_federated_connector.py
@@ -0,0 +1,97 @@
+"""add config to federated_connector
+
+Revision ID: 9drpiiw74ljy
+Revises: 2acdef638fc2
+Create Date: 2025-11-03 12:00:00.000000
+
+"""
+
+from alembic import op
+import sqlalchemy as sa
+from sqlalchemy.dialects import postgresql
+
+# revision identifiers, used by Alembic.
+revision = "9drpiiw74ljy"
+down_revision = "2acdef638fc2"
+branch_labels = None
+depends_on = None
+
+
+def upgrade() -> None:
+    connection = op.get_bind()
+
+    # Check if column already exists in current schema
+    result = connection.execute(
+        sa.text(
+            """
+            SELECT column_name
+            FROM information_schema.columns
+            WHERE table_schema = current_schema()
+            AND table_name = 'federated_connector'
+            AND column_name = 'config'
+            """
+        )
+    )
+    column_exists = result.fetchone() is not None
+
+    # Add config column with default empty object (only if it doesn't exist)
+    if not column_exists:
+        op.add_column(
+            "federated_connector",
+            sa.Column(
+                "config", postgresql.JSONB(), nullable=False, server_default="{}"
+            ),
+        )
+
+    # Data migration: Single bulk update for all Slack connectors
+    connection.execute(
+        sa.text(
+            """
+            WITH connector_configs AS (
+                SELECT
+                    fc.id as connector_id,
+                    CASE
+                        WHEN fcds.entities->'channels' IS NOT NULL
+                            AND jsonb_typeof(fcds.entities->'channels') = 'array'
+                            AND jsonb_array_length(fcds.entities->'channels') > 0
+                        THEN
+                            jsonb_build_object(
+                                'channels', fcds.entities->'channels',
+                                'search_all_channels', false
+                            ) ||
+                            CASE
+                                WHEN fcds.entities->'include_dm' IS NOT NULL
+                                THEN jsonb_build_object('include_dm', fcds.entities->'include_dm')
+                                ELSE '{}'::jsonb
+                            END
+                        ELSE
+                            jsonb_build_object('search_all_channels', true) ||
+                            CASE
+                                WHEN fcds.entities->'include_dm' IS NOT NULL
+                                THEN jsonb_build_object('include_dm', fcds.entities->'include_dm')
+                                ELSE '{}'::jsonb
+                            END
+                    END as config
+                FROM federated_connector fc
+                LEFT JOIN LATERAL (
+                    SELECT entities
+                    FROM federated_connector__document_set
+                    WHERE federated_connector_id = fc.id
+                    AND entities IS NOT NULL
+                    ORDER BY id
+                    LIMIT 1
+                ) fcds ON true
+                WHERE fc.source = 'FEDERATED_SLACK'
+                AND fcds.entities IS NOT NULL
+            )
+            UPDATE federated_connector fc
+            SET config = cc.config
+            FROM connector_configs cc
+            WHERE fc.id = cc.connector_id
+            """
+        )
+    )
+
+
+def downgrade() -> None:
+    op.drop_column("federated_connector", "config")
--- a/backend/alembic/versions/a4f23d6b71c8_add_llm_provider_persona_restrictions.py
+++ b/backend/alembic/versions/a4f23d6b71c8_add_llm_provider_persona_restrictions.py
@@ -0,0 +1,61 @@
+"""add llm provider persona restrictions
+
+Revision ID: a4f23d6b71c8
+Revises: 5e1c073d48a3
+Create Date: 2025-10-21 00:00:00.000000
+
+"""
+
+from alembic import op
+import sqlalchemy as sa
+
+
+# revision identifiers, used by Alembic.
+revision = "a4f23d6b71c8"
+down_revision = "5e1c073d48a3"
+branch_labels = None
+depends_on = None
+
+
+def upgrade() -> None:
+    op.create_table(
+        "llm_provider__persona",
+        sa.Column("llm_provider_id", sa.Integer(), nullable=False),
+        sa.Column("persona_id", sa.Integer(), nullable=False),
+        sa.ForeignKeyConstraint(
+            ["llm_provider_id"], ["llm_provider.id"], ondelete="CASCADE"
+        ),
+        sa.ForeignKeyConstraint(["persona_id"], ["persona.id"], ondelete="CASCADE"),
+        sa.PrimaryKeyConstraint("llm_provider_id", "persona_id"),
+    )
+    op.create_index(
+        "ix_llm_provider__persona_llm_provider_id",
+        "llm_provider__persona",
+        ["llm_provider_id"],
+    )
+    op.create_index(
+        "ix_llm_provider__persona_persona_id",
+        "llm_provider__persona",
+        ["persona_id"],
+    )
+    op.create_index(
+        "ix_llm_provider__persona_composite",
+        "llm_provider__persona",
+        ["persona_id", "llm_provider_id"],
+    )
+
+
+def downgrade() -> None:
+    op.drop_index(
+        "ix_llm_provider__persona_composite",
+        table_name="llm_provider__persona",
+    )
+    op.drop_index(
+        "ix_llm_provider__persona_persona_id",
+        table_name="llm_provider__persona",
+    )
+    op.drop_index(
+        "ix_llm_provider__persona_llm_provider_id",
+        table_name="llm_provider__persona",
+    )
+    op.drop_table("llm_provider__persona")
--- a/backend/docker-bake.hcl
+++ b/backend/docker-bake.hcl
@@ -1,5 +1,5 @@
-variable "REGISTRY" {
-  default = "onyxdotapp"
+variable "REPOSITORY" {
+  default = "onyxdotapp/onyx-integration"
 }

 variable "TAG" {
@@ -20,5 +20,8 @@ target "integration" {
    base = "target:backend"
  }

-  tags      = ["${REGISTRY}/integration-test-onyx-integration:${TAG}"]
+  cache-from = ["type=registry,ref=${REPOSITORY}:integration-test-backend-cache"]
+  cache-to   = ["type=registry,ref=${REPOSITORY}:integration-test-backend-cache,mode=max"]
+
+  tags      = ["${REPOSITORY}:${TAG}"]
 }
--- a/backend/ee/onyx/server/query_and_chat/chat_backend.py
+++ b/backend/ee/onyx/server/query_and_chat/chat_backend.py
@@ -161,7 +161,7 @@ def handle_send_message_simple_with_history(
        persona_id=req.persona_id,
    )

-    llm, _ = get_llms_for_persona(persona=chat_session.persona)
+    llm, _ = get_llms_for_persona(persona=chat_session.persona, user=user)

    llm_tokenizer = get_tokenizer(
        model_name=llm.config.model_name,
--- a/backend/ee/onyx/server/query_and_chat/query_backend.py
+++ b/backend/ee/onyx/server/query_and_chat/query_backend.py
@@ -24,6 +24,7 @@ from onyx.chat.models import PersonaOverrideConfig
 from onyx.chat.models import QADocsResponse
 from onyx.chat.process_message import gather_stream
 from onyx.chat.process_message import stream_chat_message_objects
+from onyx.configs.chat_configs import NUM_RETURNED_HITS
 from onyx.configs.onyxbot_configs import MAX_THREAD_CONTEXT_PERCENTAGE
 from onyx.context.search.models import SavedSearchDocWithContent
 from onyx.context.search.models import SearchRequest
@@ -48,9 +49,42 @@ logger = setup_logger()
 basic_router = APIRouter(prefix="/query")


+class DocumentSearchPagination(BaseModel):
+    offset: int
+    limit: int
+    returned_count: int
+    has_more: bool
+    next_offset: int | None = None
+
+
 class DocumentSearchResponse(BaseModel):
    top_documents: list[SavedSearchDocWithContent]
    llm_indices: list[int]
+    pagination: DocumentSearchPagination
+
+
+def _normalize_pagination(limit: int | None, offset: int | None) -> tuple[int, int]:
+    if limit is None:
+        resolved_limit = NUM_RETURNED_HITS
+    else:
+        resolved_limit = limit
+
+    if resolved_limit <= 0:
+        raise HTTPException(
+            status_code=400, detail="retrieval_options.limit must be positive"
+        )
+
+    if offset is None:
+        resolved_offset = 0
+    else:
+        resolved_offset = offset
+
+    if resolved_offset < 0:
+        raise HTTPException(
+            status_code=400, detail="retrieval_options.offset cannot be negative"
+        )
+
+    return resolved_limit, resolved_offset


@basic_router.post("/document-search")
@@ -64,6 +98,10 @@ def handle_search_request(
    logger.notice(f"Received document search query: {query}")

    llm, fast_llm = get_default_llms()
+    pagination_limit, pagination_offset = _normalize_pagination(
+        limit=search_request.retrieval_options.limit,
+        offset=search_request.retrieval_options.offset,
+    )

    search_pipeline = SearchPipeline(
        search_request=SearchRequest(
@@ -72,8 +110,8 @@ def handle_search_request(
            human_selected_filters=search_request.retrieval_options.filters,
            enable_auto_detect_filters=search_request.retrieval_options.enable_auto_detect_filters,
            persona=None,  # For simplicity, default settings should be good for this search
-            offset=search_request.retrieval_options.offset,
-            limit=search_request.retrieval_options.limit,
+            offset=pagination_offset,
+            limit=pagination_limit + 1,
            rerank_settings=search_request.rerank_settings,
            evaluation_type=search_request.evaluation_type,
            chunks_above=search_request.chunks_above,
@@ -116,6 +154,9 @@ def handle_search_request(
        for section in top_sections
    ]

+    # Track whether the underlying retrieval produced more items than requested
+    has_more_results = len(top_docs) > pagination_limit
+
    # Deduping happens at the last step to avoid harming quality by dropping content early on
    deduped_docs = top_docs
    dropped_inds = None
@@ -134,7 +175,22 @@ def handle_search_request(
            dropped_indices=dropped_inds,
        )

-    return DocumentSearchResponse(top_documents=deduped_docs, llm_indices=llm_indices)
+    paginated_docs = deduped_docs[:pagination_limit]
+    llm_indices = [index for index in llm_indices if index < len(paginated_docs)]
+    has_more = has_more_results
+    pagination = DocumentSearchPagination(
+        offset=pagination_offset,
+        limit=pagination_limit,
+        returned_count=len(paginated_docs),
+        has_more=has_more,
+        next_offset=(pagination_offset + pagination_limit) if has_more else None,
+    )
+
+    return DocumentSearchResponse(
+        top_documents=paginated_docs,
+        llm_indices=llm_indices,
+        pagination=pagination,
+    )


 def get_answer_stream(
@@ -162,7 +218,7 @@ def get_answer_stream(
            is_for_edit=False,
        )

-    llm = get_main_llm_from_tuple(get_llms_for_persona(persona_info))
+    llm = get_main_llm_from_tuple(get_llms_for_persona(persona=persona_info, user=user))

    llm_tokenizer = get_tokenizer(
        model_name=llm.config.model_name,
--- a/backend/model_server/custom_models.py
+++ b/backend/model_server/custom_models.py
@@ -517,7 +517,7 @@ def run_analysis(intent_req: IntentRequest) -> tuple[bool, list[str]]:
    try:
        keywords = map_keywords(model_input.input_ids[0], tokenizer, keyword_preds)
    except Exception as e:
-        logger.error(
+        logger.warning(
            f"Failed to extract keywords for query: {intent_req.query} due to {e}"
        )
        # Fallback to keeping all words
--- a/backend/onyx/agents/agent_framework/models.py
+++ b/backend/onyx/agents/agent_framework/models.py
@@ -0,0 +1,47 @@
+from typing import Any
+from typing import Literal
+from typing import TypeAlias
+
+from pydantic import BaseModel
+
+from onyx.llm.model_response import ModelResponseStream
+
+
+class ToolCallStreamItem(BaseModel):
+    call_id: str | None = None
+
+    id: str | None = None
+
+    name: str | None = None
+
+    arguments: str | None = None
+
+    type: Literal["function_call"] = "function_call"
+
+    index: int | None = None
+
+
+class ToolCallOutputStreamItem(BaseModel):
+    call_id: str | None = None
+
+    output: Any
+
+    type: Literal["function_call_output"] = "function_call_output"
+
+
+RunItemStreamEventDetails: TypeAlias = ToolCallStreamItem | ToolCallOutputStreamItem
+
+
+class RunItemStreamEvent(BaseModel):
+    type: Literal[
+        "message_start",
+        "message_done",
+        "reasoning_start",
+        "reasoning_done",
+        "tool_call",
+        "tool_call_output",
+    ]
+    details: RunItemStreamEventDetails | None = None
+
+
+StreamEvent: TypeAlias = ModelResponseStream | RunItemStreamEvent
--- a/backend/onyx/agents/agent_framework/query.py
+++ b/backend/onyx/agents/agent_framework/query.py
@@ -0,0 +1,215 @@
+import json
+from collections.abc import Iterator
+from collections.abc import Sequence
+from dataclasses import dataclass
+from typing import Any
+
+from onyx.agents.agent_framework.models import RunItemStreamEvent
+from onyx.agents.agent_framework.models import StreamEvent
+from onyx.agents.agent_framework.models import ToolCallOutputStreamItem
+from onyx.agents.agent_framework.models import ToolCallStreamItem
+from onyx.llm.interfaces import LanguageModelInput
+from onyx.llm.interfaces import LLM
+from onyx.llm.interfaces import ToolChoiceOptions
+from onyx.llm.message_types import ChatCompletionMessage
+from onyx.llm.message_types import ToolCall
+from onyx.llm.model_response import ModelResponseStream
+from onyx.tools.tool import RunContextWrapper
+from onyx.tools.tool import Tool
+
+
+@dataclass
+class QueryResult:
+    stream: Iterator[StreamEvent]
+    new_messages_stateful: list[ChatCompletionMessage]
+
+
+def _serialize_tool_output(output: Any) -> str:
+    if isinstance(output, str):
+        return output
+    try:
+        return json.dumps(output)
+    except TypeError:
+        return str(output)
+
+
+def _update_tool_call_with_delta(
+    tool_calls_in_progress: dict[int, dict[str, Any]],
+    tool_call_delta: Any,
+) -> None:
+    index = tool_call_delta.index
+
+    if index not in tool_calls_in_progress:
+        tool_calls_in_progress[index] = {
+            "id": None,
+            "name": None,
+            "arguments": "",
+        }
+
+    if tool_call_delta.id:
+        tool_calls_in_progress[index]["id"] = tool_call_delta.id
+
+    if tool_call_delta.function:
+        if tool_call_delta.function.name:
+            tool_calls_in_progress[index]["name"] = tool_call_delta.function.name
+
+        if tool_call_delta.function.arguments:
+            tool_calls_in_progress[index][
+                "arguments"
+            ] += tool_call_delta.function.arguments
+
+
+def query(
+    llm_with_default_settings: LLM,
+    messages: LanguageModelInput,
+    tools: Sequence[Tool],
+    context: Any,
+    tool_choice: ToolChoiceOptions | None = None,
+) -> QueryResult:
+    tool_definitions = [tool.tool_definition() for tool in tools]
+    tools_by_name = {tool.name: tool for tool in tools}
+
+    new_messages_stateful: list[ChatCompletionMessage] = []
+
+    def stream_generator() -> Iterator[StreamEvent]:
+        reasoning_started = False
+        message_started = False
+
+        tool_calls_in_progress: dict[int, dict[str, Any]] = {}
+
+        content_parts: list[str] = []
+        reasoning_parts: list[str] = []
+
+        for chunk in llm_with_default_settings.stream(
+            prompt=messages,
+            tools=tool_definitions,
+            tool_choice=tool_choice,
+        ):
+            assert isinstance(chunk, ModelResponseStream)
+
+            delta = chunk.choice.delta
+            finish_reason = chunk.choice.finish_reason
+
+            if delta.reasoning_content:
+                reasoning_parts.append(delta.reasoning_content)
+                if not reasoning_started:
+                    yield RunItemStreamEvent(type="reasoning_start")
+                    reasoning_started = True
+
+            if delta.content:
+                content_parts.append(delta.content)
+                if reasoning_started:
+                    yield RunItemStreamEvent(type="reasoning_done")
+                    reasoning_started = False
+                if not message_started:
+                    yield RunItemStreamEvent(type="message_start")
+                    message_started = True
+
+            if delta.tool_calls:
+                if reasoning_started and not message_started:
+                    yield RunItemStreamEvent(type="reasoning_done")
+                    reasoning_started = False
+                if message_started:
+                    yield RunItemStreamEvent(type="message_done")
+                    message_started = False
+
+                for tool_call_delta in delta.tool_calls:
+                    _update_tool_call_with_delta(
+                        tool_calls_in_progress, tool_call_delta
+                    )
+
+            yield chunk
+
+            if not finish_reason:
+                continue
+            if message_started:
+                yield RunItemStreamEvent(type="message_done")
+                message_started = False
+
+            if finish_reason == "tool_calls" and tool_calls_in_progress:
+                sorted_tool_calls = sorted(tool_calls_in_progress.items())
+
+                # Build tool calls for the message and execute tools
+                assistant_tool_calls: list[ToolCall] = []
+                tool_outputs: dict[str, str] = {}
+
+                for _, tool_call_data in sorted_tool_calls:
+                    call_id = tool_call_data["id"]
+                    name = tool_call_data["name"]
+                    arguments_str = tool_call_data["arguments"]
+
+                    if call_id is None or name is None:
+                        continue
+
+                    assistant_tool_calls.append(
+                        {
+                            "id": call_id,
+                            "type": "function",
+                            "function": {
+                                "name": name,
+                                "arguments": arguments_str,
+                            },
+                        }
+                    )
+
+                    yield RunItemStreamEvent(
+                        type="tool_call",
+                        details=ToolCallStreamItem(
+                            call_id=call_id,
+                            name=name,
+                            arguments=arguments_str,
+                        ),
+                    )
+
+                    if name in tools_by_name:
+                        tool = tools_by_name[name]
+                        arguments = json.loads(arguments_str)
+
+                        run_context = RunContextWrapper(context=context)
+
+                        # TODO: Instead of executing sequentially, execute in parallel
+                        # In practice, it's not a must right now since we don't use parallel
+                        # tool calls, so kicking the can down the road for now.
+                        output = tool.run_v2(run_context, **arguments)
+                        tool_outputs[call_id] = _serialize_tool_output(output)
+
+                        yield RunItemStreamEvent(
+                            type="tool_call_output",
+                            details=ToolCallOutputStreamItem(
+                                call_id=call_id,
+                                output=output,
+                            ),
+                        )
+
+                new_messages_stateful.append(
+                    {
+                        "role": "assistant",
+                        "content": None,
+                        "tool_calls": assistant_tool_calls,
+                    }
+                )
+
+                for _, tool_call_data in sorted_tool_calls:
+                    call_id = tool_call_data["id"]
+
+                    if call_id in tool_outputs:
+                        new_messages_stateful.append(
+                            {
+                                "role": "tool",
+                                "content": tool_outputs[call_id],
+                                "tool_call_id": call_id,
+                            }
+                        )
+
+            elif finish_reason == "stop" and content_parts:
+                new_messages_stateful.append(
+                    {
+                        "role": "assistant",
+                        "content": "".join(content_parts),
+                    }
+                )
+
+    return QueryResult(
+        stream=stream_generator(),
+        new_messages_stateful=new_messages_stateful,
+    )
--- a/backend/onyx/agents/agent_search/dc_search_analysis/nodes/a1_search_objects.py
+++ b/backend/onyx/agents/agent_search/dc_search_analysis/nodes/a1_search_objects.py
@@ -121,7 +121,7 @@ def search_objects(
    try:
        llm_response = run_with_timeout(
            30,
-            primary_llm.invoke,
+            primary_llm.invoke_langchain,
            prompt=msg,
            timeout_override=30,
            max_tokens=300,
--- a/backend/onyx/agents/agent_search/dc_search_analysis/nodes/a2_research_object_source.py
+++ b/backend/onyx/agents/agent_search/dc_search_analysis/nodes/a2_research_object_source.py
@@ -155,7 +155,7 @@ def research_object_source(
    try:
        llm_response = run_with_timeout(
            30,
-            primary_llm.invoke,
+            primary_llm.invoke_langchain,
            prompt=msg,
            timeout_override=30,
            max_tokens=300,
--- a/backend/onyx/agents/agent_search/dc_search_analysis/nodes/a4_consolidate_object_research.py
+++ b/backend/onyx/agents/agent_search/dc_search_analysis/nodes/a4_consolidate_object_research.py
@@ -76,7 +76,7 @@ def consolidate_object_research(
    try:
        llm_response = run_with_timeout(
            30,
-            primary_llm.invoke,
+            primary_llm.invoke_langchain,
            prompt=msg,
            timeout_override=30,
            max_tokens=300,
--- a/backend/onyx/agents/agent_search/dr/models.py
+++ b/backend/onyx/agents/agent_search/dr/models.py
@@ -1,6 +1,7 @@
 from enum import Enum

 from pydantic import BaseModel
+from pydantic import ConfigDict

 from onyx.agents.agent_search.dr.enums import DRPath
 from onyx.agents.agent_search.dr.sub_agents.image_generation.models import (
@@ -74,8 +75,7 @@ class OrchestratorTool(BaseModel):
    cost: float
    tool_object: Tool | None = None  # None for CLOSER

-    class Config:
-        arbitrary_types_allowed = True
+    model_config = ConfigDict(arbitrary_types_allowed=True)


 class IterationInstructions(BaseModel):
--- a/backend/onyx/agents/agent_search/dr/nodes/dr_a0_clarification.py
+++ b/backend/onyx/agents/agent_search/dr/nodes/dr_a0_clarification.py
@@ -680,7 +680,7 @@ def clarifier(

            @traced(name="clarifier stream and process", type="llm")
            def stream_and_process() -> BasicSearchProcessedStreamResults:
-                stream = graph_config.tooling.primary_llm.stream(
+                stream = graph_config.tooling.primary_llm.stream_langchain(
                    prompt=create_question_prompt(
                        cast(str, system_prompt_to_use),
                        cast(str, user_prompt_to_use),
--- a/backend/onyx/agents/agent_search/dr/sub_agents/custom_tool/dr_custom_tool_2_act.py
+++ b/backend/onyx/agents/agent_search/dr/sub_agents/custom_tool/dr_custom_tool_2_act.py
@@ -66,7 +66,7 @@ def custom_tool_act(
            base_question=base_question,
            tool_description=custom_tool_info.description,
        )
-        tool_calling_msg = graph_config.tooling.primary_llm.invoke(
+        tool_calling_msg = graph_config.tooling.primary_llm.invoke_langchain(
            tool_use_prompt,
            tools=[custom_tool.tool_definition()],
            tool_choice="required",
@@ -125,7 +125,7 @@ def custom_tool_act(
        query=branch_query, base_question=base_question, tool_response=tool_str
    )
    answer_string = str(
-        graph_config.tooling.primary_llm.invoke(
+        graph_config.tooling.primary_llm.invoke_langchain(
            tool_summary_prompt, timeout_override=TF_DR_TIMEOUT_SHORT
        ).content
    ).strip()
--- a/backend/onyx/agents/agent_search/dr/sub_agents/generic_internal_tool/dr_generic_internal_tool_2_act.py
+++ b/backend/onyx/agents/agent_search/dr/sub_agents/generic_internal_tool/dr_generic_internal_tool_2_act.py
@@ -65,7 +65,7 @@ def generic_internal_tool_act(
            base_question=base_question,
            tool_description=generic_internal_tool_info.description,
        )
-        tool_calling_msg = graph_config.tooling.primary_llm.invoke(
+        tool_calling_msg = graph_config.tooling.primary_llm.invoke_langchain(
            tool_use_prompt,
            tools=[generic_internal_tool.tool_definition()],
            tool_choice="required",
@@ -113,7 +113,7 @@ def generic_internal_tool_act(
        query=branch_query, base_question=base_question, tool_response=tool_str
    )
    answer_string = str(
-        graph_config.tooling.primary_llm.invoke(
+        graph_config.tooling.primary_llm.invoke_langchain(
            tool_summary_prompt, timeout_override=TF_DR_TIMEOUT_SHORT
        ).content
    ).strip()
--- a/backend/onyx/agents/agent_search/kb_search/nodes/a1_extract_ert.py
+++ b/backend/onyx/agents/agent_search/kb_search/nodes/a1_extract_ert.py
@@ -106,7 +106,7 @@ def extract_ert(
    try:
        llm_response = run_with_timeout(
            KG_ENTITY_EXTRACTION_TIMEOUT,
-            primary_llm.invoke,
+            primary_llm.invoke_langchain,
            prompt=msg,
            timeout_override=15,
            max_tokens=300,
@@ -176,7 +176,7 @@ def extract_ert(
    try:
        llm_response = run_with_timeout(
            KG_RELATIONSHIP_EXTRACTION_TIMEOUT,
-            primary_llm.invoke,
+            primary_llm.invoke_langchain,
            prompt=msg,
            timeout_override=15,
            max_tokens=300,
--- a/backend/onyx/agents/agent_search/kb_search/nodes/a2_analyze.py
+++ b/backend/onyx/agents/agent_search/kb_search/nodes/a2_analyze.py
@@ -202,7 +202,7 @@ def analyze(
        llm_response = run_with_timeout(
            KG_STRATEGY_GENERATION_TIMEOUT,
            # fast_llm.invoke,
-            primary_llm.invoke,
+            primary_llm.invoke_langchain,
            prompt=msg,
            timeout_override=5,
            max_tokens=100,
--- a/backend/onyx/agents/agent_search/kb_search/nodes/a3_generate_simple_sql.py
+++ b/backend/onyx/agents/agent_search/kb_search/nodes/a3_generate_simple_sql.py
@@ -169,7 +169,7 @@ def _get_source_documents(
    try:
        llm_response = run_with_timeout(
            KG_SQL_GENERATION_TIMEOUT,
-            llm.invoke,
+            llm.invoke_langchain,
            prompt=msg,
            timeout_override=KG_SQL_GENERATION_TIMEOUT_OVERRIDE,
            max_tokens=KG_SQL_GENERATION_MAX_TOKENS,
@@ -321,7 +321,7 @@ def generate_simple_sql(
        try:
            llm_response = run_with_timeout(
                KG_SQL_GENERATION_TIMEOUT,
-                primary_llm.invoke,
+                primary_llm.invoke_langchain,
                prompt=msg,
                timeout_override=KG_SQL_GENERATION_TIMEOUT_OVERRIDE,
                max_tokens=KG_SQL_GENERATION_MAX_TOKENS,
@@ -451,7 +451,7 @@ def generate_simple_sql(
            try:
                llm_response = run_with_timeout(
                    KG_SQL_GENERATION_TIMEOUT,
-                    primary_llm.invoke,
+                    primary_llm.invoke_langchain,
                    prompt=msg,
                    timeout_override=KG_SQL_GENERATION_TIMEOUT_OVERRIDE,
                    max_tokens=KG_SQL_GENERATION_MAX_TOKENS,
--- a/backend/onyx/agents/agent_search/kb_search/nodes/b1_construct_deep_search_filters.py
+++ b/backend/onyx/agents/agent_search/kb_search/nodes/b1_construct_deep_search_filters.py
@@ -94,7 +94,7 @@ def construct_deep_search_filters(
    try:
        llm_response = run_with_timeout(
            KG_FILTER_CONSTRUCTION_TIMEOUT,
-            llm.invoke,
+            llm.invoke_langchain,
            prompt=msg,
            timeout_override=15,
            max_tokens=1400,
--- a/backend/onyx/agents/agent_search/kb_search/nodes/b2p_process_individual_deep_search.py
+++ b/backend/onyx/agents/agent_search/kb_search/nodes/b2p_process_individual_deep_search.py
@@ -137,7 +137,7 @@ def process_individual_deep_search(
    try:
        llm_response = run_with_timeout(
            KG_OBJECT_SOURCE_RESEARCH_TIMEOUT,
-            primary_llm.invoke,
+            primary_llm.invoke_langchain,
            prompt=msg,
            timeout_override=KG_OBJECT_SOURCE_RESEARCH_TIMEOUT,
            max_tokens=300,
--- a/backend/onyx/agents/agent_search/kb_search/nodes/b2s_filtered_search.py
+++ b/backend/onyx/agents/agent_search/kb_search/nodes/b2s_filtered_search.py
@@ -127,7 +127,7 @@ def filtered_search(
    try:
        llm_response = run_with_timeout(
            KG_FILTERED_SEARCH_TIMEOUT,
-            llm.invoke,
+            llm.invoke_langchain,
            prompt=msg,
            timeout_override=30,
            max_tokens=300,
--- a/backend/onyx/agents/agent_search/models.py
+++ b/backend/onyx/agents/agent_search/models.py
@@ -1,6 +1,7 @@
 from uuid import UUID

 from pydantic import BaseModel
+from pydantic import ConfigDict
 from sqlalchemy.orm import Session

 from onyx.agents.agent_search.dr.enums import ResearchType
@@ -25,8 +26,7 @@ class GraphInputs(BaseModel):
    structured_response_format: dict | None = None
    project_instructions: str | None = None

-    class Config:
-        arbitrary_types_allowed = True
+    model_config = ConfigDict(arbitrary_types_allowed=True)


 class GraphTooling(BaseModel):
@@ -41,8 +41,7 @@ class GraphTooling(BaseModel):
    force_use_tool: ForceUseTool
    using_tool_calling_llm: bool = False

-    class Config:
-        arbitrary_types_allowed = True
+    model_config = ConfigDict(arbitrary_types_allowed=True)


 class GraphPersistence(BaseModel):
@@ -57,8 +56,7 @@ class GraphPersistence(BaseModel):
    # message were flushed to; only needed for agentic search
    db_session: Session

-    class Config:
-        arbitrary_types_allowed = True
+    model_config = ConfigDict(arbitrary_types_allowed=True)


 class GraphSearchConfig(BaseModel):
@@ -87,5 +85,4 @@ class GraphConfig(BaseModel):
    # Only needed for agentic search
    persistence: GraphPersistence

-    class Config:
-        arbitrary_types_allowed = True
+    model_config = ConfigDict(arbitrary_types_allowed=True)
--- a/backend/onyx/agents/agent_search/orchestration/states.py
+++ b/backend/onyx/agents/agent_search/orchestration/states.py
@@ -1,4 +1,5 @@
 from pydantic import BaseModel
+from pydantic import ConfigDict

 from onyx.chat.prompt_builder.schemas import PromptSnapshot
 from onyx.tools.message import ToolCallSummary
@@ -38,8 +39,7 @@ class ToolChoice(BaseModel):
    id: str | None
    search_tool_override_kwargs: SearchToolOverrideKwargs = SearchToolOverrideKwargs()

-    class Config:
-        arbitrary_types_allowed = True
+    model_config = ConfigDict(arbitrary_types_allowed=True)


 class ToolChoiceUpdate(BaseModel):
--- a/backend/onyx/agents/agent_search/shared_graph_utils/llm.py
+++ b/backend/onyx/agents/agent_search/shared_graph_utils/llm.py
@@ -76,7 +76,7 @@ def stream_llm_answer(
    else:
        citation_processor = None

-    for message in llm.stream(
+    for message in llm.stream_langchain(
        prompt,
        timeout_override=timeout_override,
        max_tokens=max_tokens,
@@ -156,7 +156,7 @@ def invoke_llm_json(
    ) and supports_response_schema(llm.config.model_name, llm.config.model_provider)

    response_content = str(
-        llm.invoke(
+        llm.invoke_langchain(
            prompt,
            tools=tools,
            tool_choice=tool_choice,
@@ -224,7 +224,7 @@ def get_answer_from_llm(
    else:
        llm_response = run_with_timeout(
            timeout,
-            llm.invoke,
+            llm.invoke_langchain,
            prompt=msg,
            timeout_override=timeout_override,
            max_tokens=max_tokens,
--- a/backend/onyx/agents/agent_search/shared_graph_utils/utils.py
+++ b/backend/onyx/agents/agent_search/shared_graph_utils/utils.py
@@ -275,7 +275,7 @@ def summarize_history(
    try:
        history_response = run_with_timeout(
            AGENT_TIMEOUT_LLM_HISTORY_SUMMARY_GENERATION,
-            llm.invoke,
+            llm.invoke_langchain,
            history_context_prompt,
            timeout_override=AGENT_TIMEOUT_CONNECT_LLM_HISTORY_SUMMARY_GENERATION,
            max_tokens=AGENT_MAX_TOKENS_HISTORY_SUMMARY,
--- a/backend/onyx/auth/users.py
+++ b/backend/onyx/auth/users.py
@@ -206,6 +206,10 @@ def anonymous_user_enabled(*, tenant_id: str | None = None) -> bool:


 def verify_email_is_invited(email: str) -> None:
+    if AUTH_TYPE in {AuthType.SAML, AuthType.OIDC}:
+        # SSO providers manage membership; allow JIT provisioning regardless of invites
+        return
+
    whitelist = get_invited_users()
    if not whitelist:
        return
--- a/backend/onyx/background/celery/tasks/docprocessing/tasks.py
+++ b/backend/onyx/background/celery/tasks/docprocessing/tasks.py
@@ -1,3 +1,5 @@
+import gc
+import os
 import time
 import traceback
 from collections import defaultdict
@@ -21,6 +23,7 @@ from onyx.background.celery.apps.app_base import task_logger
 from onyx.background.celery.celery_redis import celery_find_task
 from onyx.background.celery.celery_redis import celery_get_unacked_task_ids
 from onyx.background.celery.celery_utils import httpx_init_vespa_pool
+from onyx.background.celery.memory_monitoring import emit_process_memory
 from onyx.background.celery.tasks.beat_schedule import CLOUD_BEAT_MULTIPLIER_DEFAULT
 from onyx.background.celery.tasks.docprocessing.heartbeat import start_heartbeat
 from onyx.background.celery.tasks.docprocessing.heartbeat import stop_heartbeat
@@ -65,6 +68,7 @@ from onyx.db.engine.time_utils import get_db_current_time
 from onyx.db.enums import ConnectorCredentialPairStatus
 from onyx.db.enums import IndexingMode
 from onyx.db.enums import IndexingStatus
+from onyx.db.enums import SwitchoverType
 from onyx.db.index_attempt import create_index_attempt_error
 from onyx.db.index_attempt import get_index_attempt
 from onyx.db.index_attempt import get_index_attempt_errors_for_cc_pair
@@ -857,10 +861,10 @@ def check_for_indexing(self: Task, *, tenant_id: str) -> int | None:
                tenant_id=tenant_id,
            )

-            # Secondary indexing (only if secondary search settings exist and background reindex is enabled)
+            # Secondary indexing (only if secondary search settings exist and switchover_type is not INSTANT)
            if (
                secondary_search_settings
-                and secondary_search_settings.background_reindex_enabled
+                and secondary_search_settings.switchover_type != SwitchoverType.INSTANT
                and secondary_cc_pair_ids
            ):
                tasks_created += _kickoff_indexing_tasks(
@@ -875,11 +879,11 @@ def check_for_indexing(self: Task, *, tenant_id: str) -> int | None:
                )
            elif (
                secondary_search_settings
-                and not secondary_search_settings.background_reindex_enabled
+                and secondary_search_settings.switchover_type == SwitchoverType.INSTANT
            ):
                task_logger.info(
                    f"Skipping secondary indexing: "
-                    f"background_reindex_enabled=False "
+                    f"switchover_type=INSTANT "
                    f"for search_settings={secondary_search_settings.id}"
                )

@@ -1299,12 +1303,39 @@ def _docprocessing_task(
    # dummy lock to satisfy linter
    per_batch_lock: RedisLock | None = None
    try:
+        # FIX: Monitor memory before loading documents to track problematic batches
+        emit_process_memory(
+            os.getpid(),
+            "docprocessing",
+            {
+                "phase": "before_load",
+                "tenant_id": tenant_id,
+                "cc_pair_id": cc_pair_id,
+                "index_attempt_id": index_attempt_id,
+                "batch_num": batch_num,
+            },
+        )
+
        # Retrieve documents from storage
        documents = storage.get_batch(batch_num)
        if not documents:
            task_logger.error(f"No documents found for batch {batch_num}")
            return

+        # FIX: Monitor memory after loading documents
+        emit_process_memory(
+            os.getpid(),
+            "docprocessing",
+            {
+                "phase": "after_load",
+                "tenant_id": tenant_id,
+                "cc_pair_id": cc_pair_id,
+                "index_attempt_id": index_attempt_id,
+                "batch_num": batch_num,
+                "doc_count": len(documents),
+            },
+        )
+
        with get_session_with_current_tenant() as db_session:
            # matches parts of _run_indexing
            index_attempt = get_index_attempt(
@@ -1457,6 +1488,25 @@ def _docprocessing_task(
        # Clean up this batch after successful processing
        storage.delete_batch_by_num(batch_num)

+        # FIX: Explicitly clear document batch from memory and force garbage collection
+        # This helps prevent memory accumulation across multiple batches
+        del documents
+        gc.collect()
+
+        # FIX: Log final memory usage to track problematic tenants/CC pairs
+        emit_process_memory(
+            os.getpid(),
+            "docprocessing",
+            {
+                "phase": "after_processing",
+                "tenant_id": tenant_id,
+                "cc_pair_id": cc_pair_id,
+                "index_attempt_id": index_attempt_id,
+                "batch_num": batch_num,
+                "chunks_processed": index_pipeline_result.total_chunks,
+            },
+        )
+
        elapsed_time = time.monotonic() - start_time
        task_logger.info(
            f"Completed document batch processing: "
@@ -1464,7 +1514,7 @@ def _docprocessing_task(
            f"cc_pair={cc_pair_id} "
            f"search_settings={index_attempt.search_settings.id} "
            f"batch_num={batch_num} "
-            f"docs={len(documents)} "
+            f"docs={len(index_pipeline_result.failures) + index_pipeline_result.total_docs} "
            f"chunks={index_pipeline_result.total_chunks} "
            f"failures={len(index_pipeline_result.failures)} "
            f"elapsed={elapsed_time:.2f}s"
--- a/backend/onyx/background/indexing/index_attempt_utils.py
+++ b/backend/onyx/background/indexing/index_attempt_utils.py
@@ -1,5 +1,6 @@
 from datetime import timedelta

+from sqlalchemy import func
 from sqlalchemy.orm import Session

 from onyx.configs.constants import NUM_DAYS_TO_KEEP_INDEX_ATTEMPTS
@@ -8,14 +9,44 @@ from onyx.db.models import IndexAttempt
 from onyx.db.models import IndexAttemptError


+# Always retain at least this many attempts per connector/search settings pair
+NUM_RECENT_INDEX_ATTEMPTS_TO_KEEP = 10
+
+
 def get_old_index_attempts(
    db_session: Session, days_to_keep: int = NUM_DAYS_TO_KEEP_INDEX_ATTEMPTS
 ) -> list[IndexAttempt]:
-    """Get all index attempts older than the specified number of days."""
+    """
+    Get index attempts older than the specified number of days while retaining
+    the latest NUM_RECENT_INDEX_ATTEMPTS_TO_KEEP per connector/search settings pair.
+    """
    cutoff_date = get_db_current_time(db_session) - timedelta(days=days_to_keep)
+    ranked_attempts = (
+        db_session.query(
+            IndexAttempt.id.label("attempt_id"),
+            IndexAttempt.time_created.label("time_created"),
+            func.row_number()
+            .over(
+                partition_by=(
+                    IndexAttempt.connector_credential_pair_id,
+                    IndexAttempt.search_settings_id,
+                ),
+                order_by=IndexAttempt.time_created.desc(),
+            )
+            .label("attempt_rank"),
+        )
+    ).subquery()
+
    return (
        db_session.query(IndexAttempt)
-        .filter(IndexAttempt.time_created < cutoff_date)
+        .join(
+            ranked_attempts,
+            IndexAttempt.id == ranked_attempts.c.attempt_id,
+        )
+        .filter(
+            ranked_attempts.c.time_created < cutoff_date,
+            ranked_attempts.c.attempt_rank > NUM_RECENT_INDEX_ATTEMPTS_TO_KEEP,
+        )
        .all()
    )

--- a/backend/onyx/chat/models.py
+++ b/backend/onyx/chat/models.py
@@ -296,10 +296,13 @@ class PromptConfig(BaseModel):
            else ""
        )

+        # Check if this persona is the default assistant
+        is_default_persona = default_persona and model.id == default_persona.id
+
        # If this persona IS the default assistant, custom_instruction should be None
        # Otherwise, it should be the persona's system_prompt
        custom_instruction = None
-        if not model.is_default_persona:
+        if not is_default_persona:
            custom_instruction = model.system_prompt or None

        # Handle prompt overrides
@@ -310,7 +313,7 @@ class PromptConfig(BaseModel):

        # If there's an override, apply it to the appropriate field
        if override_system_prompt:
-            if model.is_default_persona:
+            if is_default_persona:
                default_behavior_system_prompt = override_system_prompt
            else:
                custom_instruction = override_system_prompt
@@ -387,8 +390,7 @@ class AnswerPostInfo(BaseModel):
    tool_result: ToolCallFinalResult | None = None
    message_specific_citations: MessageSpecificCitations | None = None

-    class Config:
-        arbitrary_types_allowed = True
+    model_config = ConfigDict(arbitrary_types_allowed=True)


 class ChatBasicResponse(BaseModel):
--- a/backend/onyx/chat/process_message.py
+++ b/backend/onyx/chat/process_message.py
@@ -78,6 +78,7 @@ from onyx.db.persona import get_persona_by_id
 from onyx.db.projects import get_project_instructions
 from onyx.db.projects import get_user_files_from_project
 from onyx.db.search_settings import get_current_search_settings
+from onyx.db.user_file import get_file_ids_by_user_file_ids
 from onyx.document_index.factory import get_default_document_index
 from onyx.feature_flags.factory import get_default_feature_flag_provider
 from onyx.feature_flags.feature_flags_keys import DISABLE_SIMPLE_AGENT_FRAMEWORK
@@ -421,10 +422,10 @@ def stream_chat_message_objects(
            raise RuntimeError(
                "Must specify a set of documents for chat or specify search options"
            )
-
        try:
            llm, fast_llm = get_llms_for_persona(
                persona=persona,
+                user=user,
                llm_override=new_msg_req.llm_override or chat_session.llm_override,
                additional_headers=litellm_additional_headers,
                long_term_logger=long_term_logger,
@@ -519,11 +520,12 @@ def stream_chat_message_objects(
        files = load_all_chat_files(history_msgs, new_msg_req.file_descriptors)
        req_file_ids = [f["id"] for f in new_msg_req.file_descriptors]
        latest_query_files = [file for file in files if file.file_id in req_file_ids]
-        user_file_ids: list[UUID] = []
+        current_message_user_file_ids: list[UUID] = []
+        persona_user_file_ids: list[UUID] = []

        if persona.user_files:
            for uf in persona.user_files:
-                user_file_ids.append(uf.id)
+                persona_user_file_ids.append(uf.id)

        if new_msg_req.current_message_files:
            for fd in new_msg_req.current_message_files:
@@ -531,7 +533,7 @@ def stream_chat_message_objects(
                if not uid:
                    continue
                try:
-                    user_file_ids.append(UUID(uid))
+                    current_message_user_file_ids.append(UUID(uid))
                except (TypeError, ValueError, AttributeError):
                    logger.warning(
                        "Skipping invalid user_file_id from current_message_files: %s",
@@ -543,10 +545,10 @@ def stream_chat_message_objects(
        # we can just pass them into the prompt directly
        (
            in_memory_user_files,
-            user_file_models,
            search_tool_override_kwargs_for_user_files,
        ) = parse_user_files(
-            user_file_ids=user_file_ids or [],
+            persona_user_file_ids=persona_user_file_ids,
+            current_message_user_file_ids=current_message_user_file_ids,
            project_id=chat_session.project_id,
            db_session=db_session,
            persona=persona,
@@ -567,15 +569,20 @@ def stream_chat_message_objects(
                ]
            )

-        # we don't want to attach project files to the user message
+        current_message_file_ids = []
+        if current_message_user_file_ids:
+            current_message_file_ids = get_file_ids_by_user_file_ids(
+                current_message_user_file_ids, db_session
+            )
+
+        # we don't want to attach project files and assistant files to the user message
        if user_message:
            attach_files_to_chat_message(
                chat_message=user_message,
                files=[
                    new_file.to_file_descriptor()
                    for new_file in latest_query_files
-                    if project_file_ids is not None
-                    and (new_file.file_id not in project_file_ids)
+                    if (new_file.file_id in current_message_file_ids)
                ],
                db_session=db_session,
                commit=False,
@@ -751,14 +758,14 @@ def stream_chat_message_objects(
        ]

        if not search_tool_override_kwargs_for_user_files and in_memory_user_files:
+            # we only want to send the user files attached to the current message
            yield UserKnowledgeFilePacket(
                user_files=[
                    FileDescriptor(
                        id=str(file.file_id), type=file.file_type, name=file.filename
                    )
                    for file in in_memory_user_files
-                    if project_file_ids is not None
-                    and (file.file_id not in project_file_ids)
+                    if (file.file_id in current_message_file_ids)
                ]
            )
        feature_flag_provider = get_default_feature_flag_provider()
@@ -806,6 +813,7 @@ def stream_chat_message_objects(
                or get_main_llm_from_tuple(
                    get_llms_for_persona(
                        persona=persona,
+                        user=user,
                        llm_override=(
                            new_msg_req.llm_override or chat_session.llm_override
                        ),
--- a/backend/onyx/chat/prompt_builder/answer_prompt_builder.py
+++ b/backend/onyx/chat/prompt_builder/answer_prompt_builder.py
@@ -13,11 +13,11 @@ from onyx.chat.prompt_builder.citations_prompt import compute_max_llm_input_toke
 from onyx.chat.prompt_builder.utils import translate_history_to_basemessages
 from onyx.file_store.models import InMemoryChatFile
 from onyx.llm.interfaces import LLMConfig
-from onyx.llm.llm_provider_options import OPENAI_PROVIDER_NAME
 from onyx.llm.models import PreviousMessage
 from onyx.llm.utils import build_content_with_imgs
 from onyx.llm.utils import check_message_tokens
 from onyx.llm.utils import message_to_prompt_and_imgs
+from onyx.llm.utils import model_needs_formatting_reenabled
 from onyx.llm.utils import model_supports_image_input
 from onyx.natural_language_processing.utils import get_tokenizer
 from onyx.prompts.chat_prompts import CHAT_USER_CONTEXT_FREE_PROMPT
@@ -48,12 +48,9 @@ def default_build_system_message_v2(
        prompt_config.default_behavior_system_prompt or DEFAULT_SYSTEM_PROMPT
    )

-    # See https://simonwillison.net/tags/markdown/ for context on this temporary fix
-    # for o-series markdown generation
-    if (
-        llm_config.model_provider == OPENAI_PROVIDER_NAME
-        and llm_config.model_name.startswith("o")
-    ):
+    # See https://simonwillison.net/tags/markdown/ for context on why this is needed
+    # for OpenAI reasoning models to have correct markdown generation
+    if model_needs_formatting_reenabled(llm_config.model_name):
        system_prompt = CODE_BLOCK_MARKDOWN + system_prompt

    tag_handled_prompt = handle_onyx_date_awareness(
@@ -134,13 +131,11 @@ def default_build_system_message(
        prompt_config.custom_instructions
        or prompt_config.default_behavior_system_prompt
    )
-    # See https://simonwillison.net/tags/markdown/ for context on this temporary fix
-    # for o-series markdown generation
-    if (
-        llm_config.model_provider == OPENAI_PROVIDER_NAME
-        and llm_config.model_name.startswith("o")
-    ):
+    # See https://simonwillison.net/tags/markdown/ for context on why this is needed
+    # for OpenAI reasoning models to have correct markdown generation
+    if model_needs_formatting_reenabled(llm_config.model_name):
        system_prompt = CODE_BLOCK_MARKDOWN + system_prompt
+
    tag_handled_prompt = handle_onyx_date_awareness(
        system_prompt,
        prompt_config,
--- a/backend/onyx/chat/prompt_builder/citations_prompt.py
+++ b/backend/onyx/chat/prompt_builder/citations_prompt.py
@@ -9,8 +9,7 @@ from onyx.context.search.models import InferenceChunk
 from onyx.db.models import Persona
 from onyx.db.search_settings import get_multilingual_expansion
 from onyx.file_store.models import InMemoryChatFile
-from onyx.llm.factory import get_llms_for_persona
-from onyx.llm.factory import get_main_llm_from_tuple
+from onyx.llm.factory import get_llm_config_for_persona
 from onyx.llm.interfaces import LLMConfig
 from onyx.llm.utils import build_content_with_imgs
 from onyx.llm.utils import check_number_of_tokens
@@ -93,9 +92,10 @@ def compute_max_document_tokens_for_persona(
    actual_user_input: str | None = None,
 ) -> int:
    # Use the persona directly since prompts are now embedded
+    # Access to persona is assumed to have been verified already
    return compute_max_document_tokens(
        prompt_config=PromptConfig.from_model(persona, db_session=db_session),
-        llm_config=get_main_llm_from_tuple(get_llms_for_persona(persona)).config,
+        llm_config=get_llm_config_for_persona(persona=persona, db_session=db_session),
        actual_user_input=actual_user_input,
    )

--- a/backend/onyx/chat/prune_and_merge.py
+++ b/backend/onyx/chat/prune_and_merge.py
@@ -101,20 +101,31 @@ def _separate_federated_sections(


 def _compute_limit(
-    prompt_config: PromptConfig,
    llm_config: LLMConfig,
-    question: str,
+    existing_input_tokens: int,
    max_chunks: int | None,
    max_window_percentage: float | None,
    max_tokens: int | None,
    tool_token_count: int,
+    prompt_config: PromptConfig | None = None,
 ) -> int:
-    llm_max_document_tokens = compute_max_document_tokens(
-        prompt_config=prompt_config,
-        llm_config=llm_config,
-        tool_token_count=tool_token_count,
-        actual_user_input=question,
-    )
+    # If prompt_config is provided (backwards compatibility), compute using the old method
+    if prompt_config is not None:
+        llm_max_document_tokens = compute_max_document_tokens(
+            prompt_config=prompt_config,
+            llm_config=llm_config,
+            tool_token_count=tool_token_count,
+            actual_user_input=None,  # Will use default estimate
+        )
+    else:
+        # New path: existing_input_tokens is pre-computed total input token count
+        # This includes system prompt, history, user message, agent turns, etc.
+        llm_max_document_tokens = (
+            llm_config.max_input_tokens
+            - existing_input_tokens
+            - tool_token_count
+            - 40  # _MISC_BUFFER from compute_max_document_tokens
+        )

    window_percentage_based_limit = (
        max_window_percentage * llm_max_document_tokens
@@ -333,10 +344,10 @@ def _apply_pruning(
 def prune_sections(
    sections: list[InferenceSection],
    section_relevance_list: list[bool] | None,
-    prompt_config: PromptConfig,
    llm_config: LLMConfig,
-    question: str,
+    existing_input_tokens: int,
    contextual_pruning_config: ContextualPruningConfig,
+    prompt_config: PromptConfig | None = None,
 ) -> list[InferenceSection]:
    # Assumes the sections are score ordered with highest first
    if section_relevance_list is not None:
@@ -357,13 +368,13 @@ def prune_sections(
    )

    token_limit = _compute_limit(
-        prompt_config=prompt_config,
        llm_config=llm_config,
-        question=question,
+        existing_input_tokens=existing_input_tokens,
        max_chunks=actual_num_chunks,
        max_window_percentage=contextual_pruning_config.max_window_percentage,
        max_tokens=contextual_pruning_config.max_tokens,
        tool_token_count=contextual_pruning_config.tool_num_tokens,
+        prompt_config=prompt_config,
    )

    return _apply_pruning(
@@ -504,19 +515,19 @@ def _merge_sections(sections: list[InferenceSection]) -> list[InferenceSection]:
 def prune_and_merge_sections(
    sections: list[InferenceSection],
    section_relevance_list: list[bool] | None,
-    prompt_config: PromptConfig,
    llm_config: LLMConfig,
-    question: str,
+    existing_input_tokens: int,
    contextual_pruning_config: ContextualPruningConfig,
+    prompt_config: PromptConfig | None = None,
 ) -> list[InferenceSection]:
    # Assumes the sections are score ordered with highest first
    remaining_sections = prune_sections(
        sections=sections,
        section_relevance_list=section_relevance_list,
-        prompt_config=prompt_config,
        llm_config=llm_config,
-        question=question,
+        existing_input_tokens=existing_input_tokens,
        contextual_pruning_config=contextual_pruning_config,
+        prompt_config=prompt_config,
    )

    merged_sections = _merge_sections(sections=remaining_sections)
--- a/backend/onyx/chat/turn/fast_chat_turn.py
+++ b/backend/onyx/chat/turn/fast_chat_turn.py
@@ -64,6 +64,26 @@ if TYPE_CHECKING:
 MAX_ITERATIONS = 10


+# TODO: We should be able to do this a bit more cleanly since we know the schema
+# ahead of time. I'll make sure to do that for when we replace AgentSDKMessage.
+def _extract_tokens_from_messages(messages: list[AgentSDKMessage]) -> int:
+    from onyx.llm.utils import check_number_of_tokens
+
+    total_input_text_parts: list[str] = []
+    for msg in messages:
+        if isinstance(msg, dict):
+            content = msg.get("content") or msg.get("output")
+            if isinstance(content, list):
+                for item in content:
+                    if isinstance(item, dict):
+                        text = item.get("text")
+                        if text:
+                            total_input_text_parts.append(text)
+            elif isinstance(content, str):
+                total_input_text_parts.append(content)
+    return check_number_of_tokens("\n".join(total_input_text_parts))
+
+
 # TODO -- this can be refactored out and played with in evals + normal demo
 def _run_agent_loop(
    messages: list[AgentSDKMessage],
@@ -116,6 +136,7 @@ def _run_agent_loop(
            + [current_user_message]
        )
        current_messages = previous_messages + agent_turn_messages
+        ctx.current_input_tokens = _extract_tokens_from_messages(current_messages)

        if not available_tools:
            tool_choice = None
@@ -167,6 +188,9 @@ def _run_agent_loop(
        )

        # 3. Assign citation numbers to tool call outputs
+        # Instead of doing this complex parsing from the tool call response,
+        # I could have just used the ToolCallOutput event from the Agents SDK.
+        # TODO: When agent framework is gone, I can just use our ToolCallOutput event.
        citation_result = assign_citation_numbers_recent_tool_calls(
            agent_turn_messages, ctx
        )
@@ -213,6 +237,7 @@ def _fast_chat_turn_core(
        chat_session_id,
        dependencies.redis_client,
    )
+
    ctx = starter_context or ChatTurnContext(
        run_dependencies=dependencies,
        chat_session_id=chat_session_id,
--- a/backend/onyx/chat/turn/models.py
+++ b/backend/onyx/chat/turn/models.py
@@ -87,3 +87,6 @@ class ChatTurnContext:
    # not be emitted to the frontend (e.g. out of order packets)
    # TODO: remove this once Agents SDK fixes the bug with Anthropic reasoning
    current_output_index: int | None = None
+    # Token count of all current input context (system, history, user message, agent turns, etc.)
+    # Updated dynamically as the conversation progresses through tool calls
+    current_input_tokens: int = 0
--- a/backend/onyx/chat/user_files/parse_user_files.py
+++ b/backend/onyx/chat/user_files/parse_user_files.py
@@ -3,12 +3,11 @@ from uuid import UUID
 from sqlalchemy.orm import Session

 from onyx.db.models import Persona
-from onyx.db.models import UserFile
 from onyx.db.projects import get_user_files_from_project
 from onyx.db.user_file import update_last_accessed_at_for_user_files
 from onyx.file_store.models import InMemoryChatFile
-from onyx.file_store.utils import get_user_files_as_user
 from onyx.file_store.utils import load_in_memory_chat_files
+from onyx.file_store.utils import validate_user_files_ownership
 from onyx.tools.models import SearchToolOverrideKwargs
 from onyx.utils.logger import setup_logger

@@ -17,37 +16,40 @@ logger = setup_logger()


 def parse_user_files(
-    user_file_ids: list[UUID],
+    persona_user_file_ids: list[UUID],
+    current_message_user_file_ids: list[UUID],
    db_session: Session,
    persona: Persona,
    actual_user_input: str,
    project_id: int | None,
-    # should only be None if auth is disabled
    user_id: UUID | None,
-) -> tuple[list[InMemoryChatFile], list[UserFile], SearchToolOverrideKwargs | None]:
+) -> tuple[list[InMemoryChatFile], SearchToolOverrideKwargs | None]:
    """
    Parse user files and project into in-memory chat files and create search tool override kwargs.
    Only creates SearchToolOverrideKwargs if token overflow occurs.

    Args:
-        user_file_ids: List of user file IDs to load
+        persona_user_file_ids: List of user file IDs attached to the persona
+        current_message_user_file_ids: List of user file IDs from the current message
        db_session: Database session
        persona: Persona to calculate available tokens
        actual_user_input: User's input message for token calculation
-        project_id: Project ID to validate file ownership
-        user_id: User ID to validate file ownership
+        project_id: Project ID to load associated files
+        user_id: User ID for file ownership validation and LLM access

    Returns:
        Tuple of (
            loaded user files,
-            user file models,
-            search tool override kwargs if token
-                overflow
+            search tool override kwargs if token overflow occurs
        )
    """
    # Return empty results if no files or project specified
-    if not user_file_ids and not project_id:
-        return [], [], None
+    if (
+        not persona_user_file_ids
+        and not current_message_user_file_ids
+        and not project_id
+    ):
+        return [], None

    project_user_file_ids = []

@@ -60,7 +62,9 @@ def parse_user_files(
        )

    # Combine user-provided and project-derived user file IDs
-    combined_user_file_ids = user_file_ids + project_user_file_ids or []
+    combined_user_file_ids = (
+        persona_user_file_ids + current_message_user_file_ids + project_user_file_ids
+    )

    # Load user files from the database into memory
    user_files = load_in_memory_chat_files(
@@ -68,14 +72,15 @@ def parse_user_files(
        db_session,
    )

-    user_file_models = get_user_files_as_user(
-        combined_user_file_ids,
+    # current message files should be owned by the user
+    validate_user_files_ownership(
+        current_message_user_file_ids,
        user_id,
        db_session,
    )

    # Update last accessed at for the user files which are used in the chat
-    if user_file_ids or project_user_file_ids:
+    if combined_user_file_ids:
        # update_last_accessed_at_for_user_files expects list[UUID]
        update_last_accessed_at_for_user_files(
            combined_user_file_ids,
@@ -96,6 +101,7 @@ def parse_user_files(
    )

    # Calculate available tokens for documents based on prompt, user input, etc.
+    # Access to persona is assumed to have been verified already
    available_tokens = compute_max_document_tokens_for_persona(
        persona=persona,
        db_session=db_session,
@@ -114,7 +120,7 @@ def parse_user_files(
    # we can just pass them into the prompt directly
    if have_enough_tokens:
        # No search tool override needed - files can be passed directly
-        return user_files, user_file_models, None
+        return user_files, None

    # Token overflow - need to use search tool
    override_kwargs = SearchToolOverrideKwargs(
@@ -122,10 +128,10 @@ def parse_user_files(
        alternate_db_session=None,
        retrieved_sections_callback=None,
        skip_query_analysis=have_enough_tokens,
-        user_file_ids=user_file_ids or [],
+        user_file_ids=current_message_user_file_ids + persona_user_file_ids or [],
        project_id=(
            project_id if persona.is_default_persona else None
        ),  # if the persona is not default, we don't want to use the project files
    )

-    return user_files, user_file_models, override_kwargs
+    return user_files, override_kwargs
--- a/backend/onyx/connectors/google_drive/connector.py
+++ b/backend/onyx/connectors/google_drive/connector.py
@@ -971,21 +971,54 @@ class GoogleDriveConnector(
        )

        for file in drive_files:
-            document_id = onyx_document_id_from_drive_file(file.drive_file)
-            logger.debug(
-                f"Updating checkpoint for file: {file.drive_file.get('name')}. "
-                f"Seen: {document_id in checkpoint.all_retrieved_file_ids}"
-            )
-            checkpoint.completion_map[file.user_email].update(
+            drive_file = file.drive_file or {}
+            completion = checkpoint.completion_map[file.user_email]
+
+            completed_until = completion.completed_until
+            modified_time = drive_file.get(GoogleFields.MODIFIED_TIME.value)
+            if isinstance(modified_time, str):
+                try:
+                    completed_until = datetime.fromisoformat(modified_time).timestamp()
+                except ValueError:
+                    logger.warning(
+                        "Invalid modifiedTime for file '%s' (stage=%s, user=%s).",
+                        drive_file.get("id"),
+                        file.completion_stage,
+                        file.user_email,
+                    )
+
+            completion.update(
                stage=file.completion_stage,
-                completed_until=datetime.fromisoformat(
-                    file.drive_file[GoogleFields.MODIFIED_TIME.value]
-                ).timestamp(),
+                completed_until=completed_until,
                current_folder_or_drive_id=file.parent_id,
            )
-            if document_id not in checkpoint.all_retrieved_file_ids:
-                checkpoint.all_retrieved_file_ids.add(document_id)
+
+            if file.error is not None or not drive_file:
                yield file
+                continue
+
+            try:
+                document_id = onyx_document_id_from_drive_file(drive_file)
+            except KeyError as exc:
+                logger.warning(
+                    "Drive file missing id/webViewLink (stage=%s user=%s). Skipping.",
+                    file.completion_stage,
+                    file.user_email,
+                )
+                if file.error is None:
+                    file.error = exc  # type: ignore[assignment]
+                yield file
+                continue
+
+            logger.debug(
+                f"Updating checkpoint for file: {drive_file.get('name')}. "
+                f"Seen: {document_id in checkpoint.all_retrieved_file_ids}"
+            )
+            if document_id in checkpoint.all_retrieved_file_ids:
+                continue
+
+            checkpoint.all_retrieved_file_ids.add(document_id)
+            yield file

    def _manage_oauth_retrieval(
        self,
--- a/backend/onyx/connectors/teams/connector.py
+++ b/backend/onyx/connectors/teams/connector.py
@@ -105,44 +105,40 @@ class TeamsConnector(
        if self.graph_client is None:
            raise ConnectorMissingCredentialError("Teams credentials not loaded.")

-        # Determine timeout based on special characters
+        # Check if any requested teams have special characters that need client-side filtering
        has_special_chars = _has_odata_incompatible_chars(self.requested_team_list)
-        timeout = 30 if has_special_chars else 10
+        if has_special_chars:
+            logger.info(
+                "Some requested team names contain special characters (&, (, )) that require "
+                "client-side filtering during data retrieval."
+            )
+
+        # Minimal validation: just check if we can access the teams endpoint
+        timeout = 10  # Short timeout for basic validation

        try:
-            # Minimal call to confirm we can retrieve Teams
-            # Use longer timeout if team names have special characters (requires client-side filtering)
+            # For validation, do a lightweight check instead of full team search
            logger.info(
                f"Requested team count: {len(self.requested_team_list) if self.requested_team_list else 0}, "
-                f"Has special chars: {has_special_chars}, "
-                f"Timeout: {timeout}s"
+                f"Has special chars: {has_special_chars}"
            )

-            found_teams = run_with_timeout(
+            validation_query = self.graph_client.teams.get().top(1)
+            run_with_timeout(
                timeout=timeout,
-                func=_collect_all_teams,
-                graph_client=self.graph_client,
-                requested=self.requested_team_list,
+                func=lambda: validation_query.execute_query(),
            )

            logger.info(
-                f"Teams validation successful - " f"Found {len(found_teams)} team(s)"
+                "Teams validation successful - Access to teams endpoint confirmed"
            )

        except TimeoutError as e:
-            if has_special_chars:
-                raise ConnectorValidationError(
-                    f"Timeout while fetching Teams (waited {timeout}s). "
-                    f"Team names with special characters (&, (, )) require fetching all teams "
-                    f"for client-side filtering, which can take longer. "
-                    f"Error: {e}"
-                )
-            else:
-                raise ConnectorValidationError(
-                    f"Timeout while fetching Teams (waited {timeout}s). "
-                    f"This may indicate network issues or a large number of teams. "
-                    f"Error: {e}"
-                )
+            raise ConnectorValidationError(
+                f"Timeout while validating Teams access (waited {timeout}s). "
+                f"This may indicate network issues or authentication problems. "
+                f"Error: {e}"
+            )

        except ClientRequestException as e:
            if not e.response:
@@ -176,12 +172,6 @@ class TeamsConnector(
                f"Unexpected error during Teams validation: {e}"
            )

-        if not found_teams:
-            raise ConnectorValidationError(
-                "No Teams found for the given credentials. "
-                "Either there are no Teams in this tenant, or your app does not have permission to view them."
-            )
-
    # impls for CheckpointedConnector

    def build_dummy_checkpoint(self) -> TeamsCheckpoint:
@@ -262,8 +252,8 @@ class TeamsConnector(
    def retrieve_all_slim_docs_perm_sync(
        self,
        start: SecondsSinceUnixEpoch | None = None,
-        end: SecondsSinceUnixEpoch | None = None,
-        callback: IndexingHeartbeatInterface | None = None,
+        _end: SecondsSinceUnixEpoch | None = None,
+        _callback: IndexingHeartbeatInterface | None = None,
    ) -> GenerateSlimDocumentOutput:
        start = start or 0

@@ -274,7 +264,9 @@ class TeamsConnector(

        for team in teams:
            if not team.id:
-                logger.warn(f"Expected a team with an id, instead got no id: {team=}")
+                logger.warning(
+                    f"Expected a team with an id, instead got no id: {team=}"
+                )
                continue

            channels = _collect_all_channels_from_team(
@@ -283,7 +275,7 @@ class TeamsConnector(

            for channel in channels:
                if not channel.id:
-                    logger.warn(
+                    logger.warning(
                        f"Expected a channel with an id, instead got no id: {channel=}"
                    )
                    continue
@@ -319,18 +311,70 @@ class TeamsConnector(
                    slim_doc_buffer = []


-def _has_odata_incompatible_chars(team_names: list[str] | None) -> bool:
-    """Check if any team name contains characters that break OData filters.
+def _escape_odata_string(name: str) -> str:
+    """Escape special characters for OData string literals.

-    The &, (, and ) characters are not allowed in OData string literals and are
-    reserved characters in OData syntax. Server-side filtering is not possible for
-    team names containing these characters.
+    Uses proper OData v4 string literal escaping:
+    - Single quotes: ' becomes ''
+    - Other characters are handled by using contains() instead of eq for problematic cases
+    """
+    # Escape single quotes for OData syntax (replace ' with '')
+    escaped = name.replace("'", "''")
+    return escaped
+
+
+def _has_odata_incompatible_chars(team_names: list[str] | None) -> bool:
+    """Check if any team name contains characters that break Microsoft Graph OData filters.
+
+    The Microsoft Graph Teams API has limited OData support. Characters like
+    &, (, and ) cause parsing errors and require client-side filtering instead.
    """
    if not team_names:
        return False
    return any(char in name for name in team_names for char in ["&", "(", ")"])


+def _can_use_odata_filter(
+    team_names: list[str] | None,
+) -> tuple[bool, list[str], list[str]]:
+    """Determine which teams can use OData filtering vs client-side filtering.
+
+    Microsoft Graph /teams endpoint OData limitations:
+    - Only supports basic 'eq' operators in filters
+    - No 'contains', 'startswith', or other advanced operators
+    - Special characters (&, (, )) break OData parsing
+
+    Returns:
+        tuple: (can_use_odata, safe_names, problematic_names)
+    """
+    if not team_names:
+        return False, [], []
+
+    safe_names = []
+    problematic_names = []
+
+    for name in team_names:
+        if any(char in name for char in ["&", "(", ")"]):
+            problematic_names.append(name)
+        else:
+            safe_names.append(name)
+
+    return bool(safe_names), safe_names, problematic_names
+
+
+def _build_simple_odata_filter(safe_names: list[str]) -> str | None:
+    """Build simple OData filter using only 'eq' operators for safe names."""
+    if not safe_names:
+        return None
+
+    filter_parts = []
+    for name in safe_names:
+        escaped_name = _escape_odata_string(name)
+        filter_parts.append(f"displayName eq '{escaped_name}'")
+
+    return " or ".join(filter_parts)
+
+
 def _construct_semantic_identifier(channel: Channel, top_message: Message) -> str:
    top_message_user_name: str

@@ -340,7 +384,7 @@ def _construct_semantic_identifier(channel: Channel, top_message: Message) -> st
            user_display_name if user_display_name else "Unknown User"
        )
    else:
-        logger.warn(f"Message {top_message=} has no `from.user` field")
+        logger.warning(f"Message {top_message=} has no `from.user` field")
        top_message_user_name = "Unknown User"

    top_message_content = top_message.body.content or ""
@@ -433,45 +477,72 @@ def _collect_all_teams(
    graph_client: GraphClient,
    requested: list[str] | None = None,
 ) -> list[Team]:
+    """Collect teams from Microsoft Graph using appropriate filtering strategy.
+
+    For teams with special characters (&, (, )), uses client-side filtering
+    with paginated search. For teams without special characters, uses efficient
+    OData server-side filtering.
+
+    Args:
+        graph_client: Authenticated Microsoft Graph client
+        requested: List of team names to find, or None for all teams
+
+    Returns:
+        List of Team objects matching the requested names
+    """
    teams: list[Team] = []
    next_url: str | None = None

-    # Check if team names have special characters that break OData filters
-    has_special_chars = _has_odata_incompatible_chars(requested)
-    if (
-        has_special_chars and requested
-    ):  # requested must exist if has_special_chars is True
-        logger.info(
-            f"Team name(s) contain special characters (&, (, or )) which are not supported "
-            f"in OData string literals. Fetching all teams and using client-side filtering. "
-            f"Count: {len(requested)}"
-        )
+    # Determine filtering strategy based on Microsoft Graph limitations
+    if not requested:
+        # No specific teams requested - return empty list (avoid fetching all teams)
+        logger.info("No specific teams requested - returning empty list")
+        return []

-    # Build OData filter for requested teams (only if we didn't already return from raw HTTP above)
-    filter = None
-    use_filter = (
-        bool(requested) and not has_special_chars
-    )  # Skip OData for special chars (fallback to client-side)
-    if use_filter and requested:
-        filter_parts = []
-        for name in requested:
-            # Escape single quotes for OData syntax (replace ' with '')
-            escaped_name = name.replace("'", "''")
-            filter_parts.append(f"displayName eq '{escaped_name}'")
-        filter = " or ".join(filter_parts)
+    _, safe_names, problematic_names = _can_use_odata_filter(requested)
+
+    if problematic_names and not safe_names:
+        # ALL requested teams have special characters - cannot use OData filtering
+        logger.info(
+            f"All requested team names contain special characters (&, (, )) which require "
+            f"client-side filtering. Using basic /teams endpoint with pagination. "
+            f"Teams: {problematic_names}"
+        )
+        # Use unfiltered query with pagination limit to avoid fetching too many teams
+        use_client_side_filtering = True
+        odata_filter = None
+    elif problematic_names and safe_names:
+        # Mixed scenario - need to fetch more teams to find the problematic ones
+        logger.info(
+            f"Mixed team types: will use client-side filtering for all. "
+            f"Safe names: {safe_names}, Special char names: {problematic_names}"
+        )
+        use_client_side_filtering = True
+        odata_filter = None
+    elif safe_names:
+        # All names are safe - use OData filtering
+        logger.info(f"Using OData filtering for all requested teams: {safe_names}")
+        use_client_side_filtering = False
+        odata_filter = _build_simple_odata_filter(safe_names)
+    else:
+        # No valid names
+        return []
+
+    # Track pagination to avoid fetching too many teams for client-side filtering
+    max_pages = 200
+    page_count = 0

    while True:
        try:
-            if filter:
-                # Use normal filter for teams without special characters
-                query = graph_client.teams.get().filter(filter)
-                # Add header to work around Microsoft Graph API ampersand bug
-                query.before_execute(lambda req: _add_prefer_header(request=req))
+            if use_client_side_filtering:
+                # Use basic /teams endpoint with top parameter to limit results per page
+                query = graph_client.teams.get().top(50)  # Limit to 50 teams per page
            else:
-                query = graph_client.teams.get_all(
-                    # explicitly needed because of incorrect type definitions provided by the `office365` library
-                    page_loaded=lambda _: None
-                )
+                # Use OData filter with only 'eq' operators
+                query = graph_client.teams.get().filter(odata_filter)
+
+            # Add header to work around Microsoft Graph API issues
+            query.before_execute(lambda req: _add_prefer_header(request=req))

            if next_url:
                url = next_url
@@ -481,17 +552,19 @@ def _collect_all_teams(

            team_collection = query.execute_query()
        except (ClientRequestException, ValueError) as e:
-            # If OData filter fails, fallback to client-side filtering
-            if use_filter:
+            # If OData filter fails, fall back to client-side filtering
+            if not use_client_side_filtering and odata_filter:
                logger.warning(
-                    f"OData filter failed with {type(e).__name__}: {e}. "
-                    f"Falling back to client-side filtering."
+                    f"OData filter failed: {e}. Falling back to client-side filtering."
                )
-                use_filter = False
-                filter = None
+                use_client_side_filtering = True
+                odata_filter = None
                teams = []
                next_url = None
+                page_count = 0
                continue
+            # If client-side approach also fails, re-raise
+            logger.error(f"Teams query failed: {e}")
            raise

        filtered_teams = (
@@ -501,6 +574,32 @@ def _collect_all_teams(
        )
        teams.extend(filtered_teams)

+        # For client-side filtering, check if we found all requested teams or hit page limit
+        if use_client_side_filtering:
+            page_count += 1
+            found_team_names = {
+                team.display_name for team in teams if team.display_name
+            }
+            requested_set = set(requested)
+
+            # Log progress every 10 pages to avoid excessive logging
+            if page_count % 10 == 0:
+                logger.info(
+                    f"Searched {page_count} pages, found {len(found_team_names)} matching teams so far"
+                )
+
+            # Stop if we found all requested teams or hit the page limit
+            if requested_set.issubset(found_team_names):
+                logger.info(f"Found all requested teams after {page_count} pages")
+                break
+            elif page_count >= max_pages:
+                logger.warning(
+                    f"Reached maximum page limit ({max_pages}) while searching for teams. "
+                    f"Found: {found_team_names & requested_set}, "
+                    f"Missing: {requested_set - found_team_names}"
+                )
+                break
+
        if not team_collection.has_next:
            break

@@ -514,6 +613,63 @@ def _collect_all_teams(
    return teams


+def _normalize_team_name(name: str) -> str:
+    """Normalize team name for flexible matching."""
+    if not name:
+        return ""
+    # Convert to lowercase and strip whitespace for case-insensitive matching
+    return name.lower().strip()
+
+
+def _matches_requested_team(
+    team_display_name: str, requested: list[str] | None
+) -> bool:
+    """Check if team display name matches any of the requested team names.
+
+    Uses flexible matching to handle slight variations in team names.
+    """
+    if not requested or not team_display_name:
+        return (
+            not requested
+        )  # If no teams requested, match all; if no name, don't match
+
+    normalized_team_name = _normalize_team_name(team_display_name)
+
+    for requested_name in requested:
+        normalized_requested = _normalize_team_name(requested_name)
+
+        # Exact match after normalization
+        if normalized_team_name == normalized_requested:
+            return True
+
+        # Flexible matching - check if team name contains all significant words
+        # This helps with slight variations in formatting
+        team_words = set(normalized_team_name.split())
+        requested_words = set(normalized_requested.split())
+
+        # If the requested name has special characters, split on those too
+        for char in ["&", "(", ")"]:
+            if char in normalized_requested:
+                # Split on special characters and add words
+                parts = normalized_requested.replace(char, " ").split()
+                requested_words.update(parts)
+
+        # Remove very short words that aren't meaningful
+        meaningful_requested_words = {
+            word for word in requested_words if len(word) >= 3
+        }
+
+        # Check if team name contains most of the meaningful words
+        if (
+            meaningful_requested_words
+            and len(meaningful_requested_words & team_words)
+            >= len(meaningful_requested_words) * 0.7
+        ):
+            return True
+
+    return False
+
+
 def _filter_team(
    team: Team,
    requested: list[str] | None = None,
@@ -522,7 +678,7 @@ def _filter_team(
    Returns the true if:
        - Team is not expired / deleted
        - Team has a display-name and ID
-        - Team display-name is in the requested teams list
+        - Team display-name matches any of the requested teams (with flexible matching)

    Otherwise, returns false.
    """
@@ -530,7 +686,7 @@ def _filter_team(
    if not team.id or not team.display_name:
        return False

-    if requested and team.display_name not in requested:
+    if not _matches_requested_team(team.display_name, requested):
        return False

    props = team.properties
--- a/backend/onyx/context/search/federated/slack_search.py
+++ b/backend/onyx/context/search/federated/slack_search.py
@@ -1,20 +1,26 @@
+import json
 import re
+import time
 from datetime import datetime
-from datetime import timedelta
 from typing import Any

-from langchain_core.messages import HumanMessage
+from pydantic import ValidationError
 from slack_sdk import WebClient
 from slack_sdk.errors import SlackApiError
 from sqlalchemy.orm import Session

 from onyx.configs.app_configs import ENABLE_CONTEXTUAL_RAG
-from onyx.configs.app_configs import MAX_SLACK_QUERY_EXPANSIONS
 from onyx.configs.chat_configs import DOC_TIME_DECAY
 from onyx.configs.model_configs import DOC_EMBEDDING_CONTEXT_SIZE
 from onyx.connectors.models import IndexingDocument
 from onyx.connectors.models import TextSection
 from onyx.context.search.federated.models import SlackMessage
+from onyx.context.search.federated.slack_search_utils import build_channel_query_filter
+from onyx.context.search.federated.slack_search_utils import build_slack_queries
+from onyx.context.search.federated.slack_search_utils import ChannelTypeString
+from onyx.context.search.federated.slack_search_utils import get_channel_type
+from onyx.context.search.federated.slack_search_utils import is_recency_query
+from onyx.context.search.federated.slack_search_utils import should_include_message
 from onyx.context.search.models import InferenceChunk
 from onyx.context.search.models import SearchQuery
 from onyx.db.document import DocumentSource
@@ -22,15 +28,15 @@ from onyx.db.search_settings import get_current_search_settings
 from onyx.document_index.document_index_utils import (
    get_multipass_config,
 )
+from onyx.federated_connectors.slack.models import SlackEntities
 from onyx.indexing.chunker import Chunker
 from onyx.indexing.embedder import DefaultIndexingEmbedder
 from onyx.indexing.models import DocAwareChunk
 from onyx.llm.factory import get_default_llms
-from onyx.llm.interfaces import LLM
-from onyx.llm.utils import message_to_string
 from onyx.onyxbot.slack.models import ChannelType
 from onyx.onyxbot.slack.models import SlackContext
-from onyx.prompts.federated_search import SLACK_QUERY_EXPANSION_PROMPT
+from onyx.redis.redis_pool import get_redis_client
+from onyx.server.federated.models import FederatedConnectorDetail
 from onyx.utils.logger import setup_logger
 from onyx.utils.threadpool_concurrency import run_functions_tuples_in_parallel
 from onyx.utils.timing import log_function_time
@@ -40,6 +46,182 @@ logger = setup_logger()
 HIGHLIGHT_START_CHAR = "\ue000"
 HIGHLIGHT_END_CHAR = "\ue001"

+CHANNEL_TYPES = ["public_channel", "im", "mpim", "private_channel"]
+CHANNEL_METADATA_CACHE_TTL = 60 * 60 * 24  # 24 hours
+SLACK_THREAD_CONTEXT_WINDOW = 3  # Number of messages before matched message to include
+CHANNEL_METADATA_MAX_RETRIES = 3  # Maximum retry attempts for channel metadata fetching
+CHANNEL_METADATA_RETRY_DELAY = 1  # Initial retry delay in seconds (exponential backoff)
+
+
+def fetch_and_cache_channel_metadata(
+    access_token: str, team_id: str, include_private: bool = True
+) -> dict[str, dict[str, Any]]:
+    """
+    Fetch ALL channel metadata in one API call and cache it.
+
+    Returns a dict mapping channel_id -> metadata including name, type, etc.
+    This replaces multiple conversations.info calls with a single conversations.list.
+
+    Note: We ALWAYS fetch all channel types (including private) and cache them together.
+    This ensures a single cache entry per team, avoiding duplicate API calls.
+    """
+    # Use tenant-specific Redis client
+    redis_client = get_redis_client()
+    # (tenant_id prefix is added automatically by TenantRedis)
+    cache_key = f"slack_federated_search:{team_id}:channels:metadata"
+
+    try:
+        cached = redis_client.get(cache_key)
+        if cached:
+            logger.info(f"Channel metadata cache HIT for team {team_id}")
+            cached_str: str = (
+                cached.decode("utf-8") if isinstance(cached, bytes) else str(cached)
+            )
+            cached_data: dict[str, dict[str, Any]] = json.loads(cached_str)
+            logger.info(f"Loaded {len(cached_data)} channels from cache")
+            if not include_private:
+                filtered = {
+                    k: v
+                    for k, v in cached_data.items()
+                    if v.get("type") != "private_channel"
+                }
+                logger.info(f"Filtered to {len(filtered)} channels (exclude private)")
+                return filtered
+            return cached_data
+    except Exception as e:
+        logger.warning(f"Error reading from channel metadata cache: {e}")
+
+    # Cache miss - fetch from Slack API with retry logic
+    logger.info(f"Channel metadata cache MISS for team {team_id} - fetching from API")
+    slack_client = WebClient(token=access_token)
+    channel_metadata: dict[str, dict[str, Any]] = {}
+
+    # Retry logic with exponential backoff
+    last_exception = None
+    for attempt in range(CHANNEL_METADATA_MAX_RETRIES):
+        try:
+            # ALWAYS fetch all channel types including private
+            channel_types = ",".join(CHANNEL_TYPES)
+
+            # Fetch all channels in one call
+            cursor = None
+            channel_count = 0
+            while True:
+                response = slack_client.conversations_list(
+                    types=channel_types,
+                    exclude_archived=True,
+                    limit=1000,
+                    cursor=cursor,
+                )
+                response.validate()
+
+                # Cast response.data to dict for type checking
+                response_data: dict[str, Any] = response.data  # type: ignore
+                for ch in response_data.get("channels", []):
+                    channel_id = ch.get("id")
+                    if not channel_id:
+                        continue
+
+                    # Determine channel type
+                    channel_type_enum = get_channel_type(channel_info=ch)
+                    channel_type = channel_type_enum.value
+
+                    channel_metadata[channel_id] = {
+                        "name": ch.get("name", ""),
+                        "type": channel_type,
+                        "is_private": ch.get("is_private", False),
+                        "is_member": ch.get("is_member", False),
+                    }
+                    channel_count += 1
+
+                cursor = response_data.get("response_metadata", {}).get("next_cursor")
+                if not cursor:
+                    break
+
+            logger.info(f"Fetched {channel_count} channels for team {team_id}")
+
+            # Cache the results
+            try:
+                redis_client.set(
+                    cache_key,
+                    json.dumps(channel_metadata),
+                    ex=CHANNEL_METADATA_CACHE_TTL,
+                )
+                logger.info(
+                    f"Cached {channel_count} channels for team {team_id} (TTL: {CHANNEL_METADATA_CACHE_TTL}s, key: {cache_key})"
+                )
+            except Exception as e:
+                logger.warning(f"Error caching channel metadata: {e}")
+
+            return channel_metadata
+
+        except SlackApiError as e:
+            last_exception = e
+            if attempt < CHANNEL_METADATA_MAX_RETRIES - 1:
+                retry_delay = CHANNEL_METADATA_RETRY_DELAY * (2**attempt)
+                logger.warning(
+                    f"Failed to fetch channel metadata (attempt {attempt + 1}/{CHANNEL_METADATA_MAX_RETRIES}): {e}. "
+                    f"Retrying in {retry_delay}s..."
+                )
+                time.sleep(retry_delay)
+            else:
+                logger.error(
+                    f"Failed to fetch channel metadata after {CHANNEL_METADATA_MAX_RETRIES} attempts: {e}"
+                )
+
+    # If we exhausted all retries, raise the last exception
+    if last_exception:
+        raise SlackApiError(
+            f"Channel metadata fetching failed after {CHANNEL_METADATA_MAX_RETRIES} attempts",
+            last_exception.response,
+        )
+
+    return {}
+
+
+def get_available_channels(
+    access_token: str, team_id: str, include_private: bool = False
+) -> list[str]:
+    """Fetch list of available channel names using cached metadata."""
+    metadata = fetch_and_cache_channel_metadata(access_token, team_id, include_private)
+    return [meta["name"] for meta in metadata.values() if meta["name"]]
+
+
+def _extract_channel_data_from_entities(
+    entities: dict[str, Any] | None,
+    channel_metadata_dict: dict[str, dict[str, Any]] | None,
+) -> list[str] | None:
+    """Extract available channels list from metadata based on entity configuration.
+
+    Args:
+        entities: Entity filter configuration dict
+        channel_metadata_dict: Pre-fetched channel metadata dictionary
+
+    Returns:
+        List of available channel names, or None if not needed
+    """
+    if not entities or not channel_metadata_dict:
+        return None
+
+    try:
+        parsed_entities = SlackEntities(**entities)
+        # Only extract if we have exclusions or channel filters
+        if parsed_entities.exclude_channels or parsed_entities.channels:
+            # Extract channel names from metadata dict
+            return [
+                meta["name"]
+                for meta in channel_metadata_dict.values()
+                if meta["name"]
+                and (
+                    parsed_entities.include_private_channels
+                    or meta.get("type") != ChannelTypeString.PRIVATE_CHANNEL.value
+                )
+            ]
+    except ValidationError:
+        logger.debug("Failed to parse entities for channel data extraction")
+
+    return None
+

 def _should_skip_channel(
    channel_id: str,
@@ -48,26 +230,23 @@ def _should_skip_channel(
    access_token: str,
    include_dm: bool,
 ) -> bool:
-    """
-    Determine if a channel should be skipped if in bot context. When an allowed_private_channel is passed in,
-    all other private channels are filtered out except that specific one.
-    """
+    """Bot context filtering: skip private channels unless explicitly allowed."""
    if bot_token and not include_dm:
        try:
-            # Use bot token if available (has full permissions), otherwise fall back to user token
            token_to_use = bot_token or access_token
            channel_client = WebClient(token=token_to_use)
            channel_info = channel_client.conversations_info(channel=channel_id)

-            if isinstance(channel_info.data, dict) and not _is_public_channel(
-                channel_info.data
-            ):
-                # This is a private channel - filter it out
-                if channel_id != allowed_private_channel:
-                    logger.debug(
-                        f"Skipping message from private channel {channel_id} "
-                        f"(not the allowed private channel: {allowed_private_channel})"
-                    )
+            if isinstance(channel_info.data, dict):
+                channel_data = channel_info.data.get("channel", {})
+                channel_type = get_channel_type(channel_info=channel_data)
+                is_private_or_dm = channel_type in [
+                    ChannelType.PRIVATE_CHANNEL,
+                    ChannelType.IM,
+                    ChannelType.MPIM,
+                ]
+
+                if is_private_or_dm and channel_id != allowed_private_channel:
                    return True
        except Exception as e:
            logger.warning(
@@ -77,50 +256,6 @@ def _should_skip_channel(
    return False


-def build_slack_queries(query: SearchQuery, llm: LLM) -> list[str]:
-    # get time filter
-    time_filter = ""
-    time_cutoff = query.filters.time_cutoff
-    if time_cutoff is not None:
-        # slack after: is exclusive, so we need to subtract one day
-        time_cutoff = time_cutoff - timedelta(days=1)
-        time_filter = f" after:{time_cutoff.strftime('%Y-%m-%d')}"
-
-    # use llm to generate slack queries (use original query to use same keywords as the user)
-    prompt = SLACK_QUERY_EXPANSION_PROMPT.format(query=query.original_query)
-    try:
-        msg = HumanMessage(content=prompt)
-        response = llm.invoke([msg])
-        rephrased_queries = message_to_string(response).split("\n")
-    except Exception as e:
-        logger.error(f"Error expanding query: {e}")
-        rephrased_queries = [query.query]
-
-    return [
-        rephrased_query.strip() + time_filter
-        for rephrased_query in rephrased_queries[:MAX_SLACK_QUERY_EXPANSIONS]
-    ]
-
-
-def _is_public_channel(channel_info: dict[str, Any]) -> bool:
-    """Check if a channel is public based on its info"""
-    # The channel_info structure has a nested 'channel' object
-    channel = channel_info.get("channel", {})
-
-    is_channel = channel.get("is_channel", False)
-    is_private = channel.get("is_private", False)
-    is_group = channel.get("is_group", False)
-    is_mpim = channel.get("is_mpim", False)
-    is_im = channel.get("is_im", False)
-
-    # A public channel is: a channel that is NOT private, NOT a group, NOT mpim, NOT im
-    is_public = (
-        is_channel and not is_private and not is_group and not is_mpim and not is_im
-    )
-
-    return is_public
-
-
 def query_slack(
    query_string: str,
    original_query: SearchQuery,
@@ -129,17 +264,52 @@ def query_slack(
    allowed_private_channel: str | None = None,
    bot_token: str | None = None,
    include_dm: bool = False,
+    entities: dict[str, Any] | None = None,
+    available_channels: list[str] | None = None,
 ) -> list[SlackMessage]:
-    # query slack
+
+    # Check if query has channel override (user specified channels in query)
+    has_channel_override = query_string.startswith("__CHANNEL_OVERRIDE__")
+
+    if has_channel_override:
+        # Remove the marker and use the query as-is (already has channel filters)
+        final_query = query_string.replace("__CHANNEL_OVERRIDE__", "").strip()
+    else:
+        # Normal flow: build channel filters from entity config
+        channel_filter = ""
+        if entities:
+            channel_filter = build_channel_query_filter(entities, available_channels)
+
+        final_query = query_string
+        if channel_filter:
+            # Add channel filter to query
+            final_query = f"{query_string} {channel_filter}"
+
+    logger.info(f"Final query to slack: {final_query}")
+
+    # Detect if query asks for most recent results
+    sort_by_time = is_recency_query(original_query.query)
+
    slack_client = WebClient(token=access_token)
    try:
-        response = slack_client.search_messages(
-            query=query_string, count=limit, highlight=True
-        )
+        search_params: dict[str, Any] = {
+            "query": final_query,
+            "count": limit,
+            "highlight": True,
+        }
+
+        # Sort by timestamp for recency-focused queries, otherwise by relevance
+        if sort_by_time:
+            search_params["sort"] = "timestamp"
+            search_params["sort_dir"] = "desc"
+
+        response = slack_client.search_messages(**search_params)
        response.validate()
+
        messages: dict[str, Any] = response.get("messages", {})
        matches: list[dict[str, Any]] = messages.get("matches", [])
-        logger.info(f"Successfully used search_messages, found {len(matches)} messages")
+
+        logger.info(f"Slack search found {len(matches)} messages")
    except SlackApiError as slack_error:
        logger.error(f"Slack API error in search_messages: {slack_error}")
        logger.error(
@@ -327,11 +497,26 @@ def get_contextualized_thread_text(message: SlackMessage, access_token: str) ->
        if not message_id_idx:
            return thread_text

-        # add the message
-        thread_text += "\n..." if message_id_idx > 1 else ""
+        # Include a few messages BEFORE the matched message for context
+        # This helps understand what the matched message is responding to
+        start_idx = max(
+            1, message_id_idx - SLACK_THREAD_CONTEXT_WINDOW
+        )  # Start after thread starter
+
+        # Add ellipsis if we're skipping messages between thread starter and context window
+        if start_idx > 1:
+            thread_text += "\n..."
+
+        # Add context messages before the matched message
+        for i in range(start_idx, message_id_idx):
+            msg_text = messages[i].get("text", "")
+            msg_sender = messages[i].get("user", "")
+            thread_text += f"\n\n<@{msg_sender}>: {msg_text}"
+
+        # Add the matched message itself
        msg_text = messages[message_id_idx].get("text", "")
        msg_sender = messages[message_id_idx].get("user", "")
-        thread_text += f"\n<@{msg_sender}>: {msg_text}"
+        thread_text += f"\n\n<@{msg_sender}>: {msg_text}"

    # add the following replies to the thread text
    len_replies = 0
@@ -356,7 +541,13 @@ def get_contextualized_thread_text(message: SlackMessage, access_token: str) ->
            profile: dict[str, Any] = response.get("profile", {})
            name: str | None = profile.get("real_name") or profile.get("email")
        except SlackApiError as e:
-            logger.error(f"Slack API error in get_contextualized_thread_text: {e}")
+            # user_not_found is common for deleted users, bots, etc. - not critical
+            if "user_not_found" in str(e):
+                logger.debug(
+                    f"User {userid} not found in Slack workspace (likely deleted/deactivated)"
+                )
+            else:
+                logger.warning(f"Could not fetch profile for user {userid}: {e}")
            continue
        if not name:
            continue
@@ -379,18 +570,84 @@ def slack_retrieval(
    query: SearchQuery,
    access_token: str,
    db_session: Session,
+    connector: FederatedConnectorDetail | None = None,
+    entities: dict[str, Any] | None = None,
    limit: int | None = None,
    slack_event_context: SlackContext | None = None,
    bot_token: str | None = None,  # Add bot token parameter
+    team_id: str | None = None,
 ) -> list[InferenceChunk]:
-    # query slack
-    _, fast_llm = get_default_llms()
-    query_strings = build_slack_queries(query, fast_llm)
+    """
+    Main entry point for Slack federated search with entity filtering.

+    Applies entity filtering including:
+    - Channel selection and exclusion
+    - Date range extraction and enforcement
+    - DM/private channel filtering
+    - Multi-layer caching
+
+    Args:
+        query: Search query object
+        access_token: User OAuth access token
+        db_session: Database session
+        connector: Federated connector detail (unused, kept for backwards compat)
+        entities: Connector-level config (entity filtering configuration)
+        limit: Maximum number of results
+        slack_event_context: Context when called from Slack bot
+        bot_token: Bot token for enhanced permissions
+        team_id: Slack team/workspace ID
+
+    Returns:
+        List of InferenceChunk objects
+    """
+    # Use connector-level config
+    entities = entities or {}
+
+    if not entities:
+        logger.info("No entity configuration found, using defaults")
+    else:
+        logger.info(f"Using entity configuration: {entities}")
+
+    # Extract limit from entity config if not explicitly provided
+    query_limit = limit
+    if entities:
+        try:
+            parsed_entities = SlackEntities(**entities)
+            if limit is None:
+                query_limit = parsed_entities.max_messages_per_query
+                logger.info(f"Using max_messages_per_query from config: {query_limit}")
+        except Exception as e:
+            logger.warning(f"Error parsing entities for limit: {e}")
+            if limit is None:
+                query_limit = 100  # Fallback default
+    elif limit is None:
+        query_limit = 100  # Default when no entities and no limit provided
+
+    # Pre-fetch channel metadata from Redis cache and extract available channels
+    # This avoids repeated Redis lookups during parallel search execution
+    available_channels = None
+    channel_metadata_dict = None
+    if team_id:
+        # Always fetch all channel types (include_private=True) to ensure single cache entry
+        channel_metadata_dict = fetch_and_cache_channel_metadata(
+            access_token, team_id, include_private=True
+        )
+
+        # Extract available channels list if needed for pattern matching
+        available_channels = _extract_channel_data_from_entities(
+            entities, channel_metadata_dict
+        )
+
+    # Query slack with entity filtering
+    _, fast_llm = get_default_llms()
+    query_strings = build_slack_queries(query, fast_llm, entities, available_channels)
+
+    # Determine filtering based on entities OR context (bot)
    include_dm = False
    allowed_private_channel = None

-    if slack_event_context:
+    # Bot context overrides (if entities not specified)
+    if slack_event_context and not entities:
        channel_type = slack_event_context.channel_type
        if channel_type == ChannelType.IM:  # DM with user
            include_dm = True
@@ -400,24 +657,94 @@ def slack_retrieval(
                f"Private channel context: will only allow messages from {allowed_private_channel} + public channels"
            )

-    results = run_functions_tuples_in_parallel(
-        [
+    # Build search tasks
+    search_tasks = [
+        (
+            query_slack,
            (
-                query_slack,
+                query_string,
+                query,
+                access_token,
+                query_limit,
+                allowed_private_channel,
+                bot_token,
+                include_dm,
+                entities,
+                available_channels,
+            ),
+        )
+        for query_string in query_strings
+    ]
+
+    # If include_dm is True, add additional searches without channel filters
+    # This allows searching DMs/group DMs while still searching the specified channels
+    if entities and entities.get("include_dm"):
+        # Create a minimal entities dict that won't add channel filters
+        # This ensures we search ALL conversations (DMs, group DMs, private channels)
+        # BUT we still want to exclude channels specified in exclude_channels
+        dm_entities = {
+            "include_dm": True,
+            "include_private_channels": entities.get("include_private_channels", False),
+            "default_search_days": entities.get("default_search_days", 30),
+            "search_all_channels": True,
+            "channels": None,
+            "exclude_channels": entities.get(
+                "exclude_channels"
+            ),  # ALWAYS apply exclude_channels
+        }
+
+        for query_string in query_strings:
+            search_tasks.append(
                (
-                    query_string,
-                    query,
-                    access_token,
-                    limit,
-                    allowed_private_channel,
-                    bot_token,
-                    include_dm,
-                ),
+                    query_slack,
+                    (
+                        query_string,
+                        query,
+                        access_token,
+                        query_limit,
+                        allowed_private_channel,
+                        bot_token,
+                        include_dm,
+                        dm_entities,
+                        available_channels,
+                    ),
+                )
            )
-            for query_string in query_strings
-        ]
-    )
+
+    # Execute searches in parallel
+    results = run_functions_tuples_in_parallel(search_tasks)
+
+    # Merge and post-filter results
    slack_messages, docid_to_message = merge_slack_messages(results)
+
+    # Post-filter by channel type (DM, private channel, etc.)
+    # NOTE: We must post-filter because Slack's search.messages API only supports
+    # filtering by channel NAME (via in:#channel syntax), not by channel TYPE.
+    # There's no way to specify "only public channels" or "exclude DMs" in the query.
+    if entities and team_id:
+        # Use pre-fetched channel metadata to avoid cache misses
+        # Pass it directly instead of relying on Redis cache
+
+        filtered_messages = []
+        removed_count = 0
+        for msg in slack_messages:
+            # Pass pre-fetched metadata to avoid cache lookups
+            channel_type = get_channel_type(
+                channel_id=msg.channel_id,
+                channel_metadata=channel_metadata_dict,
+            )
+            if should_include_message(channel_type, entities):
+                filtered_messages.append(msg)
+            else:
+                removed_count += 1
+
+        if removed_count > 0:
+            logger.info(
+                f"Post-filtering removed {removed_count} messages: "
+                f"{len(slack_messages)} -> {len(filtered_messages)}"
+            )
+        slack_messages = filtered_messages
+
    slack_messages = slack_messages[: limit or len(slack_messages)]
    if not slack_messages:
        return []
@@ -437,6 +764,9 @@ def slack_retrieval(
        highlighted_texts.update(slack_message.highlighted_texts)
    sorted_highlighted_texts = sorted(highlighted_texts, key=len)

+    # For queries without highlights (e.g., empty recency queries), we should keep all chunks
+    has_highlights = len(sorted_highlighted_texts) > 0
+
    # convert slack messages to index documents
    index_docs: list[IndexingDocument] = []
    for slack_message in slack_messages:
@@ -475,24 +805,36 @@ def slack_retrieval(
    chunks = chunker.chunk(index_docs)

    # prune chunks without any highlighted texts
+    # BUT: for recency queries without keywords, keep all chunks
    relevant_chunks: list[DocAwareChunk] = []
    chunkid_to_match_highlight: dict[str, str] = {}
-    for chunk in chunks:
-        match_highlight = chunk.content
-        for highlight in sorted_highlighted_texts:  # faster than re sub
-            match_highlight = match_highlight.replace(
-                highlight, f"<hi>{highlight}</hi>"
-            )

-        # if nothing got replaced, the chunk is irrelevant
-        if len(match_highlight) == len(chunk.content):
-            continue
+    if not has_highlights:
+        # No highlighted terms - keep all chunks (recency query)
+        for chunk in chunks:
+            chunk_id = f"{chunk.source_document.id}__{chunk.chunk_id}"
+            relevant_chunks.append(chunk)
+            chunkid_to_match_highlight[chunk_id] = chunk.content  # No highlighting
+            if limit and len(relevant_chunks) >= limit:
+                break
+    else:
+        # Prune chunks that don't contain highlighted terms
+        for chunk in chunks:
+            match_highlight = chunk.content
+            for highlight in sorted_highlighted_texts:  # faster than re sub
+                match_highlight = match_highlight.replace(
+                    highlight, f"<hi>{highlight}</hi>"
+                )

-        chunk_id = f"{chunk.source_document.id}__{chunk.chunk_id}"
-        relevant_chunks.append(chunk)
-        chunkid_to_match_highlight[chunk_id] = match_highlight
-        if limit and len(relevant_chunks) >= limit:
-            break
+            # if nothing got replaced, the chunk is irrelevant
+            if len(match_highlight) == len(chunk.content):
+                continue
+
+            chunk_id = f"{chunk.source_document.id}__{chunk.chunk_id}"
+            relevant_chunks.append(chunk)
+            chunkid_to_match_highlight[chunk_id] = match_highlight
+            if limit and len(relevant_chunks) >= limit:
+                break

    # convert to inference chunks
    top_chunks: list[InferenceChunk] = []
--- a/backend/onyx/context/search/federated/slack_search_utils.py
+++ b/backend/onyx/context/search/federated/slack_search_utils.py
@@ -0,0 +1,569 @@
+import fnmatch
+import json
+import re
+from datetime import datetime
+from datetime import timedelta
+from datetime import timezone
+from enum import Enum
+from typing import Any
+
+from langchain_core.messages import HumanMessage
+from pydantic import ValidationError
+
+from onyx.configs.app_configs import MAX_SLACK_QUERY_EXPANSIONS
+from onyx.context.search.models import SearchQuery
+from onyx.federated_connectors.slack.models import SlackEntities
+from onyx.llm.interfaces import LLM
+from onyx.llm.utils import message_to_string
+from onyx.onyxbot.slack.models import ChannelType
+from onyx.prompts.federated_search import SLACK_DATE_EXTRACTION_PROMPT
+from onyx.prompts.federated_search import SLACK_QUERY_EXPANSION_PROMPT
+from onyx.utils.logger import setup_logger
+
+logger = setup_logger()
+
+# Constants for date extraction heuristics
+DEFAULT_RECENCY_DAYS = 7
+DEFAULT_LATELY_DAYS = 14
+DAYS_PER_WEEK = 7
+DAYS_PER_MONTH = 30
+MAX_CONTENT_WORDS = 3
+
+RECENCY_KEYWORDS = ["recent", "latest", "newest", "last"]
+
+
+class ChannelTypeString(str, Enum):
+    """String representations of Slack channel types."""
+
+    IM = "im"
+    MPIM = "mpim"
+    PRIVATE_CHANNEL = "private_channel"
+    PUBLIC_CHANNEL = "public_channel"
+
+
+def _parse_llm_code_block_response(response: str) -> str:
+    """Remove code block markers from LLM response if present.
+
+    Handles responses wrapped in triple backticks (```) by removing
+    the opening and closing markers.
+
+    Args:
+        response: Raw LLM response string
+
+    Returns:
+        Cleaned response with code block markers removed
+    """
+    response_clean = response.strip()
+    if response_clean.startswith("```"):
+        lines = response_clean.split("\n")
+        lines = lines[1:]
+        if lines and lines[-1].strip() == "```":
+            lines = lines[:-1]
+        response_clean = "\n".join(lines)
+    return response_clean
+
+
+def is_recency_query(query: str) -> bool:
+    return any(
+        re.search(rf"\b{re.escape(keyword)}\b", query, flags=re.IGNORECASE)
+        for keyword in RECENCY_KEYWORDS
+    )
+
+
+def extract_date_range_from_query(
+    query: str,
+    llm: LLM,
+    default_search_days: int,
+) -> int:
+    query_lower = query.lower()
+
+    if re.search(r"\btoday(?:\'?s)?\b", query_lower):
+        return 0
+
+    if re.search(r"\byesterday\b", query_lower):
+        return min(1, default_search_days)
+
+    match = re.search(r"\b(?:last|past)\s+(\d+)\s+days?\b", query_lower)
+    if match:
+        days = int(match.group(1))
+        return min(days, default_search_days)
+
+    if re.search(r"\b(?:last|past|this)\s+week\b", query_lower):
+        return min(DAYS_PER_WEEK, default_search_days)
+
+    match = re.search(r"\b(?:last|past)\s+(\d+)\s+weeks?\b", query_lower)
+    if match:
+        weeks = int(match.group(1))
+        return min(weeks * DAYS_PER_WEEK, default_search_days)
+
+    if re.search(r"\b(?:last|past|this)\s+month\b", query_lower):
+        return min(DAYS_PER_MONTH, default_search_days)
+
+    match = re.search(r"\b(?:last|past)\s+(\d+)\s+months?\b", query_lower)
+    if match:
+        months = int(match.group(1))
+        return min(months * DAYS_PER_MONTH, default_search_days)
+
+    if re.search(r"\brecent(?:ly)?\b", query_lower):
+        return min(DEFAULT_RECENCY_DAYS, default_search_days)
+
+    if re.search(r"\blately\b", query_lower):
+        return min(DEFAULT_LATELY_DAYS, default_search_days)
+
+    try:
+        prompt = SLACK_DATE_EXTRACTION_PROMPT.format(query=query)
+        response = message_to_string(
+            llm.invoke_langchain([HumanMessage(content=prompt)])
+        )
+
+        response_clean = _parse_llm_code_block_response(response)
+
+        try:
+            data = json.loads(response_clean)
+            days_back = data.get("days_back")
+            if days_back is None:
+                logger.debug(
+                    f"LLM date extraction returned null for query: '{query}', using default: {default_search_days} days"
+                )
+                return default_search_days
+        except json.JSONDecodeError:
+            logger.debug(
+                f"Failed to parse LLM date extraction response for query: '{query}', using default: {default_search_days} days"
+            )
+            return default_search_days
+
+        return min(days_back, default_search_days)
+
+    except Exception as e:
+        logger.warning(f"Error extracting date range with LLM: {e}")
+        return default_search_days
+
+
+def matches_exclude_pattern(channel_name: str, patterns: list[str]) -> bool:
+    if not patterns:
+        return False
+
+    channel_norm = channel_name.lower().strip().lstrip("#")
+
+    for pattern in patterns:
+        pattern_norm = pattern.lower().strip().lstrip("#")
+        if fnmatch.fnmatch(channel_norm, pattern_norm):
+            return True
+
+    return False
+
+
+def build_channel_query_filter(
+    parsed_entities: SlackEntities | dict[str, Any],
+    available_channels: list[str] | None = None,
+) -> str:
+    # Parse entities if dict
+    try:
+        if isinstance(parsed_entities, dict):
+            entities = SlackEntities(**parsed_entities)
+        else:
+            entities = parsed_entities
+    except ValidationError:
+        return ""
+
+    search_all_channels = entities.search_all_channels
+
+    if search_all_channels:
+        if not entities.exclude_channels:
+            return ""
+
+        # Can't apply exclusions without available_channels
+        if not available_channels:
+            return ""
+
+        excluded_channels = [
+            ch
+            for ch in available_channels
+            if matches_exclude_pattern(ch, entities.exclude_channels)
+        ]
+        normalized_excluded = [ch.lstrip("#") for ch in excluded_channels]
+
+        exclusion_filters = [f"-in:#{channel}" for channel in normalized_excluded]
+        return " ".join(exclusion_filters)
+
+    if not entities.channels:
+        return ""
+
+    included_channels: list[str] = []
+    for pattern in entities.channels:
+        pattern_norm = pattern.lstrip("#")
+        if "*" in pattern_norm or "?" in pattern_norm:
+            # Glob patterns require available_channels
+            if available_channels:
+                matching = [
+                    ch
+                    for ch in available_channels
+                    if fnmatch.fnmatch(ch.lstrip("#").lower(), pattern_norm.lower())
+                ]
+                included_channels.extend(matching)
+        else:
+            # Exact match: use directly or verify against available_channels
+            if not available_channels or pattern_norm in [
+                ch.lstrip("#") for ch in available_channels
+            ]:
+                included_channels.append(pattern_norm)
+
+    # Apply exclusions to included channels
+    if entities.exclude_channels:
+        included_channels = [
+            ch
+            for ch in included_channels
+            if not matches_exclude_pattern(ch, entities.exclude_channels)
+        ]
+
+    if not included_channels:
+        return ""
+
+    normalized_channels = [ch.lstrip("#") for ch in included_channels]
+    filters = [f"in:#{channel}" for channel in normalized_channels]
+    return " ".join(filters)
+
+
+def get_channel_type(
+    channel_info: dict[str, Any] | None = None,
+    channel_id: str | None = None,
+    channel_metadata: dict[str, dict[str, Any]] | None = None,
+) -> ChannelType:
+    """
+    Determine channel type from channel info dict or by looking up channel_id.
+
+    Args:
+        channel_info: Channel info dict from Slack API (direct mode)
+        channel_id: Channel ID to look up (lookup mode)
+        channel_metadata: Pre-fetched metadata dict (for lookup mode)
+
+    Returns:
+        ChannelType enum
+    """
+    if channel_info is not None:
+        if channel_info.get("is_im"):
+            return ChannelType.IM
+        if channel_info.get("is_mpim"):
+            return ChannelType.MPIM
+        if channel_info.get("is_private"):
+            return ChannelType.PRIVATE_CHANNEL
+        return ChannelType.PUBLIC_CHANNEL
+
+    # Lookup mode: get type from pre-fetched metadata
+    if channel_id and channel_metadata:
+        ch_meta = channel_metadata.get(channel_id)
+        if ch_meta:
+            type_str = ch_meta.get("type")
+            if type_str == ChannelTypeString.IM.value:
+                return ChannelType.IM
+            elif type_str == ChannelTypeString.MPIM.value:
+                return ChannelType.MPIM
+            elif type_str == ChannelTypeString.PRIVATE_CHANNEL.value:
+                return ChannelType.PRIVATE_CHANNEL
+            return ChannelType.PUBLIC_CHANNEL
+
+    return ChannelType.PUBLIC_CHANNEL
+
+
+def should_include_message(channel_type: ChannelType, entities: dict[str, Any]) -> bool:
+    include_dm = entities.get("include_dm", False)
+    include_group_dm = entities.get("include_group_dm", False)
+    include_private = entities.get("include_private_channels", False)
+
+    if channel_type == ChannelType.IM:
+        return include_dm
+    if channel_type == ChannelType.MPIM:
+        return include_group_dm
+    if channel_type == ChannelType.PRIVATE_CHANNEL:
+        return include_private
+    return True
+
+
+def extract_channel_references_from_query(query_text: str) -> set[str]:
+    """Extract channel names referenced in the query text.
+
+    Only matches explicit channel references with prepositions or # symbols:
+    - "in the office channel"
+    - "from the office channel"
+    - "in #office"
+    - "from #office"
+
+    Does NOT match generic phrases like "slack discussions" or "team channel".
+
+    Args:
+        query_text: The user's query text
+
+    Returns:
+        Set of channel names (without # prefix)
+    """
+    channel_references = set()
+    query_lower = query_text.lower()
+
+    # Only match channels with explicit prepositions (in/from) or # prefix
+    # This prevents false positives like "slack discussions" being interpreted as channel "slack"
+    channel_patterns = [
+        r"\bin\s+(?:the\s+)?([a-z0-9_-]+)\s+(?:slack\s+)?channels?\b",  # "in the office channel"
+        r"\bfrom\s+(?:the\s+)?([a-z0-9_-]+)\s+(?:slack\s+)?channels?\b",  # "from the office channel"
+        r"\bin\s+#([a-z0-9_-]+)\b",  # "in #office"
+        r"\bfrom\s+#([a-z0-9_-]+)\b",  # "from #office"
+    ]
+
+    for pattern in channel_patterns:
+        matches = re.finditer(pattern, query_lower)
+        for match in matches:
+            channel_references.add(match.group(1))
+
+    return channel_references
+
+
+def validate_channel_references(
+    channel_references: set[str],
+    entities: dict[str, Any],
+    available_channels: list[str] | None,
+) -> None:
+    """Validate that referenced channels exist and are allowed by entity config.
+
+    Args:
+        channel_references: Set of channel names extracted from query
+        entities: Entity configuration dict
+        available_channels: List of available channel names in workspace
+
+    Raises:
+        ValueError: If channel doesn't exist, is excluded, or not in inclusion list
+    """
+    if not channel_references or not entities:
+        return
+
+    try:
+        parsed_entities = SlackEntities(**entities)
+
+        for channel_name in channel_references:
+            # Check if channel exists
+            if available_channels is not None:
+                # Normalize for comparison (available_channels may or may not have #)
+                normalized_available = [
+                    ch.lstrip("#").lower() for ch in available_channels
+                ]
+                if channel_name.lower() not in normalized_available:
+                    raise ValueError(
+                        f"Channel '{channel_name}' does not exist in your Slack workspace. "
+                        f"Please check the channel name and try again."
+                    )
+
+            # Check if channel is in exclusion list
+            if parsed_entities.exclude_channels:
+                if matches_exclude_pattern(
+                    channel_name, parsed_entities.exclude_channels
+                ):
+                    raise ValueError(
+                        f"Channel '{channel_name}' is excluded from search by your configuration. "
+                        f"Please update your connector settings to search this channel."
+                    )
+
+            # Check if channel is in inclusion list (when search_all_channels is False)
+            if not parsed_entities.search_all_channels:
+                if parsed_entities.channels:
+                    # Normalize channel lists for comparison
+                    normalized_channels = [
+                        ch.lstrip("#").lower() for ch in parsed_entities.channels
+                    ]
+                    if channel_name.lower() not in normalized_channels:
+                        raise ValueError(
+                            f"Channel '{channel_name}' is not in your configured channel list. "
+                            f"Please update your connector settings to include this channel."
+                        )
+
+    except ValidationError:
+        # If entities are malformed, skip validation
+        pass
+
+
+def build_channel_override_query(channel_references: set[str], time_filter: str) -> str:
+    """Build a Slack query with ONLY channel filters and time filter (no keywords).
+
+    Args:
+        channel_references: Set of channel names to search
+        time_filter: Time filter string (e.g., " after:2025-11-07")
+
+    Returns:
+        Query string with __CHANNEL_OVERRIDE__ marker
+    """
+    normalized_channels = [ch.lstrip("#") for ch in channel_references]
+    channel_filter = " ".join([f"in:#{channel}" for channel in normalized_channels])
+    return f"__CHANNEL_OVERRIDE__ {channel_filter}{time_filter}"
+
+
+# Slack-specific stop words (in addition to standard NLTK stop words)
+# These include Slack-specific terms and temporal/recency keywords
+SLACK_SPECIFIC_STOP_WORDS = frozenset(
+    RECENCY_KEYWORDS
+    + [
+        "dm",
+        "dms",
+        "message",
+        "messages",
+        "channel",
+        "channels",
+        "slack",
+        "post",
+        "posted",
+        "posting",
+        "sent",
+    ]
+)
+
+
+def extract_content_words_from_recency_query(
+    query_text: str, channel_references: set[str]
+) -> list[str]:
+    """Extract meaningful content words from a recency query.
+
+    Filters out NLTK stop words, Slack-specific terms, channel references, and proper nouns.
+
+    Args:
+        query_text: The user's query text
+        channel_references: Channel names to exclude from content words
+
+    Returns:
+        List of content words (up to MAX_CONTENT_WORDS)
+    """
+    # Get standard English stop words from NLTK (lazy import)
+    try:
+        from nltk.corpus import stopwords  # type: ignore
+
+        nltk_stop_words = set(stopwords.words("english"))
+    except Exception:
+        # Fallback if NLTK not available
+        nltk_stop_words = set()
+
+    # Combine NLTK stop words with Slack-specific stop words
+    all_stop_words = nltk_stop_words | SLACK_SPECIFIC_STOP_WORDS
+
+    words = query_text.split()
+    content_words = []
+
+    for word in words:
+        clean_word = word.lower().strip(".,!?;:\"'#")
+        # Skip if it's a channel reference or a stop word
+        if clean_word in channel_references:
+            continue
+        if clean_word and clean_word not in all_stop_words and len(clean_word) > 2:
+            clean_word_orig = word.strip(".,!?;:\"'#")
+            if clean_word_orig.lower() not in all_stop_words:
+                content_words.append(clean_word_orig)
+
+    # Filter out proper nouns (capitalized words)
+    content_words_filtered = [word for word in content_words if not word[0].isupper()]
+
+    return content_words_filtered[:MAX_CONTENT_WORDS]
+
+
+def expand_query_with_llm(query_text: str, llm: LLM) -> list[str]:
+    """Use LLM to expand query into multiple search variations.
+
+    Args:
+        query_text: The user's original query
+        llm: LLM instance to use for expansion
+
+    Returns:
+        List of rephrased query strings (up to MAX_SLACK_QUERY_EXPANSIONS)
+    """
+    prompt = SLACK_QUERY_EXPANSION_PROMPT.format(
+        query=query_text, max_queries=MAX_SLACK_QUERY_EXPANSIONS
+    )
+
+    try:
+        response = message_to_string(
+            llm.invoke_langchain([HumanMessage(content=prompt)])
+        )
+
+        response_clean = _parse_llm_code_block_response(response)
+
+        # Split into lines and filter out empty lines
+        rephrased_queries = [
+            line.strip() for line in response_clean.split("\n") if line.strip()
+        ]
+
+        # If no queries generated, use empty query
+        if not rephrased_queries:
+            logger.debug("No content keywords extracted from query expansion")
+            return [""]
+
+        logger.info(
+            f"Expanded query into {len(rephrased_queries)} queries: {rephrased_queries}"
+        )
+        return rephrased_queries[:MAX_SLACK_QUERY_EXPANSIONS]
+
+    except Exception as e:
+        logger.error(f"Error expanding query: {e}")
+        return [query_text]
+
+
+def build_slack_queries(
+    query: SearchQuery,
+    llm: LLM,
+    entities: dict[str, Any] | None = None,
+    available_channels: list[str] | None = None,
+) -> list[str]:
+    """Build Slack query strings with date filtering and query expansion."""
+    default_search_days = 30
+    if entities:
+        try:
+            parsed_entities = SlackEntities(**entities)
+            default_search_days = parsed_entities.default_search_days
+        except ValidationError as e:
+            logger.warning(f"Invalid entities in build_slack_queries: {e}")
+
+    days_back = extract_date_range_from_query(
+        query.original_query or query.query, llm, default_search_days
+    )
+
+    # get time filter
+    time_filter = ""
+    if days_back is not None and days_back >= 0:
+        if days_back == 0:
+            time_filter = " on:today"
+        else:
+            cutoff_date = datetime.now(timezone.utc) - timedelta(days=days_back)
+            time_filter = f" after:{cutoff_date.strftime('%Y-%m-%d')}"
+
+    original_query_text = query.original_query or query.query
+
+    # ALWAYS extract channel references from the query (not just for recency queries)
+    channel_references = extract_channel_references_from_query(original_query_text)
+
+    # Validate channel references against available channels and entity config
+    # This will raise ValueError if channels are invalid
+    if channel_references and entities:
+        try:
+            validate_channel_references(
+                channel_references, entities, available_channels
+            )
+            logger.info(
+                f"Detected and validated channel references: {channel_references}"
+            )
+
+            # If valid channels detected, use ONLY those channels with NO keywords
+            # Return query with ONLY time filter + channel filter (no keywords)
+            return [build_channel_override_query(channel_references, time_filter)]
+        except ValueError as e:
+            # If validation fails, log the error and continue with normal flow
+            logger.warning(f"Channel reference validation failed: {e}")
+            channel_references = set()
+
+    # use llm to generate slack queries (use original query to use same keywords as the user)
+    if is_recency_query(original_query_text):
+        # For recency queries, extract content words (excluding channel names and stop words)
+        content_words = extract_content_words_from_recency_query(
+            original_query_text, channel_references
+        )
+        rephrased_queries = [" ".join(content_words)] if content_words else [""]
+    else:
+        # For other queries, use LLM to expand into multiple variations
+        rephrased_queries = expand_query_with_llm(original_query_text, llm)
+
+    # Build final query strings with time filters
+    return [
+        rephrased_query.strip() + time_filter
+        for rephrased_query in rephrased_queries[:MAX_SLACK_QUERY_EXPANSIONS]
+    ]
--- a/backend/onyx/context/search/models.py
+++ b/backend/onyx/context/search/models.py
@@ -86,8 +86,7 @@ class SavedSearchSettings(InferenceSettings, IndexingSetting):
            multipass_indexing=search_settings.multipass_indexing,
            embedding_precision=search_settings.embedding_precision,
            reduced_dimension=search_settings.reduced_dimension,
-            # Whether switching to this model requires re-indexing
-            background_reindex_enabled=search_settings.background_reindex_enabled,
+            switchover_type=search_settings.switchover_type,
            enable_contextual_rag=search_settings.enable_contextual_rag,
            contextual_rag_llm_name=search_settings.contextual_rag_llm_name,
            contextual_rag_llm_provider=search_settings.contextual_rag_llm_provider,
--- a/backend/onyx/context/search/pipeline.py
+++ b/backend/onyx/context/search/pipeline.py
@@ -395,13 +395,19 @@ class SearchPipeline:
            self.contextual_pruning_config is not None
            and self.prompt_config is not None
        ):
+            from onyx.llm.utils import check_number_of_tokens
+
+            # For backwards compatibility with non-v2 flows, use query token count
+            # and pass prompt_config for proper token calculation
+            query_token_count = check_number_of_tokens(self.search_query.query)
+
            self._final_context_sections = prune_and_merge_sections(
                sections=self.reranked_sections,
                section_relevance_list=None,
-                prompt_config=self.prompt_config,
                llm_config=self.llm.config,
-                question=self.search_query.query,
+                existing_input_tokens=query_token_count,
                contextual_pruning_config=self.contextual_pruning_config,
+                prompt_config=self.prompt_config,
            )

        else:
--- a/backend/onyx/context/search/postprocessing/postprocessing.py
+++ b/backend/onyx/context/search/postprocessing/postprocessing.py
@@ -31,6 +31,7 @@ from onyx.document_index.document_index_utils import (
 from onyx.file_store.file_store import get_default_file_store
 from onyx.llm.interfaces import LLM
 from onyx.llm.utils import message_to_string
+from onyx.natural_language_processing.exceptions import CohereBillingLimitError
 from onyx.natural_language_processing.search_nlp_models import RerankingModel
 from onyx.secondary_llm_flows.chunk_usefulness import llm_batch_eval_sections
 from onyx.utils.logger import setup_logger
@@ -101,7 +102,7 @@ def update_image_sections_with_query(
                ),
            ]

-            raw_response = llm.invoke(messages)
+            raw_response = llm.invoke_langchain(messages)

            answer_text = message_to_string(raw_response).strip()
            return (
@@ -246,7 +247,14 @@ def semantic_reranking(
        f"{chunk.semantic_identifier or chunk.title or ''}\n{chunk.content}"
        for chunk in chunks_to_rerank
    ]
-    sim_scores_floats = cross_encoder.predict(query=query_str, passages=passages)
+    try:
+        sim_scores_floats = cross_encoder.predict(query=query_str, passages=passages)
+    except CohereBillingLimitError:
+        logger.warning(
+            "Skipping reranking for query '%s' because Cohere billing limit was reached.",
+            query_str,
+        )
+        return chunks_to_rerank, list(range(len(chunks_to_rerank)))

    # Old logic to handle multiple cross-encoders preserved but not used
    sim_scores = [numpy.array(sim_scores_floats)]
--- a/backend/onyx/db/document.py
+++ b/backend/onyx/db/document.py
@@ -197,7 +197,7 @@ def get_documents_for_connector_credential_pair_limited_columns(
        doc_row = DocumentRow(
            id=row.id,
            doc_metadata=row.doc_metadata,
-            external_user_group_ids=row.external_user_group_ids,
+            external_user_group_ids=row.external_user_group_ids or [],
        )
        doc_rows.append(doc_row)
    return doc_rows
--- a/backend/onyx/db/enums.py
+++ b/backend/onyx/db/enums.py
@@ -179,3 +179,9 @@ class ThemePreference(str, PyEnum):
    LIGHT = "light"
    DARK = "dark"
    SYSTEM = "system"
+
+
+class SwitchoverType(str, PyEnum):
+    REINDEX = "reindex"
+    ACTIVE_ONLY = "active_only"
+    INSTANT = "instant"
--- a/backend/onyx/db/federated.py
+++ b/backend/onyx/db/federated.py
@@ -63,17 +63,31 @@ def create_federated_connector(
    db_session: Session,
    source: FederatedConnectorSource,
    credentials: dict[str, Any],
+    config: dict[str, Any] | None = None,
 ) -> FederatedConnector:
-    """Create a new federated connector with credential validation."""
+    """Create a new federated connector with credential and config validation."""
    # Validate credentials before creating
    if not validate_federated_connector_credentials(source, credentials):
        raise ValueError(
            f"Invalid credentials for federated connector source: {source}"
        )

+    # Validate config using connector-specific validation
+    if config:
+        try:
+            # Get connector instance to access validate_config method
+            connector = get_federated_connector(source, credentials)
+            if not connector.validate_config(config):
+                raise ValueError(
+                    f"Invalid config for federated connector source: {source}"
+                )
+        except Exception as e:
+            raise ValueError(f"Config validation failed for {source}: {str(e)}")
+
    federated_connector = FederatedConnector(
        source=source,
        credentials=credentials,
+        config=config or {},
    )
    db_session.add(federated_connector)
    db_session.commit()
@@ -239,14 +253,21 @@ def update_federated_connector(
    db_session: Session,
    federated_connector_id: int,
    credentials: dict[str, Any] | None = None,
+    config: dict[str, Any] | None = None,
 ) -> FederatedConnector | None:
-    """Update a federated connector with credential validation."""
+    """Update a federated connector with credential and config validation."""
    federated_connector = fetch_federated_connector_by_id(
        federated_connector_id, db_session
    )
    if not federated_connector:
        return None

+    # Use provided credentials if updating them, otherwise use existing credentials
+    # This is needed to instantiate the connector for config validation when only config is being updated
+    creds_to_use = (
+        credentials if credentials is not None else federated_connector.credentials
+    )
+
    if credentials is not None:
        # Validate credentials before updating
        if not validate_federated_connector_credentials(
@@ -257,6 +278,23 @@ def update_federated_connector(
            )
        federated_connector.credentials = credentials

+    if config is not None:
+        # Validate config using connector-specific validation
+        try:
+            # Get connector instance to access validate_config method
+            connector = get_federated_connector(
+                federated_connector.source, creds_to_use
+            )
+            if not connector.validate_config(config):
+                raise ValueError(
+                    f"Invalid config for federated connector source: {federated_connector.source}"
+                )
+        except Exception as e:
+            raise ValueError(
+                f"Config validation failed for {federated_connector.source}: {str(e)}"
+            )
+        federated_connector.config = config
+
    db_session.commit()
    return federated_connector

--- a/backend/onyx/db/index_attempt.py
+++ b/backend/onyx/db/index_attempt.py
@@ -16,6 +16,7 @@ from sqlalchemy.orm import Session

 from onyx.connectors.models import ConnectorFailure
 from onyx.db.engine.sql_engine import get_session_with_current_tenant
+from onyx.db.enums import ConnectorCredentialPairStatus
 from onyx.db.enums import IndexingStatus
 from onyx.db.enums import IndexModelStatus
 from onyx.db.models import ConnectorCredentialPair
@@ -803,6 +804,29 @@ def count_unique_cc_pairs_with_successful_index_attempts(
    return unique_pairs_count


+def count_unique_active_cc_pairs_with_successful_index_attempts(
+    search_settings_id: int | None,
+    db_session: Session,
+) -> int:
+    """Collect all of the Index Attempts that are successful and for the specified embedding model,
+    but only for non-paused connector-credential pairs. Then do distinct by connector_id and credential_id
+    which is equivalent to the cc-pair. Finally, do a count to get the total number of unique non-paused
+    cc-pairs with successful attempts."""
+    unique_pairs_count = (
+        db_session.query(IndexAttempt.connector_credential_pair_id)
+        .join(ConnectorCredentialPair)
+        .filter(
+            IndexAttempt.search_settings_id == search_settings_id,
+            IndexAttempt.status == IndexingStatus.SUCCESS,
+            ConnectorCredentialPair.status != ConnectorCredentialPairStatus.PAUSED,
+        )
+        .distinct()
+        .count()
+    )
+
+    return unique_pairs_count
+
+
 def create_index_attempt_error(
    index_attempt_id: int | None,
    connector_credential_pair_id: int,
--- a/backend/onyx/db/llm.py
+++ b/backend/onyx/db/llm.py
@@ -1,17 +1,16 @@
 from sqlalchemy import delete
-from sqlalchemy import or_
 from sqlalchemy import select
 from sqlalchemy.dialects.postgresql import insert
 from sqlalchemy.orm import selectinload
 from sqlalchemy.orm import Session

-from onyx.configs.app_configs import AUTH_TYPE
-from onyx.configs.constants import AuthType
 from onyx.db.models import CloudEmbeddingProvider as CloudEmbeddingProviderModel
 from onyx.db.models import DocumentSet
 from onyx.db.models import LLMProvider as LLMProviderModel
+from onyx.db.models import LLMProvider__Persona
 from onyx.db.models import LLMProvider__UserGroup
 from onyx.db.models import ModelConfiguration
+from onyx.db.models import Persona
 from onyx.db.models import SearchSettings
 from onyx.db.models import Tool as ToolModel
 from onyx.db.models import User
@@ -46,6 +45,144 @@ def update_group_llm_provider_relationships__no_commit(
        db_session.add_all(new_relationships)


+def update_llm_provider_persona_relationships__no_commit(
+    db_session: Session,
+    llm_provider_id: int,
+    persona_ids: list[int] | None,
+) -> None:
+    """Replace the persona restrictions for a provider within an open transaction."""
+    db_session.execute(
+        delete(LLMProvider__Persona).where(
+            LLMProvider__Persona.llm_provider_id == llm_provider_id
+        )
+    )
+
+    if persona_ids:
+        db_session.add_all(
+            LLMProvider__Persona(
+                llm_provider_id=llm_provider_id,
+                persona_id=persona_id,
+            )
+            for persona_id in persona_ids
+        )
+
+
+def fetch_user_group_ids(db_session: Session, user: User | None) -> set[int]:
+    """Fetch the set of user group IDs for a given user.
+
+    Args:
+        db_session: Database session
+        user: User to fetch groups for, or None for anonymous users
+
+    Returns:
+        Set of user group IDs. Empty set if user is None.
+    """
+    if not user:
+        return set()
+
+    return set(
+        db_session.scalars(
+            select(User__UserGroup.user_group_id).where(
+                User__UserGroup.user_id == user.id
+            )
+        ).all()
+    )
+
+
+def can_user_access_llm_provider(
+    provider: LLMProviderModel,
+    user_group_ids: set[int],
+    persona: Persona | None,
+    is_admin: bool = False,
+) -> bool:
+    """Check if a user may use an LLM provider.
+
+    Args:
+        provider: The LLM provider to check access for
+        user_group_ids: Set of user group IDs the user belongs to
+        persona: The persona being used (if any)
+        is_admin: If True, bypass user group restrictions but still respect persona restrictions
+
+    Access logic:
+    1. If is_public=True → everyone has access (public override)
+    2. If is_public=False:
+       - Both groups AND personas set → must satisfy BOTH (AND logic, admins bypass group check)
+       - Only groups set → must be in one of the groups (OR across groups, admins bypass)
+       - Only personas set → must use one of the personas (OR across personas, applies to admins)
+       - Neither set → NOBODY has access unless admin (locked, admin-only)
+    """
+    # Public override - everyone has access
+    if provider.is_public:
+        return True
+
+    # Extract IDs once to avoid multiple iterations
+    provider_group_ids = (
+        {group.id for group in provider.groups} if provider.groups else set()
+    )
+    provider_persona_ids = (
+        {p.id for p in provider.personas} if provider.personas else set()
+    )
+
+    has_groups = bool(provider_group_ids)
+    has_personas = bool(provider_persona_ids)
+
+    # Both groups AND personas set → AND logic (must satisfy both)
+    if has_groups and has_personas:
+        # Admins bypass group check but still must satisfy persona restrictions
+        user_in_group = is_admin or bool(user_group_ids & provider_group_ids)
+        persona_allowed = persona.id in provider_persona_ids if persona else False
+        return user_in_group and persona_allowed
+
+    # Only groups set → user must be in one of the groups (admins bypass)
+    if has_groups:
+        return is_admin or bool(user_group_ids & provider_group_ids)
+
+    # Only personas set → persona must be in allowed list (applies to admins too)
+    if has_personas:
+        return persona.id in provider_persona_ids if persona else False
+
+    # Neither groups nor personas set, and not public → admins can access
+    return is_admin
+
+
+def validate_persona_ids_exist(
+    db_session: Session, persona_ids: list[int]
+) -> tuple[set[int], list[int]]:
+    """Validate that persona IDs exist in the database.
+
+    Returns:
+        Tuple of (fetched_persona_ids, missing_personas)
+    """
+    fetched_persona_ids = set(
+        db_session.scalars(select(Persona.id).where(Persona.id.in_(persona_ids))).all()
+    )
+    missing_personas = sorted(set(persona_ids) - fetched_persona_ids)
+    return fetched_persona_ids, missing_personas
+
+
+def get_personas_using_provider(
+    db_session: Session, provider_name: str
+) -> list[Persona]:
+    """Get all non-deleted personas that use a specific LLM provider."""
+    return list(
+        db_session.scalars(
+            select(Persona).where(
+                Persona.llm_model_provider_override == provider_name,
+                Persona.deleted == False,  # noqa: E712
+            )
+        ).all()
+    )
+
+
+def fetch_persona_with_groups(db_session: Session, persona_id: int) -> Persona | None:
+    """Fetch a persona with its groups eagerly loaded."""
+    return db_session.scalar(
+        select(Persona)
+        .options(selectinload(Persona.groups))
+        .where(Persona.id == persona_id, Persona.deleted == False)  # noqa: E712
+    )
+
+
 def upsert_cloud_embedding_provider(
    db_session: Session, provider: CloudEmbeddingProviderCreationRequest
 ) -> CloudEmbeddingProvider:
@@ -133,10 +270,22 @@ def upsert_llm_provider(
        group_ids=llm_provider_upsert_request.groups,
        db_session=db_session,
    )
+    update_llm_provider_persona_relationships__no_commit(
+        db_session=db_session,
+        llm_provider_id=existing_llm_provider.id,
+        persona_ids=llm_provider_upsert_request.personas,
+    )
+
+    db_session.flush()
+    db_session.refresh(existing_llm_provider)
+
+    try:
+        db_session.commit()
+    except Exception as e:
+        db_session.rollback()
+        raise ValueError(f"Failed to save LLM provider: {str(e)}") from e
+
    full_llm_provider = LLMProviderView.from_model(existing_llm_provider)
-
-    db_session.commit()
-
    return full_llm_provider


@@ -165,11 +314,14 @@ def fetch_existing_llm_providers(
    only_public: bool = False,
 ) -> list[LLMProviderModel]:
    stmt = select(LLMProviderModel).options(
-        selectinload(LLMProviderModel.model_configurations)
+        selectinload(LLMProviderModel.model_configurations),
+        selectinload(LLMProviderModel.groups),
+        selectinload(LLMProviderModel.personas),
    )
+    providers = list(db_session.scalars(stmt).all())
    if only_public:
-        stmt = stmt.where(LLMProviderModel.is_public == True)  # noqa: E712
-    return list(db_session.scalars(stmt).all())
+        return [provider for provider in providers if provider.is_public]
+    return providers


 def fetch_existing_llm_provider(
@@ -178,44 +330,16 @@ def fetch_existing_llm_provider(
    provider_model = db_session.scalar(
        select(LLMProviderModel)
        .where(LLMProviderModel.name == name)
-        .options(selectinload(LLMProviderModel.model_configurations))
+        .options(
+            selectinload(LLMProviderModel.model_configurations),
+            selectinload(LLMProviderModel.groups),
+            selectinload(LLMProviderModel.personas),
+        )
    )

    return provider_model


-def fetch_existing_llm_providers_for_user(
-    db_session: Session,
-    user: User | None = None,
-) -> list[LLMProviderModel]:
-    # if user is anonymous
-    if not user:
-        # Only fetch public providers if auth is turned on
-        return fetch_existing_llm_providers(
-            db_session, only_public=AUTH_TYPE != AuthType.DISABLED
-        )
-
-    stmt = (
-        select(LLMProviderModel)
-        .options(selectinload(LLMProviderModel.model_configurations))
-        .distinct()
-    )
-    user_groups_select = select(User__UserGroup.user_group_id).where(
-        User__UserGroup.user_id == user.id
-    )
-    access_conditions = or_(
-        LLMProviderModel.is_public,
-        LLMProviderModel.id.in_(  # User is part of a group that has access
-            select(LLMProvider__UserGroup.llm_provider_id).where(
-                LLMProvider__UserGroup.user_group_id.in_(user_groups_select)  # type: ignore
-            )
-        ),
-    )
-    stmt = stmt.where(access_conditions)
-
-    return list(db_session.scalars(stmt).all())
-
-
 def fetch_embedding_provider(
    db_session: Session, provider_type: EmbeddingProvider
 ) -> CloudEmbeddingProviderModel | None:
@@ -277,7 +401,16 @@ def remove_embedding_provider(


 def remove_llm_provider(db_session: Session, provider_id: int) -> None:
-    # Remove LLMProvider's dependent relationships
+    provider = db_session.get(LLMProviderModel, provider_id)
+    if not provider:
+        raise ValueError("LLM Provider not found")
+
+    # Clear the provider override from any personas using it
+    # This causes them to fall back to the default provider
+    personas_using_provider = get_personas_using_provider(db_session, provider.name)
+    for persona in personas_using_provider:
+        persona.llm_model_provider_override = None
+
    db_session.execute(
        delete(LLMProvider__UserGroup).where(
            LLMProvider__UserGroup.llm_provider_id == provider_id
--- a/backend/onyx/db/models.py
+++ b/backend/onyx/db/models.py
@@ -67,6 +67,7 @@ from onyx.db.enums import (
    MCPAuthenticationPerformer,
    MCPTransport,
    ThemePreference,
+    SwitchoverType,
 )
 from onyx.configs.constants import NotificationType
 from onyx.configs.constants import SearchFeedbackType
@@ -1528,6 +1529,9 @@ class FederatedConnector(Base):
        Enum(FederatedConnectorSource, native_enum=False)
    )
    credentials: Mapped[dict[str, str]] = mapped_column(EncryptedJson(), nullable=False)
+    config: Mapped[dict[str, Any]] = mapped_column(
+        postgresql.JSONB(), default=dict, nullable=False, server_default="{}"
+    )

    oauth_tokens: Mapped[list["FederatedConnectorOAuthToken"]] = relationship(
        "FederatedConnectorOAuthToken",
@@ -1609,9 +1613,13 @@ class SearchSettings(Base):
        ForeignKey("embedding_provider.provider_type"), nullable=True
    )

-    # Whether switching to this model should re-index all connectors in the background
-    # if no re-index is needed, will be ignored. Only used during the switch-over process.
-    background_reindex_enabled: Mapped[bool] = mapped_column(Boolean, default=True)
+    # Type of switchover to perform when switching embedding models
+    # REINDEX: waits for all connectors to complete
+    # ACTIVE_ONLY: waits for only non-paused connectors to complete
+    # INSTANT: swaps immediately without waiting
+    switchover_type: Mapped[SwitchoverType] = mapped_column(
+        Enum(SwitchoverType, native_enum=False), default=SwitchoverType.REINDEX
+    )

    # allows for quantization -> less memory usage for a small performance hit
    embedding_precision: Mapped[EmbeddingPrecision] = mapped_column(
@@ -2406,6 +2414,12 @@ class LLMProvider(Base):
        secondary="llm_provider__user_group",
        viewonly=True,
    )
+    personas: Mapped[list["Persona"]] = relationship(
+        "Persona",
+        secondary="llm_provider__persona",
+        back_populates="allowed_by_llm_providers",
+        viewonly=True,
+    )
    model_configurations: Mapped[list["ModelConfiguration"]] = relationship(
        "ModelConfiguration",
        back_populates="llm_provider",
@@ -2781,6 +2795,12 @@ class Persona(Base):
        secondary="persona__user_group",
        viewonly=True,
    )
+    allowed_by_llm_providers: Mapped[list["LLMProvider"]] = relationship(
+        "LLMProvider",
+        secondary="llm_provider__persona",
+        back_populates="personas",
+        viewonly=True,
+    )
    # Relationship to UserFile
    user_files: Mapped[list["UserFile"]] = relationship(
        "UserFile",
@@ -3101,6 +3121,22 @@ class Persona__UserGroup(Base):
    )


+class LLMProvider__Persona(Base):
+    """Association table restricting LLM providers to specific personas.
+
+    If no such rows exist for a given LLM provider, then it is accessible by all personas.
+    """
+
+    __tablename__ = "llm_provider__persona"
+
+    llm_provider_id: Mapped[int] = mapped_column(
+        ForeignKey("llm_provider.id", ondelete="CASCADE"), primary_key=True
+    )
+    persona_id: Mapped[int] = mapped_column(
+        ForeignKey("persona.id", ondelete="CASCADE"), primary_key=True
+    )
+
+
 class LLMProvider__UserGroup(Base):
    __tablename__ = "llm_provider__user_group"

--- a/backend/onyx/db/persona.py
+++ b/backend/onyx/db/persona.py
@@ -950,3 +950,22 @@ def update_default_assistant_configuration(

    db_session.commit()
    return persona
+
+
+def user_can_access_persona(
+    db_session: Session, persona_id: int, user: User | None, get_editable: bool = False
+) -> bool:
+    """Check if a user has access to a specific persona.
+
+    Args:
+        db_session: Database session
+        persona_id: ID of the persona to check
+        user: User to check access for
+        get_editable: If True, check for edit access; if False, check for view access
+
+    Returns:
+        True if user can access the persona, False otherwise
+    """
+    stmt = select(Persona).where(Persona.id == persona_id, Persona.deleted.is_(False))
+    stmt = _add_user_filters(stmt, user, get_editable=get_editable)
+    return db_session.scalar(stmt) is not None
--- a/backend/onyx/db/projects.py
+++ b/backend/onyx/db/projects.py
@@ -154,6 +154,14 @@ def upload_files_to_user_files_with_indexing(
 def check_project_ownership(
    project_id: int, user_id: UUID | None, db_session: Session
 ) -> bool:
+    # In no-auth mode, all projects are accessible
+    if user_id is None:
+        # Verify project exists
+        return (
+            db_session.query(UserProject).filter(UserProject.id == project_id).first()
+            is not None
+        )
+
    return (
        db_session.query(UserProject)
        .filter(UserProject.id == project_id, UserProject.user_id == user_id)
--- a/backend/onyx/db/search_settings.py
+++ b/backend/onyx/db/search_settings.py
@@ -61,7 +61,7 @@ def create_search_settings(
        rerank_provider_type=search_settings.rerank_provider_type,
        rerank_api_key=search_settings.rerank_api_key,
        num_rerank=search_settings.num_rerank,
-        background_reindex_enabled=search_settings.background_reindex_enabled,
+        switchover_type=search_settings.switchover_type,
    )

    db_session.add(embedding_model)
--- a/backend/onyx/db/slack_channel_config.py
+++ b/backend/onyx/db/slack_channel_config.py
@@ -53,9 +53,20 @@ def create_slack_channel_persona(

    # create/update persona associated with the Slack channel
    persona_name = _build_persona_name(channel_name)
+    persona_id_to_update = existing_persona_id
+    if persona_id_to_update is None:
+        # Reuse any previous Slack persona for this channel (even if the config was
+        # temporarily switched to a different persona) so we don't trip duplicate name
+        # validation inside `upsert_persona`.
+        existing_persona = db_session.scalar(
+            select(Persona).where(Persona.name == persona_name)
+        )
+        if existing_persona:
+            persona_id_to_update = existing_persona.id
+
    persona = upsert_persona(
        user=None,  # Slack channel Personas are not attached to users
-        persona_id=existing_persona_id,
+        persona_id=persona_id_to_update,
        name=persona_name,
        description="",
        system_prompt="",
--- a/backend/onyx/db/swap_index.py
+++ b/backend/onyx/db/swap_index.py
@@ -7,11 +7,14 @@ from onyx.configs.constants import KV_REINDEX_KEY
 from onyx.db.connector_credential_pair import get_connector_credential_pairs
 from onyx.db.connector_credential_pair import resync_cc_pair
 from onyx.db.document import delete_all_documents_for_connector_credential_pair
+from onyx.db.enums import ConnectorCredentialPairStatus
 from onyx.db.enums import IndexModelStatus
+from onyx.db.enums import SwitchoverType
 from onyx.db.index_attempt import cancel_indexing_attempts_for_search_settings
 from onyx.db.index_attempt import (
-    count_unique_cc_pairs_with_successful_index_attempts,
+    count_unique_active_cc_pairs_with_successful_index_attempts,
 )
+from onyx.db.index_attempt import count_unique_cc_pairs_with_successful_index_attempts
 from onyx.db.models import ConnectorCredentialPair
 from onyx.db.models import SearchSettings
 from onyx.db.search_settings import get_current_search_settings
@@ -27,12 +30,15 @@ logger = setup_logger()

 def _perform_index_swap(
    db_session: Session,
-    current_search_settings: SearchSettings,
-    secondary_search_settings: SearchSettings,
+    new_search_settings: SearchSettings,
    all_cc_pairs: list[ConnectorCredentialPair],
    cleanup_documents: bool = False,
-) -> None:
-    """Swap the indices and expire the old one."""
+) -> SearchSettings | None:
+    """Swap the indices and expire the old one.
+
+    Returns the old search settings if the swap was successful, otherwise None.
+    """
+    current_search_settings = get_current_search_settings(db_session)
    if len(all_cc_pairs) > 0:
        kv_store = get_kv_store()
        kv_store.store(KV_REINDEX_KEY, False)
@@ -48,7 +54,7 @@ def _perform_index_swap(
            resync_cc_pair(
                cc_pair=cc_pair,
                # sync based on the new search settings
-                search_settings_id=secondary_search_settings.id,
+                search_settings_id=new_search_settings.id,
                db_session=db_session,
            )

@@ -69,13 +75,13 @@ def _perform_index_swap(
        db_session=db_session,
    )
    update_search_settings_status(
-        search_settings=secondary_search_settings,
+        search_settings=new_search_settings,
        new_status=IndexModelStatus.PRESENT,
        db_session=db_session,
    )

    # remove the old index from the vector db
-    document_index = get_default_document_index(secondary_search_settings, None)
+    document_index = get_default_document_index(new_search_settings, None)

    WAIT_SECONDS = 5

@@ -86,8 +92,8 @@ def _perform_index_swap(
                f"Vespa index swap (attempt {x+1}/{VESPA_NUM_ATTEMPTS_ON_STARTUP})..."
            )
            document_index.ensure_indices_exist(
-                primary_embedding_dim=secondary_search_settings.final_embedding_dim,
-                primary_embedding_precision=secondary_search_settings.embedding_precision,
+                primary_embedding_dim=new_search_settings.final_embedding_dim,
+                primary_embedding_precision=new_search_settings.embedding_precision,
                # just finished swap, no more secondary index
                secondary_index_embedding_dim=None,
                secondary_index_embedding_precision=None,
@@ -106,8 +112,9 @@ def _perform_index_swap(
        logger.error(
            f"Vespa index swap did not succeed. Attempt limit reached. ({VESPA_NUM_ATTEMPTS_ON_STARTUP})"
        )
+        return None

-    return
+    return current_search_settings


 def check_and_perform_index_swap(db_session: Session) -> SearchSettings | None:
@@ -121,45 +128,80 @@ def check_and_perform_index_swap(db_session: Session) -> SearchSettings | None:
    # Default CC-pair created for Ingestion API unused here
    all_cc_pairs = get_connector_credential_pairs(db_session, include_user_files=True)
    cc_pair_count = max(len(all_cc_pairs) - 1, 0)
-    secondary_search_settings = get_secondary_search_settings(db_session)
+    new_search_settings = get_secondary_search_settings(db_session)

-    if not secondary_search_settings:
+    if not new_search_settings:
        return None

-    # If the secondary search settings are not configured to reindex in the background,
-    # we can just swap over instantly
-    if not secondary_search_settings.background_reindex_enabled:
-        current_search_settings = get_current_search_settings(db_session)
-        _perform_index_swap(
+    # Handle switchover based on switchover_type
+    switchover_type = new_search_settings.switchover_type
+
+    # INSTANT: Swap immediately without waiting
+    if switchover_type == SwitchoverType.INSTANT:
+        return _perform_index_swap(
            db_session=db_session,
-            current_search_settings=current_search_settings,
-            secondary_search_settings=secondary_search_settings,
+            new_search_settings=new_search_settings,
            all_cc_pairs=all_cc_pairs,
            # clean up all DocumentByConnectorCredentialPair / Document rows, since we're
            # doing an instant swap.
            cleanup_documents=True,
        )
-        return current_search_settings

-    unique_cc_indexings = count_unique_cc_pairs_with_successful_index_attempts(
-        search_settings_id=secondary_search_settings.id, db_session=db_session
-    )
-
-    # Index Attempts are cleaned up as well when the cc-pair is deleted so the logic in this
-    # function is correct. The unique_cc_indexings are specifically for the existing cc-pairs
-    old_search_settings = None
-    if unique_cc_indexings > cc_pair_count:
-        logger.error("More unique indexings than cc pairs, should not occur")
-
-    if cc_pair_count == 0 or cc_pair_count == unique_cc_indexings:
-        # Swap indices
-        current_search_settings = get_current_search_settings(db_session)
-        _perform_index_swap(
-            db_session=db_session,
-            current_search_settings=current_search_settings,
-            secondary_search_settings=secondary_search_settings,
-            all_cc_pairs=all_cc_pairs,
+    # REINDEX: Wait for all connectors to complete
+    elif switchover_type == SwitchoverType.REINDEX:
+        unique_cc_indexings = count_unique_cc_pairs_with_successful_index_attempts(
+            search_settings_id=new_search_settings.id, db_session=db_session
        )
-        old_search_settings = current_search_settings

-    return old_search_settings
+        # Index Attempts are cleaned up as well when the cc-pair is deleted so the logic in this
+        # function is correct. The unique_cc_indexings are specifically for the existing cc-pairs
+        if unique_cc_indexings > cc_pair_count:
+            logger.error("More unique indexings than cc pairs, should not occur")
+
+        if cc_pair_count == 0 or cc_pair_count == unique_cc_indexings:
+            # Swap indices
+            return _perform_index_swap(
+                db_session=db_session,
+                new_search_settings=new_search_settings,
+                all_cc_pairs=all_cc_pairs,
+            )
+
+        return None
+
+    # ACTIVE_ONLY: Wait for only non-paused connectors to complete
+    elif switchover_type == SwitchoverType.ACTIVE_ONLY:
+        # Count non-paused cc_pairs (excluding the default Ingestion API cc_pair)
+        active_cc_pairs = [
+            cc_pair
+            for cc_pair in all_cc_pairs
+            if cc_pair.status != ConnectorCredentialPairStatus.PAUSED
+        ]
+        active_cc_pair_count = max(len(active_cc_pairs) - 1, 0)
+
+        unique_active_cc_indexings = (
+            count_unique_active_cc_pairs_with_successful_index_attempts(
+                search_settings_id=new_search_settings.id, db_session=db_session
+            )
+        )
+
+        if unique_active_cc_indexings > active_cc_pair_count:
+            logger.error(
+                "More unique active indexings than active cc pairs, should not occur"
+            )
+
+        if (
+            active_cc_pair_count == 0
+            or active_cc_pair_count == unique_active_cc_indexings
+        ):
+            # Swap indices
+            return _perform_index_swap(
+                db_session=db_session,
+                new_search_settings=new_search_settings,
+                all_cc_pairs=all_cc_pairs,
+            )
+
+        return None
+
+    # Should not reach here, but handle gracefully
+    logger.error(f"Unknown switchover_type: {switchover_type}")
+    return None
--- a/backend/onyx/db/user_file.py
+++ b/backend/onyx/db/user_file.py
@@ -85,3 +85,10 @@ def get_file_id_by_user_file_id(user_file_id: str, db_session: Session) -> str |
    if user_file:
        return user_file.file_id
    return None
+
+
+def get_file_ids_by_user_file_ids(
+    user_file_ids: list[UUID], db_session: Session
+) -> list[str]:
+    user_files = db_session.query(UserFile).filter(UserFile.id.in_(user_file_ids)).all()
+    return [user_file.file_id for user_file in user_files]
--- a/backend/onyx/federated_connectors/federated_retrieval.py
+++ b/backend/onyx/federated_connectors/federated_retrieval.py
@@ -1,12 +1,12 @@
 from collections import defaultdict
 from collections.abc import Callable
+from typing import Any
 from uuid import UUID

 from pydantic import BaseModel
 from pydantic import ConfigDict
 from sqlalchemy.orm import Session

-from onyx.configs.app_configs import MAX_FEDERATED_CHUNKS
 from onyx.configs.constants import DocumentSource
 from onyx.configs.constants import FederatedConnectorSource
 from onyx.context.search.models import InferenceChunk
@@ -18,6 +18,7 @@ from onyx.db.federated import list_federated_connector_oauth_tokens
 from onyx.db.models import FederatedConnector__DocumentSet
 from onyx.db.slack_bot import fetch_slack_bots
 from onyx.federated_connectors.factory import get_federated_connector
+from onyx.federated_connectors.interfaces import FederatedConnector
 from onyx.onyxbot.slack.models import SlackContext
 from onyx.utils.logger import setup_logger

@@ -86,15 +87,31 @@ def get_federated_retrieval_functions(
                    credentials,
                )

-                federated_retrieval_infos_slack.append(
-                    FederatedRetrievalInfo(
-                        retrieval_function=lambda query: connector.search(
+                # Capture variables by value to avoid lambda closure issues
+                bot_token = tenant_slack_bot.bot_token
+
+                def create_slack_retrieval_function(
+                    conn: FederatedConnector,
+                    token: str,
+                    ctx: SlackContext,
+                    bot_tok: str,
+                ) -> Callable[[SearchQuery], list[InferenceChunk]]:
+                    def retrieval_fn(query: SearchQuery) -> list[InferenceChunk]:
+                        return conn.search(
                            query,
                            {},  # Empty entities for Slack context
-                            access_token=access_token,
-                            limit=MAX_FEDERATED_CHUNKS,
-                            slack_event_context=slack_context,
-                            bot_token=tenant_slack_bot.bot_token,
+                            access_token=token,
+                            limit=None,  # Let connector use its own max_messages_per_query config
+                            slack_event_context=ctx,
+                            bot_token=bot_tok,
+                        )
+
+                    return retrieval_fn
+
+                federated_retrieval_infos_slack.append(
+                    FederatedRetrievalInfo(
+                        retrieval_function=create_slack_retrieval_function(
+                            connector, access_token, slack_context, bot_token
                        ),
                        source=FederatedConnectorSource.FEDERATED_SLACK,
                    )
@@ -158,22 +175,33 @@ def get_federated_retrieval_functions(
        if document_set_names and not document_set_associations:
            continue

-        if document_set_associations:
-            entities = document_set_associations[0].entities
-        else:
-            entities = {}
+        # Only use connector-level config (no junction table entities)
+        entities = oauth_token.federated_connector.config or {}

        connector = get_federated_connector(
            oauth_token.federated_connector.source,
            oauth_token.federated_connector.credentials,
        )
+
+        # Capture variables by value to avoid lambda closure issues
+        access_token = oauth_token.token
+
+        def create_retrieval_function(
+            conn: FederatedConnector,
+            ent: dict[str, Any],
+            token: str,
+        ) -> Callable[[SearchQuery], list[InferenceChunk]]:
+            return lambda query: conn.search(
+                query,
+                ent,
+                access_token=token,
+                limit=None,  # Let connector use its own max_messages_per_query config
+            )
+
        federated_retrieval_infos.append(
            FederatedRetrievalInfo(
-                retrieval_function=lambda query: connector.search(
-                    query,
-                    entities,
-                    access_token=oauth_token.token,
-                    limit=MAX_FEDERATED_CHUNKS,
+                retrieval_function=create_retrieval_function(
+                    connector, entities, access_token
                ),
                source=oauth_token.federated_connector.source,
            )
--- a/backend/onyx/federated_connectors/interfaces.py
+++ b/backend/onyx/federated_connectors/interfaces.py
@@ -34,16 +34,34 @@ class FederatedConnector(ABC):

        Returns:
            True if entities are valid, False otherwise
+
+        Note: This method is used for backward compatibility with document-set level entities.
+        For connector-level config validation, use validate_config() instead.
        """

+    def validate_config(self, config: Dict[str, Any]) -> bool:
+        """
+        Validate that the provided config matches the expected structure.
+
+        This is an alias for validate_entities() to provide clearer semantics
+        when validating connector-level configuration.
+
+        Args:
+            config: Dictionary of configuration to validate
+
+        Returns:
+            True if config is valid, False otherwise
+        """
+        return self.validate_entities(config)
+
    @classmethod
    @abstractmethod
-    def entities_schema(cls) -> Dict[str, EntityField]:
+    def configuration_schema(cls) -> Dict[str, EntityField]:
        """
-        Return the specification of what entities are available for this connector.
+        Return the specification of what configuration fields are available for this connector.

        Returns:
-            Dictionary where keys are entity names and values are EntityField objects
+            Dictionary where keys are configuration field names and values are EntityField objects
            describing the expected structure and constraints.
        """

@@ -96,7 +114,7 @@ class FederatedConnector(ABC):

        Args:
            query: The search query
-            entities: The entities to search within (validated by validate())
+            entities: Connector-level config (entity filtering configuration)
            access_token: The OAuth access token
            limit: Maximum number of results to return
            slack_event_context: Slack-specific context (only used by Slack bot)
--- a/backend/onyx/federated_connectors/models.py
+++ b/backend/onyx/federated_connectors/models.py
@@ -66,5 +66,4 @@ class OAuthResult(BaseModel):
        default=None, description="Raw response for debugging"
    )

-    class Config:
-        json_encoders = {datetime: lambda v: v.isoformat()}
+    # Pydantic V2 automatically serializes datetime to ISO format, so no custom encoder needed
--- a/backend/onyx/federated_connectors/slack/federated_connector.py
+++ b/backend/onyx/federated_connectors/slack/federated_connector.py
@@ -6,6 +6,7 @@ from urllib.parse import urlencode

 import requests
 from pydantic import ValidationError
+from slack_sdk import WebClient
 from typing_extensions import override

 from onyx.context.search.federated.slack_search import slack_retrieval
@@ -25,11 +26,16 @@ logger = setup_logger()


 SCOPES = [
+    "channels:read",
+    "groups:read",
+    "im:read",
+    "mpim:read",
    "search:read",
    "channels:history",
    "groups:history",
    "im:history",
    "mpim:history",
+    "users:read",
    "users.profile:read",
 ]

@@ -58,30 +64,76 @@ class SlackFederatedConnector(FederatedConnector):
            return False

    @classmethod
-    @override
    def entities_schema(cls) -> dict[str, EntityField]:
-        """Return the specifications of what entities are available for this federated search type.
+        """Return the specifications of what entity configuration fields are available for Slack.

-        Returns a specification that tells the caller:
-        - channels is valid and should be a list[str]
-        - include_dm is valid and should be a boolean
+        This is the canonical schema definition for Slack entities.
        """
        return {
+            "exclude_channels": EntityField(
+                type="list[str]",
+                description="Exclude the following channels from search. Glob patterns are supported.",
+                required=False,
+                example=["secure-channel", "private-*", "customer*"],
+            ),
+            "search_all_channels": EntityField(
+                type="bool",
+                description="Search all accessible channels. If not set, must specify channels below.",
+                required=False,
+                default=False,
+                example=False,
+            ),
            "channels": EntityField(
                type="list[str]",
-                description="List of Slack channel names or IDs to search in",
+                description="Search the following channels",
                required=False,
-                example=["general", "random", "C1234567890"],
+                example=["general", "eng*", "product-*"],
            ),
            "include_dm": EntityField(
                type="bool",
-                description="Whether to include direct messages in the search",
+                description="Include user direct messages in search results",
                required=False,
                default=False,
-                example=True,
+                example=False,
+            ),
+            "include_group_dm": EntityField(
+                type="bool",
+                description="Include group direct messages (multi-person DMs) in search results",
+                required=False,
+                default=False,
+                example=False,
+            ),
+            "include_private_channels": EntityField(
+                type="bool",
+                description="Include private channels in search results (user must have access)",
+                required=False,
+                default=False,
+                example=False,
+            ),
+            "default_search_days": EntityField(
+                type="int",
+                description="Maximum number of days to search back. Increasing this value degrades answer quality.",
+                required=False,
+                default=30,
+                example=30,
+            ),
+            "max_messages_per_query": EntityField(
+                type="int",
+                description=(
+                    "Maximum number of messages to retrieve per search query. "
+                    "Higher values provide more context but may be slower."
+                ),
+                required=False,
+                default=25,
+                example=25,
            ),
        }

+    @classmethod
+    def configuration_schema(cls) -> dict[str, EntityField]:
+        """Wrapper for backwards compatibility - delegates to entities_schema()."""
+        return cls.entities_schema()
+
    @classmethod
    @override
    def credentials_schema(cls) -> dict[str, CredentialField]:
@@ -229,7 +281,7 @@ class SlackFederatedConnector(FederatedConnector):

        Args:
            query: The search query
-            entities: The entities to search within (validated by validate())
+            entities: Connector-level config (entity filtering configuration)
            access_token: The OAuth access token
            limit: Maximum number of results to return
            slack_event_context: Optional Slack context for slack bot
@@ -240,12 +292,28 @@ class SlackFederatedConnector(FederatedConnector):
        """
        logger.info(f"Slack federated search called with entities: {entities}")

+        # Get team_id from Slack API for caching and filtering
+        team_id = None
+        try:
+            slack_client = WebClient(token=access_token)
+            auth_response = slack_client.auth_test()
+            auth_response.validate()
+
+            # Cast response.data to dict for type checking
+            auth_data: dict[str, Any] = auth_response.data  # type: ignore
+            team_id = auth_data.get("team_id")
+            logger.info(f"Slack team_id: {team_id}")
+        except Exception as e:
+            logger.warning(f"Could not fetch team_id from Slack API: {e}")
+
        with get_session_with_current_tenant() as db_session:
            return slack_retrieval(
                query,
                access_token,
                db_session,
-                limit,
+                entities=entities,
+                limit=limit,
                slack_event_context=slack_event_context,
                bot_token=bot_token,
+                team_id=team_id,
            )
--- a/backend/onyx/federated_connectors/slack/models.py
+++ b/backend/onyx/federated_connectors/slack/models.py
@@ -3,21 +3,81 @@ from typing import Optional
 from pydantic import BaseModel
 from pydantic import Field
 from pydantic import field_validator
+from pydantic import model_validator


 class SlackEntities(BaseModel):
    """Pydantic model for Slack federated search entities."""

+    # Channel filtering
+    search_all_channels: bool = Field(
+        default=True,
+        description="Search all accessible channels. If not set, must specify channels below.",
+    )
    channels: Optional[list[str]] = Field(
-        default=None, description="List of Slack channel names or IDs to search in"
+        default=None,
+        description="List of Slack channel names to search across.",
    )
-    include_dm: Optional[bool] = Field(
-        default=False, description="Whether to include direct messages in the search"
+    exclude_channels: Optional[list[str]] = Field(
+        default=None,
+        description="List of channel names or patterns to exclude e.g. 'private-*, customer-*, secure-channel'.",
    )

+    # Direct message filtering
+    include_dm: bool = Field(
+        default=False,
+        description="Include user direct messages in search results",
+    )
+    include_group_dm: bool = Field(
+        default=False,
+        description="Include group direct messages (multi-person DMs) in search results",
+    )
+
+    # Private channel filtering
+    include_private_channels: bool = Field(
+        default=False,
+        description="Include private channels in search results (user must have access)",
+    )
+
+    # Date range filtering
+    default_search_days: int = Field(
+        default=30,
+        description="Maximum number of days to search back. Increasing this value degrades answer quality.",
+    )
+
+    # Message count per slack request
+    max_messages_per_query: int = Field(
+        default=25,
+        description=(
+            "Maximum number of messages to retrieve per search query. "
+            "Higher values provide more context but may be slower."
+        ),
+    )
+
+    @field_validator("default_search_days")
+    @classmethod
+    def validate_default_search_days(cls, v: int) -> int:
+        """Validate default_search_days is positive and reasonable"""
+        if v < 1:
+            raise ValueError("default_search_days must be at least 1")
+        if v > 365:
+            raise ValueError("default_search_days cannot exceed 365 days")
+        return v
+
+    @field_validator("max_messages_per_query")
+    @classmethod
+    def validate_max_messages_per_query(cls, v: int) -> int:
+        """Validate max_messages_per_query is positive and reasonable"""
+        if v < 1:
+            raise ValueError("max_messages_per_query must be at least 1")
+        if v > 100:
+            raise ValueError("max_messages_per_query cannot exceed 100")
+        return v
+
    @field_validator("channels")
    @classmethod
    def validate_channels(cls, v: Optional[list[str]]) -> Optional[list[str]]:
+        """Validate each channel is a non-empty string"""
        if v is not None:
            if not isinstance(v, list):
                raise ValueError("channels must be a list")
@@ -26,6 +86,31 @@ class SlackEntities(BaseModel):
                    raise ValueError("Each channel must be a non-empty string")
        return v

+    @field_validator("exclude_channels")
+    @classmethod
+    def validate_exclude_patterns(cls, v: Optional[list[str]]) -> Optional[list[str]]:
+        """Validate each exclude pattern is a non-empty string"""
+        if v is None:
+            return v
+
+        for pattern in v:
+            if not isinstance(pattern, str) or not pattern.strip():
+                raise ValueError("Each exclude pattern must be a non-empty string")
+
+        return v
+
+    @model_validator(mode="after")
+    def validate_channel_config(self) -> "SlackEntities":
+        """Validate search_all_channels configuration"""
+        # If search_all_channels is False, channels list must be provided
+        if not self.search_all_channels:
+            if self.channels is None or len(self.channels) == 0:
+                raise ValueError(
+                    "Must specify at least one channel when search_all_channels is False"
+                )
+
+        return self
+

 class SlackCredentials(BaseModel):
    """Slack federated connector credentials."""
--- a/backend/onyx/file_processing/image_summarization.py
+++ b/backend/onyx/file_processing/image_summarization.py
@@ -112,7 +112,7 @@ def _summarize_image(
    )

    try:
-        return message_to_string(llm.invoke(messages))
+        return message_to_string(llm.invoke_langchain(messages))

    except Exception as e:
        error_msg = f"Summarization failed. Messages: {messages}"
--- a/backend/onyx/file_store/file_store.py
+++ b/backend/onyx/file_store/file_store.py
@@ -332,13 +332,18 @@ class S3BackedFileStore(FileStore):
        sha256_hash = hashlib.sha256()
        kwargs: S3PutKwargs = {}

+        # FIX: Optimize checksum generation to avoid creating extra copies in memory
        # Read content from IO object
        if hasattr(content, "read"):
            file_content = content.read()
            if S3_GENERATE_LOCAL_CHECKSUM:
-                data_bytes = str(file_content).encode()
-                sha256_hash.update(data_bytes)
-                hash256 = sha256_hash.hexdigest()  # get the sha256 has in hex format
+                # FIX: Don't convert to string first (creates unnecessary copy)
+                # Work directly with bytes
+                if isinstance(file_content, bytes):
+                    sha256_hash.update(file_content)
+                else:
+                    sha256_hash.update(str(file_content).encode())
+                hash256 = sha256_hash.hexdigest()
                kwargs["ChecksumSHA256"] = hash256
            if hasattr(content, "seek"):
                content.seek(0)  # Reset position for potential re-reads
@@ -392,15 +397,20 @@ class S3BackedFileStore(FileStore):
            logger.error(f"Failed to read file {file_id} from S3")
            raise

-        file_content = response["Body"].read()
-
+        # FIX: Stream file content instead of loading entire file into memory
+        # This prevents OOM issues with large files (500MB+ PDFs, etc.)
        if use_tempfile:
-            # Always open in binary mode for temp files since we're writing bytes
-            temp_file = tempfile.NamedTemporaryFile(mode="w+b", delete=False)
-            temp_file.write(file_content)
+            # Stream directly to temp file to avoid holding entire file in memory
+            temp_file = tempfile.NamedTemporaryFile(mode="w+b", delete=True)
+            # Stream in 8MB chunks to reduce memory footprint
+            for chunk in response["Body"].iter_chunks(chunk_size=8 * 1024 * 1024):
+                temp_file.write(chunk)
            temp_file.seek(0)
            return temp_file
        else:
+            # For BytesIO, we still need to read into memory (legacy behavior)
+            # but at least we're not creating duplicate copies
+            file_content = response["Body"].read()
            return BytesIO(file_content)

    def read_file_record(
--- a/backend/onyx/file_store/utils.py
+++ b/backend/onyx/file_store/utils.py
@@ -211,7 +211,7 @@ def get_user_files(
    return user_files


-def get_user_files_as_user(
+def validate_user_files_ownership(
    user_file_ids: list[UUID],
    user_id: UUID | None,
    db_session: Session,
--- a/Show More
+++ b/Show More
Author	SHA1	Message	Date
Raunak Bhagat	9e9c3ec0b9	Remove unused imports	2025-11-18 13:51:10 -08:00
Raunak Bhagat	1457ca2a20	Make share button instantaneous	2025-11-18 13:50:37 -08:00
Raunak Bhagat	edc390edc6	Implement AppPage wrapper for all other pages inside of /chat	2025-11-18 13:34:38 -08:00
Raunak Bhagat	022624cb5a	Maintain consistent heights	2025-11-18 13:20:09 -08:00
Raunak Bhagat	f301257130	Make chatSession info and settings info be passed in as server-side data	2025-11-18 13:07:52 -08:00
Raunak Bhagat	9eecc71cda	Fix flashing	2025-11-18 11:43:49 -08:00
Justin Tahara	6677e12e55	chore(vespa): Update version (#6299 )	2025-11-18 09:50:38 -08:00
SubashMohan	7175b93a4c	enhancement(onboarding) : Replacing Select input with combobox (#6048 )	2025-11-18 17:40:57 +05:30
SubashMohan	fbbcd9646d	fix(onboarding): Header animated icon (#6098 )	2025-11-18 12:24:42 +05:30
SubashMohan	7afc9d417c	feat(modal): Implement a new modal component (#6289 )	2025-11-17 23:37:35 +00:00
Wenxi	a905f2d3fb	chore: pydantic v2 model configs (#6302 )	2025-11-17 23:24:41 +00:00
Jamison Lahman	3d1994a515	chore(deployments): run trivy scanners separate from build and push (#6301 )	2025-11-17 23:16:16 +00:00
dependabot[bot]	7f507c7be0	chore(deps): Bump actions/setup-python from 4.9.1 to 6.0.0 (#6296 ) Signed-off-by: dependabot[bot] <support@github.com> Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> Co-authored-by: Jamison Lahman <jamison@lahman.dev> Co-authored-by: greptile-apps[bot] <165735046+greptile-apps[bot]@users.noreply.github.com>	2025-11-17 20:38:08 +00:00
Jamison Lahman	c0e418d63e	chore(deployment): notifications on build failures (#6298 )	2025-11-17 20:20:21 +00:00
dependabot[bot]	db49e14f12	chore(deps): Bump docker/login-action from 1.14.1 to 3.6.0 (#6295 ) Signed-off-by: dependabot[bot] <support@github.com> Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> Co-authored-by: Jamison Lahman <jamison@lahman.dev> Co-authored-by: greptile-apps[bot] <165735046+greptile-apps[bot]@users.noreply.github.com>	2025-11-17 20:19:48 +00:00
dependabot[bot]	e87d6403e8	chore(deps): Bump helm/kind-action from 1.12.0 to 1.13.0 (#6294 ) Signed-off-by: dependabot[bot] <support@github.com> Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>	2025-11-17 19:48:13 +00:00
Richard Guan	2b6e02a775	chore(internal): search prune sections (#6247 )	2025-11-17 18:40:42 +00:00
Justin Tahara	26e1f349b9	fix(index attempts): Preserve some attempts (#6266 )	2025-11-17 18:06:26 +00:00
Jamison Lahman	ba83d7e6c3	chore(docker): generate OpenAPI schema/client with docker (#6286 )	2025-11-17 17:20:07 +00:00
dependabot[bot]	f869e44497	chore(deps-dev): Bump js-yaml from 3.14.1 to 3.14.2 in /web (#6293 ) Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>	2025-11-17 17:17:37 +00:00
Jamison Lahman	b367a60680	chore(gha): replace background docker pulls with docker-compose (#6287 )	2025-11-17 17:11:56 +00:00
Jamison Lahman	98a7e8b7e2	chore(docker): avoid ONYX_VERSION invalidating the docker cache (#6288 )	2025-11-17 17:10:54 +00:00
Nikolas Garza	f93752a2b3	fix: disable aggressive caching for Next.js static assets in dev (#6280 ) Co-authored-by: Nikolas Garza <nikolas@unknowna6c9beeb7428.attlocal.net>	2025-11-17 09:15:51 -08:00
Evan Lohn	0d20140cad	fix: mcp fixes (#6080 )	2025-11-17 08:58:49 -08:00
Wenxi	bdd6dc036e	fix(ui): new action form spacing fixes (#6285 )	2025-11-17 05:14:05 +00:00
dependabot[bot]	27fe196df3	chore(deps): Bump nanoid from 3.3.7 to 3.3.8 in /examples/widget (#3405 ) Signed-off-by: dependabot[bot] <support@github.com> Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>	2025-11-17 03:36:35 +00:00
Wenxi	18dad51bf8	fix(tests): pause connector while indexing timeout (#6282 )	2025-11-16 22:51:49 +00:00
Wenxi	b6d60fb6a9	fix(permsync): don't fail on empty group ids (#6281 )	2025-11-16 22:02:03 +00:00
Wenxi	86e7975c42	chore: foss sync readme (#6256 )	2025-11-16 13:18:13 -08:00
Jamison Lahman	bb1fb2250e	chore(scripts): only run check_lazy_imports on changed files (#6275 )	2025-11-16 18:31:53 +00:00
Nikolas Garza	8fdc3411ed	feat(slack federated search scoping - 4/4): Add frontend connector config support (#6181 ) Co-authored-by: Nikolas Garza <nikolas@Nikolass-MacBook-Pro.local>	2025-11-16 10:29:44 -08:00
Chris Weaver	d5038e8e68	fix: assistant reordering (#6278 )	2025-11-16 09:07:56 -08:00
Jamison Lahman	bc035a78e4	chore(deployment): increase model-server builder to 40GB disk (#6277 )	2025-11-16 05:17:11 +00:00
Jamison Lahman	9e1043b2fa	chore(mypy): color output in CI (#6274 )	2025-11-16 05:12:50 +00:00
SubashMohan	107e83bf2a	refactor(chat): Apply Ownership Checks Only to Current Message User Files (#6240 )	2025-11-16 05:06:35 +00:00
SubashMohan	f5aade9f69	fix(userfiles): remove fixed width in AssistantEditor and ProjectContextPanel (#6239 )	2025-11-15 11:50:37 +00:00
dependabot[bot]	9b9ca43671	chore(deps): bump next from 14.2.27 to 14.2.32 in /examples/widget (#5395 ) Signed-off-by: dependabot[bot] <support@github.com> Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>	2025-11-15 07:07:40 +00:00
Nikolas Garza	0c61cc3f65	feat(slack federated search scoping - 3/4): Add connector-level config support (#6178 ) Co-authored-by: Nikolas Garza <nikolas@Nikolass-MacBook-Pro.local>	2025-11-15 04:42:23 +00:00
Nikolas Garza	553853c7f4	feat(slack federated search scoping - 2/4): Add query construction and filtering (#6175 ) Co-authored-by: Nikolas Garza <nikolas@Nikolass-MacBook-Pro.local>	2025-11-15 04:11:28 +00:00
Jamison Lahman	15a05663ca	chore(docker): install node deps before copying source (#6261 )	2025-11-15 03:55:11 +00:00
Jamison Lahman	940773b9c5	chore(deployments): fix cross-platform related issues (#6272 )	2025-11-15 03:24:26 +00:00
Nikolas Garza	a95ae6e88b	feat(slack federated search scoping - 1/4): Add entity filtering config (#6174 ) Co-authored-by: Nikolas Garza <nikolas@Nikolass-MacBook-Pro.local>	2025-11-15 02:47:52 +00:00
Raunak Bhagat	369f923929	refactor: Implement a proper `Switch` component (#6270 )	2025-11-15 02:28:58 +00:00
Raunak Bhagat	3eefbfb646	fix: Fix header for white-labelling (#6271 )	2025-11-14 18:27:29 -08:00
Justin Tahara	3919a2d0a2	fix(gdrive): Missing Id Field (#6262 )	2025-11-14 17:59:34 -08:00
Justin Tahara	4553e811b0	feat(github): Showcasing our Github Repo Ranking (#6267 )	2025-11-14 16:54:34 -08:00
Justin Tahara	7f7389692e	fix(reformat): Teams Test (#6268 )	2025-11-14 16:53:19 -08:00
Richard Guan	30147c03cf	chore(fix): agent sdk replacement message formatting (#6180 )	2025-11-14 14:51:37 -08:00
Wenxi	dc48ccc117	fix(teams): mypy (#6259 )	2025-11-14 14:42:30 -08:00
Alex Kim	ee366c50c4	fix(teams): handle OData parsing errors with special characters (#6115 ) Co-authored-by: Jessica Singh <86633231+jessicasingh7@users.noreply.github.com>	2025-11-14 14:38:58 -08:00
sktbcpraha	caf92a6cce	fix: Assistant instruction ignored (#6243 )	2025-11-14 14:30:14 -08:00
Jamison Lahman	259bc9d64b	chore(deployments): fix actions/checkout typo (#6255 )	2025-11-14 21:48:12 +00:00
dependabot[bot]	60664f7e5b	chore(deps-dev): bump js-yaml from 4.1.0 to 4.1.1 in /examples/widget (#6248 ) Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>	2025-11-14 21:09:16 +00:00
Wenxi	07f55c6ae2	fix: readme (#6254 )	2025-11-14 13:31:29 -08:00
Wenxi	256ece05a6	chore: readme (#6253 )	2025-11-14 13:26:53 -08:00
Jamison Lahman	530d6d8284	chore(deployments): simplify pipeline + cross-platform builds (#6250 )	2025-11-14 13:16:20 -08:00
Chris Weaver	6299bc30b6	fix: playwright test (#6244 ) Co-authored-by: Nikolas Garza <90273783+nmgarza5@users.noreply.github.com>	2025-11-14 12:26:50 -08:00
Jamison Lahman	0607ea9741	chore(deployments): add ability to trigger dry-run build (#6246 )	2025-11-14 11:22:22 -08:00
Chris Weaver	3ba4bdfe78	fix: gpt-5 output formatting (#6245 )	2025-11-14 10:55:17 -08:00
Chris Weaver	a9165ad329	feat: allow switchover with active connectors only (#6226 )	2025-11-14 16:52:07 +00:00
Raunak Bhagat	24aea2d7ce	refactor: Edit button types (#6235 )	2025-11-14 16:21:08 +00:00
SubashMohan	aa30008419	feat(component): new switch component (#6212 )	2025-11-14 08:46:53 +00:00
Raunak Bhagat	3605676f61	fix: Fix inputs overflowing in Settings page (#6238 )	2025-11-14 06:24:25 +00:00
Raunak Bhagat	1faa9e7812	refactor: Updated Modals API (#6227 ) Co-authored-by: greptile-apps[bot] <165735046+greptile-apps[bot]@users.noreply.github.com>	2025-11-13 21:48:28 -08:00
Jamison Lahman	d85b702cac	chore(deployments): remove unnecessary install of build-essentials (#6234 )	2025-11-14 04:33:28 +00:00
Jamison Lahman	a724f53e5b	chore(deployments): prefer ecr over s3 as docker cache backend (#6232 )	2025-11-13 19:39:55 -08:00
Chris Weaver	68fcc5cb8a	fix: signup button (#6228 )	2025-11-13 19:02:05 -08:00
Justin Tahara	3eb1ca01a2	fix(docprocessing): OOM cleanup (#6223 )	2025-11-13 18:24:59 -08:00
Chris Weaver	c2c3d1a722	feat: allow disabling the default assistant (#6222 )	2025-11-13 17:42:47 -08:00
Wenxi	f79a8533fb	fix: show agent descriptions (#6219 )	2025-11-13 14:17:43 -08:00
Jamison Lahman	c1dce9fabd	chore(runs-on): define custom AMI specs (#6216 )	2025-11-13 22:01:07 +00:00
Jamison Lahman	244bf82c7a	chore(gha): prefer venv over installing python packages to the system (#6213 )	2025-11-13 17:39:54 +00:00
Jamison Lahman	188ea3faff	chore(gha): prefer Github-hosted for simple steps (#6208 )	2025-11-13 02:37:48 +00:00
Justin Tahara	c04f624891	fix(slack): Fixing the link coloring (#6203 )	2025-11-13 02:32:50 +00:00
Jamison Lahman	43ae02a870	chore(gha): remove custom cloudwatch metrics (#6202 )	2025-11-13 00:12:13 +00:00
Jamison Lahman	14123926a7	chore(gha): final runs-on migration nits (#6170 )	2025-11-12 23:00:25 +00:00
Justin Tahara	d14d1b833f	fix(slack): Show Channels when Editing Fed Slack (#6200 )	2025-11-12 22:30:49 +00:00
Nikolas Garza	ff06f10af6	fix: type checking for multiToolTestHelpers (#6199 ) Co-authored-by: Nikolas Garza <nikolas@Nikolass-MacBook-Pro.local>	2025-11-12 14:36:04 -08:00
Justin Tahara	5d26c1bafc	fix(slackbot): Switch between document set and assistant (#6198 )	2025-11-12 22:21:27 +00:00
dependabot[bot]	dbf06c6a1b	chore(deps): bump aquasecurity/trivy-action from 0.29.0 to 0.33.1 (#6194 ) Signed-off-by: dependabot[bot] <support@github.com> Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>	2025-11-12 19:43:15 +00:00
dependabot[bot]	d31e83900f	chore(deps): bump docker/setup-buildx-action from 1.7.0 to 3.11.1 (#6196 ) Signed-off-by: dependabot[bot] <support@github.com> Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> Co-authored-by: Jamison Lahman <jamison@lahman.dev> Co-authored-by: greptile-apps[bot] <165735046+greptile-apps[bot]@users.noreply.github.com>	2025-11-12 19:25:04 +00:00
dependabot[bot]	1ac92e6bd0	chore(deps-dev): bump types-urllib3 from 1.26.25.11 to 1.26.25.14 in /backend (#6193 ) Signed-off-by: dependabot[bot] <support@github.com> Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>	2025-11-12 19:21:32 +00:00
dependabot[bot]	5e159c35f3	chore(deps): bump pilosus/action-pip-license-checker from 2.0.0 to 3.1.0 (#6191 ) Signed-off-by: dependabot[bot] <support@github.com> Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> Co-authored-by: Jamison Lahman <jamison@lahman.dev> Co-authored-by: greptile-apps[bot] <165735046+greptile-apps[bot]@users.noreply.github.com>	2025-11-12 19:00:23 +00:00
Raunak Bhagat	550271abd9	feat: Share chat button in top right corner (#6186 ) Co-authored-by: greptile-apps[bot] <165735046+greptile-apps[bot]@users.noreply.github.com>	2025-11-12 11:08:23 -08:00
Nikolas Garza	db3d0bfb34	feat: improve usability of react testing framework + multi-tool renderer tests (#5973 ) Co-authored-by: Nikolas Garza <nikolas@Nikolass-MacBook-Pro.local>	2025-11-12 10:48:49 -08:00
Nikolas Garza	860bdd3c0f	chore: run playwright projects as separate jobs (#6190 ) Co-authored-by: Nikolas Garza <nikolas@Nikolass-MacBook-Pro.local>	2025-11-12 18:28:19 +00:00
Jamison Lahman	3bc63b30ce	chore(deps): dependabot for python (#6188 )	2025-11-12 18:18:27 +00:00
dependabot[bot]	78a23eeec0	chore(deps): bump pypdf from 6.0.0 to 6.1.3 in /backend/requirements (#5866 ) Signed-off-by: dependabot[bot] <support@github.com> Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>	2025-11-12 18:15:05 +00:00
Raunak Bhagat	096e4029ba	build: Revert to using webpack instead of turbopack (#6185 ) Co-authored-by: greptile-apps[bot] <165735046+greptile-apps[bot]@users.noreply.github.com>	2025-11-12 18:10:17 +00:00
SubashMohan	e8da5722df	feat(upload): increase token limit to 100k and configurable skip (#6187 )	2025-11-12 09:53:37 -08:00
Jamison Lahman	a1a261f68e	chore(lint): introduce actionlint, github actions linter (#6184 )	2025-11-12 03:39:17 +00:00
Jamison Lahman	ac57b10240	chore(gha): ensure run-id is unique, fix hanging jobs (#6183 )	2025-11-12 01:25:59 +00:00
Richard Guan	ce35e01ce3	chore(hotfix): for configuration (#6182 )	2025-11-12 00:59:28 +00:00
Richard Guan	808f82de0b	chore(agent sdk): make alternative to openai agent sdk (#6153 )	2025-11-11 16:25:19 -08:00
Jamison Lahman	9518bd14bb	chore(gha): explicit spot pricing (#6177 )	2025-11-11 23:52:54 +00:00
Justin Tahara	54eb655634	fix(gdrive): Checkbox fix (#6171 )	2025-11-11 22:39:36 +00:00
Wenxi	a773c398af	fix: safari input bar quadrupling new lines (#6173 )	2025-11-11 13:31:23 -08:00
Jamison Lahman	53131e7669	chore(gha): run whitespace fixers on actions (#6172 )	2025-11-11 13:06:59 -08:00
Richard Guan	d5cb56b0e9	chore(llm): interface decoupled from langchain (#6128 )	2025-11-11 19:48:25 +00:00
Wenxi	de6226e192	fix: img input support check false vs. none (#6169 )	2025-11-11 11:21:58 -08:00
Jamison Lahman	a1d502804a	chore(gha): migrate pr-integration-tests off blacksmith (#6164 )	2025-11-11 19:06:56 +00:00
Jamison Lahman	76fc01968b	chore(gha): de-dupe python setup for external dep tests (#6159 ) Co-authored-by: greptile-apps[bot] <165735046+greptile-apps[bot]@users.noreply.github.com>	2025-11-11 09:38:01 -08:00
Jamison Lahman	f9de82c135	chore(runs-on): more instance families and use price-capacity-optimized (#6165 )	2025-11-11 09:37:50 -08:00
Justin Tahara	db4b074938	fix(pegasus): Cleanup (#6163 )	2025-11-11 09:26:58 -08:00
Justin Tahara	bc5a574cf1	fix(embedding): Fix Deletion of Same Name (#6149 )	2025-11-10 19:37:21 -08:00
Jamison Lahman	c14414c9be	feat(pre-commit): run check-yaml on .github/ (#6160 )	2025-11-11 02:21:50 +00:00
Justin Tahara	770bfcf360	fix(gpt-5): Catch all (#6162 )	2025-11-10 18:35:06 -08:00
Chris Weaver	67c1099f98	fix: improve /llm/provider performance (#6158 )	2025-11-10 17:01:56 -08:00
Jamison Lahman	67eb54734f	chore(gha): migrate playwright tests to runs-on (#6154 )	2025-11-10 15:51:14 -08:00
Justin Tahara	f819fdf09b	feat(auth): Allow JIT even with Invite List (#6157 )	2025-11-10 14:36:59 -08:00
Justin Tahara	b39a4a075a	fix(cohere): Add Billing Handler (#6156 )	2025-11-10 14:31:01 -08:00
Justin Tahara	8a244aff0d	feat(api): Paginated Document Search (#6155 )	2025-11-10 14:10:36 -08:00
Jamison Lahman	6a74e54eda	feat(gha): python tests use uv w/ caching (#6152 )	2025-11-10 12:10:21 -08:00
Jamison Lahman	e87818c961	feat(gha): enable npm caching in CI (#6151 )	2025-11-10 11:34:06 -08:00
dependabot[bot]	fbec393faa	chore(deps): bump actions/download-artifact from 4.3.0 to 6.0.0 (#6147 ) Signed-off-by: dependabot[bot] <support@github.com> Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>	2025-11-10 11:27:45 -08:00
dependabot[bot]	da167e93ab	chore(deps): bump actions/upload-artifact from 4.6.2 to 5.0.0 (#6146 ) Signed-off-by: dependabot[bot] <support@github.com> Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>	2025-11-10 11:27:36 -08:00
dependabot[bot]	91c0b273bf	chore(deps): bump actions/setup-node from 4.4.0 to 6.0.0 (#6148 ) Signed-off-by: dependabot[bot] <support@github.com> Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>	2025-11-10 11:14:01 -08:00
Jamison Lahman	72d1cfa36a	chore(gha): docker-login follow up (#6150 )	2025-11-10 10:57:34 -08:00
Jamison Lahman	1f45ebc818	fix(gha): docker login for all external image fetching (#6139 )	2025-11-10 10:34:02 -08:00
Chris Weaver	c1428d03f5	fix: infinite render on embedding model page (#6144 )	2025-11-10 09:39:32 -08:00
Chris Weaver	904bcdb0fe	chore: change log-level of keyword extraction failure (#6135 )	2025-11-08 14:52:38 -08:00
Nikolas Garza	9caf136f0e	feat: llm access controls (#5819 )	2025-11-08 10:36:14 -08:00