Remove unused imports

Make share button instantaneous
Implement AppPage wrapper for all other pages inside of /chat
2026-02-16 23:35:46 +00:00 · 2025-11-18 13:51:10 -08:00 · 2025-11-18 13:50:37 -08:00 · 2025-11-18 13:34:38 -08:00 · 2025-11-18 13:20:09 -08:00 · 2025-11-18 13:07:52 -08:00
552 changed files with 26444 additions and 14899 deletions
--- a/.github/actionlint.yml
+++ b/.github/actionlint.yml
@@ -0,0 +1,42 @@
+self-hosted-runner:
+  # Labels of self-hosted runner in array of strings.
+  labels:
+    - extras=ecr-cache
+    - extras=s3-cache
+    - hdd=256
+    - runs-on
+    - runner=1cpu-linux-arm64
+    - runner=1cpu-linux-x64
+    - runner=2cpu-linux-arm64
+    - runner=2cpu-linux-x64
+    - runner=4cpu-linux-arm64
+    - runner=4cpu-linux-x64
+    - runner=8cpu-linux-arm64
+    - runner=8cpu-linux-x64
+    - runner=16cpu-linux-arm64
+    - runner=16cpu-linux-x64
+    - ubuntu-slim # Currently in public preview
+    - volume=40gb
+
+# Configuration variables in array of strings defined in your repository or
+# organization. `null` means disabling configuration variables check.
+# Empty array means no configuration variable is allowed.
+config-variables: null
+
+# Configuration for file paths. The keys are glob patterns to match to file
+# paths relative to the repository root. The values are the configurations for
+# the file paths. Note that the path separator is always '/'.
+# The following configurations are available.
+#
+# "ignore" is an array of regular expression patterns. Matched error messages
+# are ignored. This is similar to the "-ignore" command line option.
+paths:
+  # Glob pattern relative to the repository root for matching files. The path separator is always '/'.
+  # This example configures any YAML file under the '.github/workflows/' directory.
+  .github/workflows/**/*.{yml,yaml}:
+    # TODO: These are real and should be fixed eventually.
+    ignore:
+      - 'shellcheck reported issue in this script: SC2038:.+'
+      - 'shellcheck reported issue in this script: SC2046:.+'
+      - 'shellcheck reported issue in this script: SC2086:.+'
+      - 'shellcheck reported issue in this script: SC2193:.+'
--- a/.github/actions/custom-build-and-push/action.yml
+++ b/.github/actions/custom-build-and-push/action.yml
@@ -59,7 +59,7 @@ runs:
  steps:
    - name: Build and push Docker image (Attempt 1 of 3)
      id: buildx1
-      uses: docker/build-push-action@v6
+      uses: docker/build-push-action@263435318d21b8e681c14492fe198d362a7d2c83 # ratchet:docker/build-push-action@v6
      continue-on-error: true
      with:
        context: ${{ inputs.context }}
@@ -86,7 +86,7 @@ runs:
    - name: Build and push Docker image (Attempt 2 of 3)
      id: buildx2
      if: steps.buildx1.outcome != 'success'
-      uses: docker/build-push-action@v6
+      uses: docker/build-push-action@263435318d21b8e681c14492fe198d362a7d2c83 # ratchet:docker/build-push-action@v6
      with:
        context: ${{ inputs.context }}
        file: ${{ inputs.file }}
@@ -112,7 +112,7 @@ runs:
    - name: Build and push Docker image (Attempt 3 of 3)
      id: buildx3
      if: steps.buildx1.outcome != 'success' && steps.buildx2.outcome != 'success'
-      uses: docker/build-push-action@v6
+      uses: docker/build-push-action@263435318d21b8e681c14492fe198d362a7d2c83 # ratchet:docker/build-push-action@v6
      with:
        context: ${{ inputs.context }}
        file: ${{ inputs.file }}
--- a/.github/actions/prepare-build/action.yml
+++ b/.github/actions/prepare-build/action.yml
@@ -1,25 +1,15 @@
 name: "Prepare Build (OpenAPI generation)"
 description: "Sets up Python with uv, installs deps, generates OpenAPI schema and Python client, uploads artifact"
+inputs:
+  docker-username:
+    required: true
+  docker-password:
+    required: true
 runs:
  using: "composite"
  steps:
-    - name: Checkout code
-      uses: actions/checkout@v4
-
-    - name: Setup uv
-      uses: astral-sh/setup-uv@v3
-
-    - name: Setup Python
-      uses: actions/setup-python@v5
-      with:
-        python-version: "3.11"
-
-    - name: Install Python dependencies with uv
-      shell: bash
-      run: |
-        uv pip install --system \
-          -r backend/requirements/default.txt \
-          -r backend/requirements/dev.txt
+    - name: Setup Python and Install Dependencies
+      uses: ./.github/actions/setup-python-and-install-dependencies

    - name: Generate OpenAPI schema
      shell: bash
@@ -29,6 +19,15 @@ runs:
      run: |
        python scripts/onyx_openapi_schema.py --filename generated/openapi.json

+    # needed for pulling openapitools/openapi-generator-cli
+    # otherwise, we hit the "Unauthenticated users" limit
+    # https://docs.docker.com/docker-hub/usage/
+    - name: Login to Docker Hub
+      uses: docker/login-action@5e57cd118135c172c3672efd75eb46360885c0ef # ratchet:docker/login-action@v3
+      with:
+        username: ${{ inputs['docker-username'] }}
+        password: ${{ inputs['docker-password'] }}
+
    - name: Generate OpenAPI Python client
      shell: bash
      run: |
@@ -41,10 +40,3 @@ runs:
          --package-name onyx_openapi_client \
          --skip-validate-spec \
          --openapi-normalizer "SIMPLIFY_ONEOF_ANYOF=true,SET_OAS3_NULLABLE=true"
-
-    - name: Upload OpenAPI artifacts
-      uses: actions/upload-artifact@v4
-      with:
-        name: openapi-artifacts
-        path: backend/generated/
-
--- a/.github/actions/setup-playwright/action.yml
+++ b/.github/actions/setup-playwright/action.yml
@@ -0,0 +1,17 @@
+name: "Setup Playwright"
+description: "Sets up Playwright and system deps (assumes Python and Playwright are installed)"
+runs:
+  using: "composite"
+  steps:
+    - name: Cache playwright cache
+      uses: runs-on/cache@50350ad4242587b6c8c2baa2e740b1bc11285ff4 # ratchet:runs-on/cache@v4
+      with:
+        path: ~/.cache/ms-playwright
+        key: ${{ runner.os }}-playwright-${{ hashFiles('backend/requirements/default.txt') }}
+        restore-keys: |
+          ${{ runner.os }}-playwright-
+
+    - name: Install playwright
+      shell: bash
+      run: |
+        playwright install chromium --with-deps
--- a/.github/actions/setup-python-and-install-dependencies/action.yml
+++ b/.github/actions/setup-python-and-install-dependencies/action.yml
@@ -0,0 +1,38 @@
+name: "Setup Python and Install Dependencies"
+description: "Sets up Python with uv and installs deps"
+runs:
+  using: "composite"
+  steps:
+    - name: Setup uv
+      uses: astral-sh/setup-uv@caf0cab7a618c569241d31dcd442f54681755d39 # ratchet:astral-sh/setup-uv@v3
+      # TODO: Enable caching once there is a uv.lock file checked in.
+      # with:
+      #   enable-cache: true
+
+    - name: Cache uv cache directory
+      uses: runs-on/cache@50350ad4242587b6c8c2baa2e740b1bc11285ff4 # ratchet:runs-on/cache@v4
+      with:
+        path: ~/.cache/uv
+        key: ${{ runner.os }}-uv-${{ hashFiles('backend/requirements/*.txt', 'backend/pyproject.toml') }}
+        restore-keys: |
+          ${{ runner.os }}-uv-
+
+    - name: Setup Python
+      uses: actions/setup-python@a26af69be951a213d495a4c3e4e4022e16d87065 # ratchet:actions/setup-python@v5
+      with:
+        python-version: "3.11"
+
+    - name: Create virtual environment
+      shell: bash
+      run: |
+        uv venv ${{ runner.temp }}/venv
+        echo "VENV_PATH=${{ runner.temp }}/venv" >> $GITHUB_ENV
+        echo "${{ runner.temp }}/venv/bin" >> $GITHUB_PATH
+
+    - name: Install Python dependencies with uv
+      shell: bash
+      run: |
+        uv pip install \
+          -r backend/requirements/default.txt \
+          -r backend/requirements/dev.txt \
+          -r backend/requirements/model_server.txt
--- a/.github/actions/slack-notify/action.yml
+++ b/.github/actions/slack-notify/action.yml
@@ -0,0 +1,101 @@
+name: "Slack Notify on Failure"
+description: "Sends a Slack notification when a workflow fails"
+inputs:
+  webhook-url:
+    description: "Slack webhook URL (can also use SLACK_WEBHOOK_URL env var)"
+    required: false
+  failed-jobs:
+    description: "List of failed job names (newline-separated)"
+    required: false
+  title:
+    description: "Title for the notification"
+    required: false
+    default: "🚨 Workflow Failed"
+  ref-name:
+    description: "Git ref name (tag/branch)"
+    required: false
+runs:
+  using: "composite"
+  steps:
+    - name: Send Slack notification
+      shell: bash
+      env:
+        SLACK_WEBHOOK_URL: ${{ inputs.webhook-url }}
+      run: |
+        if [ -z "$SLACK_WEBHOOK_URL" ]; then
+          echo "webhook-url input or SLACK_WEBHOOK_URL env var is not set, skipping notification"
+          exit 0
+        fi
+
+        # Get inputs with defaults
+        FAILED_JOBS="${{ inputs.failed-jobs }}"
+        TITLE="${{ inputs.title }}"
+        REF_NAME="${{ inputs.ref-name }}"
+        REPO="${{ github.repository }}"
+        WORKFLOW="${{ github.workflow }}"
+        RUN_NUMBER="${{ github.run_number }}"
+        RUN_ID="${{ github.run_id }}"
+        SERVER_URL="${{ github.server_url }}"
+        WORKFLOW_URL="${SERVER_URL}/${REPO}/actions/runs/${RUN_ID}"
+
+        # Use ref_name from input or fall back to github.ref_name
+        if [ -z "$REF_NAME" ]; then
+          REF_NAME="${{ github.ref_name }}"
+        fi
+
+        # Escape JSON special characters
+        escape_json() {
+          local input="$1"
+          # Escape backslashes first (but preserve \n sequences)
+          # Protect \n sequences temporarily
+          input=$(printf '%s' "$input" | sed 's/\\n/\x01NL\x01/g')
+          # Escape remaining backslashes
+          input=$(printf '%s' "$input" | sed 's/\\/\\\\/g')
+          # Restore \n sequences (single backslash, will be correct in JSON)
+          input=$(printf '%s' "$input" | sed 's/\x01NL\x01/\\n/g')
+          # Escape quotes
+          printf '%s' "$input" | sed 's/"/\\"/g'
+        }
+
+        REF_NAME_ESC=$(escape_json "$REF_NAME")
+        FAILED_JOBS_ESC=$(escape_json "$FAILED_JOBS")
+        WORKFLOW_URL_ESC=$(escape_json "$WORKFLOW_URL")
+        TITLE_ESC=$(escape_json "$TITLE")
+
+        # Build JSON payload piece by piece
+        # Note: FAILED_JOBS_ESC already contains \n sequences that should remain as \n in JSON
+        PAYLOAD="{"
+        PAYLOAD="${PAYLOAD}\"text\":\"${TITLE_ESC}\","
+        PAYLOAD="${PAYLOAD}\"blocks\":[{"
+        PAYLOAD="${PAYLOAD}\"type\":\"header\","
+        PAYLOAD="${PAYLOAD}\"text\":{\"type\":\"plain_text\",\"text\":\"${TITLE_ESC}\"}"
+        PAYLOAD="${PAYLOAD}},{"
+        PAYLOAD="${PAYLOAD}\"type\":\"section\","
+        PAYLOAD="${PAYLOAD}\"fields\":["
+        if [ -n "$REF_NAME" ]; then
+          PAYLOAD="${PAYLOAD}{\"type\":\"mrkdwn\",\"text\":\"*Ref:*\\n${REF_NAME_ESC}\"},"
+        fi
+        PAYLOAD="${PAYLOAD}{\"type\":\"mrkdwn\",\"text\":\"*Run ID:*\\n#${RUN_NUMBER}\"}"
+        PAYLOAD="${PAYLOAD}]"
+        PAYLOAD="${PAYLOAD}}"
+        if [ -n "$FAILED_JOBS" ]; then
+          PAYLOAD="${PAYLOAD},{"
+          PAYLOAD="${PAYLOAD}\"type\":\"section\","
+          PAYLOAD="${PAYLOAD}\"text\":{\"type\":\"mrkdwn\",\"text\":\"*Failed Jobs:*\\n${FAILED_JOBS_ESC}\"}"
+          PAYLOAD="${PAYLOAD}}"
+        fi
+        PAYLOAD="${PAYLOAD},{"
+        PAYLOAD="${PAYLOAD}\"type\":\"actions\","
+        PAYLOAD="${PAYLOAD}\"elements\":[{"
+        PAYLOAD="${PAYLOAD}\"type\":\"button\","
+        PAYLOAD="${PAYLOAD}\"text\":{\"type\":\"plain_text\",\"text\":\"View Workflow Run\"},"
+        PAYLOAD="${PAYLOAD}\"url\":\"${WORKFLOW_URL_ESC}\""
+        PAYLOAD="${PAYLOAD}}]"
+        PAYLOAD="${PAYLOAD}}"
+        PAYLOAD="${PAYLOAD}]"
+        PAYLOAD="${PAYLOAD}}"
+
+        curl -X POST -H 'Content-type: application/json' \
+          --data "$PAYLOAD" \
+          "$SLACK_WEBHOOK_URL"
+
--- a/.github/dependabot.yml
+++ b/.github/dependabot.yml
@@ -0,0 +1,20 @@
+version: 2
+updates:
+  - package-ecosystem: "github-actions"
+    directory: "/"
+    schedule:
+      interval: "weekly"
+    open-pull-requests-limit: 3
+    assignees:
+      - "jmelahman"
+    labels:
+      - "dependabot:actions"
+  - package-ecosystem: "pip"
+    directory: "/backend"
+    schedule:
+      interval: "weekly"
+    open-pull-requests-limit: 3
+    assignees:
+      - "jmelahman"
+    labels:
+      - "dependabot:python"
--- a/.github/runs-on.yml
+++ b/.github/runs-on.yml
@@ -0,0 +1 @@
+_extend: .github-private
--- a/.github/workflows/check-lazy-imports.yml
+++ b/.github/workflows/check-lazy-imports.yml
@@ -1,4 +1,7 @@
 name: Check Lazy Imports
+concurrency:
+  group: Check-Lazy-Imports-${{ github.workflow }}-${{ github.head_ref || github.event.workflow_run.head_branch || github.run_id }}
+  cancel-in-progress: true

 on:
  merge_group:
@@ -13,12 +16,12 @@ jobs:

    steps:
    - name: Checkout code
-      uses: actions/checkout@v4
+      uses: actions/checkout@08eba0b27e820071cde6df949e0beb9ba4906955 # ratchet:actions/checkout@v4

    - name: Set up Python
-      uses: actions/setup-python@v4
+      uses: actions/setup-python@e797f83bcb11b83ae66e0230d6156d7c80228e7c # ratchet:actions/setup-python@v6
      with:
        python-version: '3.11'

    - name: Check lazy imports
-      run: python3 backend/scripts/check_lazy_imports.py
+      run: python3 backend/scripts/check_lazy_imports.py
--- a/.github/workflows/deployment.yml
+++ b/.github/workflows/deployment.yml
@@ -0,0 +1,512 @@
+name: Build and Push Docker Images on Tag
+
+on:
+  push:
+    tags:
+      - "*"
+  workflow_dispatch:
+
+env:
+  IS_DRY_RUN: ${{ github.event_name == 'workflow_dispatch' }}
+  EDGE_TAG: ${{ startsWith(github.ref_name, 'nightly-latest') }}
+
+jobs:
+  # Determine which components to build based on the tag
+  determine-builds:
+    # NOTE: Github-hosted runners have about 20s faster queue times and are preferred here.
+    runs-on: ubuntu-slim
+    outputs:
+      build-web: ${{ steps.check.outputs.build-web }}
+      build-web-cloud: ${{ steps.check.outputs.build-web-cloud }}
+      build-backend: ${{ steps.check.outputs.build-backend }}
+      build-model-server: ${{ steps.check.outputs.build-model-server }}
+      is-cloud-tag: ${{ steps.check.outputs.is-cloud-tag }}
+      is-stable: ${{ steps.check.outputs.is-stable }}
+      is-beta: ${{ steps.check.outputs.is-beta }}
+      is-stable-standalone: ${{ steps.check.outputs.is-stable-standalone }}
+      is-beta-standalone: ${{ steps.check.outputs.is-beta-standalone }}
+      sanitized-tag: ${{ steps.check.outputs.sanitized-tag }}
+    steps:
+      - name: Check which components to build and version info
+        id: check
+        run: |
+          TAG="${{ github.ref_name }}"
+          # Sanitize tag name by replacing slashes with hyphens (for Docker tag compatibility)
+          SANITIZED_TAG=$(echo "$TAG" | tr '/' '-')
+          IS_CLOUD=false
+          BUILD_WEB=false
+          BUILD_WEB_CLOUD=false
+          BUILD_BACKEND=true
+          BUILD_MODEL_SERVER=true
+          IS_STABLE=false
+          IS_BETA=false
+          IS_STABLE_STANDALONE=false
+          IS_BETA_STANDALONE=false
+
+          if [[ "$TAG" == *cloud* ]]; then
+            IS_CLOUD=true
+            BUILD_WEB_CLOUD=true
+          else
+            BUILD_WEB=true
+          fi
+
+          # Version checks (for web - any stable version)
+          if [[ "$TAG" =~ ^v[0-9]+\.[0-9]+\.[0-9]+$ ]]; then
+            IS_STABLE=true
+          fi
+          if [[ "$TAG" =~ ^v[0-9]+\.[0-9]+\.[0-9]+-beta\.[0-9]+$ ]]; then
+            IS_BETA=true
+          fi
+
+          # Version checks (for backend/model-server - stable version excluding cloud tags)
+          if [[ "$TAG" =~ ^v[0-9]+\.[0-9]+\.[0-9]+$ ]] && [[ "$TAG" != *cloud* ]]; then
+            IS_STABLE_STANDALONE=true
+          fi
+          if [[ "$TAG" =~ ^v[0-9]+\.[0-9]+\.[0-9]+-beta\.[0-9]+$ ]] && [[ "$TAG" != *cloud* ]]; then
+            IS_BETA_STANDALONE=true
+          fi
+
+          {
+            echo "build-web=$BUILD_WEB"
+            echo "build-web-cloud=$BUILD_WEB_CLOUD"
+            echo "build-backend=$BUILD_BACKEND"
+            echo "build-model-server=$BUILD_MODEL_SERVER"
+            echo "is-cloud-tag=$IS_CLOUD"
+            echo "is-stable=$IS_STABLE"
+            echo "is-beta=$IS_BETA"
+            echo "is-stable-standalone=$IS_STABLE_STANDALONE"
+            echo "is-beta-standalone=$IS_BETA_STANDALONE"
+            echo "sanitized-tag=$SANITIZED_TAG"
+          } >> "$GITHUB_OUTPUT"
+
+  build-web:
+    needs: determine-builds
+    if: needs.determine-builds.outputs.build-web == 'true'
+    runs-on:
+      - runs-on
+      - runner=4cpu-linux-x64
+      - run-id=${{ github.run_id }}-web-build
+      - extras=ecr-cache
+    env:
+      REGISTRY_IMAGE: onyxdotapp/onyx-web-server
+      DEPLOYMENT: standalone
+    steps:
+      - uses: runs-on/action@cd2b598b0515d39d78c38a02d529db87d2196d1e # ratchet:runs-on/action@v2
+
+      - name: Checkout
+        uses: actions/checkout@08eba0b27e820071cde6df949e0beb9ba4906955 # ratchet:actions/checkout@v4
+
+      - name: Docker meta
+        id: meta
+        uses: docker/metadata-action@318604b99e75e41977312d83839a89be02ca4893 # ratchet:docker/metadata-action@v5
+        with:
+          images: ${{ github.event_name == 'workflow_dispatch' && env.RUNS_ON_ECR_CACHE || env.REGISTRY_IMAGE }}
+          flavor: |
+            latest=false
+          tags: |
+            type=raw,value=${{ github.event_name == 'workflow_dispatch' && format('web-{0}', needs.determine-builds.outputs.sanitized-tag) || github.ref_name }}
+            type=raw,value=${{ github.event_name != 'workflow_dispatch' && needs.determine-builds.outputs.is-stable == 'true' && 'latest' || '' }}
+            type=raw,value=${{ github.event_name != 'workflow_dispatch' && env.EDGE_TAG == 'true' && 'edge' || '' }}
+            type=raw,value=${{ github.event_name != 'workflow_dispatch' && needs.determine-builds.outputs.is-beta == 'true' && 'beta' || '' }}
+
+      - name: Set up Docker Buildx
+        uses: docker/setup-buildx-action@e468171a9de216ec08956ac3ada2f0791b6bd435 # ratchet:docker/setup-buildx-action@v3
+
+      - name: Login to Docker Hub
+        uses: docker/login-action@5e57cd118135c172c3672efd75eb46360885c0ef # ratchet:docker/login-action@v3
+        with:
+          username: ${{ secrets.DOCKER_USERNAME }}
+          password: ${{ secrets.DOCKER_TOKEN }}
+
+      - name: Build and push
+        uses: docker/build-push-action@263435318d21b8e681c14492fe198d362a7d2c83 # ratchet:docker/build-push-action@v6
+        with:
+          context: ./web
+          file: ./web/Dockerfile
+          platforms: linux/amd64,linux/arm64
+          push: true
+          tags: ${{ steps.meta.outputs.tags }}
+          labels: ${{ steps.meta.outputs.labels }}
+          build-args: |
+            ONYX_VERSION=${{ github.ref_name }}
+            NODE_OPTIONS=--max-old-space-size=8192
+          cache-from: |
+            type=registry,ref=${{ env.REGISTRY_IMAGE }}:latest
+            type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:web-${{ env.DEPLOYMENT }}-cache
+          cache-to: |
+            type=inline
+            type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:web-${{ env.DEPLOYMENT }}-cache,mode=max
+
+  build-web-cloud:
+    needs: determine-builds
+    if: needs.determine-builds.outputs.build-web-cloud == 'true'
+    runs-on:
+      - runs-on
+      - runner=4cpu-linux-x64
+      - run-id=${{ github.run_id }}-web-cloud-build
+      - extras=ecr-cache
+    env:
+      REGISTRY_IMAGE: onyxdotapp/onyx-web-server-cloud
+      DEPLOYMENT: cloud
+    steps:
+      - uses: runs-on/action@cd2b598b0515d39d78c38a02d529db87d2196d1e # ratchet:runs-on/action@v2
+
+      - name: Checkout
+        uses: actions/checkout@08eba0b27e820071cde6df949e0beb9ba4906955 # ratchet:actions/checkout@v4
+
+      - name: Docker meta
+        id: meta
+        uses: docker/metadata-action@318604b99e75e41977312d83839a89be02ca4893 # ratchet:docker/metadata-action@v5
+        with:
+          images: ${{ github.event_name == 'workflow_dispatch' && env.RUNS_ON_ECR_CACHE || env.REGISTRY_IMAGE }}
+          flavor: |
+            latest=false
+          tags: |
+            type=raw,value=${{ github.event_name == 'workflow_dispatch' && format('web-cloud-{0}', needs.determine-builds.outputs.sanitized-tag) || github.ref_name }}
+
+      - name: Set up Docker Buildx
+        uses: docker/setup-buildx-action@e468171a9de216ec08956ac3ada2f0791b6bd435 # ratchet:docker/setup-buildx-action@v3
+
+      - name: Login to Docker Hub
+        uses: docker/login-action@5e57cd118135c172c3672efd75eb46360885c0ef # ratchet:docker/login-action@v3
+        with:
+          username: ${{ secrets.DOCKER_USERNAME }}
+          password: ${{ secrets.DOCKER_TOKEN }}
+
+      - name: Build and push
+        uses: docker/build-push-action@263435318d21b8e681c14492fe198d362a7d2c83 # ratchet:docker/build-push-action@v6
+        with:
+          context: ./web
+          file: ./web/Dockerfile
+          platforms: linux/amd64,linux/arm64
+          push: true
+          tags: ${{ steps.meta.outputs.tags }}
+          labels: ${{ steps.meta.outputs.labels }}
+          build-args: |
+            ONYX_VERSION=${{ github.ref_name }}
+            NEXT_PUBLIC_CLOUD_ENABLED=true
+            NEXT_PUBLIC_POSTHOG_KEY=${{ secrets.POSTHOG_KEY }}
+            NEXT_PUBLIC_POSTHOG_HOST=${{ secrets.POSTHOG_HOST }}
+            NEXT_PUBLIC_SENTRY_DSN=${{ secrets.SENTRY_DSN }}
+            NEXT_PUBLIC_STRIPE_PUBLISHABLE_KEY=${{ secrets.STRIPE_PUBLISHABLE_KEY }}
+            NEXT_PUBLIC_GTM_ENABLED=true
+            NEXT_PUBLIC_FORGOT_PASSWORD_ENABLED=true
+            NEXT_PUBLIC_INCLUDE_ERROR_POPUP_SUPPORT_LINK=true
+            NODE_OPTIONS=--max-old-space-size=8192
+          cache-from: |
+            type=registry,ref=${{ env.REGISTRY_IMAGE }}:latest
+            type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:cloudweb-${{ env.DEPLOYMENT }}-cache
+          cache-to: |
+            type=inline
+            type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:cloudweb-${{ env.DEPLOYMENT }}-cache,mode=max
+
+  build-backend:
+    needs: determine-builds
+    if: needs.determine-builds.outputs.build-backend == 'true'
+    runs-on:
+      - runs-on
+      - runner=2cpu-linux-x64
+      - run-id=${{ github.run_id }}-backend-build
+      - extras=ecr-cache
+    env:
+      REGISTRY_IMAGE: ${{ contains(github.ref_name, 'cloud') && 'onyxdotapp/onyx-backend-cloud' || 'onyxdotapp/onyx-backend' }}
+      DEPLOYMENT: ${{ contains(github.ref_name, 'cloud') && 'cloud' || 'standalone' }}
+    steps:
+      - uses: runs-on/action@cd2b598b0515d39d78c38a02d529db87d2196d1e # ratchet:runs-on/action@v2
+
+      - name: Checkout code
+        uses: actions/checkout@08eba0b27e820071cde6df949e0beb9ba4906955 # ratchet:actions/checkout@v4
+
+      - name: Docker meta
+        id: meta
+        uses: docker/metadata-action@318604b99e75e41977312d83839a89be02ca4893 # ratchet:docker/metadata-action@v5
+        with:
+          images: ${{ github.event_name == 'workflow_dispatch' && env.RUNS_ON_ECR_CACHE || env.REGISTRY_IMAGE }}
+          flavor: |
+            latest=false
+          tags: |
+            type=raw,value=${{ github.event_name == 'workflow_dispatch' && format('backend-{0}', needs.determine-builds.outputs.sanitized-tag) || github.ref_name }}
+            type=raw,value=${{ github.event_name != 'workflow_dispatch' && needs.determine-builds.outputs.is-stable-standalone == 'true' && 'latest' || '' }}
+            type=raw,value=${{ github.event_name != 'workflow_dispatch' && env.EDGE_TAG == 'true' && 'edge' || '' }}
+            type=raw,value=${{ github.event_name != 'workflow_dispatch' && needs.determine-builds.outputs.is-beta-standalone == 'true' && 'beta' || '' }}
+
+      - name: Set up Docker Buildx
+        uses: docker/setup-buildx-action@e468171a9de216ec08956ac3ada2f0791b6bd435 # ratchet:docker/setup-buildx-action@v3
+
+      - name: Login to Docker Hub
+        uses: docker/login-action@5e57cd118135c172c3672efd75eb46360885c0ef # ratchet:docker/login-action@v3
+        with:
+          username: ${{ secrets.DOCKER_USERNAME }}
+          password: ${{ secrets.DOCKER_TOKEN }}
+
+      - name: Build and push
+        uses: docker/build-push-action@263435318d21b8e681c14492fe198d362a7d2c83 # ratchet:docker/build-push-action@v6
+        with:
+          context: ./backend
+          file: ./backend/Dockerfile
+          platforms: linux/amd64,linux/arm64
+          push: true
+          tags: ${{ steps.meta.outputs.tags }}
+          labels: ${{ steps.meta.outputs.labels }}
+          build-args: |
+            ONYX_VERSION=${{ github.ref_name }}
+          cache-from: |
+            type=registry,ref=${{ env.REGISTRY_IMAGE }}:latest
+            type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:backend-${{ env.DEPLOYMENT }}-cache
+          cache-to: |
+            type=inline
+            type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:backend-${{ env.DEPLOYMENT }}-cache,mode=max
+
+  build-model-server:
+    needs: determine-builds
+    if: needs.determine-builds.outputs.build-model-server == 'true'
+    runs-on:
+      - runs-on
+      - runner=2cpu-linux-x64
+      - run-id=${{ github.run_id }}-model-server-build
+      - volume=40gb
+      - extras=ecr-cache
+    env:
+      REGISTRY_IMAGE: ${{ contains(github.ref_name, 'cloud') && 'onyxdotapp/onyx-model-server-cloud' || 'onyxdotapp/onyx-model-server' }}
+      DOCKER_BUILDKIT: 1
+      BUILDKIT_PROGRESS: plain
+      DEPLOYMENT: ${{ contains(github.ref_name, 'cloud') && 'cloud' || 'standalone' }}
+    steps:
+      - uses: runs-on/action@cd2b598b0515d39d78c38a02d529db87d2196d1e # ratchet:runs-on/action@v2
+
+      - name: Checkout code
+        uses: actions/checkout@08eba0b27e820071cde6df949e0beb9ba4906955 # ratchet:actions/checkout@v4
+
+      - name: Docker meta
+        id: meta
+        uses: docker/metadata-action@318604b99e75e41977312d83839a89be02ca4893 # ratchet:docker/metadata-action@v5
+        with:
+          images: ${{ github.event_name == 'workflow_dispatch' && env.RUNS_ON_ECR_CACHE || env.REGISTRY_IMAGE }}
+          flavor: |
+            latest=false
+          tags: |
+            type=raw,value=${{ github.event_name == 'workflow_dispatch' && format('model-server-{0}', needs.determine-builds.outputs.sanitized-tag) || github.ref_name }}
+            type=raw,value=${{ github.event_name != 'workflow_dispatch' && needs.determine-builds.outputs.is-stable-standalone == 'true' && 'latest' || '' }}
+            type=raw,value=${{ github.event_name != 'workflow_dispatch' && env.EDGE_TAG == 'true' && 'edge' || '' }}
+            type=raw,value=${{ github.event_name != 'workflow_dispatch' && needs.determine-builds.outputs.is-beta-standalone == 'true' && 'beta' || '' }}
+
+      - name: Set up Docker Buildx
+        uses: docker/setup-buildx-action@e468171a9de216ec08956ac3ada2f0791b6bd435 # ratchet:docker/setup-buildx-action@v3
+        with:
+          driver-opts: |
+            image=moby/buildkit:latest
+            network=host
+
+      - name: Login to Docker Hub
+        uses: docker/login-action@5e57cd118135c172c3672efd75eb46360885c0ef # ratchet:docker/login-action@v3
+        with:
+          username: ${{ secrets.DOCKER_USERNAME }}
+          password: ${{ secrets.DOCKER_TOKEN }}
+
+      - name: Build and push
+        uses: docker/build-push-action@263435318d21b8e681c14492fe198d362a7d2c83 # ratchet:docker/build-push-action@v6
+        with:
+          context: ./backend
+          file: ./backend/Dockerfile.model_server
+          platforms: linux/amd64,linux/arm64
+          push: true
+          tags: ${{ steps.meta.outputs.tags }}
+          labels: ${{ steps.meta.outputs.labels }}
+          build-args: |
+            ONYX_VERSION=${{ github.ref_name }}
+          cache-from: |
+            type=registry,ref=${{ env.REGISTRY_IMAGE }}:latest
+            type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:model-server-${{ env.DEPLOYMENT }}-cache
+          cache-to: |
+            type=inline
+            type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:model-server-${{ env.DEPLOYMENT }}-cache,mode=max
+
+  trivy-scan-web:
+    needs: [determine-builds, build-web]
+    if: needs.build-web.result == 'success'
+    runs-on:
+      - runs-on
+      - runner=2cpu-linux-x64
+      - run-id=${{ github.run_id }}-trivy-scan-web
+      - extras=ecr-cache
+    env:
+      REGISTRY_IMAGE: onyxdotapp/onyx-web-server
+    steps:
+      - uses: runs-on/action@cd2b598b0515d39d78c38a02d529db87d2196d1e # ratchet:runs-on/action@v2
+
+      - name: Run Trivy vulnerability scanner
+        uses: nick-fields/retry@ce71cc2ab81d554ebbe88c79ab5975992d79ba08 # ratchet:nick-fields/retry@v3
+        with:
+          timeout_minutes: 30
+          max_attempts: 3
+          retry_wait_seconds: 10
+          command: |
+            if [ "${{ github.event_name }}" == "workflow_dispatch" ]; then
+              SCAN_IMAGE="${{ env.RUNS_ON_ECR_CACHE }}:web-${{ needs.determine-builds.outputs.sanitized-tag }}"
+            else
+              SCAN_IMAGE="docker.io/${{ env.REGISTRY_IMAGE }}:${{ github.ref_name }}"
+            fi
+            docker run --rm -v $HOME/.cache/trivy:/root/.cache/trivy \
+              -e TRIVY_DB_REPOSITORY="public.ecr.aws/aquasecurity/trivy-db:2" \
+              -e TRIVY_JAVA_DB_REPOSITORY="public.ecr.aws/aquasecurity/trivy-java-db:1" \
+              -e TRIVY_USERNAME="${{ secrets.DOCKER_USERNAME }}" \
+              -e TRIVY_PASSWORD="${{ secrets.DOCKER_TOKEN }}" \
+              aquasec/trivy@sha256:a22415a38938a56c379387a8163fcb0ce38b10ace73e593475d3658d578b2436 \
+              image \
+              --skip-version-check \
+              --timeout 20m \
+              --severity CRITICAL,HIGH \
+              ${SCAN_IMAGE}
+
+  trivy-scan-web-cloud:
+    needs: [determine-builds, build-web-cloud]
+    if: needs.build-web-cloud.result == 'success'
+    runs-on:
+      - runs-on
+      - runner=2cpu-linux-x64
+      - run-id=${{ github.run_id }}-trivy-scan-web-cloud
+      - extras=ecr-cache
+    env:
+      REGISTRY_IMAGE: onyxdotapp/onyx-web-server-cloud
+    steps:
+      - uses: runs-on/action@cd2b598b0515d39d78c38a02d529db87d2196d1e # ratchet:runs-on/action@v2
+
+      - name: Run Trivy vulnerability scanner
+        uses: nick-fields/retry@ce71cc2ab81d554ebbe88c79ab5975992d79ba08 # ratchet:nick-fields/retry@v3
+        with:
+          timeout_minutes: 30
+          max_attempts: 3
+          retry_wait_seconds: 10
+          command: |
+            if [ "${{ github.event_name }}" == "workflow_dispatch" ]; then
+              SCAN_IMAGE="${{ env.RUNS_ON_ECR_CACHE }}:web-cloud-${{ needs.determine-builds.outputs.sanitized-tag }}"
+            else
+              SCAN_IMAGE="docker.io/${{ env.REGISTRY_IMAGE }}:${{ github.ref_name }}"
+            fi
+            docker run --rm -v $HOME/.cache/trivy:/root/.cache/trivy \
+              -e TRIVY_DB_REPOSITORY="public.ecr.aws/aquasecurity/trivy-db:2" \
+              -e TRIVY_JAVA_DB_REPOSITORY="public.ecr.aws/aquasecurity/trivy-java-db:1" \
+              -e TRIVY_USERNAME="${{ secrets.DOCKER_USERNAME }}" \
+              -e TRIVY_PASSWORD="${{ secrets.DOCKER_TOKEN }}" \
+              aquasec/trivy@sha256:a22415a38938a56c379387a8163fcb0ce38b10ace73e593475d3658d578b2436 \
+              image \
+              --skip-version-check \
+              --timeout 20m \
+              --severity CRITICAL,HIGH \
+              ${SCAN_IMAGE}
+
+  trivy-scan-backend:
+    needs: [determine-builds, build-backend]
+    if: needs.build-backend.result == 'success'
+    runs-on:
+      - runs-on
+      - runner=2cpu-linux-x64
+      - run-id=${{ github.run_id }}-trivy-scan-backend
+      - extras=ecr-cache
+    env:
+      REGISTRY_IMAGE: ${{ contains(github.ref_name, 'cloud') && 'onyxdotapp/onyx-backend-cloud' || 'onyxdotapp/onyx-backend' }}
+    steps:
+      - uses: runs-on/action@cd2b598b0515d39d78c38a02d529db87d2196d1e # ratchet:runs-on/action@v2
+
+      - name: Checkout
+        uses: actions/checkout@08eba0b27e820071cde6df949e0beb9ba4906955 # ratchet:actions/checkout@v4
+
+      - name: Run Trivy vulnerability scanner
+        uses: nick-fields/retry@ce71cc2ab81d554ebbe88c79ab5975992d79ba08 # ratchet:nick-fields/retry@v3
+        with:
+          timeout_minutes: 30
+          max_attempts: 3
+          retry_wait_seconds: 10
+          command: |
+            if [ "${{ github.event_name }}" == "workflow_dispatch" ]; then
+              SCAN_IMAGE="${{ env.RUNS_ON_ECR_CACHE }}:backend-${{ needs.determine-builds.outputs.sanitized-tag }}"
+            else
+              SCAN_IMAGE="docker.io/${{ env.REGISTRY_IMAGE }}:${{ github.ref_name }}"
+            fi
+            docker run --rm -v $HOME/.cache/trivy:/root/.cache/trivy \
+              -v ${{ github.workspace }}/backend/.trivyignore:/tmp/.trivyignore:ro \
+              -e TRIVY_DB_REPOSITORY="public.ecr.aws/aquasecurity/trivy-db:2" \
+              -e TRIVY_JAVA_DB_REPOSITORY="public.ecr.aws/aquasecurity/trivy-java-db:1" \
+              -e TRIVY_USERNAME="${{ secrets.DOCKER_USERNAME }}" \
+              -e TRIVY_PASSWORD="${{ secrets.DOCKER_TOKEN }}" \
+              aquasec/trivy@sha256:a22415a38938a56c379387a8163fcb0ce38b10ace73e593475d3658d578b2436 \
+              image \
+              --skip-version-check \
+              --timeout 20m \
+              --severity CRITICAL,HIGH \
+              --ignorefile /tmp/.trivyignore \
+              ${SCAN_IMAGE}
+
+  trivy-scan-model-server:
+    needs: [determine-builds, build-model-server]
+    if: needs.build-model-server.result == 'success'
+    runs-on:
+      - runs-on
+      - runner=2cpu-linux-x64
+      - run-id=${{ github.run_id }}-trivy-scan-model-server
+      - extras=ecr-cache
+    env:
+      REGISTRY_IMAGE: ${{ contains(github.ref_name, 'cloud') && 'onyxdotapp/onyx-model-server-cloud' || 'onyxdotapp/onyx-model-server' }}
+    steps:
+      - uses: runs-on/action@cd2b598b0515d39d78c38a02d529db87d2196d1e # ratchet:runs-on/action@v2
+
+      - name: Run Trivy vulnerability scanner
+        uses: nick-fields/retry@ce71cc2ab81d554ebbe88c79ab5975992d79ba08 # ratchet:nick-fields/retry@v3
+        with:
+          timeout_minutes: 30
+          max_attempts: 3
+          retry_wait_seconds: 10
+          command: |
+            if [ "${{ github.event_name }}" == "workflow_dispatch" ]; then
+              SCAN_IMAGE="${{ env.RUNS_ON_ECR_CACHE }}:model-server-${{ needs.determine-builds.outputs.sanitized-tag }}"
+            else
+              SCAN_IMAGE="docker.io/${{ env.REGISTRY_IMAGE }}:${{ github.ref_name }}"
+            fi
+            docker run --rm -v $HOME/.cache/trivy:/root/.cache/trivy \
+              -e TRIVY_DB_REPOSITORY="public.ecr.aws/aquasecurity/trivy-db:2" \
+              -e TRIVY_JAVA_DB_REPOSITORY="public.ecr.aws/aquasecurity/trivy-java-db:1" \
+              -e TRIVY_USERNAME="${{ secrets.DOCKER_USERNAME }}" \
+              -e TRIVY_PASSWORD="${{ secrets.DOCKER_TOKEN }}" \
+              aquasec/trivy@sha256:a22415a38938a56c379387a8163fcb0ce38b10ace73e593475d3658d578b2436 \
+              image \
+              --skip-version-check \
+              --timeout 20m \
+              --severity CRITICAL,HIGH \
+              ${SCAN_IMAGE}
+
+  notify-slack-on-failure:
+    needs: [build-web, build-web-cloud, build-backend, build-model-server]
+    if: always() && (needs.build-web.result == 'failure' || needs.build-web-cloud.result == 'failure' || needs.build-backend.result == 'failure' || needs.build-model-server.result == 'failure') && github.event_name != 'workflow_dispatch'
+    runs-on: ubuntu-slim
+    steps:
+      - name: Checkout
+        uses: actions/checkout@08eba0b27e820071cde6df949e0beb9ba4906955 # ratchet:actions/checkout@v4
+
+      - name: Determine failed jobs
+        id: failed-jobs
+        shell: bash
+        run: |
+          FAILED_JOBS=""
+          if [ "${{ needs.build-web.result }}" == "failure" ]; then
+            FAILED_JOBS="${FAILED_JOBS}• build-web\\n"
+          fi
+          if [ "${{ needs.build-web-cloud.result }}" == "failure" ]; then
+            FAILED_JOBS="${FAILED_JOBS}• build-web-cloud\\n"
+          fi
+          if [ "${{ needs.build-backend.result }}" == "failure" ]; then
+            FAILED_JOBS="${FAILED_JOBS}• build-backend\\n"
+          fi
+          if [ "${{ needs.build-model-server.result }}" == "failure" ]; then
+            FAILED_JOBS="${FAILED_JOBS}• build-model-server\\n"
+          fi
+          # Remove trailing \n and set output
+          FAILED_JOBS=$(printf '%s' "$FAILED_JOBS" | sed 's/\\n$//')
+          echo "jobs=$FAILED_JOBS" >> "$GITHUB_OUTPUT"
+
+      - name: Send Slack notification
+        uses: ./.github/actions/slack-notify
+        with:
+          webhook-url: ${{ secrets.MONITOR_DEPLOYMENTS_WEBHOOK }}
+          failed-jobs: ${{ steps.failed-jobs.outputs.jobs }}
+          title: "🚨 Deployment Workflow Failed"
+          ref-name: ${{ github.ref_name }}
--- a/.github/workflows/docker-build-push-backend-container-on-tag.yml
+++ b/.github/workflows/docker-build-push-backend-container-on-tag.yml
@@ -1,198 +0,0 @@
-name: Build and Push Backend Image on Tag
-
-on:
-  push:
-    tags:
-      - "*"
-
-env:
-  REGISTRY_IMAGE: ${{ contains(github.ref_name, 'cloud') && 'onyxdotapp/onyx-backend-cloud' || 'onyxdotapp/onyx-backend' }}
-  DEPLOYMENT: ${{ contains(github.ref_name, 'cloud') && 'cloud' || 'standalone' }}
-
-  # tag nightly builds with "edge"
-  EDGE_TAG: ${{ startsWith(github.ref_name, 'nightly-latest') }}
-
-jobs:
-  build-and-push:
-    # TODO: investigate a matrix build like the web container
-    # See https://runs-on.com/runners/linux/
-    runs-on:
-      - runs-on
-      - runner=${{ matrix.platform == 'linux/amd64' && '8cpu-linux-x64' || '8cpu-linux-arm64' }}
-      - run-id=${{ github.run_id }}
-      - tag=platform-${{ matrix.platform }}
-    strategy:
-      fail-fast: false
-      matrix:
-        platform:
-          - linux/amd64
-          - linux/arm64
-          
-    steps:
-      - name: Prepare
-        run: |
-          platform=${{ matrix.platform }}
-          echo "PLATFORM_PAIR=${platform//\//-}" >> $GITHUB_ENV
-
-      - name: Check if stable release version
-        id: check_version
-        run: |
-          if [[ "${{ github.ref_name }}" =~ ^v[0-9]+\.[0-9]+\.[0-9]+$ ]] && [[ "${{ github.ref_name }}" != *"cloud"* ]]; then
-            echo "is_stable=true" >> $GITHUB_OUTPUT
-          else
-            echo "is_stable=false" >> $GITHUB_OUTPUT
-          fi
-          if [[ "${{ github.ref_name }}" =~ ^v[0-9]+\.[0-9]+\.[0-9]+-beta\.[0-9]+$ ]] && [[ "${{ github.ref_name }}" != *"cloud"* ]]; then
-            echo "is_beta=true" >> $GITHUB_OUTPUT
-          else
-            echo "is_beta=false" >> $GITHUB_OUTPUT
-          fi
-
-      - name: Checkout code
-        uses: actions/checkout@v4
-
-      - name: Docker meta
-        id: meta
-        uses: docker/metadata-action@v5
-        with:
-          images: ${{ env.REGISTRY_IMAGE }}
-          flavor: |
-            latest=false
-          tags: |
-            type=raw,value=${{ github.ref_name }}
-            type=raw,value=${{ steps.check_version.outputs.is_stable == 'true' && 'latest' || '' }}
-            type=raw,value=${{ env.EDGE_TAG == 'true' && 'edge' || '' }}
-            type=raw,value=${{ steps.check_version.outputs.is_beta == 'true' && 'beta' || '' }}
-            
-      - name: Set up Docker Buildx
-        uses: docker/setup-buildx-action@v3
-
-      - name: Login to Docker Hub
-        uses: docker/login-action@v3
-        with:
-          username: ${{ secrets.DOCKER_USERNAME }}
-          password: ${{ secrets.DOCKER_TOKEN }}
-
-      - name: Install build-essential
-        run: |
-          sudo apt-get update
-          sudo apt-get install -y build-essential
-
-      - name: Backend Image Docker Build and Push
-        id: build
-        uses: docker/build-push-action@v6
-        with:
-          context: ./backend
-          file: ./backend/Dockerfile
-          platforms: ${{ matrix.platform }}
-          push: true
-          build-args: |
-            ONYX_VERSION=${{ github.ref_name }}
-          labels: ${{ steps.meta.outputs.labels }}
-          outputs: type=image,name=${{ env.REGISTRY_IMAGE }},push-by-digest=true,name-canonical=true,push=true
-          cache-from: type=s3,prefix=cache/${{ github.repository }}/${{ env.DEPLOYMENT }}/backend-${{ env.PLATFORM_PAIR }}/,region=${{ env.RUNS_ON_AWS_REGION }},bucket=${{ env.RUNS_ON_S3_BUCKET_CACHE }}
-          cache-to: type=s3,prefix=cache/${{ github.repository }}/${{ env.DEPLOYMENT }}/backend-${{ env.PLATFORM_PAIR }}/,region=${{ env.RUNS_ON_AWS_REGION }},bucket=${{ env.RUNS_ON_S3_BUCKET_CACHE }},mode=max
-
-      - name: Export digest      
-        run: |
-          mkdir -p /tmp/digests
-          digest="${{ steps.build.outputs.digest }}"
-          touch "/tmp/digests/${digest#sha256:}"
-
-      - name: Upload digest
-        uses: actions/upload-artifact@v4
-        with:
-          name: backend-digests-${{ env.PLATFORM_PAIR }}-${{ github.run_id }}
-          path: /tmp/digests/*
-          if-no-files-found: error
-          retention-days: 1
-          
-  merge:
-    runs-on: ubuntu-latest
-    needs:
-      - build-and-push
-    steps:
-      # Needed for trivyignore
-      - name: Checkout
-        uses: actions/checkout@v4
-      
-      - name: Check if stable release version
-        id: check_version
-        run: |
-          if [[ "${{ github.ref_name }}" =~ ^v[0-9]+\.[0-9]+\.[0-9]+$ ]] && [[ "${{ github.ref_name }}" != *"cloud"* ]]; then
-            echo "is_stable=true" >> $GITHUB_OUTPUT
-          else
-            echo "is_stable=false" >> $GITHUB_OUTPUT
-          fi
-          if [[ "${{ github.ref_name }}" =~ ^v[0-9]+\.[0-9]+\.[0-9]+-beta\.[0-9]+$ ]] && [[ "${{ github.ref_name }}" != *"cloud"* ]]; then
-            echo "is_beta=true" >> $GITHUB_OUTPUT
-          else
-            echo "is_beta=false" >> $GITHUB_OUTPUT
-          fi
-        
-      - name: Download digests
-        uses: actions/download-artifact@v4
-        with:
-          path: /tmp/digests
-          pattern: backend-digests-*-${{ github.run_id }}
-          merge-multiple: true
-
-      - name: Set up Docker Buildx
-        uses: docker/setup-buildx-action@v3
-
-      - name: Docker meta
-        id: meta
-        uses: docker/metadata-action@v5
-        with:
-          images: ${{ env.REGISTRY_IMAGE }}
-          flavor: |
-            latest=false
-          tags: |
-            type=raw,value=${{ github.ref_name }}
-            type=raw,value=${{ steps.check_version.outputs.is_stable == 'true' && 'latest' || '' }}
-            type=raw,value=${{ env.EDGE_TAG == 'true' && 'edge' || '' }}
-            type=raw,value=${{ steps.check_version.outputs.is_beta == 'true' && 'beta' || '' }}
-
-      - name: Login to Docker Hub
-        uses: docker/login-action@v3
-        with:
-          username: ${{ secrets.DOCKER_USERNAME }}
-          password: ${{ secrets.DOCKER_TOKEN }}
-
-      - name: Create manifest list and push
-        working-directory: /tmp/digests
-        run: |
-          docker buildx imagetools create $(jq -cr '.tags | map("-t " + .) | join(" ")' <<< "$DOCKER_METADATA_OUTPUT_JSON") \
-            $(printf '${{ env.REGISTRY_IMAGE }}@sha256:%s ' *)
-
-      - name: Inspect image
-        run: |
-          docker buildx imagetools inspect ${{ env.REGISTRY_IMAGE }}:${{ steps.meta.outputs.version }}
-          
-      # trivy has their own rate limiting issues causing this action to flake
-      # we worked around it by hardcoding to different db repos in env
-      # can re-enable when they figure it out
-      # https://github.com/aquasecurity/trivy/discussions/7538
-      # https://github.com/aquasecurity/trivy-action/issues/389
-      # Security: Using pinned digest (0.65.0@sha256:a22415a38938a56c379387a8163fcb0ce38b10ace73e593475d3658d578b2436)
-      # Security: No Docker socket mount needed for remote registry scanning
-      - name: Run Trivy vulnerability scanner
-        uses: nick-fields/retry@v3
-        with:
-          timeout_minutes: 30
-          max_attempts: 3
-          retry_wait_seconds: 10
-          command: |
-            docker run --rm -v $HOME/.cache/trivy:/root/.cache/trivy \
-              -v ${{ github.workspace }}/backend/.trivyignore:/tmp/.trivyignore:ro \
-              -e TRIVY_DB_REPOSITORY="public.ecr.aws/aquasecurity/trivy-db:2" \
-              -e TRIVY_JAVA_DB_REPOSITORY="public.ecr.aws/aquasecurity/trivy-java-db:1" \
-              -e TRIVY_USERNAME="${{ secrets.DOCKER_USERNAME }}" \
-              -e TRIVY_PASSWORD="${{ secrets.DOCKER_TOKEN }}" \
-              aquasec/trivy@sha256:a22415a38938a56c379387a8163fcb0ce38b10ace73e593475d3658d578b2436 \
-              image \
-              --skip-version-check \
-              --timeout 20m \
-              --severity CRITICAL,HIGH \
-              --ignorefile /tmp/.trivyignore \
-              docker.io/${{ env.REGISTRY_IMAGE }}:${{ github.ref_name }}
--- a/.github/workflows/docker-build-push-cloud-web-container-on-tag.yml
+++ b/.github/workflows/docker-build-push-cloud-web-container-on-tag.yml
@@ -1,158 +0,0 @@
-name: Build and Push Cloud Web Image on Tag
-# Identical to the web container build, but with correct image tag and build args
-
-on:
-  push:
-    tags:
-      - "*cloud*"
-
-env:
-  REGISTRY_IMAGE: onyxdotapp/onyx-web-server-cloud
-  DEPLOYMENT: cloud
-  
-jobs:
-  build:
-    runs-on:
-      - runs-on
-      - runner=${{ matrix.platform == 'linux/amd64' && '8cpu-linux-x64' || '8cpu-linux-arm64' }}
-      - run-id=${{ github.run_id }}
-      - tag=platform-${{ matrix.platform }}
-    strategy:
-      fail-fast: false
-      matrix:
-        platform:
-          - linux/amd64
-          - linux/arm64
-
-    steps:
-      - name: Prepare
-        run: |
-          platform=${{ matrix.platform }}
-          echo "PLATFORM_PAIR=${platform//\//-}" >> $GITHUB_ENV
-
-      - name: Checkout
-        uses: actions/checkout@v4
-
-      - name: Docker meta
-        id: meta
-        uses: docker/metadata-action@v5
-        with:
-          images: ${{ env.REGISTRY_IMAGE }}
-          flavor: |
-            latest=false
-          tags: |
-            type=raw,value=${{ github.ref_name }}
-
-      - name: Set up Docker Buildx
-        uses: docker/setup-buildx-action@v3
-
-      - name: Login to Docker Hub
-        uses: docker/login-action@v3
-        with:
-          username: ${{ secrets.DOCKER_USERNAME }}
-          password: ${{ secrets.DOCKER_TOKEN }}
-
-      - name: Build and push by digest
-        id: build
-        uses: docker/build-push-action@v6
-        with:
-          context: ./web
-          file: ./web/Dockerfile
-          platforms: ${{ matrix.platform }}
-          push: true
-          build-args: |
-            ONYX_VERSION=${{ github.ref_name }}
-            NEXT_PUBLIC_CLOUD_ENABLED=true
-            NEXT_PUBLIC_POSTHOG_KEY=${{ secrets.POSTHOG_KEY }}
-            NEXT_PUBLIC_POSTHOG_HOST=${{ secrets.POSTHOG_HOST }}
-            NEXT_PUBLIC_SENTRY_DSN=${{ secrets.SENTRY_DSN }}
-            NEXT_PUBLIC_STRIPE_PUBLISHABLE_KEY=${{ secrets.STRIPE_PUBLISHABLE_KEY }}
-            NEXT_PUBLIC_GTM_ENABLED=true
-            NEXT_PUBLIC_FORGOT_PASSWORD_ENABLED=true
-            NEXT_PUBLIC_INCLUDE_ERROR_POPUP_SUPPORT_LINK=true
-            NODE_OPTIONS=--max-old-space-size=8192
-          labels: ${{ steps.meta.outputs.labels }}
-          outputs: type=image,name=${{ env.REGISTRY_IMAGE }},push-by-digest=true,name-canonical=true,push=true
-          cache-from: type=s3,prefix=cache/${{ github.repository }}/${{ env.DEPLOYMENT }}/cloudweb-${{ env.PLATFORM_PAIR }}/,region=${{ env.RUNS_ON_AWS_REGION }},bucket=${{ env.RUNS_ON_S3_BUCKET_CACHE }}
-          cache-to: type=s3,prefix=cache/${{ github.repository }}/${{ env.DEPLOYMENT }}/cloudweb-${{ env.PLATFORM_PAIR }}/,region=${{ env.RUNS_ON_AWS_REGION }},bucket=${{ env.RUNS_ON_S3_BUCKET_CACHE }},mode=max
-          # no-cache needed due to weird interactions with the builds for different platforms
-          # NOTE(rkuo): this may not be true any more with the proper cache prefixing by architecture - currently testing with it off
-
-      - name: Export digest
-        run: |
-          mkdir -p /tmp/digests
-          digest="${{ steps.build.outputs.digest }}"
-          touch "/tmp/digests/${digest#sha256:}"
-
-      - name: Upload digest
-        uses: actions/upload-artifact@v4
-        with:
-          name: cloudweb-digests-${{ env.PLATFORM_PAIR }}-${{ github.run_id }}
-          path: /tmp/digests/*
-          if-no-files-found: error
-          retention-days: 1
-
-  merge:
-    runs-on: ubuntu-latest
-    needs:
-      - build
-    steps:
-      - name: Download digests
-        uses: actions/download-artifact@v4
-        with:
-          path: /tmp/digests
-          pattern: cloudweb-digests-*-${{ github.run_id }}
-          merge-multiple: true
-
-      - name: Set up Docker Buildx
-        uses: docker/setup-buildx-action@v3
-
-      - name: Docker meta
-        id: meta
-        uses: docker/metadata-action@v5
-        with:
-          images: ${{ env.REGISTRY_IMAGE }}
-          flavor: |
-            latest=false
-          tags: |
-            type=raw,value=${{ github.ref_name }}
-
-      - name: Login to Docker Hub
-        uses: docker/login-action@v3
-        with:
-          username: ${{ secrets.DOCKER_USERNAME }}
-          password: ${{ secrets.DOCKER_TOKEN }}
-
-      - name: Create manifest list and push
-        working-directory: /tmp/digests
-        run: |
-          docker buildx imagetools create $(jq -cr '.tags | map("-t " + .) | join(" ")' <<< "$DOCKER_METADATA_OUTPUT_JSON") \
-            $(printf '${{ env.REGISTRY_IMAGE }}@sha256:%s ' *)
-
-      - name: Inspect image
-        run: |
-          docker buildx imagetools inspect ${{ env.REGISTRY_IMAGE }}:${{ steps.meta.outputs.version }}
-
-      # trivy has their own rate limiting issues causing this action to flake
-      # we worked around it by hardcoding to different db repos in env
-      # can re-enable when they figure it out
-      # https://github.com/aquasecurity/trivy/discussions/7538
-      # https://github.com/aquasecurity/trivy-action/issues/389
-      - name: Run Trivy vulnerability scanner
-        uses: nick-fields/retry@v3
-        with:
-          timeout_minutes: 30
-          max_attempts: 3
-          retry_wait_seconds: 10
-          command: |
-            docker run --rm -v $HOME/.cache/trivy:/root/.cache/trivy \
-              -e TRIVY_DB_REPOSITORY="public.ecr.aws/aquasecurity/trivy-db:2" \
-              -e TRIVY_JAVA_DB_REPOSITORY="public.ecr.aws/aquasecurity/trivy-java-db:1" \
-              -e TRIVY_USERNAME="${{ secrets.DOCKER_USERNAME }}" \
-              -e TRIVY_PASSWORD="${{ secrets.DOCKER_TOKEN }}" \
-              aquasec/trivy@sha256:a22415a38938a56c379387a8163fcb0ce38b10ace73e593475d3658d578b2436 \
-              image \
-              --skip-version-check \
-              --timeout 20m \
-              --severity CRITICAL,HIGH \
-              docker.io/${{ env.REGISTRY_IMAGE }}:${{ github.ref_name }}
--- a/.github/workflows/docker-build-push-model-server-container-on-tag.yml
+++ b/.github/workflows/docker-build-push-model-server-container-on-tag.yml
@@ -1,207 +0,0 @@
-name: Build and Push Model Server Image on Tag
-
-on:
-  push:
-    tags:
-      - "*"
-
-env:
-  REGISTRY_IMAGE: ${{ contains(github.ref_name, 'cloud') && 'onyxdotapp/onyx-model-server-cloud' || 'onyxdotapp/onyx-model-server' }}
-  DOCKER_BUILDKIT: 1
-  BUILDKIT_PROGRESS: plain
-  DEPLOYMENT: ${{ contains(github.ref_name, 'cloud') && 'cloud' || 'standalone' }}
-
-  # tag nightly builds with "edge"
-  EDGE_TAG: ${{ startsWith(github.ref_name, 'nightly-latest') }}
-  
-jobs:
-
-#   Bypassing this for now as the idea of not building is glitching
-#   releases and builds that depends on everything being tagged in docker
-#   1) Preliminary job to check if the changed files are relevant
-#   check_model_server_changes:
-#     runs-on: ubuntu-latest
-#     outputs:
-#       changed: ${{ steps.check.outputs.changed }}
-#     steps:
-#       - name: Checkout code
-#         uses: actions/checkout@v4
-# 
-#       - name: Check if relevant files changed
-#         id: check
-#         run: |
-#           # Default to "false"
-#           echo "changed=false" >> $GITHUB_OUTPUT
-# 
-#           # Compare the previous commit (github.event.before) to the current one (github.sha)
-#           # If any file in backend/model_server/** or backend/Dockerfile.model_server is changed,
-#           # set changed=true
-#           if git diff --name-only ${{ github.event.before }} ${{ github.sha }} \
-#              | grep -E '^backend/model_server/|^backend/Dockerfile.model_server'; then
-#             echo "changed=true" >> $GITHUB_OUTPUT
-#           fi
-
-  check_model_server_changes:
-    runs-on: ubuntu-latest
-    outputs:
-      changed: "true"
-    steps:
-      - name: Bypass check and set output
-        run: echo "changed=true" >> $GITHUB_OUTPUT
-        
-  build-amd64:
-    needs: [check_model_server_changes]
-    if: needs.check_model_server_changes.outputs.changed == 'true'
-    runs-on:
-      [runs-on, runner=8cpu-linux-x64, "run-id=${{ github.run_id }}-amd64"]
-    env:
-      PLATFORM_PAIR: linux-amd64
-    steps:
-      - name: Checkout code
-        uses: actions/checkout@v4
-
-      - name: System Info
-        run: |
-          df -h
-          free -h
-          docker system prune -af --volumes
-
-      - name: Set up Docker Buildx
-        uses: docker/setup-buildx-action@v3
-        with:
-          driver-opts: |
-            image=moby/buildkit:latest
-            network=host
-
-      - name: Login to Docker Hub
-        uses: docker/login-action@v3
-        with:
-          username: ${{ secrets.DOCKER_USERNAME }}
-          password: ${{ secrets.DOCKER_TOKEN }}
-
-      - name: Build and Push AMD64
-        uses: docker/build-push-action@v6
-        with:
-          context: ./backend
-          file: ./backend/Dockerfile.model_server
-          platforms: linux/amd64
-          push: true
-          tags: ${{ env.REGISTRY_IMAGE }}:${{ github.ref_name }}-amd64
-          build-args: |
-            ONYX_VERSION=${{ github.ref_name }}
-          outputs: type=registry
-          provenance: false
-          cache-from: type=s3,prefix=cache/${{ github.repository }}/${{ env.DEPLOYMENT }}/model-server-${{ env.PLATFORM_PAIR }}/,region=${{ env.RUNS_ON_AWS_REGION }},bucket=${{ env.RUNS_ON_S3_BUCKET_CACHE }}
-          cache-to: type=s3,prefix=cache/${{ github.repository }}/${{ env.DEPLOYMENT }}/model-server-${{ env.PLATFORM_PAIR }}/,region=${{ env.RUNS_ON_AWS_REGION }},bucket=${{ env.RUNS_ON_S3_BUCKET_CACHE }},mode=max
-#           no-cache: true
-
-  build-arm64:
-    needs: [check_model_server_changes]
-    if: needs.check_model_server_changes.outputs.changed == 'true'
-    runs-on:
-      [runs-on, runner=8cpu-linux-arm64, "run-id=${{ github.run_id }}-arm64"]
-    env:
-      PLATFORM_PAIR: linux-arm64
-    steps:
-      - name: Checkout code
-        uses: actions/checkout@v4
-
-      - name: System Info
-        run: |
-          df -h
-          free -h
-          docker system prune -af --volumes
-
-      - name: Set up Docker Buildx
-        uses: docker/setup-buildx-action@v3
-        with:
-          driver-opts: |
-            image=moby/buildkit:latest
-            network=host
-
-      - name: Login to Docker Hub
-        uses: docker/login-action@v3
-        with:
-          username: ${{ secrets.DOCKER_USERNAME }}
-          password: ${{ secrets.DOCKER_TOKEN }}
-
-      - name: Build and Push ARM64
-        uses: docker/build-push-action@v6
-        with:
-          context: ./backend
-          file: ./backend/Dockerfile.model_server
-          platforms: linux/arm64
-          push: true
-          tags: ${{ env.REGISTRY_IMAGE }}:${{ github.ref_name }}-arm64
-          build-args: |
-            ONYX_VERSION=${{ github.ref_name }}
-          outputs: type=registry
-          provenance: false
-          cache-from: type=s3,prefix=cache/${{ github.repository }}/${{ env.DEPLOYMENT }}/model-server-${{ env.PLATFORM_PAIR }}/,region=${{ env.RUNS_ON_AWS_REGION }},bucket=${{ env.RUNS_ON_S3_BUCKET_CACHE }}
-          cache-to: type=s3,prefix=cache/${{ github.repository }}/${{ env.DEPLOYMENT }}/model-server-${{ env.PLATFORM_PAIR }}/,region=${{ env.RUNS_ON_AWS_REGION }},bucket=${{ env.RUNS_ON_S3_BUCKET_CACHE }},mode=max
-
-  merge-and-scan:
-    needs: [build-amd64, build-arm64, check_model_server_changes]
-    if: needs.check_model_server_changes.outputs.changed == 'true'
-    runs-on: ubuntu-latest
-    steps:
-      - name: Check if stable release version
-        id: check_version
-        run: |
-          if [[ "${{ github.ref_name }}" =~ ^v[0-9]+\.[0-9]+\.[0-9]+$ ]] && [[ "${{ github.ref_name }}" != *"cloud"* ]]; then
-            echo "is_stable=true" >> $GITHUB_OUTPUT
-          else
-            echo "is_stable=false" >> $GITHUB_OUTPUT
-          fi
-          if [[ "${{ github.ref_name }}" =~ ^v[0-9]+\.[0-9]+\.[0-9]+-beta\.[0-9]+$ ]] && [[ "${{ github.ref_name }}" != *"cloud"* ]]; then
-            echo "is_beta=true" >> $GITHUB_OUTPUT
-          else
-            echo "is_beta=false" >> $GITHUB_OUTPUT
-          fi
-
-      - name: Login to Docker Hub
-        uses: docker/login-action@v3
-        with:
-          username: ${{ secrets.DOCKER_USERNAME }}
-          password: ${{ secrets.DOCKER_TOKEN }}
-
-      - name: Create and Push Multi-arch Manifest
-        run: |
-          docker buildx create --use
-          docker buildx imagetools create -t ${{ env.REGISTRY_IMAGE }}:${{ github.ref_name }} \
-            ${{ env.REGISTRY_IMAGE }}:${{ github.ref_name }}-amd64 \
-            ${{ env.REGISTRY_IMAGE }}:${{ github.ref_name }}-arm64
-          if [[ "${{ steps.check_version.outputs.is_stable }}" == "true" ]]; then
-            docker buildx imagetools create -t ${{ env.REGISTRY_IMAGE }}:latest \
-              ${{ env.REGISTRY_IMAGE }}:${{ github.ref_name }}-amd64 \
-              ${{ env.REGISTRY_IMAGE }}:${{ github.ref_name }}-arm64
-          fi
-          if [[ "${{ env.EDGE_TAG }}" == "true" ]]; then
-            docker buildx imagetools create -t ${{ env.REGISTRY_IMAGE }}:edge \
-              ${{ env.REGISTRY_IMAGE }}:${{ github.ref_name }}-amd64 \
-              ${{ env.REGISTRY_IMAGE }}:${{ github.ref_name }}-arm64
-          fi
-          if [[ "${{ steps.check_version.outputs.is_beta }}" == "true" ]]; then
-            docker buildx imagetools create -t ${{ env.REGISTRY_IMAGE }}:beta \
-              ${{ env.REGISTRY_IMAGE }}:${{ github.ref_name }}-amd64 \
-              ${{ env.REGISTRY_IMAGE }}:${{ github.ref_name }}-arm64
-          fi
-
-      - name: Run Trivy vulnerability scanner
-        uses: nick-fields/retry@v3
-        with:
-          timeout_minutes: 30
-          max_attempts: 3
-          retry_wait_seconds: 10
-          command: |
-            docker run --rm -v $HOME/.cache/trivy:/root/.cache/trivy \
-              -e TRIVY_DB_REPOSITORY="public.ecr.aws/aquasecurity/trivy-db:2" \
-              -e TRIVY_JAVA_DB_REPOSITORY="public.ecr.aws/aquasecurity/trivy-java-db:1" \
-              -e TRIVY_USERNAME="${{ secrets.DOCKER_USERNAME }}" \
-              -e TRIVY_PASSWORD="${{ secrets.DOCKER_TOKEN }}" \
-              aquasec/trivy@sha256:a22415a38938a56c379387a8163fcb0ce38b10ace73e593475d3658d578b2436 \
-              image \
-              --skip-version-check \
-              --timeout 20m \
-              --severity CRITICAL,HIGH \
-              docker.io/${{ env.REGISTRY_IMAGE }}:${{ github.ref_name }}
--- a/.github/workflows/docker-build-push-web-container-on-tag.yml
+++ b/.github/workflows/docker-build-push-web-container-on-tag.yml
@@ -1,204 +0,0 @@
-name: Build and Push Web Image on Tag
-
-on:
-  push:
-    tags:
-      - "*"
-
-env:
-  REGISTRY_IMAGE: onyxdotapp/onyx-web-server
-
-  # tag nightly builds with "edge"
-  EDGE_TAG: ${{ startsWith(github.ref_name, 'nightly-latest') }}
-
-  DEPLOYMENT: standalone
-
-jobs:
-  precheck:
-    runs-on: [runs-on, runner=2cpu-linux-x64, "run-id=${{ github.run_id }}"]
-    outputs:
-      should-run: ${{ steps.set-output.outputs.should-run }}
-    steps:
-      - name: Check if tag contains "cloud"
-        id: set-output
-        run: |
-          if [[ "${{ github.ref_name }}" == *cloud* ]]; then
-            echo "should-run=false" >> "$GITHUB_OUTPUT"
-          else
-            echo "should-run=true" >> "$GITHUB_OUTPUT"
-          fi
-  build:
-    needs: precheck
-    if: needs.precheck.outputs.should-run == 'true'
-    runs-on:
-      - runs-on
-      - runner=${{ matrix.platform == 'linux/amd64' && '8cpu-linux-x64' || '8cpu-linux-arm64' }}
-      - run-id=${{ github.run_id }}
-      - tag=platform-${{ matrix.platform }}
-    strategy:
-      fail-fast: false
-      matrix:
-        platform:
-          - linux/amd64
-          - linux/arm64
-
-    steps:
-      - name: Prepare
-        run: |
-          platform=${{ matrix.platform }}
-          echo "PLATFORM_PAIR=${platform//\//-}" >> $GITHUB_ENV
-
-      - name: Check if stable release version
-        id: check_version
-        run: |
-          if [[ "${{ github.ref_name }}" =~ ^v[0-9]+\.[0-9]+\.[0-9]+$ ]]; then
-            echo "is_stable=true" >> $GITHUB_OUTPUT
-          else
-            echo "is_stable=false" >> $GITHUB_OUTPUT
-          fi
-          if [[ "${{ github.ref_name }}" =~ ^v[0-9]+\.[0-9]+\.[0-9]+-beta\.[0-9]+$ ]]; then
-            echo "is_beta=true" >> $GITHUB_OUTPUT
-          else
-            echo "is_beta=false" >> $GITHUB_OUTPUT
-          fi
-
-      - name: Checkout
-        uses: actions/checkout@v4
-
-      - name: Docker meta
-        id: meta
-        uses: docker/metadata-action@v5
-        with:
-          images: ${{ env.REGISTRY_IMAGE }}
-          flavor: |
-            latest=false
-          tags: |
-            type=raw,value=${{ github.ref_name }}
-            type=raw,value=${{ steps.check_version.outputs.is_stable == 'true' && 'latest' || '' }}
-            type=raw,value=${{ env.EDGE_TAG == 'true' && 'edge' || '' }}
-            type=raw,value=${{ steps.check_version.outputs.is_beta == 'true' && 'beta' || '' }}
-
-      - name: Set up Docker Buildx
-        uses: docker/setup-buildx-action@v3
-
-      - name: Login to Docker Hub
-        uses: docker/login-action@v3
-        with:
-          username: ${{ secrets.DOCKER_USERNAME }}
-          password: ${{ secrets.DOCKER_TOKEN }}
-
-      - name: Build and push by digest
-        id: build
-        uses: docker/build-push-action@v6
-        with:
-          context: ./web
-          file: ./web/Dockerfile
-          platforms: ${{ matrix.platform }}
-          push: true
-          build-args: |
-            ONYX_VERSION=${{ github.ref_name }}
-            NODE_OPTIONS=--max-old-space-size=8192
-
-          labels: ${{ steps.meta.outputs.labels }}
-          outputs: type=image,name=${{ env.REGISTRY_IMAGE }},push-by-digest=true,name-canonical=true,push=true
-          cache-from: type=s3,prefix=cache/${{ github.repository }}/${{ env.DEPLOYMENT }}/web-${{ env.PLATFORM_PAIR }}/,region=${{ env.RUNS_ON_AWS_REGION }},bucket=${{ env.RUNS_ON_S3_BUCKET_CACHE }}
-          cache-to: type=s3,prefix=cache/${{ github.repository }}/${{ env.DEPLOYMENT }}/web-${{ env.PLATFORM_PAIR }}/,region=${{ env.RUNS_ON_AWS_REGION }},bucket=${{ env.RUNS_ON_S3_BUCKET_CACHE }},mode=max
-          # no-cache needed due to weird interactions with the builds for different platforms
-          # NOTE(rkuo): this may not be true any more with the proper cache prefixing by architecture - currently testing with it off
-          
-      - name: Export digest
-        run: |
-          mkdir -p /tmp/digests
-          digest="${{ steps.build.outputs.digest }}"
-          touch "/tmp/digests/${digest#sha256:}"
-
-      - name: Upload digest
-        uses: actions/upload-artifact@v4
-        with:
-          name: web-digests-${{ env.PLATFORM_PAIR }}-${{ github.run_id }}
-          path: /tmp/digests/*
-          if-no-files-found: error
-          retention-days: 1
-
-  merge:
-    needs:
-      - build
-    if: needs.precheck.outputs.should-run == 'true'
-    runs-on: ubuntu-latest
-    steps:
-      - name: Check if stable release version
-        id: check_version
-        run: |
-          if [[ "${{ github.ref_name }}" =~ ^v[0-9]+\.[0-9]+\.[0-9]+$ ]] && [[ "${{ github.ref_name }}" != *"cloud"* ]]; then
-            echo "is_stable=true" >> $GITHUB_OUTPUT
-          else
-            echo "is_stable=false" >> $GITHUB_OUTPUT
-          fi
-          if [[ "${{ github.ref_name }}" =~ ^v[0-9]+\.[0-9]+\.[0-9]+-beta\.[0-9]+$ ]]; then
-            echo "is_beta=true" >> $GITHUB_OUTPUT
-          else
-            echo "is_beta=false" >> $GITHUB_OUTPUT
-          fi
-        
-      - name: Download digests
-        uses: actions/download-artifact@v4
-        with:
-          path: /tmp/digests
-          pattern: web-digests-*-${{ github.run_id }}
-          merge-multiple: true
-
-      - name: Set up Docker Buildx
-        uses: docker/setup-buildx-action@v3
-
-      - name: Docker meta
-        id: meta
-        uses: docker/metadata-action@v5
-        with:
-          images: ${{ env.REGISTRY_IMAGE }}
-          flavor: |
-            latest=false
-          tags: |
-            type=raw,value=${{ github.ref_name }}
-            type=raw,value=${{ steps.check_version.outputs.is_stable == 'true' && 'latest' || '' }}
-            type=raw,value=${{ env.EDGE_TAG == 'true' && 'edge' || '' }}
-            type=raw,value=${{ steps.check_version.outputs.is_beta == 'true' && 'beta' || '' }}
-
-      - name: Login to Docker Hub
-        uses: docker/login-action@v3
-        with:
-          username: ${{ secrets.DOCKER_USERNAME }}
-          password: ${{ secrets.DOCKER_TOKEN }}
-
-      - name: Create manifest list and push
-        working-directory: /tmp/digests
-        run: |
-          docker buildx imagetools create $(jq -cr '.tags | map("-t " + .) | join(" ")' <<< "$DOCKER_METADATA_OUTPUT_JSON") \
-            $(printf '${{ env.REGISTRY_IMAGE }}@sha256:%s ' *)
-
-      - name: Inspect image
-        run: |
-          docker buildx imagetools inspect ${{ env.REGISTRY_IMAGE }}:${{ steps.meta.outputs.version }}
-
-      # trivy has their own rate limiting issues causing this action to flake
-      # we worked around it by hardcoding to different db repos in env
-      # can re-enable when they figure it out
-      # https://github.com/aquasecurity/trivy/discussions/7538
-      # https://github.com/aquasecurity/trivy-action/issues/389
-      - name: Run Trivy vulnerability scanner
-        uses: nick-fields/retry@v3
-        with:
-          timeout_minutes: 30
-          max_attempts: 3
-          retry_wait_seconds: 10
-          command: |
-            docker run --rm -v $HOME/.cache/trivy:/root/.cache/trivy \
-              -e TRIVY_DB_REPOSITORY="public.ecr.aws/aquasecurity/trivy-db:2" \
-              -e TRIVY_JAVA_DB_REPOSITORY="public.ecr.aws/aquasecurity/trivy-java-db:1" \
-              -e TRIVY_USERNAME="${{ secrets.DOCKER_USERNAME }}" \
-              -e TRIVY_PASSWORD="${{ secrets.DOCKER_TOKEN }}" \
-              aquasec/trivy@sha256:a22415a38938a56c379387a8163fcb0ce38b10ace73e593475d3658d578b2436 \
-              image \
-              --skip-version-check \
-              --timeout 20m \
-              --severity CRITICAL,HIGH \
-              docker.io/${{ env.REGISTRY_IMAGE }}:${{ github.ref_name }}
--- a/.github/workflows/docker-tag-beta.yml
+++ b/.github/workflows/docker-tag-beta.yml
@@ -14,13 +14,13 @@ jobs:
  tag:
    # See https://runs-on.com/runners/linux/
    # use a lower powered instance since this just does i/o to docker hub
-    runs-on: [runs-on, runner=2cpu-linux-x64, "run-id=${{ github.run_id }}"]
+    runs-on: [runs-on, runner=2cpu-linux-x64, "run-id=${{ github.run_id }}-tag"]
    steps:
      - name: Set up Docker Buildx
-        uses: docker/setup-buildx-action@v1
+        uses: docker/setup-buildx-action@e468171a9de216ec08956ac3ada2f0791b6bd435 # ratchet:docker/setup-buildx-action@v3

      - name: Login to Docker Hub
-        uses: docker/login-action@v1
+        uses: docker/login-action@5e57cd118135c172c3672efd75eb46360885c0ef # ratchet:docker/login-action@v3
        with:
          username: ${{ secrets.DOCKER_USERNAME }}
          password: ${{ secrets.DOCKER_TOKEN }}
--- a/.github/workflows/docker-tag-latest.yml
+++ b/.github/workflows/docker-tag-latest.yml
@@ -14,13 +14,13 @@ jobs:
  tag:
    # See https://runs-on.com/runners/linux/
    # use a lower powered instance since this just does i/o to docker hub
-    runs-on: [runs-on, runner=2cpu-linux-x64, "run-id=${{ github.run_id }}"]
+    runs-on: [runs-on, runner=2cpu-linux-x64, "run-id=${{ github.run_id }}-tag"]
    steps:
      - name: Set up Docker Buildx
-        uses: docker/setup-buildx-action@v1
+        uses: docker/setup-buildx-action@e468171a9de216ec08956ac3ada2f0791b6bd435 # ratchet:docker/setup-buildx-action@v3

      - name: Login to Docker Hub
-        uses: docker/login-action@v1
+        uses: docker/login-action@5e57cd118135c172c3672efd75eb46360885c0ef # ratchet:docker/login-action@v3
        with:
          username: ${{ secrets.DOCKER_USERNAME }}
          password: ${{ secrets.DOCKER_TOKEN }}
--- a/.github/workflows/helm-chart-releases.yml
+++ b/.github/workflows/helm-chart-releases.yml
@@ -14,12 +14,12 @@ jobs:
    runs-on: ubuntu-latest
    steps:
      - name: Checkout
-        uses: actions/checkout@v4
+        uses: actions/checkout@08eba0b27e820071cde6df949e0beb9ba4906955 # ratchet:actions/checkout@v4
        with:
          fetch-depth: 0

      - name: Install Helm CLI
-        uses: azure/setup-helm@v4
+        uses: azure/setup-helm@1a275c3b69536ee54be43f2070a358922e12c8d4 # ratchet:azure/setup-helm@v4
        with:
          version: v3.12.1

@@ -43,7 +43,7 @@ jobs:
          done

      - name: Publish Helm charts to gh-pages
-        uses: stefanprodan/helm-gh-pages@v1.7.0
+        uses: stefanprodan/helm-gh-pages@0ad2bb377311d61ac04ad9eb6f252fb68e207260 # ratchet:stefanprodan/helm-gh-pages@v1.7.0
        with:
          token: ${{ secrets.GITHUB_TOKEN }}
          charts_dir: deployment/helm/charts
--- a/.github/workflows/nightly-close-stale-issues.yml
+++ b/.github/workflows/nightly-close-stale-issues.yml
@@ -7,12 +7,12 @@ permissions:
  # contents: write # only for delete-branch option
  issues: write
  pull-requests: write
-  
+
 jobs:
  stale:
    runs-on: ubuntu-latest
    steps:
-      - uses: actions/stale@v9
+      - uses: actions/stale@5bef64f19d7facfb25b37b414482c7164d639639 # ratchet:actions/stale@v9
        with:
          stale-issue-message: 'This issue is stale because it has been open 75 days with no activity. Remove stale label or comment or this will be closed in 15 days.'
          stale-pr-message: 'This PR is stale because it has been open 75 days with no activity. Remove stale label or comment or this will be closed in 15 days.'
@@ -20,4 +20,3 @@ jobs:
          close-pr-message: 'This PR was closed because it has been stalled for 90 days with no activity.'
          days-before-stale: 75
 #           days-before-close: 90  # uncomment after we test stale behavior
-          
--- a/.github/workflows/nightly-scan-licenses.yml
+++ b/.github/workflows/nightly-scan-licenses.yml
@@ -16,18 +16,18 @@ permissions:
  actions: read
  contents: read
  security-events: write
-  
+
 jobs:
  scan-licenses:
    # See https://runs-on.com/runners/linux/
-    runs-on: [runs-on,runner=2cpu-linux-x64,"run-id=${{ github.run_id }}"]
+    runs-on: [runs-on,runner=2cpu-linux-x64,"run-id=${{ github.run_id }}-scan-licenses"]

    steps:
      - name: Checkout code
-        uses: actions/checkout@v4
-        
+        uses: actions/checkout@08eba0b27e820071cde6df949e0beb9ba4906955 # ratchet:actions/checkout@v4
+
      - name: Set up Python
-        uses: actions/setup-python@v5
+        uses: actions/setup-python@e797f83bcb11b83ae66e0230d6156d7c80228e7c # ratchet:actions/setup-python@v6
        with:
          python-version: '3.11'
          cache: 'pip'
@@ -35,7 +35,7 @@ jobs:
            backend/requirements/default.txt
            backend/requirements/dev.txt
            backend/requirements/model_server.txt
-      
+
      - name: Get explicit and transitive dependencies
        run: |
          python -m pip install --upgrade pip
@@ -43,28 +43,28 @@ jobs:
          pip install --retries 5 --timeout 30 -r backend/requirements/dev.txt
          pip install --retries 5 --timeout 30 -r backend/requirements/model_server.txt
          pip freeze > requirements-all.txt
-                    
+
      - name: Check python
        id: license_check_report
-        uses: pilosus/action-pip-license-checker@v2
+        uses: pilosus/action-pip-license-checker@e909b0226ff49d3235c99c4585bc617f49fff16a # ratchet:pilosus/action-pip-license-checker@v3
        with:
          requirements: 'requirements-all.txt'
          fail: 'Copyleft'
          exclude: '(?i)^(pylint|aio[-_]*).*'
-          
+
      - name: Print report
        if: always()
        run: echo "${{ steps.license_check_report.outputs.report }}"
-      
+
      - name: Install npm dependencies
        working-directory: ./web
        run: npm ci

        # be careful enabling the sarif and upload as it may spam the security tab
-        # with a huge amount of items. Work out the issues before enabling upload.       
+        # with a huge amount of items. Work out the issues before enabling upload.
 #       - name: Run Trivy vulnerability scanner in repo mode
 #         if: always()
-#         uses: aquasecurity/trivy-action@0.29.0
+#         uses: aquasecurity/trivy-action@b6643a29fecd7f34b3597bc6acb0a98b03d33ff8 # ratchet:aquasecurity/trivy-action@0.33.1
 #         with:
 #           scan-type: fs
 #           scan-ref: .
@@ -73,7 +73,7 @@ jobs:
 #           severity: HIGH,CRITICAL
 # #           format: sarif
 # #           output: trivy-results.sarif
-# 
+#
 # #       - name: Upload Trivy scan results to GitHub Security tab
 # #         uses: github/codeql-action/upload-sarif@v3
 # #         with:
@@ -81,14 +81,14 @@ jobs:

  scan-trivy:
    # See https://runs-on.com/runners/linux/
-    runs-on: [runs-on,runner=2cpu-linux-x64,"run-id=${{ github.run_id }}"]
-      
+    runs-on: [runs-on,runner=2cpu-linux-x64,"run-id=${{ github.run_id }}-scan-trivy"]
+
    steps:
    - name: Set up Docker Buildx
-      uses: docker/setup-buildx-action@v3
+      uses: docker/setup-buildx-action@e468171a9de216ec08956ac3ada2f0791b6bd435 # ratchet:docker/setup-buildx-action@v3

    - name: Login to Docker Hub
-      uses: docker/login-action@v3
+      uses: docker/login-action@5e57cd118135c172c3672efd75eb46360885c0ef # ratchet:docker/login-action@v3
      with:
        username: ${{ secrets.DOCKER_USERNAME }}
        password: ${{ secrets.DOCKER_TOKEN }}
@@ -98,7 +98,7 @@ jobs:
      run: docker pull onyxdotapp/onyx-backend:latest

    - name: Run Trivy vulnerability scanner on backend
-      uses: aquasecurity/trivy-action@0.29.0
+      uses: aquasecurity/trivy-action@b6643a29fecd7f34b3597bc6acb0a98b03d33ff8 # ratchet:aquasecurity/trivy-action@0.33.1
      env:
        TRIVY_DB_REPOSITORY: 'public.ecr.aws/aquasecurity/trivy-db:2'
        TRIVY_JAVA_DB_REPOSITORY: 'public.ecr.aws/aquasecurity/trivy-java-db:1'
@@ -112,9 +112,9 @@ jobs:
    # Web server
    - name: Pull web server docker image
      run: docker pull onyxdotapp/onyx-web-server:latest
-          
+
    - name: Run Trivy vulnerability scanner on web server
-      uses: aquasecurity/trivy-action@0.29.0
+      uses: aquasecurity/trivy-action@b6643a29fecd7f34b3597bc6acb0a98b03d33ff8 # ratchet:aquasecurity/trivy-action@0.33.1
      env:
        TRIVY_DB_REPOSITORY: 'public.ecr.aws/aquasecurity/trivy-db:2'
        TRIVY_JAVA_DB_REPOSITORY: 'public.ecr.aws/aquasecurity/trivy-java-db:1'
@@ -130,7 +130,7 @@ jobs:
      run: docker pull onyxdotapp/onyx-model-server:latest

    - name: Run Trivy vulnerability scanner
-      uses: aquasecurity/trivy-action@0.29.0
+      uses: aquasecurity/trivy-action@b6643a29fecd7f34b3597bc6acb0a98b03d33ff8 # ratchet:aquasecurity/trivy-action@0.33.1
      env:
        TRIVY_DB_REPOSITORY: 'public.ecr.aws/aquasecurity/trivy-db:2'
        TRIVY_JAVA_DB_REPOSITORY: 'public.ecr.aws/aquasecurity/trivy-java-db:1'
@@ -139,4 +139,4 @@ jobs:
        scanners: license
        severity: HIGH,CRITICAL
        vuln-type: library
-        exit-code: 0
+        exit-code: 0
--- a/.github/workflows/pr-external-dependency-unit-tests.yml
+++ b/.github/workflows/pr-external-dependency-unit-tests.yml
@@ -1,4 +1,7 @@
 name: External Dependency Unit Tests
+concurrency:
+  group: External-Dependency-Unit-Tests-${{ github.workflow }}-${{ github.head_ref || github.event.workflow_run.head_branch || github.run_id }}
+  cancel-in-progress: true

 on:
  merge_group:
@@ -27,13 +30,14 @@ env:

 jobs:
  discover-test-dirs:
-    runs-on: ubuntu-latest
+    # NOTE: Github-hosted runners have about 20s faster queue times and are preferred here.
+    runs-on: ubuntu-slim
    outputs:
      test-dirs: ${{ steps.set-matrix.outputs.test-dirs }}
    steps:
      - name: Checkout code
-        uses: actions/checkout@v4
-      
+        uses: actions/checkout@08eba0b27e820071cde6df949e0beb9ba4906955 # ratchet:actions/checkout@v4
+
      - name: Discover test directories
        id: set-matrix
        run: |
@@ -44,8 +48,11 @@ jobs:
  external-dependency-unit-tests:
    needs: discover-test-dirs
    # Use larger runner with more resources for Vespa
-    runs-on: [runs-on, runner=16cpu-linux-x64, "run-id=${{ github.run_id }}"]
-    
+    runs-on:
+      - runs-on
+      - runner=2cpu-linux-arm64
+      - ${{ format('run-id={0}-external-dependency-unit-tests-job-{1}', github.run_id, strategy['job-index']) }}
+      - extras=s3-cache
    strategy:
      fail-fast: false
      matrix:
@@ -56,42 +63,31 @@ jobs:
      MODEL_SERVER_HOST: "disabled"

    steps:
+      - uses: runs-on/action@cd2b598b0515d39d78c38a02d529db87d2196d1e # ratchet:runs-on/action@v2
+
      - name: Checkout code
-        uses: actions/checkout@v4
+        uses: actions/checkout@08eba0b27e820071cde6df949e0beb9ba4906955 # ratchet:actions/checkout@v4

-      - name: Set up Python
-        uses: actions/setup-python@v5
+      - name: Setup Python and Install Dependencies
+        uses: ./.github/actions/setup-python-and-install-dependencies
+
+      - name: Setup Playwright
+        uses: ./.github/actions/setup-playwright
+
+      # needed for pulling Vespa, Redis, Postgres, and Minio images
+      # otherwise, we hit the "Unauthenticated users" limit
+      # https://docs.docker.com/docker-hub/usage/
+      - name: Login to Docker Hub
+        uses: docker/login-action@5e57cd118135c172c3672efd75eb46360885c0ef # ratchet:docker/login-action@v3
        with:
-          python-version: "3.11"
-          cache: "pip"
-          cache-dependency-path: |
-            backend/requirements/default.txt
-            backend/requirements/dev.txt
-
-      - name: Install Dependencies
-        run: |
-          python -m pip install --upgrade pip
-          pip install --retries 5 --timeout 30 -r backend/requirements/default.txt
-          pip install --retries 5 --timeout 30 -r backend/requirements/dev.txt
-          playwright install chromium
-          playwright install-deps chromium
+          username: ${{ secrets.DOCKER_USERNAME }}
+          password: ${{ secrets.DOCKER_TOKEN }}

      - name: Set up Standard Dependencies
        run: |
          cd deployment/docker_compose
          docker compose -f docker-compose.yml -f docker-compose.dev.yml up -d minio relational_db cache index

-      - name: Wait for services
-        run: |
-          echo "Waiting for services to be ready..."
-          sleep 30
-          
-          # Wait for Vespa specifically
-          echo "Waiting for Vespa to be ready..."
-          timeout 300 bash -c 'until curl -f -s http://localhost:8081/ApplicationStatus > /dev/null 2>&1; do echo "Vespa not ready, waiting..."; sleep 10; done' || echo "Vespa timeout - continuing anyway"
-          
-          echo "Services should be ready now"
-
      - name: Run migrations
        run: |
          cd backend
--- a/.github/workflows/pr-helm-chart-testing.yml
+++ b/.github/workflows/pr-helm-chart-testing.yml
@@ -1,30 +1,33 @@
 name: Helm - Lint and Test Charts
+concurrency:
+  group: Helm-Lint-and-Test-Charts-${{ github.workflow }}-${{ github.head_ref || github.event.workflow_run.head_branch || github.run_id }}
+  cancel-in-progress: true

 on:
  merge_group:
  pull_request:
    branches: [ main ]
  workflow_dispatch:  # Allows manual triggering
-  
+
 jobs:
  helm-chart-check:
    # See https://runs-on.com/runners/linux/
-    runs-on: [runs-on,runner=8cpu-linux-x64,hdd=256,"run-id=${{ github.run_id }}"]
+    runs-on: [runs-on,runner=8cpu-linux-x64,hdd=256,"run-id=${{ github.run_id }}-helm-chart-check"]

    # fetch-depth 0 is required for helm/chart-testing-action
    steps:
    - name: Checkout code
-      uses: actions/checkout@v4
+      uses: actions/checkout@08eba0b27e820071cde6df949e0beb9ba4906955 # ratchet:actions/checkout@v4
      with:
        fetch-depth: 0
-        
+
    - name: Set up Helm
-      uses: azure/setup-helm@v4.3.1
+      uses: azure/setup-helm@1a275c3b69536ee54be43f2070a358922e12c8d4 # ratchet:azure/setup-helm@v4.3.1
      with:
        version: v3.19.0
-      
+
    - name: Set up chart-testing
-      uses: helm/chart-testing-action@v2.7.0
+      uses: helm/chart-testing-action@6ec842c01de15ebb84c8627d2744a0c2f2755c9f # ratchet:helm/chart-testing-action@v2.8.0

    # even though we specify chart-dirs in ct.yaml, it isn't used by ct for the list-changed command...
    - name: Run chart-testing (list-changed)
@@ -41,7 +44,7 @@ jobs:
 #     - name: Force run chart-testing (list-changed)
 #       id: list-changed
 #       run: echo "changed=true" >> $GITHUB_OUTPUT
-        
+
    # lint all charts if any changes were detected
    - name: Run chart-testing (lint)
      if: steps.list-changed.outputs.changed == 'true'
@@ -51,7 +54,7 @@ jobs:

    - name: Create kind cluster
      if: steps.list-changed.outputs.changed == 'true'
-      uses: helm/kind-action@v1.12.0
+      uses: helm/kind-action@92086f6be054225fa813e0a4b13787fc9088faab # ratchet:helm/kind-action@v1.13.0

    - name: Pre-install cluster status check
      if: steps.list-changed.outputs.changed == 'true'
@@ -118,7 +121,7 @@ jobs:
      if: steps.list-changed.outputs.changed == 'true'
      run: |
        echo "=== Starting chart installation with monitoring ==="
-        
+
        # Function to monitor cluster state
        monitor_cluster() {
          while true; do
@@ -140,11 +143,11 @@ jobs:
            sleep 60
          done
        }
-        
+
        # Start monitoring in background
        monitor_cluster &
        MONITOR_PID=$!
-        
+
        # Set up cleanup
        cleanup() {
          echo "=== Cleaning up monitoring process ==="
@@ -153,10 +156,10 @@ jobs:
          kubectl get pods --all-namespaces
          kubectl get events --all-namespaces --sort-by=.lastTimestamp | tail -20
        }
-        
+
        # Trap cleanup on exit
        trap cleanup EXIT
-        
+
        # Run the actual installation with detailed logging
        echo "=== Starting ct install ==="
        set +e
@@ -214,15 +217,15 @@ jobs:
        echo "=== Final cluster state ==="
        kubectl get pods --all-namespaces
        kubectl get events --all-namespaces --sort-by=.lastTimestamp | tail -10
-        
+
        echo "=== Pod descriptions for debugging ==="
        kubectl describe pods --all-namespaces | grep -A 10 -B 3 "Failed\|Error\|Warning\|Pending" || echo "No problematic pods found"
-        
+
        echo "=== Recent logs for debugging ==="
        kubectl logs --all-namespaces --tail=50 | grep -i "error\|timeout\|failed\|pull" || echo "No error logs found"

        echo "=== Helm releases ==="
        helm list --all-namespaces
-      # the following would install only changed charts, but we only have one chart so 
+      # the following would install only changed charts, but we only have one chart so
      # don't worry about that for now
      # run: ct install --target-branch ${{ github.event.repository.default_branch }}
--- a/.github/workflows/pr-integration-tests.yml
+++ b/.github/workflows/pr-integration-tests.yml
@@ -11,11 +11,6 @@ on:
      - "release/**"

 env:
-  # Private Registry Configuration
-  PRIVATE_REGISTRY: experimental-registry.blacksmith.sh:5000
-  PRIVATE_REGISTRY_USERNAME: ${{ secrets.PRIVATE_REGISTRY_USERNAME }}
-  PRIVATE_REGISTRY_PASSWORD: ${{ secrets.PRIVATE_REGISTRY_PASSWORD }}
-
  # Test Environment Variables
  OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
  SLACK_BOT_TOKEN: ${{ secrets.SLACK_BOT_TOKEN }}
@@ -35,12 +30,13 @@ env:

 jobs:
  discover-test-dirs:
-    runs-on: blacksmith-2vcpu-ubuntu-2404-arm
+    # NOTE: Github-hosted runners have about 20s faster queue times and are preferred here.
+    runs-on: ubuntu-slim
    outputs:
      test-dirs: ${{ steps.set-matrix.outputs.test-dirs }}
    steps:
      - name: Checkout code
-        uses: actions/checkout@v4
+        uses: actions/checkout@08eba0b27e820071cde6df949e0beb9ba4906955 # ratchet:actions/checkout@v4

      - name: Discover test directories
        id: set-matrix
@@ -62,100 +58,92 @@ jobs:
          all_dirs="[${all_dirs%,}]"
          echo "test-dirs=$all_dirs" >> $GITHUB_OUTPUT

-  prepare-build:
-    runs-on: blacksmith-2vcpu-ubuntu-2404-arm
-    steps:
-      - name: Checkout code
-        uses: actions/checkout@v4
-
-      - name: Prepare build
-        uses: ./.github/actions/prepare-build

  build-backend-image:
-    runs-on: blacksmith-16vcpu-ubuntu-2404-arm
+    runs-on: [runs-on, runner=1cpu-linux-arm64, "run-id=${{ github.run_id }}-build-backend-image", "extras=ecr-cache"]
    steps:
+      - uses: runs-on/action@cd2b598b0515d39d78c38a02d529db87d2196d1e # ratchet:runs-on/action@v2
      - name: Checkout code
-        uses: actions/checkout@v4
-
-      - name: Login to Private Registry
-        uses: docker/login-action@v3
-        with:
-          registry: ${{ env.PRIVATE_REGISTRY }}
-          username: ${{ env.PRIVATE_REGISTRY_USERNAME }}
-          password: ${{ env.PRIVATE_REGISTRY_PASSWORD }}
+        uses: actions/checkout@08eba0b27e820071cde6df949e0beb9ba4906955 # ratchet:actions/checkout@v4

      - name: Set up Docker Buildx
-        uses: useblacksmith/setup-docker-builder@v1
+        uses: docker/setup-buildx-action@e468171a9de216ec08956ac3ada2f0791b6bd435 # ratchet:docker/setup-buildx-action@v3
+
+      # needed for pulling Vespa, Redis, Postgres, and Minio images
+      # otherwise, we hit the "Unauthenticated users" limit
+      # https://docs.docker.com/docker-hub/usage/
+      - name: Login to Docker Hub
+        uses: docker/login-action@5e57cd118135c172c3672efd75eb46360885c0ef # ratchet:docker/login-action@v3
+        with:
+          username: ${{ secrets.DOCKER_USERNAME }}
+          password: ${{ secrets.DOCKER_TOKEN }}

      - name: Build and push Backend Docker image
-        uses: useblacksmith/build-push-action@v2
+        uses: docker/build-push-action@263435318d21b8e681c14492fe198d362a7d2c83 # ratchet:docker/build-push-action@v6
        with:
          context: ./backend
          file: ./backend/Dockerfile
-          platforms: linux/arm64
-          tags: ${{ env.PRIVATE_REGISTRY }}/integration-test-onyx-backend:test-${{ github.run_id }}
          push: true
-          outputs: type=registry
-          no-cache: true
-
-
-  build-model-server-image:
-    runs-on: blacksmith-16vcpu-ubuntu-2404-arm
-    steps:
-      - name: Checkout code
-        uses: actions/checkout@v4
-
-      - name: Login to Private Registry
-        uses: docker/login-action@v3
-        with:
-          registry: ${{ env.PRIVATE_REGISTRY }}
-          username: ${{ env.PRIVATE_REGISTRY_USERNAME }}
-          password: ${{ env.PRIVATE_REGISTRY_PASSWORD }}
-
-      - name: Set up Docker Buildx
-        uses: useblacksmith/setup-docker-builder@v1
-
-      - name: Build and push Model Server Docker image
-        uses: useblacksmith/build-push-action@v2
-        with:
-          context: ./backend
-          file: ./backend/Dockerfile.model_server
-          platforms: linux/arm64
-          tags: ${{ env.PRIVATE_REGISTRY }}/integration-test-onyx-model-server:test-${{ github.run_id }}
-          push: true
-          outputs: type=registry
-          provenance: false
+          tags: ${{ env.RUNS_ON_ECR_CACHE }}:integration-test-backend-test-${{ github.run_id }}
+          cache-from: type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:integration-test-backend-cache
+          cache-to: type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:integration-test-backend-cache,mode=max
          no-cache: ${{ vars.DOCKER_NO_CACHE == 'true' }}


-  build-integration-image:
-    needs: prepare-build
-    runs-on: blacksmith-16vcpu-ubuntu-2404-arm
+  build-model-server-image:
+    runs-on: [runs-on, runner=1cpu-linux-arm64, "run-id=${{ github.run_id }}-build-model-server-image", "extras=ecr-cache"]
    steps:
+      - uses: runs-on/action@cd2b598b0515d39d78c38a02d529db87d2196d1e # ratchet:runs-on/action@v2
      - name: Checkout code
-        uses: actions/checkout@v4
-
-      - name: Login to Private Registry
-        uses: docker/login-action@v3
-        with:
-          registry: ${{ env.PRIVATE_REGISTRY }}
-          username: ${{ env.PRIVATE_REGISTRY_USERNAME }}
-          password: ${{ env.PRIVATE_REGISTRY_PASSWORD }}
-
-      - name: Download OpenAPI artifacts
-        uses: actions/download-artifact@v4
-        with:
-          name: openapi-artifacts
-          path: backend/generated/
+        uses: actions/checkout@08eba0b27e820071cde6df949e0beb9ba4906955 # ratchet:actions/checkout@v4

      - name: Set up Docker Buildx
-        uses: useblacksmith/setup-docker-builder@v1
+        uses: docker/setup-buildx-action@e468171a9de216ec08956ac3ada2f0791b6bd435 # ratchet:docker/setup-buildx-action@v3
+
+      # needed for pulling Vespa, Redis, Postgres, and Minio images
+      # otherwise, we hit the "Unauthenticated users" limit
+      # https://docs.docker.com/docker-hub/usage/
+      - name: Login to Docker Hub
+        uses: docker/login-action@5e57cd118135c172c3672efd75eb46360885c0ef # ratchet:docker/login-action@v3
+        with:
+          username: ${{ secrets.DOCKER_USERNAME }}
+          password: ${{ secrets.DOCKER_TOKEN }}
+
+      - name: Build and push Model Server Docker image
+        uses: docker/build-push-action@263435318d21b8e681c14492fe198d362a7d2c83 # ratchet:docker/build-push-action@v6
+        with:
+          context: ./backend
+          file: ./backend/Dockerfile.model_server
+          push: true
+          tags: ${{ env.RUNS_ON_ECR_CACHE }}:integration-test-model-server-test-${{ github.run_id }}
+          cache-from: type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:integration-test-model-server-cache
+          cache-to: type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:integration-test-model-server-cache,mode=max
+
+
+  build-integration-image:
+    runs-on: [runs-on, runner=2cpu-linux-arm64, "run-id=${{ github.run_id }}-build-integration-image", "extras=ecr-cache"]
+    steps:
+      - uses: runs-on/action@cd2b598b0515d39d78c38a02d529db87d2196d1e # ratchet:runs-on/action@v2
+      - name: Checkout code
+        uses: actions/checkout@08eba0b27e820071cde6df949e0beb9ba4906955 # ratchet:actions/checkout@v4
+
+      - name: Set up Docker Buildx
+        uses: docker/setup-buildx-action@e468171a9de216ec08956ac3ada2f0791b6bd435 # ratchet:docker/setup-buildx-action@v3
+
+      # needed for pulling openapitools/openapi-generator-cli
+      # otherwise, we hit the "Unauthenticated users" limit
+      # https://docs.docker.com/docker-hub/usage/
+      - name: Login to Docker Hub
+        uses: docker/login-action@5e57cd118135c172c3672efd75eb46360885c0ef # ratchet:docker/login-action@v3
+        with:
+          username: ${{ secrets.DOCKER_USERNAME }}
+          password: ${{ secrets.DOCKER_TOKEN }}

      - name: Build and push integration test image with Docker Bake
        env:
-          REGISTRY: ${{ env.PRIVATE_REGISTRY }}
-          TAG: test-${{ github.run_id }}
-        run: cd backend && docker buildx bake --no-cache --push integration
+          REPOSITORY: ${{ env.RUNS_ON_ECR_CACHE }}
+          TAG: integration-test-${{ github.run_id }}
+        run: cd backend && docker buildx bake --push integration

  integration-tests:
    needs:
@@ -165,7 +153,11 @@ jobs:
        build-model-server-image,
        build-integration-image,
      ]
-    runs-on: blacksmith-8vcpu-ubuntu-2404-arm
+    runs-on:
+      - runs-on
+      - runner=4cpu-linux-arm64
+      - ${{ format('run-id={0}-integration-tests-job-{1}', github.run_id, strategy['job-index']) }}
+      - extras=ecr-cache

    strategy:
      fail-fast: false
@@ -173,43 +165,19 @@ jobs:
        test-dir: ${{ fromJson(needs.discover-test-dirs.outputs.test-dirs) }}

    steps:
+      - uses: runs-on/action@cd2b598b0515d39d78c38a02d529db87d2196d1e # ratchet:runs-on/action@v2
      - name: Checkout code
-        uses: actions/checkout@v4
-
-      - name: Login to Private Registry
-        uses: docker/login-action@v3
-        with:
-          registry: ${{ env.PRIVATE_REGISTRY }}
-          username: ${{ env.PRIVATE_REGISTRY_USERNAME }}
-          password: ${{ env.PRIVATE_REGISTRY_PASSWORD }}
+        uses: actions/checkout@08eba0b27e820071cde6df949e0beb9ba4906955 # ratchet:actions/checkout@v4

      # needed for pulling Vespa, Redis, Postgres, and Minio images
      # otherwise, we hit the "Unauthenticated users" limit
      # https://docs.docker.com/docker-hub/usage/
      - name: Login to Docker Hub
-        uses: docker/login-action@v3
+        uses: docker/login-action@5e57cd118135c172c3672efd75eb46360885c0ef # ratchet:docker/login-action@v3
        with:
          username: ${{ secrets.DOCKER_USERNAME }}
          password: ${{ secrets.DOCKER_TOKEN }}

-      - name: Pull Docker images
-        run: |
-          # Pull all images from registry in parallel
-          echo "Pulling Docker images in parallel..."
-          # Pull images from private registry
-          (docker pull --platform linux/arm64 ${{ env.PRIVATE_REGISTRY }}/integration-test-onyx-backend:test-${{ github.run_id }}) &
-          (docker pull --platform linux/arm64 ${{ env.PRIVATE_REGISTRY }}/integration-test-onyx-model-server:test-${{ github.run_id }}) &
-          (docker pull --platform linux/arm64 ${{ env.PRIVATE_REGISTRY }}/integration-test-onyx-integration:test-${{ github.run_id }}) &
-
-          # Wait for all background jobs to complete
-          wait
-          echo "All Docker images pulled successfully"
-
-          # Re-tag to remove registry prefix for docker-compose
-          docker tag ${{ env.PRIVATE_REGISTRY }}/integration-test-onyx-backend:test-${{ github.run_id }} onyxdotapp/onyx-backend:test
-          docker tag ${{ env.PRIVATE_REGISTRY }}/integration-test-onyx-model-server:test-${{ github.run_id }} onyxdotapp/onyx-model-server:test
-          docker tag ${{ env.PRIVATE_REGISTRY }}/integration-test-onyx-integration:test-${{ github.run_id }} onyxdotapp/onyx-integration:test
-
      # NOTE: Use pre-ping/null pool to reduce flakiness due to dropped connections
      # NOTE: don't need web server for integration tests
      - name: Start Docker containers
@@ -221,7 +189,8 @@ jobs:
          POSTGRES_USE_NULL_POOL=true \
          REQUIRE_EMAIL_VERIFICATION=false \
          DISABLE_TELEMETRY=true \
-          IMAGE_TAG=test \
+          ONYX_BACKEND_IMAGE=${{ env.RUNS_ON_ECR_CACHE }}:integration-test-backend-test-${{ github.run_id }} \
+          ONYX_MODEL_SERVER_IMAGE=${{ env.RUNS_ON_ECR_CACHE }}:integration-test-model-server-test-${{ github.run_id }} \
          INTEGRATION_TESTS_MODE=true \
          CHECK_TTL_MANAGEMENT_TASK_FREQUENCY_IN_HOURS=0.001 \
          docker compose -f docker-compose.yml -f docker-compose.dev.yml up \
@@ -248,15 +217,15 @@ jobs:
          while true; do
            current_time=$(date +%s)
            elapsed_time=$((current_time - start_time))
-            
+
            if [ $elapsed_time -ge $timeout ]; then
              echo "Timeout reached. Service did not become ready in 5 minutes."
              exit 1
            fi
-            
+
            # Use curl with error handling to ignore specific exit code 56
            response=$(curl -s -o /dev/null -w "%{http_code}" http://localhost:8080/health || echo "curl_error")
-            
+
            if [ "$response" = "200" ]; then
              echo "Service is ready!"
              break
@@ -265,7 +234,7 @@ jobs:
            else
              echo "Service not ready yet (HTTP status $response). Retrying in 5 seconds..."
            fi
-            
+
            sleep 5
          done
          echo "Finished waiting for service."
@@ -277,7 +246,7 @@ jobs:
            -p mock-it-services-stack up -d

      - name: Run Integration Tests for ${{ matrix.test-dir.name }}
-        uses: nick-fields/retry@v3
+        uses: nick-fields/retry@ce71cc2ab81d554ebbe88c79ab5975992d79ba08 # ratchet:nick-fields/retry@v3
        with:
          timeout_minutes: 20
          max_attempts: 3
@@ -314,7 +283,7 @@ jobs:
              -e TEST_WEB_HOSTNAME=test-runner \
              -e MOCK_CONNECTOR_SERVER_HOST=mock_connector_server \
              -e MOCK_CONNECTOR_SERVER_PORT=8001 \
-              onyxdotapp/onyx-integration:test \
+              ${{ env.RUNS_ON_ECR_CACHE }}:integration-test-${{ github.run_id }} \
              /app/tests/integration/${{ matrix.test-dir.path }}

      # ------------------------------------------------------------
@@ -333,18 +302,12 @@ jobs:

      - name: Upload logs
        if: always()
-        uses: actions/upload-artifact@v4
+        uses: actions/upload-artifact@330a01c490aca151604b8cf639adc76d48f6c5d4 # ratchet:actions/upload-artifact@v4
        with:
          name: docker-all-logs-${{ matrix.test-dir.name }}
          path: ${{ github.workspace }}/docker-compose.log
      # ------------------------------------------------------------

-      - name: Stop Docker containers
-        if: always()
-        run: |
-          cd deployment/docker_compose
-          docker compose down -v
-

  multitenant-tests:
    needs:
@@ -353,35 +316,19 @@ jobs:
        build-model-server-image,
        build-integration-image,
      ]
-    runs-on: blacksmith-8vcpu-ubuntu-2404-arm
+    runs-on: [runs-on, runner=8cpu-linux-arm64, "run-id=${{ github.run_id }}-multitenant-tests", "extras=ecr-cache"]

    steps:
+      - uses: runs-on/action@cd2b598b0515d39d78c38a02d529db87d2196d1e # ratchet:runs-on/action@v2
      - name: Checkout code
-        uses: actions/checkout@v4
-
-      - name: Login to Private Registry
-        uses: docker/login-action@v3
-        with:
-          registry: ${{ env.PRIVATE_REGISTRY }}
-          username: ${{ env.PRIVATE_REGISTRY_USERNAME }}
-          password: ${{ env.PRIVATE_REGISTRY_PASSWORD }}
+        uses: actions/checkout@08eba0b27e820071cde6df949e0beb9ba4906955 # ratchet:actions/checkout@v4

      - name: Login to Docker Hub
-        uses: docker/login-action@v3
+        uses: docker/login-action@5e57cd118135c172c3672efd75eb46360885c0ef # ratchet:docker/login-action@v3
        with:
          username: ${{ secrets.DOCKER_USERNAME }}
          password: ${{ secrets.DOCKER_TOKEN }}

-      - name: Pull Docker images
-        run: |
-          (docker pull --platform linux/arm64 ${{ env.PRIVATE_REGISTRY }}/integration-test-onyx-backend:test-${{ github.run_id }}) &
-          (docker pull --platform linux/arm64 ${{ env.PRIVATE_REGISTRY }}/integration-test-onyx-model-server:test-${{ github.run_id }}) &
-          (docker pull --platform linux/arm64 ${{ env.PRIVATE_REGISTRY }}/integration-test-onyx-integration:test-${{ github.run_id }}) &
-          wait
-          docker tag ${{ env.PRIVATE_REGISTRY }}/integration-test-onyx-backend:test-${{ github.run_id }} onyxdotapp/onyx-backend:test
-          docker tag ${{ env.PRIVATE_REGISTRY }}/integration-test-onyx-model-server:test-${{ github.run_id }} onyxdotapp/onyx-model-server:test
-          docker tag ${{ env.PRIVATE_REGISTRY }}/integration-test-onyx-integration:test-${{ github.run_id }} onyxdotapp/onyx-integration:test
-
      - name: Start Docker containers for multi-tenant tests
        run: |
          cd deployment/docker_compose
@@ -390,7 +337,8 @@ jobs:
          AUTH_TYPE=cloud \
          REQUIRE_EMAIL_VERIFICATION=false \
          DISABLE_TELEMETRY=true \
-          IMAGE_TAG=test \
+          ONYX_BACKEND_IMAGE=${{ env.RUNS_ON_ECR_CACHE }}:integration-test-backend-test-${{ github.run_id }} \
+          ONYX_MODEL_SERVER_IMAGE=${{ env.RUNS_ON_ECR_CACHE }}:integration-test-model-server-test-${{ github.run_id }} \
          DEV_MODE=true \
          docker compose -f docker-compose.multitenant-dev.yml up \
            relational_db \
@@ -453,9 +401,8 @@ jobs:
            -e SKIP_RESET=true \
            -e REQUIRE_EMAIL_VERIFICATION=false \
            -e DISABLE_TELEMETRY=true \
-            -e IMAGE_TAG=test \
            -e DEV_MODE=true \
-            onyxdotapp/onyx-integration:test \
+            ${{ env.RUNS_ON_ECR_CACHE }}:integration-test-${{ github.run_id }} \
            /app/tests/integration/multitenant_tests

      - name: Dump API server logs (multi-tenant)
@@ -472,7 +419,7 @@ jobs:

      - name: Upload logs (multi-tenant)
        if: always()
-        uses: actions/upload-artifact@v4
+        uses: actions/upload-artifact@330a01c490aca151604b8cf639adc76d48f6c5d4 # ratchet:actions/upload-artifact@v4
        with:
          name: docker-all-logs-multitenant
          path: ${{ github.workspace }}/docker-compose-multitenant.log
@@ -483,12 +430,13 @@ jobs:
          cd deployment/docker_compose
          docker compose -f docker-compose.multitenant-dev.yml down -v

-  required: 
-    runs-on: blacksmith-2vcpu-ubuntu-2404-arm
+  required:
+    # NOTE: Github-hosted runners have about 20s faster queue times and are preferred here.
+    runs-on: ubuntu-slim
    needs: [integration-tests, multitenant-tests]
    if: ${{ always() }}
    steps:
-      - uses: actions/github-script@v7
+      - uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # ratchet:actions/github-script@v8
        with:
          script: |
            const needs = ${{ toJSON(needs) }};
--- a/.github/workflows/pr-jest-tests.yml
+++ b/.github/workflows/pr-jest-tests.yml
@@ -11,12 +11,14 @@ jobs:
    runs-on: ubuntu-latest
    steps:
      - name: Checkout code
-        uses: actions/checkout@v4
+        uses: actions/checkout@08eba0b27e820071cde6df949e0beb9ba4906955 # ratchet:actions/checkout@v4

      - name: Setup node
-        uses: actions/setup-node@v4
+        uses: actions/setup-node@2028fbc5c25fe9cf00d9f06a71cc4710d4507903 # ratchet:actions/setup-node@v4
        with:
          node-version: 22
+          cache: 'npm'
+          cache-dependency-path: ./web/package-lock.json

      - name: Install node dependencies
        working-directory: ./web
@@ -28,7 +30,7 @@ jobs:

      - name: Upload coverage reports
        if: always()
-        uses: actions/upload-artifact@v4
+        uses: actions/upload-artifact@330a01c490aca151604b8cf639adc76d48f6c5d4 # ratchet:actions/upload-artifact@v4
        with:
          name: jest-coverage-${{ github.run_id }}
          path: ./web/coverage
--- a/.github/workflows/pr-labeler.yml
+++ b/.github/workflows/pr-labeler.yml
@@ -27,7 +27,7 @@ jobs:
            echo "::error::❌ Your PR title does not follow the Conventional Commits format.
              This check ensures that all pull requests use clear, consistent titles that help automate changelogs and improve project history.

-              Please update your PR title to follow the Conventional Commits style.  
+              Please update your PR title to follow the Conventional Commits style.
              Here is a link to a blog explaining the reason why we've included the Conventional Commits style into our PR titles: https://xfuture-blog.com/working-with-conventional-commits

              **Here are some examples of valid PR titles:**
--- a/.github/workflows/pr-linear-check.yml
+++ b/.github/workflows/pr-linear-check.yml
@@ -1,4 +1,7 @@
 name: Ensure PR references Linear
+concurrency:
+  group: Ensure-PR-references-Linear-${{ github.workflow }}-${{ github.head_ref || github.event.workflow_run.head_branch || github.run_id }}
+  cancel-in-progress: true

 on:
  pull_request:
--- a/.github/workflows/pr-mit-integration-tests.yml
+++ b/.github/workflows/pr-mit-integration-tests.yml
@@ -8,11 +8,6 @@ on:
    types: [checks_requested]

 env:
-  # Private Registry Configuration
-  PRIVATE_REGISTRY: experimental-registry.blacksmith.sh:5000
-  PRIVATE_REGISTRY_USERNAME: ${{ secrets.PRIVATE_REGISTRY_USERNAME }}
-  PRIVATE_REGISTRY_PASSWORD: ${{ secrets.PRIVATE_REGISTRY_PASSWORD }}
-
  # Test Environment Variables
  OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
  SLACK_BOT_TOKEN: ${{ secrets.SLACK_BOT_TOKEN }}
@@ -31,12 +26,13 @@ env:

 jobs:
  discover-test-dirs:
-    runs-on: blacksmith-2vcpu-ubuntu-2404-arm
+    # NOTE: Github-hosted runners have about 20s faster queue times and are preferred here.
+    runs-on: ubuntu-slim
    outputs:
      test-dirs: ${{ steps.set-matrix.outputs.test-dirs }}
    steps:
      - name: Checkout code
-        uses: actions/checkout@v4
+        uses: actions/checkout@08eba0b27e820071cde6df949e0beb9ba4906955 # ratchet:actions/checkout@v4

      - name: Discover test directories
        id: set-matrix
@@ -58,100 +54,89 @@ jobs:
          all_dirs="[${all_dirs%,}]"
          echo "test-dirs=$all_dirs" >> $GITHUB_OUTPUT

-  prepare-build:
-    runs-on: blacksmith-2vcpu-ubuntu-2404-arm
-    steps:
-      - name: Checkout code
-        uses: actions/checkout@v4
-
-      - name: Prepare build
-        uses: ./.github/actions/prepare-build
-
  build-backend-image:
-    runs-on: blacksmith-16vcpu-ubuntu-2404-arm
+    runs-on: [runs-on, runner=1cpu-linux-arm64, "run-id=${{ github.run_id }}-build-backend-image", "extras=ecr-cache"]
    steps:
+      - uses: runs-on/action@cd2b598b0515d39d78c38a02d529db87d2196d1e # ratchet:runs-on/action@v2
      - name: Checkout code
-        uses: actions/checkout@v4
-
-      - name: Login to Private Registry
-        uses: docker/login-action@v3
-        with:
-          registry: ${{ env.PRIVATE_REGISTRY }}
-          username: ${{ env.PRIVATE_REGISTRY_USERNAME }}
-          password: ${{ env.PRIVATE_REGISTRY_PASSWORD }}
+        uses: actions/checkout@08eba0b27e820071cde6df949e0beb9ba4906955 # ratchet:actions/checkout@v4

      - name: Set up Docker Buildx
-        uses: useblacksmith/setup-docker-builder@v1
+        uses: docker/setup-buildx-action@e468171a9de216ec08956ac3ada2f0791b6bd435 # ratchet:docker/setup-buildx-action@v3
+
+      # needed for pulling Vespa, Redis, Postgres, and Minio images
+      # otherwise, we hit the "Unauthenticated users" limit
+      # https://docs.docker.com/docker-hub/usage/
+      - name: Login to Docker Hub
+        uses: docker/login-action@5e57cd118135c172c3672efd75eb46360885c0ef # ratchet:docker/login-action@v3
+        with:
+          username: ${{ secrets.DOCKER_USERNAME }}
+          password: ${{ secrets.DOCKER_TOKEN }}

      - name: Build and push Backend Docker image
-        uses: useblacksmith/build-push-action@v2
+        uses: docker/build-push-action@263435318d21b8e681c14492fe198d362a7d2c83 # ratchet:docker/build-push-action@v6
        with:
          context: ./backend
          file: ./backend/Dockerfile
-          platforms: linux/arm64
-          tags: ${{ env.PRIVATE_REGISTRY }}/integration-test-onyx-backend:test-${{ github.run_id }}
          push: true
-          outputs: type=registry
-          no-cache: true
-
+          tags: ${{ env.RUNS_ON_ECR_CACHE }}:integration-test-backend-test-${{ github.run_id }}
+          cache-from: type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:integration-test-backend-cache
+          cache-to: type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:integration-test-backend-cache,mode=max
+          no-cache: ${{ vars.DOCKER_NO_CACHE == 'true' }}

  build-model-server-image:
-    runs-on: blacksmith-16vcpu-ubuntu-2404-arm
+    runs-on: [runs-on, runner=1cpu-linux-arm64, "run-id=${{ github.run_id }}-build-model-server-image", "extras=ecr-cache"]
    steps:
+      - uses: runs-on/action@cd2b598b0515d39d78c38a02d529db87d2196d1e # ratchet:runs-on/action@v2
      - name: Checkout code
-        uses: actions/checkout@v4
-
-      - name: Login to Private Registry
-        uses: docker/login-action@v3
-        with:
-          registry: ${{ env.PRIVATE_REGISTRY }}
-          username: ${{ env.PRIVATE_REGISTRY_USERNAME }}
-          password: ${{ env.PRIVATE_REGISTRY_PASSWORD }}
+        uses: actions/checkout@08eba0b27e820071cde6df949e0beb9ba4906955 # ratchet:actions/checkout@v4

      - name: Set up Docker Buildx
-        uses: useblacksmith/setup-docker-builder@v1
+        uses: docker/setup-buildx-action@e468171a9de216ec08956ac3ada2f0791b6bd435 # ratchet:docker/setup-buildx-action@v3
+
+      # needed for pulling Vespa, Redis, Postgres, and Minio images
+      # otherwise, we hit the "Unauthenticated users" limit
+      # https://docs.docker.com/docker-hub/usage/
+      - name: Login to Docker Hub
+        uses: docker/login-action@5e57cd118135c172c3672efd75eb46360885c0ef # ratchet:docker/login-action@v3
+        with:
+          username: ${{ secrets.DOCKER_USERNAME }}
+          password: ${{ secrets.DOCKER_TOKEN }}

      - name: Build and push Model Server Docker image
-        uses: useblacksmith/build-push-action@v2
+        uses: docker/build-push-action@263435318d21b8e681c14492fe198d362a7d2c83 # ratchet:docker/build-push-action@v6
        with:
          context: ./backend
          file: ./backend/Dockerfile.model_server
-          platforms: linux/arm64
-          tags: ${{ env.PRIVATE_REGISTRY }}/integration-test-onyx-model-server:test-${{ github.run_id }}
          push: true
-          outputs: type=registry
-          provenance: false
-          no-cache: ${{ vars.DOCKER_NO_CACHE == 'true' }}
-
+          tags: ${{ env.RUNS_ON_ECR_CACHE }}:integration-test-model-server-test-${{ github.run_id }}
+          cache-from: type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:integration-test-model-server-cache
+          cache-to: type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:integration-test-model-server-cache,mode=max

  build-integration-image:
-    needs: prepare-build
-    runs-on: blacksmith-16vcpu-ubuntu-2404-arm
+    runs-on: [runs-on, runner=2cpu-linux-arm64, "run-id=${{ github.run_id }}-build-integration-image", "extras=ecr-cache"]
    steps:
+      - uses: runs-on/action@cd2b598b0515d39d78c38a02d529db87d2196d1e # ratchet:runs-on/action@v2
      - name: Checkout code
-        uses: actions/checkout@v4
-
-      - name: Login to Private Registry
-        uses: docker/login-action@v3
-        with:
-          registry: ${{ env.PRIVATE_REGISTRY }}
-          username: ${{ env.PRIVATE_REGISTRY_USERNAME }}
-          password: ${{ env.PRIVATE_REGISTRY_PASSWORD }}
-
-      - name: Download OpenAPI artifacts
-        uses: actions/download-artifact@v4
-        with:
-          name: openapi-artifacts
-          path: backend/generated/
+        uses: actions/checkout@08eba0b27e820071cde6df949e0beb9ba4906955 # ratchet:actions/checkout@v4

      - name: Set up Docker Buildx
-        uses: useblacksmith/setup-docker-builder@v1
+        uses: docker/setup-buildx-action@e468171a9de216ec08956ac3ada2f0791b6bd435 # ratchet:docker/setup-buildx-action@v3
+
+      # needed for pulling openapitools/openapi-generator-cli
+      # otherwise, we hit the "Unauthenticated users" limit
+      # https://docs.docker.com/docker-hub/usage/
+      - name: Login to Docker Hub
+        uses: docker/login-action@5e57cd118135c172c3672efd75eb46360885c0ef # ratchet:docker/login-action@v3
+        with:
+          username: ${{ secrets.DOCKER_USERNAME }}
+          password: ${{ secrets.DOCKER_TOKEN }}

      - name: Build and push integration test image with Docker Bake
        env:
-          REGISTRY: ${{ env.PRIVATE_REGISTRY }}
-          TAG: test-${{ github.run_id }}
-        run: cd backend && docker buildx bake --no-cache --push integration
+          REPOSITORY: ${{ env.RUNS_ON_ECR_CACHE }}
+          TAG: integration-test-${{ github.run_id }}
+        run: cd backend && docker buildx bake --push integration

  integration-tests-mit:
    needs:
@@ -161,8 +146,11 @@ jobs:
        build-model-server-image,
        build-integration-image,
      ]
-    # See https://docs.blacksmith.sh/blacksmith-runners/overview
-    runs-on: blacksmith-8vcpu-ubuntu-2404-arm
+    runs-on:
+      - runs-on
+      - runner=4cpu-linux-arm64
+      - ${{ format('run-id={0}-integration-tests-mit-job-{1}', github.run_id, strategy['job-index']) }}
+      - extras=ecr-cache

    strategy:
      fail-fast: false
@@ -170,43 +158,19 @@ jobs:
        test-dir: ${{ fromJson(needs.discover-test-dirs.outputs.test-dirs) }}

    steps:
+      - uses: runs-on/action@cd2b598b0515d39d78c38a02d529db87d2196d1e # ratchet:runs-on/action@v2
      - name: Checkout code
-        uses: actions/checkout@v4
-
-      - name: Login to Private Registry
-        uses: docker/login-action@v3
-        with:
-          registry: ${{ env.PRIVATE_REGISTRY }}
-          username: ${{ env.PRIVATE_REGISTRY_USERNAME }}
-          password: ${{ env.PRIVATE_REGISTRY_PASSWORD }}
+        uses: actions/checkout@08eba0b27e820071cde6df949e0beb9ba4906955 # ratchet:actions/checkout@v4

      # needed for pulling Vespa, Redis, Postgres, and Minio images
      # otherwise, we hit the "Unauthenticated users" limit
      # https://docs.docker.com/docker-hub/usage/
      - name: Login to Docker Hub
-        uses: docker/login-action@v3
+        uses: docker/login-action@5e57cd118135c172c3672efd75eb46360885c0ef # ratchet:docker/login-action@v3
        with:
          username: ${{ secrets.DOCKER_USERNAME }}
          password: ${{ secrets.DOCKER_TOKEN }}

-      - name: Pull Docker images
-        run: |
-          # Pull all images from registry in parallel
-          echo "Pulling Docker images in parallel..."
-          # Pull images from private registry
-          (docker pull --platform linux/arm64 ${{ env.PRIVATE_REGISTRY }}/integration-test-onyx-backend:test-${{ github.run_id }}) &
-          (docker pull --platform linux/arm64 ${{ env.PRIVATE_REGISTRY }}/integration-test-onyx-model-server:test-${{ github.run_id }}) &
-          (docker pull --platform linux/arm64 ${{ env.PRIVATE_REGISTRY }}/integration-test-onyx-integration:test-${{ github.run_id }}) &
-
-          # Wait for all background jobs to complete
-          wait
-          echo "All Docker images pulled successfully"
-
-          # Re-tag to remove registry prefix for docker-compose
-          docker tag ${{ env.PRIVATE_REGISTRY }}/integration-test-onyx-backend:test-${{ github.run_id }} onyxdotapp/onyx-backend:test
-          docker tag ${{ env.PRIVATE_REGISTRY }}/integration-test-onyx-model-server:test-${{ github.run_id }} onyxdotapp/onyx-model-server:test
-          docker tag ${{ env.PRIVATE_REGISTRY }}/integration-test-onyx-integration:test-${{ github.run_id }} onyxdotapp/onyx-integration:test
-
      # NOTE: Use pre-ping/null pool to reduce flakiness due to dropped connections
      # NOTE: don't need web server for integration tests
      - name: Start Docker containers
@@ -217,7 +181,8 @@ jobs:
          POSTGRES_USE_NULL_POOL=true \
          REQUIRE_EMAIL_VERIFICATION=false \
          DISABLE_TELEMETRY=true \
-          IMAGE_TAG=test \
+          ONYX_BACKEND_IMAGE=${{ env.RUNS_ON_ECR_CACHE }}:integration-test-backend-test-${{ github.run_id }} \
+          ONYX_MODEL_SERVER_IMAGE=${{ env.RUNS_ON_ECR_CACHE }}:integration-test-model-server-test-${{ github.run_id }} \
          INTEGRATION_TESTS_MODE=true \
          docker compose -f docker-compose.yml -f docker-compose.dev.yml up \
            relational_db \
@@ -243,15 +208,15 @@ jobs:
          while true; do
            current_time=$(date +%s)
            elapsed_time=$((current_time - start_time))
-            
+
            if [ $elapsed_time -ge $timeout ]; then
              echo "Timeout reached. Service did not become ready in 5 minutes."
              exit 1
            fi
-            
+
            # Use curl with error handling to ignore specific exit code 56
            response=$(curl -s -o /dev/null -w "%{http_code}" http://localhost:8080/health || echo "curl_error")
-            
+
            if [ "$response" = "200" ]; then
              echo "Service is ready!"
              break
@@ -260,7 +225,7 @@ jobs:
            else
              echo "Service not ready yet (HTTP status $response). Retrying in 5 seconds..."
            fi
-            
+
            sleep 5
          done
          echo "Finished waiting for service."
@@ -273,7 +238,7 @@ jobs:

      # NOTE: Use pre-ping/null to reduce flakiness due to dropped connections
      - name: Run Integration Tests for ${{ matrix.test-dir.name }}
-        uses: nick-fields/retry@v3
+        uses: nick-fields/retry@ce71cc2ab81d554ebbe88c79ab5975992d79ba08 # ratchet:nick-fields/retry@v3
        with:
          timeout_minutes: 20
          max_attempts: 3
@@ -310,7 +275,7 @@ jobs:
              -e TEST_WEB_HOSTNAME=test-runner \
              -e MOCK_CONNECTOR_SERVER_HOST=mock_connector_server \
              -e MOCK_CONNECTOR_SERVER_PORT=8001 \
-              onyxdotapp/onyx-integration:test \
+              ${{ env.RUNS_ON_ECR_CACHE }}:integration-test-${{ github.run_id }} \
              /app/tests/integration/${{ matrix.test-dir.path }}

      # ------------------------------------------------------------
@@ -329,25 +294,20 @@ jobs:

      - name: Upload logs
        if: always()
-        uses: actions/upload-artifact@v4
+        uses: actions/upload-artifact@330a01c490aca151604b8cf639adc76d48f6c5d4 # ratchet:actions/upload-artifact@v4
        with:
          name: docker-all-logs-${{ matrix.test-dir.name }}
          path: ${{ github.workspace }}/docker-compose.log
      # ------------------------------------------------------------

-      - name: Stop Docker containers
-        if: always()
-        run: |
-          cd deployment/docker_compose
-          docker compose down -v

-  
-  required: 
-    runs-on: blacksmith-2vcpu-ubuntu-2404-arm
+  required:
+    # NOTE: Github-hosted runners have about 20s faster queue times and are preferred here.
+    runs-on: ubuntu-slim
    needs: [integration-tests-mit]
    if: ${{ always() }}
    steps:
-      - uses: actions/github-script@v7
+      - uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # ratchet:actions/github-script@v8
        with:
          script: |
            const needs = ${{ toJSON(needs) }};
--- a/.github/workflows/pr-playwright-tests.yml
+++ b/.github/workflows/pr-playwright-tests.yml
@@ -6,13 +6,6 @@ concurrency:
 on: push

 env:
-  # AWS ECR Configuration
-  AWS_REGION: ${{ secrets.AWS_REGION || 'us-west-2' }}
-  ECR_REGISTRY: ${{ secrets.ECR_REGISTRY }}
-  AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID_ECR }}
-  AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY_ECR }}
-  BUILDX_NO_DEFAULT_ATTESTATIONS: 1
-
  # Test Environment Variables
  OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
  SLACK_BOT_TOKEN: ${{ secrets.SLACK_BOT_TOKEN }}
@@ -23,164 +16,153 @@ env:
  SLACK_CLIENT_ID: ${{ secrets.SLACK_CLIENT_ID }}
  SLACK_CLIENT_SECRET: ${{ secrets.SLACK_CLIENT_SECRET }}

+  # for MCP Oauth tests
+  MCP_OAUTH_CLIENT_ID: ${{ secrets.MCP_OAUTH_CLIENT_ID }}
+  MCP_OAUTH_CLIENT_SECRET: ${{ secrets.MCP_OAUTH_CLIENT_SECRET }}
+  MCP_OAUTH_ISSUER: ${{ secrets.MCP_OAUTH_ISSUER }}
+  MCP_OAUTH_JWKS_URI: ${{ secrets.MCP_OAUTH_JWKS_URI }}
+  MCP_OAUTH_USERNAME: ${{ vars.MCP_OAUTH_USERNAME }}
+  MCP_OAUTH_PASSWORD: ${{ secrets.MCP_OAUTH_PASSWORD }}
+
  MOCK_LLM_RESPONSE: true
+  MCP_TEST_SERVER_PORT: 8004
+  MCP_TEST_SERVER_URL: http://host.docker.internal:8004/mcp
+  MCP_TEST_SERVER_PUBLIC_URL: http://host.docker.internal:8004/mcp
+  MCP_TEST_SERVER_BIND_HOST: 0.0.0.0
+  MCP_TEST_SERVER_PUBLIC_HOST: host.docker.internal
+  MCP_SERVER_HOST: 0.0.0.0
+  MCP_SERVER_PUBLIC_HOST: host.docker.internal
+  MCP_SERVER_PUBLIC_URL: http://host.docker.internal:8004/mcp

 jobs:
  build-web-image:
-    runs-on: blacksmith-8vcpu-ubuntu-2404-arm
+    runs-on: [runs-on, runner=4cpu-linux-arm64, "run-id=${{ github.run_id }}-build-web-image", "extras=ecr-cache"]
    steps:
+      - uses: runs-on/action@cd2b598b0515d39d78c38a02d529db87d2196d1e # ratchet:runs-on/action@v2
+
      - name: Checkout code
-        uses: actions/checkout@v4
-
-      - name: Configure AWS credentials
-        uses: aws-actions/configure-aws-credentials@v4
-        with:
-          aws-access-key-id: ${{ env.AWS_ACCESS_KEY_ID }}
-          aws-secret-access-key: ${{ env.AWS_SECRET_ACCESS_KEY }}
-          aws-region: ${{ env.AWS_REGION }}
-
-      - name: Login to Amazon ECR
-        id: login-ecr
-        uses: aws-actions/amazon-ecr-login@v2
+        uses: actions/checkout@08eba0b27e820071cde6df949e0beb9ba4906955 # ratchet:actions/checkout@v4

      - name: Set up Docker Buildx
-        uses: useblacksmith/setup-docker-builder@v1
+        uses: docker/setup-buildx-action@e468171a9de216ec08956ac3ada2f0791b6bd435 # ratchet:docker/setup-buildx-action@v3

-      - name: Build and push Web Docker image
-        uses: useblacksmith/build-push-action@v2
-        with:
-          context: ./web
-          file: ./web/Dockerfile
-          platforms: linux/arm64
-          tags: ${{ env.ECR_REGISTRY }}/integration-test-onyx-web-server:playwright-test-${{ github.run_id }}
-          provenance: false
-          sbom: false
-          push: true
-          outputs: type=registry
-          no-cache: ${{ vars.DOCKER_NO_CACHE == 'true' }}
-
-  build-backend-image:
-    runs-on: blacksmith-8vcpu-ubuntu-2404-arm
-    steps:
-      - name: Checkout code
-        uses: actions/checkout@v4
-
-      - name: Configure AWS credentials
-        uses: aws-actions/configure-aws-credentials@v4
-        with:
-          aws-access-key-id: ${{ env.AWS_ACCESS_KEY_ID }}
-          aws-secret-access-key: ${{ env.AWS_SECRET_ACCESS_KEY }}
-          aws-region: ${{ env.AWS_REGION }}
-
-      - name: Login to Amazon ECR
-        id: login-ecr
-        uses: aws-actions/amazon-ecr-login@v2
-
-      - name: Set up Docker Buildx
-        uses: useblacksmith/setup-docker-builder@v1
-
-      - name: Build and push Backend Docker image
-        uses: useblacksmith/build-push-action@v2
-        with:
-          context: ./backend
-          file: ./backend/Dockerfile
-          platforms: linux/arm64
-          tags: ${{ env.ECR_REGISTRY }}/integration-test-onyx-backend:playwright-test-${{ github.run_id }}
-          provenance: false
-          sbom: false
-          push: true
-          outputs: type=registry
-          no-cache: ${{ vars.DOCKER_NO_CACHE == 'true' }}
-
-  build-model-server-image:
-    runs-on: blacksmith-8vcpu-ubuntu-2404-arm
-    steps:
-      - name: Checkout code
-        uses: actions/checkout@v4
-
-      - name: Configure AWS credentials
-        uses: aws-actions/configure-aws-credentials@v4
-        with:
-          aws-access-key-id: ${{ env.AWS_ACCESS_KEY_ID }}
-          aws-secret-access-key: ${{ env.AWS_SECRET_ACCESS_KEY }}
-          aws-region: ${{ env.AWS_REGION }}
-
-      - name: Login to Amazon ECR
-        id: login-ecr
-        uses: aws-actions/amazon-ecr-login@v2
-
-      - name: Set up Docker Buildx
-        uses: useblacksmith/setup-docker-builder@v1
-
-      - name: Build and push Model Server Docker image
-        uses: useblacksmith/build-push-action@v2
-        with:
-          context: ./backend
-          file: ./backend/Dockerfile.model_server
-          platforms: linux/arm64
-          tags: ${{ env.ECR_REGISTRY }}/integration-test-onyx-model-server:playwright-test-${{ github.run_id }}
-          provenance: false
-          sbom: false
-          push: true
-          outputs: type=registry
-          no-cache: ${{ vars.DOCKER_NO_CACHE == 'true' }}
-
-  playwright-tests:
-    needs: [build-web-image, build-backend-image, build-model-server-image]
-    name: Playwright Tests
-    runs-on: blacksmith-8vcpu-ubuntu-2404-arm
-    steps:
-      - name: Checkout code
-        uses: actions/checkout@v4
-        with:
-          fetch-depth: 0
-
-      - name: Configure AWS credentials
-        uses: aws-actions/configure-aws-credentials@v4
-        with:
-          aws-access-key-id: ${{ env.AWS_ACCESS_KEY_ID }}
-          aws-secret-access-key: ${{ env.AWS_SECRET_ACCESS_KEY }}
-          aws-region: ${{ env.AWS_REGION }}
-
-      - name: Login to Amazon ECR
-        id: login-ecr
-        uses: aws-actions/amazon-ecr-login@v2
-
-      # needed for pulling Vespa, Redis, Postgres, and Minio images
-      # otherwise, we hit the "Unauthenticated users" limit
+      # needed for pulling external images otherwise, we hit the "Unauthenticated users" limit
      # https://docs.docker.com/docker-hub/usage/
      - name: Login to Docker Hub
-        uses: docker/login-action@v3
+        uses: docker/login-action@5e57cd118135c172c3672efd75eb46360885c0ef # ratchet:docker/login-action@v3
        with:
          username: ${{ secrets.DOCKER_USERNAME }}
          password: ${{ secrets.DOCKER_TOKEN }}

-      - name: Pull Docker images
-        run: |
-          # Pull all images from ECR in parallel
-          echo "Pulling Docker images in parallel..."
-          (docker pull ${{ env.ECR_REGISTRY }}/integration-test-onyx-web-server:playwright-test-${{ github.run_id }}) &
-          (docker pull ${{ env.ECR_REGISTRY }}/integration-test-onyx-backend:playwright-test-${{ github.run_id }}) &
-          (docker pull ${{ env.ECR_REGISTRY }}/integration-test-onyx-model-server:playwright-test-${{ github.run_id }}) &
+      - name: Build and push Web Docker image
+        uses: docker/build-push-action@263435318d21b8e681c14492fe198d362a7d2c83 # ratchet:docker/build-push-action@v6
+        with:
+          context: ./web
+          file: ./web/Dockerfile
+          platforms: linux/arm64
+          tags: ${{ env.RUNS_ON_ECR_CACHE }}:playwright-test-web-${{ github.run_id }}
+          push: true
+          cache-from: type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:playwright-test-web-cache
+          cache-to: type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:playwright-test-web-cache,mode=max
+          no-cache: ${{ vars.DOCKER_NO_CACHE == 'true' }}

-          # Wait for all background jobs to complete
-          wait
-          echo "All Docker images pulled successfully"
+  build-backend-image:
+    runs-on: [runs-on, runner=1cpu-linux-arm64, "run-id=${{ github.run_id }}-build-backend-image", "extras=ecr-cache"]
+    steps:
+      - uses: runs-on/action@cd2b598b0515d39d78c38a02d529db87d2196d1e # ratchet:runs-on/action@v2

-          # Re-tag with expected names for docker-compose
-          docker tag ${{ env.ECR_REGISTRY }}/integration-test-onyx-web-server:playwright-test-${{ github.run_id }} onyxdotapp/onyx-web-server:test
-          docker tag ${{ env.ECR_REGISTRY }}/integration-test-onyx-backend:playwright-test-${{ github.run_id }} onyxdotapp/onyx-backend:test
-          docker tag ${{ env.ECR_REGISTRY }}/integration-test-onyx-model-server:playwright-test-${{ github.run_id }} onyxdotapp/onyx-model-server:test
+      - name: Checkout code
+        uses: actions/checkout@08eba0b27e820071cde6df949e0beb9ba4906955 # ratchet:actions/checkout@v4
+
+      - name: Set up Docker Buildx
+        uses: docker/setup-buildx-action@e468171a9de216ec08956ac3ada2f0791b6bd435 # ratchet:docker/setup-buildx-action@v3
+
+      # needed for pulling external images otherwise, we hit the "Unauthenticated users" limit
+      # https://docs.docker.com/docker-hub/usage/
+      - name: Login to Docker Hub
+        uses: docker/login-action@5e57cd118135c172c3672efd75eb46360885c0ef # ratchet:docker/login-action@v3
+        with:
+          username: ${{ secrets.DOCKER_USERNAME }}
+          password: ${{ secrets.DOCKER_TOKEN }}
+
+      - name: Build and push Backend Docker image
+        uses: docker/build-push-action@263435318d21b8e681c14492fe198d362a7d2c83 # ratchet:docker/build-push-action@v6
+        with:
+          context: ./backend
+          file: ./backend/Dockerfile
+          platforms: linux/arm64
+          tags: ${{ env.RUNS_ON_ECR_CACHE }}:playwright-test-backend-${{ github.run_id }}
+          push: true
+          cache-from: type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:playwright-test-backend-cache
+          cache-to: type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:playwright-test-backend-cache,mode=max
+          no-cache: ${{ vars.DOCKER_NO_CACHE == 'true' }}
+
+  build-model-server-image:
+    runs-on: [runs-on, runner=1cpu-linux-arm64, "run-id=${{ github.run_id }}-build-model-server-image", "extras=ecr-cache"]
+    steps:
+      - uses: runs-on/action@cd2b598b0515d39d78c38a02d529db87d2196d1e # ratchet:runs-on/action@v2
+
+      - name: Checkout code
+        uses: actions/checkout@08eba0b27e820071cde6df949e0beb9ba4906955 # ratchet:actions/checkout@v4
+
+      - name: Set up Docker Buildx
+        uses: docker/setup-buildx-action@e468171a9de216ec08956ac3ada2f0791b6bd435 # ratchet:docker/setup-buildx-action@v3
+
+      # needed for pulling external images otherwise, we hit the "Unauthenticated users" limit
+      # https://docs.docker.com/docker-hub/usage/
+      - name: Login to Docker Hub
+        uses: docker/login-action@5e57cd118135c172c3672efd75eb46360885c0ef # ratchet:docker/login-action@v3
+        with:
+          username: ${{ secrets.DOCKER_USERNAME }}
+          password: ${{ secrets.DOCKER_TOKEN }}
+
+      - name: Build and push Model Server Docker image
+        uses: docker/build-push-action@263435318d21b8e681c14492fe198d362a7d2c83 # ratchet:docker/build-push-action@v6
+        with:
+          context: ./backend
+          file: ./backend/Dockerfile.model_server
+          platforms: linux/arm64
+          tags: ${{ env.RUNS_ON_ECR_CACHE }}:playwright-test-model-server-${{ github.run_id }}
+          push: true
+          cache-from: type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:playwright-test-model-server-cache
+          cache-to: type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:playwright-test-model-server-cache,mode=max
+          no-cache: ${{ vars.DOCKER_NO_CACHE == 'true' }}
+
+  playwright-tests:
+    needs: [build-web-image, build-backend-image, build-model-server-image]
+    name: Playwright Tests (${{ matrix.project }})
+    runs-on: [runs-on, runner=8cpu-linux-arm64, "run-id=${{ github.run_id }}-playwright-tests-${{ matrix.project }}", "extras=ecr-cache"]
+    strategy:
+      fail-fast: false
+      matrix:
+        project: [admin, no-auth, exclusive]
+    steps:
+      - uses: runs-on/action@cd2b598b0515d39d78c38a02d529db87d2196d1e # ratchet:runs-on/action@v2
+
+      - name: Checkout code
+        uses: actions/checkout@08eba0b27e820071cde6df949e0beb9ba4906955 # ratchet:actions/checkout@v4
+        with:
+          fetch-depth: 0

      - name: Setup node
-        uses: actions/setup-node@v4
+        uses: actions/setup-node@2028fbc5c25fe9cf00d9f06a71cc4710d4507903 # ratchet:actions/setup-node@v4
        with:
          node-version: 22
+          cache: 'npm'
+          cache-dependency-path: ./web/package-lock.json

      - name: Install node dependencies
        working-directory: ./web
        run: npm ci

+      - name: Cache playwright cache
+        uses: runs-on/cache@50350ad4242587b6c8c2baa2e740b1bc11285ff4 # ratchet:runs-on/cache@v4
+        with:
+          path: ~/.cache/ms-playwright
+          key: ${{ runner.os }}-playwright-npm-${{ hashFiles('web/package-lock.json') }}
+          restore-keys: |
+            ${{ runner.os }}-playwright-npm-
+
      - name: Install playwright browsers
        working-directory: ./web
        run: npx playwright install --with-deps
@@ -194,13 +176,24 @@ jobs:
          EXA_API_KEY=${{ env.EXA_API_KEY }}
          REQUIRE_EMAIL_VERIFICATION=false
          DISABLE_TELEMETRY=true
-          IMAGE_TAG=test
+          ONYX_BACKEND_IMAGE=${{ env.RUNS_ON_ECR_CACHE }}:playwright-test-backend-${{ github.run_id }}
+          ONYX_MODEL_SERVER_IMAGE=${{ env.RUNS_ON_ECR_CACHE }}:playwright-test-model-server-${{ github.run_id }}
+          ONYX_WEB_SERVER_IMAGE=${{ env.RUNS_ON_ECR_CACHE }}:playwright-test-web-${{ github.run_id }}
          EOF

+      # needed for pulling Vespa, Redis, Postgres, and Minio images
+      # otherwise, we hit the "Unauthenticated users" limit
+      # https://docs.docker.com/docker-hub/usage/
+      - name: Login to Docker Hub
+        uses: docker/login-action@5e57cd118135c172c3672efd75eb46360885c0ef # ratchet:docker/login-action@v3
+        with:
+          username: ${{ secrets.DOCKER_USERNAME }}
+          password: ${{ secrets.DOCKER_TOKEN }}
+
      - name: Start Docker containers
        run: |
          cd deployment/docker_compose
-          docker compose -f docker-compose.yml -f docker-compose.dev.yml up -d
+          docker compose -f docker-compose.yml -f docker-compose.dev.yml -f docker-compose.mcp-oauth-test.yml up -d
        id: start_docker

      - name: Wait for service to be ready
@@ -237,18 +230,41 @@ jobs:
          done
          echo "Finished waiting for service."

+      - name: Wait for MCP OAuth mock server
+        run: |
+          echo "Waiting for MCP OAuth mock server on port ${MCP_TEST_SERVER_PORT:-8004}..."
+          start_time=$(date +%s)
+          timeout=120
+
+          while true; do
+            current_time=$(date +%s)
+            elapsed_time=$((current_time - start_time))
+
+            if [ $elapsed_time -ge $timeout ]; then
+              echo "Timeout reached. MCP OAuth mock server did not become ready in ${timeout}s."
+              exit 1
+            fi
+
+            if curl -sf "http://localhost:${MCP_TEST_SERVER_PORT:-8004}/healthz" > /dev/null; then
+              echo "MCP OAuth mock server is ready!"
+              break
+            fi
+
+            sleep 3
+          done
+
      - name: Run Playwright tests
        working-directory: ./web
        run: |
          # Create test-results directory to ensure it exists for artifact upload
          mkdir -p test-results
-          npx playwright test
+          npx playwright test --project ${{ matrix.project }}

-      - uses: actions/upload-artifact@v4
+      - uses: actions/upload-artifact@330a01c490aca151604b8cf639adc76d48f6c5d4 # ratchet:actions/upload-artifact@v4
        if: always()
        with:
          # Includes test results and trace.zip files
-          name: playwright-test-results-${{ github.run_id }}
+          name: playwright-test-results-${{ matrix.project }}-${{ github.run_id }}
          path: ./web/test-results/
          retention-days: 30

@@ -262,15 +278,11 @@ jobs:

      - name: Upload logs
        if: success() || failure()
-        uses: actions/upload-artifact@v4
+        uses: actions/upload-artifact@330a01c490aca151604b8cf639adc76d48f6c5d4 # ratchet:actions/upload-artifact@v4
        with:
-          name: docker-logs
+          name: docker-logs-${{ matrix.project }}-${{ github.run_id }}
          path: ${{ github.workspace }}/docker-compose.log

-      - name: Stop Docker containers
-        run: |
-          cd deployment/docker_compose
-          docker compose down -v

 # NOTE: Chromatic UI diff testing is currently disabled.
 # We are using Playwright for local and CI testing without visual regression checks.
@@ -289,12 +301,12 @@ jobs:
 #     ]
 #   steps:
 #     - name: Checkout code
-#       uses: actions/checkout@v4
+#       uses: actions/checkout@08eba0b27e820071cde6df949e0beb9ba4906955 # ratchet:actions/checkout@v4
 #       with:
 #         fetch-depth: 0

 #     - name: Setup node
-#       uses: actions/setup-node@v4
+#       uses: actions/setup-node@2028fbc5c25fe9cf00d9f06a71cc4710d4507903 # ratchet:actions/setup-node@v4
 #       with:
 #         node-version: 22

@@ -303,7 +315,7 @@ jobs:
 #       run: npm ci

 #     - name: Download Playwright test results
-#       uses: actions/download-artifact@v4
+#       uses: actions/download-artifact@d3f86a106a0bac45b974a628896c90dbdf5c8093 # ratchet:actions/download-artifact@v4
 #       with:
 #         name: test-results
 #         path: ./web/test-results
--- a/.github/workflows/pr-python-checks.yml
+++ b/.github/workflows/pr-python-checks.yml
@@ -1,4 +1,7 @@
 name: Python Checks
+concurrency:
+  group: Python-Checks-${{ github.workflow }}-${{ github.head_ref || github.event.workflow_run.head_branch || github.run_id }}
+  cancel-in-progress: true

 on:
  merge_group:
@@ -10,58 +13,51 @@ on:
 jobs:
  mypy-check:
    # See https://runs-on.com/runners/linux/
-    runs-on: [runs-on,runner=8cpu-linux-x64,"run-id=${{ github.run_id }}"]
+    # Note: Mypy seems quite optimized for x64 compared to arm64.
+    # Similarly, mypy is single-threaded and incremental, so 2cpu is sufficient.
+    runs-on: [runs-on, runner=2cpu-linux-x64, "run-id=${{ github.run_id }}-mypy-check", "extras=s3-cache"]

    steps:
-    - name: Checkout code
-      uses: actions/checkout@v4
+      - uses: runs-on/action@cd2b598b0515d39d78c38a02d529db87d2196d1e # ratchet:runs-on/action@v2
+      - name: Checkout code
+        uses: actions/checkout@08eba0b27e820071cde6df949e0beb9ba4906955 # ratchet:actions/checkout@v4

-    - name: Set up Python
-      uses: actions/setup-python@v5
-      with:
-        python-version: '3.11'
-        cache: 'pip'
-        cache-dependency-path: |
-          backend/requirements/default.txt
-          backend/requirements/dev.txt
-          backend/requirements/model_server.txt
-    - run: |
-        python -m pip install --upgrade pip
-        pip install --retries 5 --timeout 30 -r backend/requirements/default.txt
-        pip install --retries 5 --timeout 30 -r backend/requirements/dev.txt
-        pip install --retries 5 --timeout 30 -r backend/requirements/model_server.txt
+      # needed for pulling openapitools/openapi-generator-cli
+      # otherwise, we hit the "Unauthenticated users" limit
+      # https://docs.docker.com/docker-hub/usage/
+      - name: Login to Docker Hub
+        uses: docker/login-action@5e57cd118135c172c3672efd75eb46360885c0ef # ratchet:docker/login-action@v3
+        with:
+          username: ${{ secrets.DOCKER_USERNAME }}
+          password: ${{ secrets.DOCKER_TOKEN }}

-    - name: Generate OpenAPI schema
-      working-directory: ./backend
-      env:
-        PYTHONPATH: "."
-      run: |
-        python scripts/onyx_openapi_schema.py --filename generated/openapi.json
+      - name: Prepare build
+        uses: ./.github/actions/prepare-build
+        with:
+          docker-username: ${{ secrets.DOCKER_USERNAME }}
+          docker-password: ${{ secrets.DOCKER_TOKEN }}

-    - name: Generate OpenAPI Python client
-      working-directory: ./backend
-      run: |
-        docker run --rm \
-          -v "${{ github.workspace }}/backend/generated:/local" \
-          openapitools/openapi-generator-cli generate \
-          -i /local/openapi.json \
-          -g python \
-          -o /local/onyx_openapi_client \
-          --package-name onyx_openapi_client \
-          --skip-validate-spec \
-          --openapi-normalizer "SIMPLIFY_ONEOF_ANYOF=true,SET_OAS3_NULLABLE=true"
-            
-    - name: Run MyPy
-      run: |
-        cd backend
-        mypy .
+      - name: Cache mypy cache
+        if: ${{ vars.DISABLE_MYPY_CACHE != 'true' }}
+        uses: runs-on/cache@50350ad4242587b6c8c2baa2e740b1bc11285ff4 # ratchet:runs-on/cache@v4
+        with:
+          path: backend/.mypy_cache
+          key: mypy-${{ runner.os }}-${{ hashFiles('**/*.py', '**/*.pyi', 'backend/pyproject.toml') }}
+          restore-keys: |
+            mypy-${{ runner.os }}-

-    - name: Check import order with reorder-python-imports
-      run: |
-        cd backend
-        find ./onyx -name "*.py" | xargs reorder-python-imports --py311-plus
+      - name: Run MyPy
+        working-directory: ./backend
+        env:
+          MYPY_FORCE_COLOR: 1
+          TERM: xterm-256color
+        run: mypy .

-    - name: Check code formatting with Black
-      run: |
-        cd backend
-        black --check .
+      - name: Check import order with reorder-python-imports
+        working-directory: ./backend
+        run: |
+          find ./onyx -name "*.py" | xargs reorder-python-imports --py311-plus
+
+      - name: Check code formatting with Black
+        working-directory: ./backend
+        run: black --check .
--- a/.github/workflows/pr-python-connector-tests.yml
+++ b/.github/workflows/pr-python-connector-tests.yml
@@ -1,4 +1,7 @@
 name: Connector Tests
+concurrency:
+  group: Connector-Tests-${{ github.workflow }}-${{ github.head_ref || github.event.workflow_run.head_branch || github.run_id }}
+  cancel-in-progress: true

 on:
  merge_group:
@@ -119,35 +122,26 @@ env:
 jobs:
  connectors-check:
    # See https://runs-on.com/runners/linux/
-    runs-on: [runs-on, runner=8cpu-linux-x64, "run-id=${{ github.run_id }}"]
+    runs-on: [runs-on, runner=8cpu-linux-x64, "run-id=${{ github.run_id }}-connectors-check", "extras=s3-cache"]

    env:
      PYTHONPATH: ./backend

    steps:
+      - uses: runs-on/action@cd2b598b0515d39d78c38a02d529db87d2196d1e # ratchet:runs-on/action@v2
+
      - name: Checkout code
-        uses: actions/checkout@v4
+        uses: actions/checkout@08eba0b27e820071cde6df949e0beb9ba4906955 # ratchet:actions/checkout@v4

-      - name: Set up Python
-        uses: actions/setup-python@v5
-        with:
-          python-version: "3.11"
-          cache: "pip"
-          cache-dependency-path: |
-            backend/requirements/default.txt
-            backend/requirements/dev.txt
+      - name: Setup Python and Install Dependencies
+        uses: ./.github/actions/setup-python-and-install-dependencies

-      - name: Install Dependencies
-        run: |
-          python -m pip install --upgrade pip
-          pip install --retries 5 --timeout 30 -r backend/requirements/default.txt
-          pip install --retries 5 --timeout 30 -r backend/requirements/dev.txt
-          playwright install chromium
-          playwright install-deps chromium
+      - name: Setup Playwright
+        uses: ./.github/actions/setup-playwright

      - name: Detect Connector changes
        id: changes
-        uses: dorny/paths-filter@v3
+        uses: dorny/paths-filter@de90cc6fb38fc0963ad72b210f1f284cd68cea36 # ratchet:dorny/paths-filter@v3
        with:
          filters: |
            hubspot:
--- a/.github/workflows/pr-python-model-tests.yml
+++ b/.github/workflows/pr-python-model-tests.yml
@@ -10,7 +10,7 @@ on:
        description: 'Branch to run the workflow on'
        required: false
        default: 'main'
-        
+
 env:
  # Bedrock
  AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }}
@@ -28,17 +28,17 @@ env:
 jobs:
  model-check:
    # See https://runs-on.com/runners/linux/
-    runs-on: [runs-on,runner=8cpu-linux-x64,"run-id=${{ github.run_id }}"]
+    runs-on: [runs-on,runner=8cpu-linux-x64,"run-id=${{ github.run_id }}-model-check"]

    env:
      PYTHONPATH: ./backend

    steps:
      - name: Checkout code
-        uses: actions/checkout@v4
+        uses: actions/checkout@08eba0b27e820071cde6df949e0beb9ba4906955 # ratchet:actions/checkout@v4

      - name: Login to Docker Hub
-        uses: docker/login-action@v3
+        uses: docker/login-action@5e57cd118135c172c3672efd75eb46360885c0ef # ratchet:docker/login-action@v3
        with:
          username: ${{ secrets.DOCKER_USERNAME }}
          password: ${{ secrets.DOCKER_TOKEN }}
@@ -53,9 +53,9 @@ jobs:
        run: |
          docker pull onyxdotapp/onyx-model-server:latest
          docker tag onyxdotapp/onyx-model-server:latest onyxdotapp/onyx-model-server:test
-          
+
      - name: Set up Python
-        uses: actions/setup-python@v5
+        uses: actions/setup-python@e797f83bcb11b83ae66e0230d6156d7c80228e7c # ratchet:actions/setup-python@v6
        with:
          python-version: "3.11"
          cache: "pip"
@@ -90,15 +90,15 @@ jobs:
          while true; do
            current_time=$(date +%s)
            elapsed_time=$((current_time - start_time))
-            
+
            if [ $elapsed_time -ge $timeout ]; then
              echo "Timeout reached. Service did not become ready in 5 minutes."
              exit 1
            fi
-            
+
            # Use curl with error handling to ignore specific exit code 56
            response=$(curl -s -o /dev/null -w "%{http_code}" http://localhost:9000/api/health || echo "curl_error")
-            
+
            if [ "$response" = "200" ]; then
              echo "Service is ready!"
              break
@@ -107,11 +107,11 @@ jobs:
            else
              echo "Service not ready yet (HTTP status $response). Retrying in 5 seconds..."
            fi
-            
+
            sleep 5
          done
          echo "Finished waiting for service."
-          
+
      - name: Run Tests
        shell: script -q -e -c "bash --noprofile --norc -eo pipefail {0}"
        run: |
@@ -127,7 +127,7 @@ jobs:
            -H 'Content-type: application/json' \
            --data '{"text":"Scheduled Model Tests failed! Check the run at: https://github.com/${{ github.repository }}/actions/runs/${{ github.run_id }}"}' \
            $SLACK_WEBHOOK
-            
+
      - name: Dump all-container logs (optional)
        if: always()
        run: |
@@ -136,14 +136,7 @@ jobs:

      - name: Upload logs
        if: always()
-        uses: actions/upload-artifact@v4
+        uses: actions/upload-artifact@330a01c490aca151604b8cf639adc76d48f6c5d4 # ratchet:actions/upload-artifact@v4
        with:
          name: docker-all-logs
          path: ${{ github.workspace }}/docker-compose.log
-          
-      - name: Stop Docker containers
-        if: always()
-        run: |
-          cd deployment/docker_compose
-          docker compose -f docker-compose.model-server-test.yml down -v
-          
--- a/.github/workflows/pr-python-tests.yml
+++ b/.github/workflows/pr-python-tests.yml
@@ -1,4 +1,7 @@
 name: Python Unit Tests
+concurrency:
+  group: Python-Unit-Tests-${{ github.workflow }}-${{ github.head_ref || github.event.workflow_run.head_branch || github.run_id }}
+  cancel-in-progress: true

 on:
  merge_group:
@@ -10,7 +13,8 @@ on:
 jobs:
  backend-check:
    # See https://runs-on.com/runners/linux/
-    runs-on: [runs-on,runner=8cpu-linux-x64,"run-id=${{ github.run_id }}"]
+    runs-on: [runs-on, runner=2cpu-linux-arm64, "run-id=${{ github.run_id }}-backend-check"]
+

    env:
      PYTHONPATH: ./backend
@@ -18,27 +22,15 @@ jobs:
      SF_USERNAME: ${{ secrets.SF_USERNAME }}
      SF_PASSWORD: ${{ secrets.SF_PASSWORD }}
      SF_SECURITY_TOKEN: ${{ secrets.SF_SECURITY_TOKEN }}
-      
+
    steps:
+    - uses: runs-on/action@cd2b598b0515d39d78c38a02d529db87d2196d1e # ratchet:runs-on/action@v2
+
    - name: Checkout code
-      uses: actions/checkout@v4
+      uses: actions/checkout@08eba0b27e820071cde6df949e0beb9ba4906955 # ratchet:actions/checkout@v4

-    - name: Set up Python
-      uses: actions/setup-python@v5
-      with:
-        python-version: '3.11'
-        cache: 'pip'
-        cache-dependency-path: |
-          backend/requirements/default.txt
-          backend/requirements/dev.txt
-          backend/requirements/model_server.txt
-
-    - name: Install Dependencies
-      run: |
-        python -m pip install --upgrade pip
-        pip install --retries 5 --timeout 30 -r backend/requirements/default.txt
-        pip install --retries 5 --timeout 30 -r backend/requirements/dev.txt
-        pip install --retries 5 --timeout 30 -r backend/requirements/model_server.txt
+    - name: Setup Python and Install Dependencies
+      uses: ./.github/actions/setup-python-and-install-dependencies

    - name: Run Tests
      shell: script -q -e -c "bash --noprofile --norc -eo pipefail {0}"
--- a/.github/workflows/pr-quality-checks.yml
+++ b/.github/workflows/pr-quality-checks.yml
@@ -10,14 +10,17 @@ on:
 jobs:
  quality-checks:
    # See https://runs-on.com/runners/linux/
-    runs-on: [runs-on,runner=8cpu-linux-x64,"run-id=${{ github.run_id }}"]
+    runs-on: [runs-on, runner=1cpu-linux-arm64, "run-id=${{ github.run_id }}-quality-checks"]
    steps:
-      - uses: actions/checkout@v4
+      - uses: runs-on/action@cd2b598b0515d39d78c38a02d529db87d2196d1e # ratchet:runs-on/action@v2
+      - uses: actions/checkout@08eba0b27e820071cde6df949e0beb9ba4906955 # ratchet:actions/checkout@v4
        with:
          fetch-depth: 0
-      - uses: actions/setup-python@v5
+      - uses: actions/setup-python@e797f83bcb11b83ae66e0230d6156d7c80228e7c # ratchet:actions/setup-python@v6
        with:
          python-version: "3.11"
-      - uses: pre-commit/action@v3.0.1
+      - name: Setup Terraform
+        uses: hashicorp/setup-terraform@b9cd54a3c349d3f38e8881555d616ced269862dd # ratchet:hashicorp/setup-terraform@v3
+      - uses: pre-commit/action@2c7b3805fd2a0fd8c1884dcaebf91fc102a13ecd # ratchet:pre-commit/action@v3.0.1
        with:
          extra_args: ${{ github.event_name == 'pull_request' && format('--from-ref {0} --to-ref {1}', github.event.pull_request.base.sha, github.event.pull_request.head.sha) || '' }}
--- a/.github/workflows/sync_foss.yml
+++ b/.github/workflows/sync_foss.yml
@@ -13,7 +13,7 @@ jobs:
      contents: read
    steps:
      - name: Checkout main Onyx repo
-        uses: actions/checkout@v4
+        uses: actions/checkout@08eba0b27e820071cde6df949e0beb9ba4906955 # ratchet:actions/checkout@v4
        with:
          fetch-depth: 0

--- a/.github/workflows/tag-nightly.yml
+++ b/.github/workflows/tag-nightly.yml
@@ -9,7 +9,7 @@ permissions:

 jobs:
  create-and-push-tag:
-    runs-on: [runs-on, runner=2cpu-linux-x64, "run-id=${{ github.run_id }}"]
+    runs-on: [runs-on, runner=2cpu-linux-x64, "run-id=${{ github.run_id }}-create-and-push-tag"]

    steps:
      # actions using GITHUB_TOKEN cannot trigger another workflow, but we do want this to trigger docker pushes
@@ -19,7 +19,7 @@ jobs:
      # Additional NOTE: even though this is named "rkuo", the actual key is tied to the onyx repo
      # and not rkuo's personal account. It is fine to leave this key as is!
      - name: Checkout code
-        uses: actions/checkout@v4
+        uses: actions/checkout@08eba0b27e820071cde6df949e0beb9ba4906955 # ratchet:actions/checkout@v4
        with:
          ssh-key: "${{ secrets.RKUO_DEPLOY_KEY }}"

--- a/.gitignore
+++ b/.gitignore
@@ -18,6 +18,7 @@ backend/tests/regression/search_quality/eval-*
 backend/tests/regression/search_quality/search_eval_config.yaml
 backend/tests/regression/search_quality/*.json
 backend/onyx/evals/data/
+backend/onyx/evals/one_off/*.json
 *.log

 # secret files
@@ -44,3 +45,6 @@ CLAUDE.md

 # Local .terraform.lock.hcl file
 .terraform.lock.hcl
+
+# MCP configs
+.playwright-mcp
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -1,4 +1,15 @@
 repos:
+  - repo: https://github.com/pre-commit/pre-commit-hooks
+    rev: v4.6.0
+    hooks:
+      - id: check-yaml
+        files: ^.github/
+
+  - repo: https://github.com/rhysd/actionlint
+    rev: v1.7.8
+    hooks:
+      - id: actionlint
+
  - repo: https://github.com/psf/black
    rev: 25.1.0
    hooks:
@@ -54,15 +65,12 @@ repos:
        language: system
        pass_filenames: false
        files: \.tf$
+
      - id: check-lazy-imports
-        name: Check lazy imports are not directly imported
+        name: Check lazy imports
        entry: python3 backend/scripts/check_lazy_imports.py
        language: system
        files: ^backend/(?!\.venv/).*\.py$
-        pass_filenames: false
-        # Note: pass_filenames is false because tsc must check the entire
-        # project, but the files filter ensures this only runs when relevant
-        # files change. Using --incremental for faster subsequent checks.

  # We would like to have a mypy pre-commit hook, but due to the fact that
  # pre-commit runs in it's own isolated environment, we would need to install
--- a/README.md
+++ b/README.md
@@ -1,29 +1,34 @@
 <a name="readme-top"></a>

 <h2 align="center">
-    <a href="https://www.onyx.app/"> <img width="50%" src="https://github.com/onyx-dot-app/onyx/blob/logo/OnyxLogoCropped.jpg?raw=true)" /></a>
+    <a href="https://www.onyx.app/?utm_source=onyx_repo&utm_medium=github&utm_campaign=readme"> <img width="50%" src="https://github.com/onyx-dot-app/onyx/blob/logo/OnyxLogoCropped.jpg?raw=true" /></a>
 </h2>

 <p align="center">Open Source AI Platform</p>

 <p align="center">
    <a href="https://discord.gg/TDJ59cGV2X" target="_blank">
-        <img src="https://img.shields.io/badge/discord-join-blue.svg?logo=discord&logoColor=white" alt="Discord">
+        <img src="https://img.shields.io/badge/discord-join-blue.svg?logo=discord&logoColor=white" alt="Discord" />
    </a>
-    <a href="https://docs.onyx.app/" target="_blank">
-        <img src="https://img.shields.io/badge/docs-view-blue" alt="Documentation">
+    <a href="https://docs.onyx.app/?utm_source=onyx_repo&utm_medium=github&utm_campaign=readme" target="_blank">
+        <img src="https://img.shields.io/badge/docs-view-blue" alt="Documentation" />
    </a>
-    <a href="https://docs.onyx.app/" target="_blank">
-        <img src="https://img.shields.io/website?url=https://www.onyx.app&up_message=visit&up_color=blue" alt="Documentation">
+    <a href="https://www.onyx.app/?utm_source=onyx_repo&utm_medium=github&utm_campaign=readme" target="_blank">
+        <img src="https://img.shields.io/website?url=https://www.onyx.app&up_message=visit&up_color=blue" alt="Documentation" />
    </a>
    <a href="https://github.com/onyx-dot-app/onyx/blob/main/LICENSE" target="_blank">
-        <img src="https://img.shields.io/static/v1?label=license&message=MIT&color=blue" alt="License">
+        <img src="https://img.shields.io/static/v1?label=license&message=MIT&color=blue" alt="License" />
    </a>
 </p>

+<p align="center">
+  <a href="https://trendshift.io/repositories/12516" target="_blank">
+    <img src="https://trendshift.io/api/badge/repositories/12516" alt="onyx-dot-app/onyx | Trendshift" style="width: 250px; height: 55px;" />
+  </a>
+</p>


-**[Onyx](https://www.onyx.app/)** is a feature-rich, self-hostable Chat UI that works with any LLM. It is easy to deploy and can run in a completely airgapped environment.
+**[Onyx](https://www.onyx.app/?utm_source=onyx_repo&utm_medium=github&utm_campaign=readme)** is a feature-rich, self-hostable Chat UI that works with any LLM. It is easy to deploy and can run in a completely airgapped environment.

 Onyx comes loaded with advanced features like Agents, Web Search, RAG, MCP, Deep Research, Connectors to 40+ knowledge sources, and more.

@@ -52,7 +57,7 @@ Onyx comes loaded with advanced features like Agents, Web Search, RAG, MCP, Deep

 Onyx works with all LLMs (like OpenAI, Anthropic, Gemini, etc.) and self-hosted LLMs (like Ollama, vLLM, etc.)

-To learn more about the features, check out our [documentation](https://docs.onyx.app/welcome)!
+To learn more about the features, check out our [documentation](https://docs.onyx.app/welcome?utm_source=onyx_repo&utm_medium=github&utm_campaign=readme)!



@@ -60,13 +65,13 @@ To learn more about the features, check out our [documentation](https://docs.ony
 Onyx supports deployments in Docker, Kubernetes, Terraform, along with guides for major cloud providers.

 See guides below:
- [Docker](https://docs.onyx.app/deployment/local/docker) or [Quickstart](https://docs.onyx.app/deployment/getting_started/quickstart) (best for most users)
- [Kubernetes](https://docs.onyx.app/deployment/local/kubernetes) (best for large teams)
- [Terraform](https://docs.onyx.app/deployment/local/terraform) (best for teams already using Terraform)
- Cloud specific guides (best if specifically using [AWS EKS](https://docs.onyx.app/deployment/cloud/aws/eks), [Azure VMs](https://docs.onyx.app/deployment/cloud/azure), etc.)
+- [Docker](https://docs.onyx.app/deployment/local/docker?utm_source=onyx_repo&utm_medium=github&utm_campaign=readme) or [Quickstart](https://docs.onyx.app/deployment/getting_started/quickstart?utm_source=onyx_repo&utm_medium=github&utm_campaign=readme) (best for most users)
+- [Kubernetes](https://docs.onyx.app/deployment/local/kubernetes?utm_source=onyx_repo&utm_medium=github&utm_campaign=readme) (best for large teams)
+- [Terraform](https://docs.onyx.app/deployment/local/terraform?utm_source=onyx_repo&utm_medium=github&utm_campaign=readme) (best for teams already using Terraform)
+- Cloud specific guides (best if specifically using [AWS EKS](https://docs.onyx.app/deployment/cloud/aws/eks?utm_source=onyx_repo&utm_medium=github&utm_campaign=readme), [Azure VMs](https://docs.onyx.app/deployment/cloud/azure?utm_source=onyx_repo&utm_medium=github&utm_campaign=readme), etc.)

 > [!TIP]  
-> **To try Onyx for free without deploying, check out [Onyx Cloud](https://cloud.onyx.app/signup)**.
+> **To try Onyx for free without deploying, check out [Onyx Cloud](https://cloud.onyx.app/signup?utm_source=onyx_repo&utm_medium=github&utm_campaign=readme)**.



@@ -90,7 +95,7 @@ There are two editions of Onyx:

 - Onyx Community Edition (CE) is available freely under the MIT license.
 - Onyx Enterprise Edition (EE) includes extra features that are primarily useful for larger organizations.
-For feature details, check out [our website](https://www.onyx.app/pricing).
+For feature details, check out [our website](https://www.onyx.app/pricing?utm_source=onyx_repo&utm_medium=github&utm_campaign=readme).



--- a/backend/Dockerfile
+++ b/backend/Dockerfile
@@ -7,15 +7,12 @@ have a contract or agreement with DanswerAI, you are not permitted to use the En
 Edition features outside of personal development or testing purposes. Please reach out to \
 founders@onyx.app for more information. Please visit https://github.com/onyx-dot-app/onyx"

-# Default ONYX_VERSION, typically overriden during builds by GitHub Actions.
-ARG ONYX_VERSION=0.0.0-dev
 # DO_NOT_TRACK is used to disable telemetry for Unstructured
-ENV ONYX_VERSION=${ONYX_VERSION} \
-    DANSWER_RUNNING_IN_DOCKER="true" \
+ENV DANSWER_RUNNING_IN_DOCKER="true" \
    DO_NOT_TRACK="true" \
    PLAYWRIGHT_BROWSERS_PATH="/app/.cache/ms-playwright"

-COPY --from=ghcr.io/astral-sh/uv:latest /uv /uvx /bin/
+COPY --from=ghcr.io/astral-sh/uv:0.9.9 /uv /uvx /bin/

 # Install system dependencies
 # cmake needed for psycopg (postgres)
@@ -90,6 +87,10 @@ nltk.download('stopwords', quiet=True); \
 nltk.download('punkt_tab', quiet=True);"
 # nltk.download('wordnet', quiet=True); introduce this back if lemmatization is needed

+# Pre-downloading tiktoken for setups with limited egress
+RUN python -c "import tiktoken; \
+tiktoken.get_encoding('cl100k_base')"
+
 # Set up application files
 WORKDIR /app

@@ -124,6 +125,10 @@ COPY --chown=onyx:onyx ./assets /app/assets

 ENV PYTHONPATH=/app

+# Default ONYX_VERSION, typically overriden during builds by GitHub Actions.
+ARG ONYX_VERSION=0.0.0-dev
+ENV ONYX_VERSION=${ONYX_VERSION}
+
 # Default command which does nothing
 # This container is used by api server and background which specify their own CMD
 CMD ["tail", "-f", "/dev/null"]
--- a/backend/Dockerfile.model_server
+++ b/backend/Dockerfile.model_server
@@ -6,13 +6,10 @@ AI models for Onyx. This container and all the code is MIT Licensed and free for
 You can find it at https://hub.docker.com/r/onyx/onyx-model-server. For more details, \
 visit https://github.com/onyx-dot-app/onyx."

-# Default ONYX_VERSION, typically overriden during builds by GitHub Actions.
-ARG ONYX_VERSION=0.0.0-dev
-ENV ONYX_VERSION=${ONYX_VERSION} \
-    DANSWER_RUNNING_IN_DOCKER="true" \
+ENV DANSWER_RUNNING_IN_DOCKER="true" \
    HF_HOME=/app/.cache/huggingface

-COPY --from=ghcr.io/astral-sh/uv:latest /uv /uvx /bin/
+COPY --from=ghcr.io/astral-sh/uv:0.9.9 /uv /uvx /bin/

 # Create non-root user for security best practices
 RUN mkdir -p /app && \
@@ -23,24 +20,6 @@ RUN mkdir -p /app && \
    chmod 755 /var/log/onyx && \
    chown onyx:onyx /var/log/onyx

-# --- add toolchain needed for Rust/Python builds (fastuuid) ---
-ENV RUSTUP_HOME=/usr/local/rustup \
-    CARGO_HOME=/usr/local/cargo \
-    PATH=/usr/local/cargo/bin:$PATH
-
-RUN set -eux; \
-    apt-get update && apt-get install -y --no-install-recommends \
-        build-essential \
-        pkg-config \
-        curl \
-        ca-certificates \
-    # Install latest stable Rust (supports Cargo.lock v4)
-    && curl -sSf https://sh.rustup.rs | sh -s -- -y --profile minimal --default-toolchain stable \
-    && rustc --version && cargo --version \
-    && apt-get remove -y --allow-remove-essential perl-base \
-    && apt-get autoremove -y \
-    && rm -rf /var/lib/apt/lists/*
-
 COPY ./requirements/model_server.txt /tmp/requirements.txt
 RUN uv pip install --system --no-cache-dir --upgrade \
        -r /tmp/requirements.txt && \
@@ -83,4 +62,8 @@ COPY ./model_server /app/model_server

 ENV PYTHONPATH=/app

+# Default ONYX_VERSION, typically overriden during builds by GitHub Actions.
+ARG ONYX_VERSION=0.0.0-dev
+ENV ONYX_VERSION=${ONYX_VERSION}
+
 CMD ["uvicorn", "model_server.main:app", "--host", "0.0.0.0", "--port", "9000"]
--- a/backend/alembic/versions/2acdef638fc2_add_switchover_type_field.py
+++ b/backend/alembic/versions/2acdef638fc2_add_switchover_type_field.py
@@ -0,0 +1,72 @@
+"""add switchover_type field and remove background_reindex_enabled
+
+Revision ID: 2acdef638fc2
+Revises: a4f23d6b71c8
+Create Date: 2025-01-XX XX:XX:XX.XXXXXX
+
+"""
+
+from alembic import op
+import sqlalchemy as sa
+
+from onyx.db.enums import SwitchoverType
+
+
+# revision identifiers, used by Alembic.
+revision = "2acdef638fc2"
+down_revision = "a4f23d6b71c8"
+branch_labels = None
+depends_on = None
+
+
+def upgrade() -> None:
+    # Add switchover_type column with default value of REINDEX
+    op.add_column(
+        "search_settings",
+        sa.Column(
+            "switchover_type",
+            sa.Enum(SwitchoverType, native_enum=False),
+            nullable=False,
+            server_default=SwitchoverType.REINDEX.value,
+        ),
+    )
+
+    # Migrate existing data: set switchover_type based on background_reindex_enabled
+    # REINDEX where background_reindex_enabled=True, INSTANT where False
+    op.execute(
+        """
+        UPDATE search_settings
+        SET switchover_type = CASE
+            WHEN background_reindex_enabled = true THEN 'REINDEX'
+            ELSE 'INSTANT'
+        END
+        """
+    )
+
+    # Remove the background_reindex_enabled column (replaced by switchover_type)
+    op.drop_column("search_settings", "background_reindex_enabled")
+
+
+def downgrade() -> None:
+    # Re-add the background_reindex_enabled column with default value of True
+    op.add_column(
+        "search_settings",
+        sa.Column(
+            "background_reindex_enabled",
+            sa.Boolean(),
+            nullable=False,
+            server_default="true",
+        ),
+    )
+    # Set background_reindex_enabled based on switchover_type
+    op.execute(
+        """
+        UPDATE search_settings
+        SET background_reindex_enabled = CASE
+            WHEN switchover_type = 'INSTANT' THEN false
+            ELSE true
+        END
+        """
+    )
+    # Remove the switchover_type column
+    op.drop_column("search_settings", "switchover_type")
--- a/backend/alembic/versions/5e1c073d48a3_add_personal_access_token_table.py
+++ b/backend/alembic/versions/5e1c073d48a3_add_personal_access_token_table.py
@@ -0,0 +1,88 @@
+"""add_personal_access_token_table
+
+Revision ID: 5e1c073d48a3
+Revises: 09995b8811eb
+Create Date: 2025-10-30 17:30:24.308521
+
+"""
+
+from alembic import op
+import sqlalchemy as sa
+from sqlalchemy.dialects import postgresql
+
+
+# revision identifiers, used by Alembic.
+revision = "5e1c073d48a3"
+down_revision = "09995b8811eb"
+branch_labels = None
+depends_on = None
+
+
+def upgrade() -> None:
+    # Create personal_access_token table
+    op.create_table(
+        "personal_access_token",
+        sa.Column("id", sa.Integer(), nullable=False),
+        sa.Column("name", sa.String(), nullable=False),
+        sa.Column("hashed_token", sa.String(length=64), nullable=False),
+        sa.Column("token_display", sa.String(), nullable=False),
+        sa.Column(
+            "user_id",
+            postgresql.UUID(as_uuid=True),
+            nullable=False,
+        ),
+        sa.Column(
+            "expires_at",
+            sa.DateTime(timezone=True),
+            nullable=True,
+        ),
+        sa.Column(
+            "created_at",
+            sa.DateTime(timezone=True),
+            server_default=sa.text("now()"),
+            nullable=False,
+        ),
+        sa.Column(
+            "last_used_at",
+            sa.DateTime(timezone=True),
+            nullable=True,
+        ),
+        sa.Column(
+            "is_revoked",
+            sa.Boolean(),
+            server_default=sa.text("false"),
+            nullable=False,
+        ),
+        sa.ForeignKeyConstraint(
+            ["user_id"],
+            ["user.id"],
+            ondelete="CASCADE",
+        ),
+        sa.PrimaryKeyConstraint("id"),
+        sa.UniqueConstraint("hashed_token"),
+    )
+
+    # Create indexes
+    op.create_index(
+        "ix_personal_access_token_expires_at",
+        "personal_access_token",
+        ["expires_at"],
+        unique=False,
+    )
+    op.create_index(
+        "ix_pat_user_created",
+        "personal_access_token",
+        ["user_id", sa.text("created_at DESC")],
+        unique=False,
+    )
+
+
+def downgrade() -> None:
+    # Drop indexes first
+    op.drop_index("ix_pat_user_created", table_name="personal_access_token")
+    op.drop_index(
+        "ix_personal_access_token_expires_at", table_name="personal_access_token"
+    )
+
+    # Drop table
+    op.drop_table("personal_access_token")
--- a/backend/alembic/versions/9drpiiw74ljy_add_config_to_federated_connector.py
+++ b/backend/alembic/versions/9drpiiw74ljy_add_config_to_federated_connector.py
@@ -0,0 +1,97 @@
+"""add config to federated_connector
+
+Revision ID: 9drpiiw74ljy
+Revises: 2acdef638fc2
+Create Date: 2025-11-03 12:00:00.000000
+
+"""
+
+from alembic import op
+import sqlalchemy as sa
+from sqlalchemy.dialects import postgresql
+
+# revision identifiers, used by Alembic.
+revision = "9drpiiw74ljy"
+down_revision = "2acdef638fc2"
+branch_labels = None
+depends_on = None
+
+
+def upgrade() -> None:
+    connection = op.get_bind()
+
+    # Check if column already exists in current schema
+    result = connection.execute(
+        sa.text(
+            """
+            SELECT column_name
+            FROM information_schema.columns
+            WHERE table_schema = current_schema()
+            AND table_name = 'federated_connector'
+            AND column_name = 'config'
+            """
+        )
+    )
+    column_exists = result.fetchone() is not None
+
+    # Add config column with default empty object (only if it doesn't exist)
+    if not column_exists:
+        op.add_column(
+            "federated_connector",
+            sa.Column(
+                "config", postgresql.JSONB(), nullable=False, server_default="{}"
+            ),
+        )
+
+    # Data migration: Single bulk update for all Slack connectors
+    connection.execute(
+        sa.text(
+            """
+            WITH connector_configs AS (
+                SELECT
+                    fc.id as connector_id,
+                    CASE
+                        WHEN fcds.entities->'channels' IS NOT NULL
+                            AND jsonb_typeof(fcds.entities->'channels') = 'array'
+                            AND jsonb_array_length(fcds.entities->'channels') > 0
+                        THEN
+                            jsonb_build_object(
+                                'channels', fcds.entities->'channels',
+                                'search_all_channels', false
+                            ) ||
+                            CASE
+                                WHEN fcds.entities->'include_dm' IS NOT NULL
+                                THEN jsonb_build_object('include_dm', fcds.entities->'include_dm')
+                                ELSE '{}'::jsonb
+                            END
+                        ELSE
+                            jsonb_build_object('search_all_channels', true) ||
+                            CASE
+                                WHEN fcds.entities->'include_dm' IS NOT NULL
+                                THEN jsonb_build_object('include_dm', fcds.entities->'include_dm')
+                                ELSE '{}'::jsonb
+                            END
+                    END as config
+                FROM federated_connector fc
+                LEFT JOIN LATERAL (
+                    SELECT entities
+                    FROM federated_connector__document_set
+                    WHERE federated_connector_id = fc.id
+                    AND entities IS NOT NULL
+                    ORDER BY id
+                    LIMIT 1
+                ) fcds ON true
+                WHERE fc.source = 'FEDERATED_SLACK'
+                AND fcds.entities IS NOT NULL
+            )
+            UPDATE federated_connector fc
+            SET config = cc.config
+            FROM connector_configs cc
+            WHERE fc.id = cc.connector_id
+            """
+        )
+    )
+
+
+def downgrade() -> None:
+    op.drop_column("federated_connector", "config")
--- a/backend/alembic/versions/a4f23d6b71c8_add_llm_provider_persona_restrictions.py
+++ b/backend/alembic/versions/a4f23d6b71c8_add_llm_provider_persona_restrictions.py
@@ -0,0 +1,61 @@
+"""add llm provider persona restrictions
+
+Revision ID: a4f23d6b71c8
+Revises: 5e1c073d48a3
+Create Date: 2025-10-21 00:00:00.000000
+
+"""
+
+from alembic import op
+import sqlalchemy as sa
+
+
+# revision identifiers, used by Alembic.
+revision = "a4f23d6b71c8"
+down_revision = "5e1c073d48a3"
+branch_labels = None
+depends_on = None
+
+
+def upgrade() -> None:
+    op.create_table(
+        "llm_provider__persona",
+        sa.Column("llm_provider_id", sa.Integer(), nullable=False),
+        sa.Column("persona_id", sa.Integer(), nullable=False),
+        sa.ForeignKeyConstraint(
+            ["llm_provider_id"], ["llm_provider.id"], ondelete="CASCADE"
+        ),
+        sa.ForeignKeyConstraint(["persona_id"], ["persona.id"], ondelete="CASCADE"),
+        sa.PrimaryKeyConstraint("llm_provider_id", "persona_id"),
+    )
+    op.create_index(
+        "ix_llm_provider__persona_llm_provider_id",
+        "llm_provider__persona",
+        ["llm_provider_id"],
+    )
+    op.create_index(
+        "ix_llm_provider__persona_persona_id",
+        "llm_provider__persona",
+        ["persona_id"],
+    )
+    op.create_index(
+        "ix_llm_provider__persona_composite",
+        "llm_provider__persona",
+        ["persona_id", "llm_provider_id"],
+    )
+
+
+def downgrade() -> None:
+    op.drop_index(
+        "ix_llm_provider__persona_composite",
+        table_name="llm_provider__persona",
+    )
+    op.drop_index(
+        "ix_llm_provider__persona_persona_id",
+        table_name="llm_provider__persona",
+    )
+    op.drop_index(
+        "ix_llm_provider__persona_llm_provider_id",
+        table_name="llm_provider__persona",
+    )
+    op.drop_table("llm_provider__persona")
--- a/backend/docker-bake.hcl
+++ b/backend/docker-bake.hcl
@@ -1,5 +1,5 @@
-variable "REGISTRY" {
-  default = "onyxdotapp"
+variable "REPOSITORY" {
+  default = "onyxdotapp/onyx-integration"
 }

 variable "TAG" {
@@ -20,5 +20,8 @@ target "integration" {
    base = "target:backend"
  }

-  tags      = ["${REGISTRY}/integration-test-onyx-integration:${TAG}"]
+  cache-from = ["type=registry,ref=${REPOSITORY}:integration-test-backend-cache"]
+  cache-to   = ["type=registry,ref=${REPOSITORY}:integration-test-backend-cache,mode=max"]
+
+  tags      = ["${REPOSITORY}:${TAG}"]
 }
--- a/backend/ee/onyx/server/middleware/tenant_tracking.py
+++ b/backend/ee/onyx/server/middleware/tenant_tracking.py
@@ -8,7 +8,7 @@ from fastapi import Request
 from fastapi import Response

 from ee.onyx.auth.users import decode_anonymous_user_jwt_token
-from onyx.auth.api_key import extract_tenant_from_api_key_header
+from onyx.auth.utils import extract_tenant_from_auth_header
 from onyx.configs.constants import ANONYMOUS_USER_COOKIE_NAME
 from onyx.configs.constants import TENANT_ID_COOKIE_NAME
 from onyx.db.engine.sql_engine import is_valid_schema_name
@@ -49,13 +49,13 @@ async def _get_tenant_id_from_request(
 ) -> str:
    """
    Attempt to extract tenant_id from:
-    1) The API key header
+    1) The API key or PAT (Personal Access Token) header
    2) The Redis-based token (stored in Cookie: fastapiusersauth)
    3) The anonymous user cookie
    Fallback: POSTGRES_DEFAULT_SCHEMA
    """
-    # Check for API key
-    tenant_id = extract_tenant_from_api_key_header(request)
+    # Check for API key or PAT in Authorization header
+    tenant_id = extract_tenant_from_auth_header(request)
    if tenant_id is not None:
        return tenant_id

--- a/backend/ee/onyx/server/query_and_chat/chat_backend.py
+++ b/backend/ee/onyx/server/query_and_chat/chat_backend.py
@@ -161,7 +161,7 @@ def handle_send_message_simple_with_history(
        persona_id=req.persona_id,
    )

-    llm, _ = get_llms_for_persona(persona=chat_session.persona)
+    llm, _ = get_llms_for_persona(persona=chat_session.persona, user=user)

    llm_tokenizer = get_tokenizer(
        model_name=llm.config.model_name,
--- a/backend/ee/onyx/server/query_and_chat/query_backend.py
+++ b/backend/ee/onyx/server/query_and_chat/query_backend.py
@@ -24,6 +24,7 @@ from onyx.chat.models import PersonaOverrideConfig
 from onyx.chat.models import QADocsResponse
 from onyx.chat.process_message import gather_stream
 from onyx.chat.process_message import stream_chat_message_objects
+from onyx.configs.chat_configs import NUM_RETURNED_HITS
 from onyx.configs.onyxbot_configs import MAX_THREAD_CONTEXT_PERCENTAGE
 from onyx.context.search.models import SavedSearchDocWithContent
 from onyx.context.search.models import SearchRequest
@@ -48,9 +49,42 @@ logger = setup_logger()
 basic_router = APIRouter(prefix="/query")


+class DocumentSearchPagination(BaseModel):
+    offset: int
+    limit: int
+    returned_count: int
+    has_more: bool
+    next_offset: int | None = None
+
+
 class DocumentSearchResponse(BaseModel):
    top_documents: list[SavedSearchDocWithContent]
    llm_indices: list[int]
+    pagination: DocumentSearchPagination
+
+
+def _normalize_pagination(limit: int | None, offset: int | None) -> tuple[int, int]:
+    if limit is None:
+        resolved_limit = NUM_RETURNED_HITS
+    else:
+        resolved_limit = limit
+
+    if resolved_limit <= 0:
+        raise HTTPException(
+            status_code=400, detail="retrieval_options.limit must be positive"
+        )
+
+    if offset is None:
+        resolved_offset = 0
+    else:
+        resolved_offset = offset
+
+    if resolved_offset < 0:
+        raise HTTPException(
+            status_code=400, detail="retrieval_options.offset cannot be negative"
+        )
+
+    return resolved_limit, resolved_offset


@basic_router.post("/document-search")
@@ -64,6 +98,10 @@ def handle_search_request(
    logger.notice(f"Received document search query: {query}")

    llm, fast_llm = get_default_llms()
+    pagination_limit, pagination_offset = _normalize_pagination(
+        limit=search_request.retrieval_options.limit,
+        offset=search_request.retrieval_options.offset,
+    )

    search_pipeline = SearchPipeline(
        search_request=SearchRequest(
@@ -72,8 +110,8 @@ def handle_search_request(
            human_selected_filters=search_request.retrieval_options.filters,
            enable_auto_detect_filters=search_request.retrieval_options.enable_auto_detect_filters,
            persona=None,  # For simplicity, default settings should be good for this search
-            offset=search_request.retrieval_options.offset,
-            limit=search_request.retrieval_options.limit,
+            offset=pagination_offset,
+            limit=pagination_limit + 1,
            rerank_settings=search_request.rerank_settings,
            evaluation_type=search_request.evaluation_type,
            chunks_above=search_request.chunks_above,
@@ -116,6 +154,9 @@ def handle_search_request(
        for section in top_sections
    ]

+    # Track whether the underlying retrieval produced more items than requested
+    has_more_results = len(top_docs) > pagination_limit
+
    # Deduping happens at the last step to avoid harming quality by dropping content early on
    deduped_docs = top_docs
    dropped_inds = None
@@ -134,7 +175,22 @@ def handle_search_request(
            dropped_indices=dropped_inds,
        )

-    return DocumentSearchResponse(top_documents=deduped_docs, llm_indices=llm_indices)
+    paginated_docs = deduped_docs[:pagination_limit]
+    llm_indices = [index for index in llm_indices if index < len(paginated_docs)]
+    has_more = has_more_results
+    pagination = DocumentSearchPagination(
+        offset=pagination_offset,
+        limit=pagination_limit,
+        returned_count=len(paginated_docs),
+        has_more=has_more,
+        next_offset=(pagination_offset + pagination_limit) if has_more else None,
+    )
+
+    return DocumentSearchResponse(
+        top_documents=paginated_docs,
+        llm_indices=llm_indices,
+        pagination=pagination,
+    )


 def get_answer_stream(
@@ -162,7 +218,7 @@ def get_answer_stream(
            is_for_edit=False,
        )

-    llm = get_main_llm_from_tuple(get_llms_for_persona(persona_info))
+    llm = get_main_llm_from_tuple(get_llms_for_persona(persona=persona_info, user=user))

    llm_tokenizer = get_tokenizer(
        model_name=llm.config.model_name,
--- a/backend/model_server/custom_models.py
+++ b/backend/model_server/custom_models.py
@@ -517,7 +517,7 @@ def run_analysis(intent_req: IntentRequest) -> tuple[bool, list[str]]:
    try:
        keywords = map_keywords(model_input.input_ids[0], tokenizer, keyword_preds)
    except Exception as e:
-        logger.error(
+        logger.warning(
            f"Failed to extract keywords for query: {intent_req.query} due to {e}"
        )
        # Fallback to keeping all words
--- a/backend/onyx/agents/agent_framework/models.py
+++ b/backend/onyx/agents/agent_framework/models.py
@@ -0,0 +1,47 @@
+from typing import Any
+from typing import Literal
+from typing import TypeAlias
+
+from pydantic import BaseModel
+
+from onyx.llm.model_response import ModelResponseStream
+
+
+class ToolCallStreamItem(BaseModel):
+    call_id: str | None = None
+
+    id: str | None = None
+
+    name: str | None = None
+
+    arguments: str | None = None
+
+    type: Literal["function_call"] = "function_call"
+
+    index: int | None = None
+
+
+class ToolCallOutputStreamItem(BaseModel):
+    call_id: str | None = None
+
+    output: Any
+
+    type: Literal["function_call_output"] = "function_call_output"
+
+
+RunItemStreamEventDetails: TypeAlias = ToolCallStreamItem | ToolCallOutputStreamItem
+
+
+class RunItemStreamEvent(BaseModel):
+    type: Literal[
+        "message_start",
+        "message_done",
+        "reasoning_start",
+        "reasoning_done",
+        "tool_call",
+        "tool_call_output",
+    ]
+    details: RunItemStreamEventDetails | None = None
+
+
+StreamEvent: TypeAlias = ModelResponseStream | RunItemStreamEvent
--- a/backend/onyx/agents/agent_framework/query.py
+++ b/backend/onyx/agents/agent_framework/query.py
@@ -0,0 +1,215 @@
+import json
+from collections.abc import Iterator
+from collections.abc import Sequence
+from dataclasses import dataclass
+from typing import Any
+
+from onyx.agents.agent_framework.models import RunItemStreamEvent
+from onyx.agents.agent_framework.models import StreamEvent
+from onyx.agents.agent_framework.models import ToolCallOutputStreamItem
+from onyx.agents.agent_framework.models import ToolCallStreamItem
+from onyx.llm.interfaces import LanguageModelInput
+from onyx.llm.interfaces import LLM
+from onyx.llm.interfaces import ToolChoiceOptions
+from onyx.llm.message_types import ChatCompletionMessage
+from onyx.llm.message_types import ToolCall
+from onyx.llm.model_response import ModelResponseStream
+from onyx.tools.tool import RunContextWrapper
+from onyx.tools.tool import Tool
+
+
+@dataclass
+class QueryResult:
+    stream: Iterator[StreamEvent]
+    new_messages_stateful: list[ChatCompletionMessage]
+
+
+def _serialize_tool_output(output: Any) -> str:
+    if isinstance(output, str):
+        return output
+    try:
+        return json.dumps(output)
+    except TypeError:
+        return str(output)
+
+
+def _update_tool_call_with_delta(
+    tool_calls_in_progress: dict[int, dict[str, Any]],
+    tool_call_delta: Any,
+) -> None:
+    index = tool_call_delta.index
+
+    if index not in tool_calls_in_progress:
+        tool_calls_in_progress[index] = {
+            "id": None,
+            "name": None,
+            "arguments": "",
+        }
+
+    if tool_call_delta.id:
+        tool_calls_in_progress[index]["id"] = tool_call_delta.id
+
+    if tool_call_delta.function:
+        if tool_call_delta.function.name:
+            tool_calls_in_progress[index]["name"] = tool_call_delta.function.name
+
+        if tool_call_delta.function.arguments:
+            tool_calls_in_progress[index][
+                "arguments"
+            ] += tool_call_delta.function.arguments
+
+
+def query(
+    llm_with_default_settings: LLM,
+    messages: LanguageModelInput,
+    tools: Sequence[Tool],
+    context: Any,
+    tool_choice: ToolChoiceOptions | None = None,
+) -> QueryResult:
+    tool_definitions = [tool.tool_definition() for tool in tools]
+    tools_by_name = {tool.name: tool for tool in tools}
+
+    new_messages_stateful: list[ChatCompletionMessage] = []
+
+    def stream_generator() -> Iterator[StreamEvent]:
+        reasoning_started = False
+        message_started = False
+
+        tool_calls_in_progress: dict[int, dict[str, Any]] = {}
+
+        content_parts: list[str] = []
+        reasoning_parts: list[str] = []
+
+        for chunk in llm_with_default_settings.stream(
+            prompt=messages,
+            tools=tool_definitions,
+            tool_choice=tool_choice,
+        ):
+            assert isinstance(chunk, ModelResponseStream)
+
+            delta = chunk.choice.delta
+            finish_reason = chunk.choice.finish_reason
+
+            if delta.reasoning_content:
+                reasoning_parts.append(delta.reasoning_content)
+                if not reasoning_started:
+                    yield RunItemStreamEvent(type="reasoning_start")
+                    reasoning_started = True
+
+            if delta.content:
+                content_parts.append(delta.content)
+                if reasoning_started:
+                    yield RunItemStreamEvent(type="reasoning_done")
+                    reasoning_started = False
+                if not message_started:
+                    yield RunItemStreamEvent(type="message_start")
+                    message_started = True
+
+            if delta.tool_calls:
+                if reasoning_started and not message_started:
+                    yield RunItemStreamEvent(type="reasoning_done")
+                    reasoning_started = False
+                if message_started:
+                    yield RunItemStreamEvent(type="message_done")
+                    message_started = False
+
+                for tool_call_delta in delta.tool_calls:
+                    _update_tool_call_with_delta(
+                        tool_calls_in_progress, tool_call_delta
+                    )
+
+            yield chunk
+
+            if not finish_reason:
+                continue
+            if message_started:
+                yield RunItemStreamEvent(type="message_done")
+                message_started = False
+
+            if finish_reason == "tool_calls" and tool_calls_in_progress:
+                sorted_tool_calls = sorted(tool_calls_in_progress.items())
+
+                # Build tool calls for the message and execute tools
+                assistant_tool_calls: list[ToolCall] = []
+                tool_outputs: dict[str, str] = {}
+
+                for _, tool_call_data in sorted_tool_calls:
+                    call_id = tool_call_data["id"]
+                    name = tool_call_data["name"]
+                    arguments_str = tool_call_data["arguments"]
+
+                    if call_id is None or name is None:
+                        continue
+
+                    assistant_tool_calls.append(
+                        {
+                            "id": call_id,
+                            "type": "function",
+                            "function": {
+                                "name": name,
+                                "arguments": arguments_str,
+                            },
+                        }
+                    )
+
+                    yield RunItemStreamEvent(
+                        type="tool_call",
+                        details=ToolCallStreamItem(
+                            call_id=call_id,
+                            name=name,
+                            arguments=arguments_str,
+                        ),
+                    )
+
+                    if name in tools_by_name:
+                        tool = tools_by_name[name]
+                        arguments = json.loads(arguments_str)
+
+                        run_context = RunContextWrapper(context=context)
+
+                        # TODO: Instead of executing sequentially, execute in parallel
+                        # In practice, it's not a must right now since we don't use parallel
+                        # tool calls, so kicking the can down the road for now.
+                        output = tool.run_v2(run_context, **arguments)
+                        tool_outputs[call_id] = _serialize_tool_output(output)
+
+                        yield RunItemStreamEvent(
+                            type="tool_call_output",
+                            details=ToolCallOutputStreamItem(
+                                call_id=call_id,
+                                output=output,
+                            ),
+                        )
+
+                new_messages_stateful.append(
+                    {
+                        "role": "assistant",
+                        "content": None,
+                        "tool_calls": assistant_tool_calls,
+                    }
+                )
+
+                for _, tool_call_data in sorted_tool_calls:
+                    call_id = tool_call_data["id"]
+
+                    if call_id in tool_outputs:
+                        new_messages_stateful.append(
+                            {
+                                "role": "tool",
+                                "content": tool_outputs[call_id],
+                                "tool_call_id": call_id,
+                            }
+                        )
+
+            elif finish_reason == "stop" and content_parts:
+                new_messages_stateful.append(
+                    {
+                        "role": "assistant",
+                        "content": "".join(content_parts),
+                    }
+                )
+
+    return QueryResult(
+        stream=stream_generator(),
+        new_messages_stateful=new_messages_stateful,
+    )
--- a/backend/onyx/agents/agent_sdk/message_types.py
+++ b/backend/onyx/agents/agent_sdk/message_types.py
@@ -1,6 +1,7 @@
 """Strongly typed message structures for Agent SDK messages."""

 from typing import Literal
+from typing import NotRequired

 from typing_extensions import TypedDict

@@ -81,7 +82,7 @@ class FunctionCallMessage(TypedDict):
    """Agent SDK function call message format."""

    type: Literal["function_call"]
-    id: str
+    id: NotRequired[str]
    call_id: str
    name: str
    arguments: str
--- a/backend/onyx/agents/agent_search/dc_search_analysis/nodes/a1_search_objects.py
+++ b/backend/onyx/agents/agent_search/dc_search_analysis/nodes/a1_search_objects.py
@@ -121,7 +121,7 @@ def search_objects(
    try:
        llm_response = run_with_timeout(
            30,
-            primary_llm.invoke,
+            primary_llm.invoke_langchain,
            prompt=msg,
            timeout_override=30,
            max_tokens=300,
--- a/backend/onyx/agents/agent_search/dc_search_analysis/nodes/a2_research_object_source.py
+++ b/backend/onyx/agents/agent_search/dc_search_analysis/nodes/a2_research_object_source.py
@@ -155,7 +155,7 @@ def research_object_source(
    try:
        llm_response = run_with_timeout(
            30,
-            primary_llm.invoke,
+            primary_llm.invoke_langchain,
            prompt=msg,
            timeout_override=30,
            max_tokens=300,
--- a/backend/onyx/agents/agent_search/dc_search_analysis/nodes/a4_consolidate_object_research.py
+++ b/backend/onyx/agents/agent_search/dc_search_analysis/nodes/a4_consolidate_object_research.py
@@ -76,7 +76,7 @@ def consolidate_object_research(
    try:
        llm_response = run_with_timeout(
            30,
-            primary_llm.invoke,
+            primary_llm.invoke_langchain,
            prompt=msg,
            timeout_override=30,
            max_tokens=300,
--- a/backend/onyx/agents/agent_search/dr/models.py
+++ b/backend/onyx/agents/agent_search/dr/models.py
@@ -1,6 +1,7 @@
 from enum import Enum

 from pydantic import BaseModel
+from pydantic import ConfigDict

 from onyx.agents.agent_search.dr.enums import DRPath
 from onyx.agents.agent_search.dr.sub_agents.image_generation.models import (
@@ -74,8 +75,7 @@ class OrchestratorTool(BaseModel):
    cost: float
    tool_object: Tool | None = None  # None for CLOSER

-    class Config:
-        arbitrary_types_allowed = True
+    model_config = ConfigDict(arbitrary_types_allowed=True)


 class IterationInstructions(BaseModel):
--- a/backend/onyx/agents/agent_search/dr/nodes/dr_a0_clarification.py
+++ b/backend/onyx/agents/agent_search/dr/nodes/dr_a0_clarification.py
@@ -644,11 +644,14 @@ def clarifier(
            if context_llm_docs:
                persona = graph_config.inputs.persona
                if persona is not None:
-                    prompt_config = PromptConfig.from_model(persona)
+                    prompt_config = PromptConfig.from_model(
+                        persona, db_session=graph_config.persistence.db_session
+                    )
                else:
                    prompt_config = PromptConfig(
-                        system_prompt=assistant_system_prompt,
-                        task_prompt="",
+                        default_behavior_system_prompt=assistant_system_prompt,
+                        custom_instructions=None,
+                        reminder="",
                        datetime_aware=True,
                    )

@@ -677,7 +680,7 @@ def clarifier(

            @traced(name="clarifier stream and process", type="llm")
            def stream_and_process() -> BasicSearchProcessedStreamResults:
-                stream = graph_config.tooling.primary_llm.stream(
+                stream = graph_config.tooling.primary_llm.stream_langchain(
                    prompt=create_question_prompt(
                        cast(str, system_prompt_to_use),
                        cast(str, user_prompt_to_use),
--- a/backend/onyx/agents/agent_search/dr/sub_agents/custom_tool/dr_custom_tool_2_act.py
+++ b/backend/onyx/agents/agent_search/dr/sub_agents/custom_tool/dr_custom_tool_2_act.py
@@ -66,7 +66,7 @@ def custom_tool_act(
            base_question=base_question,
            tool_description=custom_tool_info.description,
        )
-        tool_calling_msg = graph_config.tooling.primary_llm.invoke(
+        tool_calling_msg = graph_config.tooling.primary_llm.invoke_langchain(
            tool_use_prompt,
            tools=[custom_tool.tool_definition()],
            tool_choice="required",
@@ -125,7 +125,7 @@ def custom_tool_act(
        query=branch_query, base_question=base_question, tool_response=tool_str
    )
    answer_string = str(
-        graph_config.tooling.primary_llm.invoke(
+        graph_config.tooling.primary_llm.invoke_langchain(
            tool_summary_prompt, timeout_override=TF_DR_TIMEOUT_SHORT
        ).content
    ).strip()
--- a/backend/onyx/agents/agent_search/dr/sub_agents/generic_internal_tool/dr_generic_internal_tool_2_act.py
+++ b/backend/onyx/agents/agent_search/dr/sub_agents/generic_internal_tool/dr_generic_internal_tool_2_act.py
@@ -65,7 +65,7 @@ def generic_internal_tool_act(
            base_question=base_question,
            tool_description=generic_internal_tool_info.description,
        )
-        tool_calling_msg = graph_config.tooling.primary_llm.invoke(
+        tool_calling_msg = graph_config.tooling.primary_llm.invoke_langchain(
            tool_use_prompt,
            tools=[generic_internal_tool.tool_definition()],
            tool_choice="required",
@@ -113,7 +113,7 @@ def generic_internal_tool_act(
        query=branch_query, base_question=base_question, tool_response=tool_str
    )
    answer_string = str(
-        graph_config.tooling.primary_llm.invoke(
+        graph_config.tooling.primary_llm.invoke_langchain(
            tool_summary_prompt, timeout_override=TF_DR_TIMEOUT_SHORT
        ).content
    ).strip()
--- a/backend/onyx/agents/agent_search/dr/sub_agents/web_search/dr_ws_6_summarize.py
+++ b/backend/onyx/agents/agent_search/dr/sub_agents/web_search/dr_ws_6_summarize.py
@@ -1,7 +1,5 @@
 from datetime import datetime
 from typing import cast
-from urllib.parse import urlparse
-from urllib.parse import urlunparse

 from langchain_core.runnables import RunnableConfig
 from langgraph.types import StreamWriter
@@ -23,20 +21,12 @@ from onyx.configs.agent_configs import TF_DR_TIMEOUT_SHORT
 from onyx.context.search.models import InferenceSection
 from onyx.prompts.dr_prompts import INTERNAL_SEARCH_PROMPTS
 from onyx.utils.logger import setup_logger
+from onyx.utils.url import normalize_url


 logger = setup_logger()


-def normalize_url(url: str) -> str:
-    """
-    Normalize a URL by removing query parameters and fragments.
-    This prevents KeyErrors when URLs differ only in query parameters like ?activeTab=explore.
-    """
-    parsed = urlparse(url)
-    return urlunparse((parsed.scheme, parsed.netloc, parsed.path, "", "", ""))
-
-
 def is_summarize(
    state: SummarizeInput,
    config: RunnableConfig,
--- a/backend/onyx/agents/agent_search/dr/sub_agents/web_search/models.py
+++ b/backend/onyx/agents/agent_search/dr/sub_agents/web_search/models.py
@@ -5,6 +5,9 @@ from datetime import datetime
 from enum import Enum

 from pydantic import BaseModel
+from pydantic import field_validator
+
+from onyx.utils.url import normalize_url


 class ProviderType(Enum):
@@ -17,9 +20,14 @@ class ProviderType(Enum):
 class WebSearchResult(BaseModel):
    title: str
    link: str
+    snippet: str | None = None
    author: str | None = None
    published_date: datetime | None = None
-    snippet: str | None = None
+
+    @field_validator("link")
+    @classmethod
+    def normalize_link(cls, v: str) -> str:
+        return normalize_url(v)


 class WebContent(BaseModel):
@@ -29,6 +37,11 @@ class WebContent(BaseModel):
    published_date: datetime | None = None
    scrape_successful: bool = True

+    @field_validator("link")
+    @classmethod
+    def normalize_link(cls, v: str) -> str:
+        return normalize_url(v)
+

 class WebSearchProvider(ABC):
    @abstractmethod
--- a/backend/onyx/agents/agent_search/kb_search/nodes/a1_extract_ert.py
+++ b/backend/onyx/agents/agent_search/kb_search/nodes/a1_extract_ert.py
@@ -106,7 +106,7 @@ def extract_ert(
    try:
        llm_response = run_with_timeout(
            KG_ENTITY_EXTRACTION_TIMEOUT,
-            primary_llm.invoke,
+            primary_llm.invoke_langchain,
            prompt=msg,
            timeout_override=15,
            max_tokens=300,
@@ -176,7 +176,7 @@ def extract_ert(
    try:
        llm_response = run_with_timeout(
            KG_RELATIONSHIP_EXTRACTION_TIMEOUT,
-            primary_llm.invoke,
+            primary_llm.invoke_langchain,
            prompt=msg,
            timeout_override=15,
            max_tokens=300,
--- a/backend/onyx/agents/agent_search/kb_search/nodes/a2_analyze.py
+++ b/backend/onyx/agents/agent_search/kb_search/nodes/a2_analyze.py
@@ -202,7 +202,7 @@ def analyze(
        llm_response = run_with_timeout(
            KG_STRATEGY_GENERATION_TIMEOUT,
            # fast_llm.invoke,
-            primary_llm.invoke,
+            primary_llm.invoke_langchain,
            prompt=msg,
            timeout_override=5,
            max_tokens=100,
--- a/backend/onyx/agents/agent_search/kb_search/nodes/a3_generate_simple_sql.py
+++ b/backend/onyx/agents/agent_search/kb_search/nodes/a3_generate_simple_sql.py
@@ -169,7 +169,7 @@ def _get_source_documents(
    try:
        llm_response = run_with_timeout(
            KG_SQL_GENERATION_TIMEOUT,
-            llm.invoke,
+            llm.invoke_langchain,
            prompt=msg,
            timeout_override=KG_SQL_GENERATION_TIMEOUT_OVERRIDE,
            max_tokens=KG_SQL_GENERATION_MAX_TOKENS,
@@ -321,7 +321,7 @@ def generate_simple_sql(
        try:
            llm_response = run_with_timeout(
                KG_SQL_GENERATION_TIMEOUT,
-                primary_llm.invoke,
+                primary_llm.invoke_langchain,
                prompt=msg,
                timeout_override=KG_SQL_GENERATION_TIMEOUT_OVERRIDE,
                max_tokens=KG_SQL_GENERATION_MAX_TOKENS,
@@ -451,7 +451,7 @@ def generate_simple_sql(
            try:
                llm_response = run_with_timeout(
                    KG_SQL_GENERATION_TIMEOUT,
-                    primary_llm.invoke,
+                    primary_llm.invoke_langchain,
                    prompt=msg,
                    timeout_override=KG_SQL_GENERATION_TIMEOUT_OVERRIDE,
                    max_tokens=KG_SQL_GENERATION_MAX_TOKENS,
--- a/backend/onyx/agents/agent_search/kb_search/nodes/b1_construct_deep_search_filters.py
+++ b/backend/onyx/agents/agent_search/kb_search/nodes/b1_construct_deep_search_filters.py
@@ -94,7 +94,7 @@ def construct_deep_search_filters(
    try:
        llm_response = run_with_timeout(
            KG_FILTER_CONSTRUCTION_TIMEOUT,
-            llm.invoke,
+            llm.invoke_langchain,
            prompt=msg,
            timeout_override=15,
            max_tokens=1400,
--- a/backend/onyx/agents/agent_search/kb_search/nodes/b2p_process_individual_deep_search.py
+++ b/backend/onyx/agents/agent_search/kb_search/nodes/b2p_process_individual_deep_search.py
@@ -137,7 +137,7 @@ def process_individual_deep_search(
    try:
        llm_response = run_with_timeout(
            KG_OBJECT_SOURCE_RESEARCH_TIMEOUT,
-            primary_llm.invoke,
+            primary_llm.invoke_langchain,
            prompt=msg,
            timeout_override=KG_OBJECT_SOURCE_RESEARCH_TIMEOUT,
            max_tokens=300,
--- a/backend/onyx/agents/agent_search/kb_search/nodes/b2s_filtered_search.py
+++ b/backend/onyx/agents/agent_search/kb_search/nodes/b2s_filtered_search.py
@@ -127,7 +127,7 @@ def filtered_search(
    try:
        llm_response = run_with_timeout(
            KG_FILTERED_SEARCH_TIMEOUT,
-            llm.invoke,
+            llm.invoke_langchain,
            prompt=msg,
            timeout_override=30,
            max_tokens=300,
--- a/backend/onyx/agents/agent_search/models.py
+++ b/backend/onyx/agents/agent_search/models.py
@@ -1,6 +1,7 @@
 from uuid import UUID

 from pydantic import BaseModel
+from pydantic import ConfigDict
 from sqlalchemy.orm import Session

 from onyx.agents.agent_search.dr.enums import ResearchType
@@ -25,8 +26,7 @@ class GraphInputs(BaseModel):
    structured_response_format: dict | None = None
    project_instructions: str | None = None

-    class Config:
-        arbitrary_types_allowed = True
+    model_config = ConfigDict(arbitrary_types_allowed=True)


 class GraphTooling(BaseModel):
@@ -41,8 +41,7 @@ class GraphTooling(BaseModel):
    force_use_tool: ForceUseTool
    using_tool_calling_llm: bool = False

-    class Config:
-        arbitrary_types_allowed = True
+    model_config = ConfigDict(arbitrary_types_allowed=True)


 class GraphPersistence(BaseModel):
@@ -57,8 +56,7 @@ class GraphPersistence(BaseModel):
    # message were flushed to; only needed for agentic search
    db_session: Session

-    class Config:
-        arbitrary_types_allowed = True
+    model_config = ConfigDict(arbitrary_types_allowed=True)


 class GraphSearchConfig(BaseModel):
@@ -87,5 +85,4 @@ class GraphConfig(BaseModel):
    # Only needed for agentic search
    persistence: GraphPersistence

-    class Config:
-        arbitrary_types_allowed = True
+    model_config = ConfigDict(arbitrary_types_allowed=True)
--- a/backend/onyx/agents/agent_search/orchestration/states.py
+++ b/backend/onyx/agents/agent_search/orchestration/states.py
@@ -1,4 +1,5 @@
 from pydantic import BaseModel
+from pydantic import ConfigDict

 from onyx.chat.prompt_builder.schemas import PromptSnapshot
 from onyx.tools.message import ToolCallSummary
@@ -38,8 +39,7 @@ class ToolChoice(BaseModel):
    id: str | None
    search_tool_override_kwargs: SearchToolOverrideKwargs = SearchToolOverrideKwargs()

-    class Config:
-        arbitrary_types_allowed = True
+    model_config = ConfigDict(arbitrary_types_allowed=True)


 class ToolChoiceUpdate(BaseModel):
--- a/backend/onyx/agents/agent_search/shared_graph_utils/agent_prompt_ops.py
+++ b/backend/onyx/agents/agent_search/shared_graph_utils/agent_prompt_ops.py
@@ -82,7 +82,7 @@ def trim_prompt_piece(config: LLMConfig, prompt_piece: str, reserved_str: str) -
 def build_history_prompt(config: GraphConfig, question: str) -> str:
    prompt_builder = config.inputs.prompt_builder
    persona_base = get_persona_agent_prompt_expressions(
-        config.inputs.persona
+        config.inputs.persona, db_session=config.persistence.db_session
    ).base_prompt

    if prompt_builder is None:
@@ -126,7 +126,9 @@ def build_history_prompt(config: GraphConfig, question: str) -> str:
 def get_prompt_enrichment_components(
    config: GraphConfig,
 ) -> AgentPromptEnrichmentComponents:
-    persona_prompts = get_persona_agent_prompt_expressions(config.inputs.persona)
+    persona_prompts = get_persona_agent_prompt_expressions(
+        config.inputs.persona, db_session=config.persistence.db_session
+    )

    history = build_history_prompt(config, config.inputs.prompt_builder.raw_user_query)

--- a/backend/onyx/agents/agent_search/shared_graph_utils/llm.py
+++ b/backend/onyx/agents/agent_search/shared_graph_utils/llm.py
@@ -76,7 +76,7 @@ def stream_llm_answer(
    else:
        citation_processor = None

-    for message in llm.stream(
+    for message in llm.stream_langchain(
        prompt,
        timeout_override=timeout_override,
        max_tokens=max_tokens,
@@ -156,7 +156,7 @@ def invoke_llm_json(
    ) and supports_response_schema(llm.config.model_name, llm.config.model_provider)

    response_content = str(
-        llm.invoke(
+        llm.invoke_langchain(
            prompt,
            tools=tools,
            tool_choice=tool_choice,
@@ -224,7 +224,7 @@ def get_answer_from_llm(
    else:
        llm_response = run_with_timeout(
            timeout,
-            llm.invoke,
+            llm.invoke_langchain,
            prompt=msg,
            timeout_override=timeout_override,
            max_tokens=max_tokens,
--- a/backend/onyx/agents/agent_search/shared_graph_utils/utils.py
+++ b/backend/onyx/agents/agent_search/shared_graph_utils/utils.py
@@ -10,6 +10,7 @@ from typing import TypedDict

 from langchain_core.messages import BaseMessage
 from langgraph.types import StreamWriter
+from sqlalchemy.orm import Session

 from onyx.agents.agent_search.shared_graph_utils.models import BaseMessage_Content
 from onyx.agents.agent_search.shared_graph_utils.models import (
@@ -134,18 +135,24 @@ def format_entity_term_extraction(

 def get_persona_agent_prompt_expressions(
    persona: Persona | None,
+    db_session: Session,
 ) -> PersonaPromptExpressions:
    if persona is None:
        return PersonaPromptExpressions(
            contextualized_prompt=ASSISTANT_SYSTEM_PROMPT_DEFAULT, base_prompt=""
        )

-    # Prompts are now embedded directly on the Persona model
-    prompt_config = PromptConfig.from_model(persona)
+    # Pull custom instructions if they exist for backwards compatibility
+    prompt_config = PromptConfig.from_model(persona, db_session=db_session)
+    system_prompt = (
+        prompt_config.custom_instructions
+        or prompt_config.default_behavior_system_prompt
+    )
+
    datetime_aware_system_prompt = handle_onyx_date_awareness(
-        prompt_str=prompt_config.system_prompt,
+        prompt_str=system_prompt,
        prompt_config=prompt_config,
-        add_additional_info_if_no_tag=persona.datetime_aware,
+        add_additional_info_if_no_tag=bool(persona and persona.datetime_aware),
    )

    return PersonaPromptExpressions(
@@ -268,7 +275,7 @@ def summarize_history(
    try:
        history_response = run_with_timeout(
            AGENT_TIMEOUT_LLM_HISTORY_SUMMARY_GENERATION,
-            llm.invoke,
+            llm.invoke_langchain,
            history_context_prompt,
            timeout_override=AGENT_TIMEOUT_CONNECT_LLM_HISTORY_SUMMARY_GENERATION,
            max_tokens=AGENT_MAX_TOKENS_HISTORY_SUMMARY,
--- a/backend/onyx/auth/api_key.py
+++ b/backend/onyx/auth/api_key.py
@@ -2,29 +2,20 @@ import hashlib
 import secrets
 import uuid
 from urllib.parse import quote
-from urllib.parse import unquote

 from fastapi import Request
 from passlib.hash import sha256_crypt
 from pydantic import BaseModel

+from onyx.auth.constants import API_KEY_LENGTH
+from onyx.auth.constants import API_KEY_PREFIX
+from onyx.auth.constants import DEPRECATED_API_KEY_PREFIX
 from onyx.auth.schemas import UserRole
+from onyx.auth.utils import get_hashed_bearer_token_from_request
 from onyx.configs.app_configs import API_KEY_HASH_ROUNDS
 from shared_configs.configs import MULTI_TENANT


-_API_KEY_HEADER_NAME = "Authorization"
-# NOTE for others who are curious: In the context of a header, "X-" often refers
-# to non-standard, experimental, or custom headers in HTTP or other protocols. It
-# indicates that the header is not part of the official standards defined by
-# organizations like the Internet Engineering Task Force (IETF).
-_API_KEY_HEADER_ALTERNATIVE_NAME = "X-Onyx-Authorization"
-_BEARER_PREFIX = "Bearer "
-_API_KEY_PREFIX = "on_"
-_DEPRECATED_API_KEY_PREFIX = "dn_"
-_API_KEY_LEN = 192
-
-
 class ApiKeyDescriptor(BaseModel):
    api_key_id: int
    api_key_display: str
@@ -37,34 +28,10 @@ class ApiKeyDescriptor(BaseModel):

 def generate_api_key(tenant_id: str | None = None) -> str:
    if not MULTI_TENANT or not tenant_id:
-        return _API_KEY_PREFIX + secrets.token_urlsafe(_API_KEY_LEN)
+        return API_KEY_PREFIX + secrets.token_urlsafe(API_KEY_LENGTH)

    encoded_tenant = quote(tenant_id)  # URL encode the tenant ID
-    return f"{_API_KEY_PREFIX}{encoded_tenant}.{secrets.token_urlsafe(_API_KEY_LEN)}"
-
-
-def extract_tenant_from_api_key_header(request: Request) -> str | None:
-    """Extract tenant ID from request. Returns None if auth is disabled or invalid format."""
-    raw_api_key_header = request.headers.get(
-        _API_KEY_HEADER_ALTERNATIVE_NAME
-    ) or request.headers.get(_API_KEY_HEADER_NAME)
-
-    if not raw_api_key_header or not raw_api_key_header.startswith(_BEARER_PREFIX):
-        return None
-
-    api_key = raw_api_key_header[len(_BEARER_PREFIX) :].strip()
-
-    if not api_key.startswith(_API_KEY_PREFIX) and not api_key.startswith(
-        _DEPRECATED_API_KEY_PREFIX
-    ):
-        return None
-
-    parts = api_key[len(_API_KEY_PREFIX) :].split(".", 1)
-    if len(parts) != 2:
-        return None
-
-    tenant_id = parts[0]
-    return unquote(tenant_id) if tenant_id else None
+    return f"{API_KEY_PREFIX}{encoded_tenant}.{secrets.token_urlsafe(API_KEY_LENGTH)}"


 def _deprecated_hash_api_key(api_key: str) -> str:
@@ -74,30 +41,30 @@ def _deprecated_hash_api_key(api_key: str) -> str:
 def hash_api_key(api_key: str) -> str:
    # NOTE: no salt is needed, as the API key is randomly generated
    # and overlaps are impossible
-    if api_key.startswith(_API_KEY_PREFIX):
+    if api_key.startswith(API_KEY_PREFIX):
        return hashlib.sha256(api_key.encode("utf-8")).hexdigest()

-    if api_key.startswith(_DEPRECATED_API_KEY_PREFIX):
+    if api_key.startswith(DEPRECATED_API_KEY_PREFIX):
        return _deprecated_hash_api_key(api_key)

    raise ValueError(f"Invalid API key prefix: {api_key[:3]}")


 def build_displayable_api_key(api_key: str) -> str:
-    if api_key.startswith(_API_KEY_PREFIX):
-        api_key = api_key[len(_API_KEY_PREFIX) :]
+    if api_key.startswith(API_KEY_PREFIX):
+        api_key = api_key[len(API_KEY_PREFIX) :]

-    return _API_KEY_PREFIX + api_key[:4] + "********" + api_key[-4:]
+    return API_KEY_PREFIX + api_key[:4] + "********" + api_key[-4:]


 def get_hashed_api_key_from_request(request: Request) -> str | None:
-    raw_api_key_header = request.headers.get(
-        _API_KEY_HEADER_ALTERNATIVE_NAME
-    ) or request.headers.get(_API_KEY_HEADER_NAME)
-    if raw_api_key_header is None:
-        return None
+    """Extract and hash API key from Authorization header.

-    if raw_api_key_header.startswith(_BEARER_PREFIX):
-        raw_api_key_header = raw_api_key_header[len(_BEARER_PREFIX) :].strip()
-
-    return hash_api_key(raw_api_key_header)
+    Accepts both "Bearer <key>" and raw key formats.
+    """
+    return get_hashed_bearer_token_from_request(
+        request,
+        valid_prefixes=[API_KEY_PREFIX, DEPRECATED_API_KEY_PREFIX],
+        hash_fn=hash_api_key,
+        allow_non_bearer=True,  # API keys historically support both formats
+    )
--- a/backend/onyx/auth/constants.py
+++ b/backend/onyx/auth/constants.py
@@ -0,0 +1,15 @@
+"""Authentication constants shared across auth modules."""
+
+# API Key constants
+API_KEY_PREFIX = "on_"
+DEPRECATED_API_KEY_PREFIX = "dn_"
+API_KEY_LENGTH = 192
+
+# PAT constants
+PAT_PREFIX = "onyx_pat_"
+PAT_LENGTH = 192
+
+# Shared header constants
+API_KEY_HEADER_NAME = "Authorization"
+API_KEY_HEADER_ALTERNATIVE_NAME = "X-Onyx-Authorization"
+BEARER_PREFIX = "Bearer "
--- a/backend/onyx/auth/pat.py
+++ b/backend/onyx/auth/pat.py
@@ -0,0 +1,60 @@
+"""Personal Access Token generation and validation."""
+
+import hashlib
+import secrets
+from datetime import datetime
+from datetime import timedelta
+from datetime import timezone
+from urllib.parse import quote
+
+from fastapi import Request
+
+from onyx.auth.constants import PAT_LENGTH
+from onyx.auth.constants import PAT_PREFIX
+from onyx.auth.utils import get_hashed_bearer_token_from_request
+from shared_configs.configs import MULTI_TENANT
+
+
+def generate_pat(tenant_id: str | None = None) -> str:
+    """Generate cryptographically secure PAT."""
+    if MULTI_TENANT and tenant_id:
+        encoded_tenant = quote(tenant_id)
+        return f"{PAT_PREFIX}{encoded_tenant}.{secrets.token_urlsafe(PAT_LENGTH)}"
+    return PAT_PREFIX + secrets.token_urlsafe(PAT_LENGTH)
+
+
+def hash_pat(token: str) -> str:
+    """Hash PAT using SHA256 (no salt needed due to cryptographic randomness)."""
+    return hashlib.sha256(token.encode("utf-8")).hexdigest()
+
+
+def build_displayable_pat(token: str) -> str:
+    """Create masked display version: show prefix + first 4 random chars, mask middle, show last 4.
+
+    Example: onyx_pat_abc1****xyz9
+    """
+    # Show first 12 chars (onyx_pat_ + 4 random chars) and last 4 chars
+    return f"{token[:12]}****{token[-4:]}"
+
+
+def get_hashed_pat_from_request(request: Request) -> str | None:
+    """Extract and hash PAT from Authorization header.
+
+    Only accepts "Bearer <token>" format (unlike API keys which support raw format).
+    """
+    return get_hashed_bearer_token_from_request(
+        request,
+        valid_prefixes=[PAT_PREFIX],
+        hash_fn=hash_pat,
+        allow_non_bearer=False,  # PATs require Bearer prefix
+    )
+
+
+def calculate_expiration(days: int | None) -> datetime | None:
+    """Calculate expiration at 23:59:59.999999 UTC on the target date. None = no expiration."""
+    if days is None:
+        return None
+    expiry_date = datetime.now(timezone.utc).date() + timedelta(days=days)
+    return datetime.combine(expiry_date, datetime.max.time()).replace(
+        tzinfo=timezone.utc
+    )
--- a/backend/onyx/auth/users.py
+++ b/backend/onyx/auth/users.py
@@ -64,6 +64,7 @@ from onyx.auth.email_utils import send_user_verification_email
 from onyx.auth.invited_users import get_invited_users
 from onyx.auth.invited_users import remove_user_from_invited_users
 from onyx.auth.jwt import verify_jwt_token
+from onyx.auth.pat import get_hashed_pat_from_request
 from onyx.auth.schemas import AuthBackend
 from onyx.auth.schemas import UserCreate
 from onyx.auth.schemas import UserRole
@@ -109,6 +110,7 @@ from onyx.db.models import AccessToken
 from onyx.db.models import OAuthAccount
 from onyx.db.models import Persona
 from onyx.db.models import User
+from onyx.db.pat import fetch_user_for_pat
 from onyx.db.users import get_user_by_email
 from onyx.redis.redis_pool import get_async_redis_connection
 from onyx.redis.redis_pool import get_redis_client
@@ -204,6 +206,10 @@ def anonymous_user_enabled(*, tenant_id: str | None = None) -> bool:


 def verify_email_is_invited(email: str) -> None:
+    if AUTH_TYPE in {AuthType.SAML, AuthType.OIDC}:
+        # SSO providers manage membership; allow JIT provisioning regardless of invites
+        return
+
    whitelist = get_invited_users()
    if not whitelist:
        return
@@ -1079,6 +1085,12 @@ async def optional_user(
 ) -> User | None:
    user = await _check_for_saml_and_jwt(request, user, async_db_session)

+    # check if a PAT is present (before API key)
+    if user is None:
+        hashed_pat = get_hashed_pat_from_request(request)
+        if hashed_pat:
+            user = await fetch_user_for_pat(hashed_pat, async_db_session)
+
    # check if an API key is present
    if user is None:
        try:
--- a/backend/onyx/auth/utils.py
+++ b/backend/onyx/auth/utils.py
@@ -0,0 +1,108 @@
+"""Shared authentication utilities for bearer token extraction and validation."""
+
+from collections.abc import Callable
+from urllib.parse import unquote
+
+from fastapi import Request
+
+from onyx.auth.constants import API_KEY_HEADER_ALTERNATIVE_NAME
+from onyx.auth.constants import API_KEY_HEADER_NAME
+from onyx.auth.constants import API_KEY_PREFIX
+from onyx.auth.constants import BEARER_PREFIX
+from onyx.auth.constants import DEPRECATED_API_KEY_PREFIX
+from onyx.auth.constants import PAT_PREFIX
+
+
+def get_hashed_bearer_token_from_request(
+    request: Request,
+    valid_prefixes: list[str],
+    hash_fn: Callable[[str], str],
+    allow_non_bearer: bool = False,
+) -> str | None:
+    """Generic extraction and hashing of bearer tokens from request headers.
+
+    Args:
+        request: The FastAPI request
+        valid_prefixes: List of valid token prefixes (e.g., ["on_", "onyx_pat_"])
+        hash_fn: Function to hash the token (e.g., hash_api_key or hash_pat)
+        allow_non_bearer: If True, accept raw tokens without "Bearer " prefix
+
+    Returns:
+        Hashed token if valid format, else None
+    """
+    auth_header = request.headers.get(
+        API_KEY_HEADER_ALTERNATIVE_NAME
+    ) or request.headers.get(API_KEY_HEADER_NAME)
+
+    if not auth_header:
+        return None
+
+    # Handle bearer format
+    if auth_header.startswith(BEARER_PREFIX):
+        token = auth_header[len(BEARER_PREFIX) :].strip()
+    elif allow_non_bearer:
+        token = auth_header
+    else:
+        return None
+
+    # Check if token starts with any valid prefix
+    if valid_prefixes:
+        valid = any(token.startswith(prefix) for prefix in valid_prefixes)
+        if not valid:
+            return None
+
+    return hash_fn(token)
+
+
+def _extract_tenant_from_bearer_token(
+    request: Request, valid_prefixes: list[str]
+) -> str | None:
+    """Generic tenant extraction from bearer token. Returns None if invalid format.
+
+    Args:
+        request: The FastAPI request
+        valid_prefixes: List of valid token prefixes (e.g., ["on_", "dn_"])
+
+    Returns:
+        Tenant ID if found in format <prefix><tenant>.<random>, else None
+    """
+    auth_header = request.headers.get(
+        API_KEY_HEADER_ALTERNATIVE_NAME
+    ) or request.headers.get(API_KEY_HEADER_NAME)
+
+    if not auth_header or not auth_header.startswith(BEARER_PREFIX):
+        return None
+
+    token = auth_header[len(BEARER_PREFIX) :].strip()
+
+    # Check if token starts with any valid prefix
+    matched_prefix = None
+    for prefix in valid_prefixes:
+        if token.startswith(prefix):
+            matched_prefix = prefix
+            break
+
+    if not matched_prefix:
+        return None
+
+    # Parse tenant from token format: <prefix><tenant>.<random>
+    parts = token[len(matched_prefix) :].split(".", 1)
+    if len(parts) != 2:
+        return None
+
+    tenant_id = parts[0]
+    return unquote(tenant_id) if tenant_id else None
+
+
+def extract_tenant_from_auth_header(request: Request) -> str | None:
+    """Extract tenant ID from API key or PAT header.
+
+    Unified function for extracting tenant from any bearer token (API key or PAT).
+    Checks all known token prefixes in order.
+
+    Returns:
+        Tenant ID if found, else None
+    """
+    return _extract_tenant_from_bearer_token(
+        request, [API_KEY_PREFIX, DEPRECATED_API_KEY_PREFIX, PAT_PREFIX]
+    )
--- a/backend/onyx/background/celery/tasks/docprocessing/tasks.py
+++ b/backend/onyx/background/celery/tasks/docprocessing/tasks.py
@@ -1,3 +1,5 @@
+import gc
+import os
 import time
 import traceback
 from collections import defaultdict
@@ -21,6 +23,7 @@ from onyx.background.celery.apps.app_base import task_logger
 from onyx.background.celery.celery_redis import celery_find_task
 from onyx.background.celery.celery_redis import celery_get_unacked_task_ids
 from onyx.background.celery.celery_utils import httpx_init_vespa_pool
+from onyx.background.celery.memory_monitoring import emit_process_memory
 from onyx.background.celery.tasks.beat_schedule import CLOUD_BEAT_MULTIPLIER_DEFAULT
 from onyx.background.celery.tasks.docprocessing.heartbeat import start_heartbeat
 from onyx.background.celery.tasks.docprocessing.heartbeat import stop_heartbeat
@@ -65,6 +68,7 @@ from onyx.db.engine.time_utils import get_db_current_time
 from onyx.db.enums import ConnectorCredentialPairStatus
 from onyx.db.enums import IndexingMode
 from onyx.db.enums import IndexingStatus
+from onyx.db.enums import SwitchoverType
 from onyx.db.index_attempt import create_index_attempt_error
 from onyx.db.index_attempt import get_index_attempt
 from onyx.db.index_attempt import get_index_attempt_errors_for_cc_pair
@@ -857,10 +861,10 @@ def check_for_indexing(self: Task, *, tenant_id: str) -> int | None:
                tenant_id=tenant_id,
            )

-            # Secondary indexing (only if secondary search settings exist and background reindex is enabled)
+            # Secondary indexing (only if secondary search settings exist and switchover_type is not INSTANT)
            if (
                secondary_search_settings
-                and secondary_search_settings.background_reindex_enabled
+                and secondary_search_settings.switchover_type != SwitchoverType.INSTANT
                and secondary_cc_pair_ids
            ):
                tasks_created += _kickoff_indexing_tasks(
@@ -875,11 +879,11 @@ def check_for_indexing(self: Task, *, tenant_id: str) -> int | None:
                )
            elif (
                secondary_search_settings
-                and not secondary_search_settings.background_reindex_enabled
+                and secondary_search_settings.switchover_type == SwitchoverType.INSTANT
            ):
                task_logger.info(
                    f"Skipping secondary indexing: "
-                    f"background_reindex_enabled=False "
+                    f"switchover_type=INSTANT "
                    f"for search_settings={secondary_search_settings.id}"
                )

@@ -1299,12 +1303,39 @@ def _docprocessing_task(
    # dummy lock to satisfy linter
    per_batch_lock: RedisLock | None = None
    try:
+        # FIX: Monitor memory before loading documents to track problematic batches
+        emit_process_memory(
+            os.getpid(),
+            "docprocessing",
+            {
+                "phase": "before_load",
+                "tenant_id": tenant_id,
+                "cc_pair_id": cc_pair_id,
+                "index_attempt_id": index_attempt_id,
+                "batch_num": batch_num,
+            },
+        )
+
        # Retrieve documents from storage
        documents = storage.get_batch(batch_num)
        if not documents:
            task_logger.error(f"No documents found for batch {batch_num}")
            return

+        # FIX: Monitor memory after loading documents
+        emit_process_memory(
+            os.getpid(),
+            "docprocessing",
+            {
+                "phase": "after_load",
+                "tenant_id": tenant_id,
+                "cc_pair_id": cc_pair_id,
+                "index_attempt_id": index_attempt_id,
+                "batch_num": batch_num,
+                "doc_count": len(documents),
+            },
+        )
+
        with get_session_with_current_tenant() as db_session:
            # matches parts of _run_indexing
            index_attempt = get_index_attempt(
@@ -1457,6 +1488,25 @@ def _docprocessing_task(
        # Clean up this batch after successful processing
        storage.delete_batch_by_num(batch_num)

+        # FIX: Explicitly clear document batch from memory and force garbage collection
+        # This helps prevent memory accumulation across multiple batches
+        del documents
+        gc.collect()
+
+        # FIX: Log final memory usage to track problematic tenants/CC pairs
+        emit_process_memory(
+            os.getpid(),
+            "docprocessing",
+            {
+                "phase": "after_processing",
+                "tenant_id": tenant_id,
+                "cc_pair_id": cc_pair_id,
+                "index_attempt_id": index_attempt_id,
+                "batch_num": batch_num,
+                "chunks_processed": index_pipeline_result.total_chunks,
+            },
+        )
+
        elapsed_time = time.monotonic() - start_time
        task_logger.info(
            f"Completed document batch processing: "
@@ -1464,7 +1514,7 @@ def _docprocessing_task(
            f"cc_pair={cc_pair_id} "
            f"search_settings={index_attempt.search_settings.id} "
            f"batch_num={batch_num} "
-            f"docs={len(documents)} "
+            f"docs={len(index_pipeline_result.failures) + index_pipeline_result.total_docs} "
            f"chunks={index_pipeline_result.total_chunks} "
            f"failures={len(index_pipeline_result.failures)} "
            f"elapsed={elapsed_time:.2f}s"
--- a/backend/onyx/background/indexing/index_attempt_utils.py
+++ b/backend/onyx/background/indexing/index_attempt_utils.py
@@ -1,5 +1,6 @@
 from datetime import timedelta

+from sqlalchemy import func
 from sqlalchemy.orm import Session

 from onyx.configs.constants import NUM_DAYS_TO_KEEP_INDEX_ATTEMPTS
@@ -8,14 +9,44 @@ from onyx.db.models import IndexAttempt
 from onyx.db.models import IndexAttemptError


+# Always retain at least this many attempts per connector/search settings pair
+NUM_RECENT_INDEX_ATTEMPTS_TO_KEEP = 10
+
+
 def get_old_index_attempts(
    db_session: Session, days_to_keep: int = NUM_DAYS_TO_KEEP_INDEX_ATTEMPTS
 ) -> list[IndexAttempt]:
-    """Get all index attempts older than the specified number of days."""
+    """
+    Get index attempts older than the specified number of days while retaining
+    the latest NUM_RECENT_INDEX_ATTEMPTS_TO_KEEP per connector/search settings pair.
+    """
    cutoff_date = get_db_current_time(db_session) - timedelta(days=days_to_keep)
+    ranked_attempts = (
+        db_session.query(
+            IndexAttempt.id.label("attempt_id"),
+            IndexAttempt.time_created.label("time_created"),
+            func.row_number()
+            .over(
+                partition_by=(
+                    IndexAttempt.connector_credential_pair_id,
+                    IndexAttempt.search_settings_id,
+                ),
+                order_by=IndexAttempt.time_created.desc(),
+            )
+            .label("attempt_rank"),
+        )
+    ).subquery()
+
    return (
        db_session.query(IndexAttempt)
-        .filter(IndexAttempt.time_created < cutoff_date)
+        .join(
+            ranked_attempts,
+            IndexAttempt.id == ranked_attempts.c.attempt_id,
+        )
+        .filter(
+            ranked_attempts.c.time_created < cutoff_date,
+            ranked_attempts.c.attempt_rank > NUM_RECENT_INDEX_ATTEMPTS_TO_KEEP,
+        )
        .all()
    )

--- a/backend/onyx/chat/chat_utils.py
+++ b/backend/onyx/chat/chat_utils.py
@@ -16,6 +16,7 @@ from onyx.background.celery.tasks.kg_processing.kg_indexing import (
 from onyx.chat.models import LlmDoc
 from onyx.chat.models import PersonaOverrideConfig
 from onyx.chat.models import ThreadMessage
+from onyx.chat.turn.models import FetchedDocumentCacheEntry
 from onyx.configs.constants import DEFAULT_PERSONA_ID
 from onyx.configs.constants import MessageType
 from onyx.configs.constants import TMP_DRALPHA_PERSONA_NAME
@@ -121,6 +122,28 @@ def llm_doc_from_inference_section(inference_section: InferenceSection) -> LlmDo
    )


+def llm_docs_from_fetched_documents_cache(
+    fetched_documents_cache: dict[str, "FetchedDocumentCacheEntry"],
+) -> list[LlmDoc]:
+    """Convert FetchedDocumentCacheEntry objects to LlmDoc objects.
+
+    This ensures that citation numbers are properly transferred from the cache
+    entries to the LlmDoc objects, which is critical for proper citation rendering.
+
+    Args:
+        fetched_documents_cache: Dictionary mapping document IDs to FetchedDocumentCacheEntry
+
+    Returns:
+        List of LlmDoc objects with properly set document_citation_number
+    """
+    llm_docs = []
+    for cache_value in fetched_documents_cache.values():
+        llm_doc = llm_doc_from_inference_section(cache_value.inference_section)
+        llm_doc.document_citation_number = cache_value.document_citation_number
+        llm_docs.append(llm_doc)
+    return llm_docs
+
+
 def saved_search_docs_from_llm_docs(
    llm_docs: list[LlmDoc] | None,
 ) -> list[SavedSearchDoc]:
--- a/backend/onyx/chat/models.py
+++ b/backend/onyx/chat/models.py
@@ -10,6 +10,7 @@ from typing import Union
 from pydantic import BaseModel
 from pydantic import ConfigDict
 from pydantic import Field
+from sqlalchemy.orm import Session

 from onyx.configs.constants import DocumentSource
 from onyx.configs.constants import MessageType
@@ -273,22 +274,54 @@ class PromptConfig(BaseModel):
    """Final representation of the Prompt configuration passed
    into the `PromptBuilder` object."""

-    system_prompt: str
-    task_prompt: str
+    default_behavior_system_prompt: str
+    custom_instructions: str | None
+    reminder: str
    datetime_aware: bool

    @classmethod
    def from_model(
-        cls, model: "Persona", prompt_override: PromptOverride | None = None
+        cls,
+        model: "Persona",
+        db_session: Session,
+        prompt_override: PromptOverride | None = None,
    ) -> "PromptConfig":
+        from onyx.db.persona import get_default_behavior_persona
+
+        # Get the default persona's system prompt
+        default_persona = get_default_behavior_persona(db_session)
+        default_behavior_system_prompt = (
+            default_persona.system_prompt
+            if default_persona and default_persona.system_prompt
+            else ""
+        )
+
+        # Check if this persona is the default assistant
+        is_default_persona = default_persona and model.id == default_persona.id
+
+        # If this persona IS the default assistant, custom_instruction should be None
+        # Otherwise, it should be the persona's system_prompt
+        custom_instruction = None
+        if not is_default_persona:
+            custom_instruction = model.system_prompt or None
+
+        # Handle prompt overrides
        override_system_prompt = (
            prompt_override.system_prompt if prompt_override else None
        )
        override_task_prompt = prompt_override.task_prompt if prompt_override else None

+        # If there's an override, apply it to the appropriate field
+        if override_system_prompt:
+            if is_default_persona:
+                default_behavior_system_prompt = override_system_prompt
+            else:
+                custom_instruction = override_system_prompt
+
        return cls(
-            system_prompt=override_system_prompt or model.system_prompt or "",
-            task_prompt=override_task_prompt or model.task_prompt or "",
+            default_behavior_system_prompt=default_behavior_system_prompt,
+            custom_instructions=custom_instruction,
+            reminder=override_task_prompt or model.task_prompt or "",
            datetime_aware=model.datetime_aware,
        )

@@ -357,8 +390,7 @@ class AnswerPostInfo(BaseModel):
    tool_result: ToolCallFinalResult | None = None
    message_specific_citations: MessageSpecificCitations | None = None

-    class Config:
-        arbitrary_types_allowed = True
+    model_config = ConfigDict(arbitrary_types_allowed=True)


 class ChatBasicResponse(BaseModel):
--- a/backend/onyx/chat/process_message.py
+++ b/backend/onyx/chat/process_message.py
@@ -78,6 +78,7 @@ from onyx.db.persona import get_persona_by_id
 from onyx.db.projects import get_project_instructions
 from onyx.db.projects import get_user_files_from_project
 from onyx.db.search_settings import get_current_search_settings
+from onyx.db.user_file import get_file_ids_by_user_file_ids
 from onyx.document_index.factory import get_default_document_index
 from onyx.feature_flags.factory import get_default_feature_flag_provider
 from onyx.feature_flags.feature_flags_keys import DISABLE_SIMPLE_AGENT_FRAMEWORK
@@ -114,6 +115,7 @@ from onyx.tools.tool_implementations.search.search_tool import SearchTool
 from onyx.tools.tool_implementations.web_search.web_search_tool import (
    WebSearchTool,
 )
+from onyx.tools.utils import compute_all_tool_tokens
 from onyx.utils.logger import setup_logger
 from onyx.utils.long_term_log import LongTermLogger
 from onyx.utils.telemetry import mt_cloud_telemetry
@@ -420,10 +422,10 @@ def stream_chat_message_objects(
            raise RuntimeError(
                "Must specify a set of documents for chat or specify search options"
            )
-
        try:
            llm, fast_llm = get_llms_for_persona(
                persona=persona,
+                user=user,
                llm_override=new_msg_req.llm_override or chat_session.llm_override,
                additional_headers=litellm_additional_headers,
                long_term_logger=long_term_logger,
@@ -518,11 +520,12 @@ def stream_chat_message_objects(
        files = load_all_chat_files(history_msgs, new_msg_req.file_descriptors)
        req_file_ids = [f["id"] for f in new_msg_req.file_descriptors]
        latest_query_files = [file for file in files if file.file_id in req_file_ids]
-        user_file_ids: list[UUID] = []
+        current_message_user_file_ids: list[UUID] = []
+        persona_user_file_ids: list[UUID] = []

        if persona.user_files:
            for uf in persona.user_files:
-                user_file_ids.append(uf.id)
+                persona_user_file_ids.append(uf.id)

        if new_msg_req.current_message_files:
            for fd in new_msg_req.current_message_files:
@@ -530,7 +533,7 @@ def stream_chat_message_objects(
                if not uid:
                    continue
                try:
-                    user_file_ids.append(UUID(uid))
+                    current_message_user_file_ids.append(UUID(uid))
                except (TypeError, ValueError, AttributeError):
                    logger.warning(
                        "Skipping invalid user_file_id from current_message_files: %s",
@@ -542,10 +545,10 @@ def stream_chat_message_objects(
        # we can just pass them into the prompt directly
        (
            in_memory_user_files,
-            user_file_models,
            search_tool_override_kwargs_for_user_files,
        ) = parse_user_files(
-            user_file_ids=user_file_ids or [],
+            persona_user_file_ids=persona_user_file_ids,
+            current_message_user_file_ids=current_message_user_file_ids,
            project_id=chat_session.project_id,
            db_session=db_session,
            persona=persona,
@@ -566,15 +569,20 @@ def stream_chat_message_objects(
                ]
            )

-        # we don't want to attach project files to the user message
+        current_message_file_ids = []
+        if current_message_user_file_ids:
+            current_message_file_ids = get_file_ids_by_user_file_ids(
+                current_message_user_file_ids, db_session
+            )
+
+        # we don't want to attach project files and assistant files to the user message
        if user_message:
            attach_files_to_chat_message(
                chat_message=user_message,
                files=[
                    new_file.to_file_descriptor()
                    for new_file in latest_query_files
-                    if project_file_ids is not None
-                    and (new_file.file_id not in project_file_ids)
+                    if (new_file.file_id in current_message_file_ids)
                ],
                db_session=db_session,
                commit=False,
@@ -654,10 +662,11 @@ def stream_chat_message_objects(
        prompt_override = new_msg_req.prompt_override or chat_session.prompt_override
        if new_msg_req.persona_override_config:
            prompt_config = PromptConfig(
-                system_prompt=new_msg_req.persona_override_config.prompts[
+                default_behavior_system_prompt=new_msg_req.persona_override_config.prompts[
                    0
                ].system_prompt,
-                task_prompt=new_msg_req.persona_override_config.prompts[0].task_prompt,
+                custom_instructions=None,
+                reminder=new_msg_req.persona_override_config.prompts[0].task_prompt,
                datetime_aware=new_msg_req.persona_override_config.prompts[
                    0
                ].datetime_aware,
@@ -666,10 +675,11 @@ def stream_chat_message_objects(
            # Apply prompt override on top of persona-embedded prompt
            prompt_config = PromptConfig.from_model(
                persona,
+                db_session=db_session,
                prompt_override=prompt_override,
            )
        else:
-            prompt_config = PromptConfig.from_model(persona)
+            prompt_config = PromptConfig.from_model(persona, db_session=db_session)

        # Retrieve project-specific instructions if this chat session is associated with a project.
        project_instructions: str | None = (
@@ -748,14 +758,14 @@ def stream_chat_message_objects(
        ]

        if not search_tool_override_kwargs_for_user_files and in_memory_user_files:
+            # we only want to send the user files attached to the current message
            yield UserKnowledgeFilePacket(
                user_files=[
                    FileDescriptor(
                        id=str(file.file_id), type=file.file_type, name=file.filename
                    )
                    for file in in_memory_user_files
-                    if project_file_ids is not None
-                    and (file.file_id not in project_file_ids)
+                    if (file.file_id in current_message_file_ids)
                ]
            )
        feature_flag_provider = get_default_feature_flag_provider()
@@ -803,6 +813,7 @@ def stream_chat_message_objects(
                or get_main_llm_from_tuple(
                    get_llms_for_persona(
                        persona=persona,
+                        user=user,
                        llm_override=(
                            new_msg_req.llm_override or chat_session.llm_override
                        ),
@@ -884,6 +895,40 @@ def stream_chat_message_objects(


 # TODO: Refactor this to live somewhere else
+def _reserve_prompt_tokens_for_agent_overhead(
+    prompt_builder: AnswerPromptBuilder,
+    primary_llm: LLM,
+    tools: list[Tool],
+    prompt_config: PromptConfig,
+) -> None:
+    try:
+        tokenizer = get_tokenizer(
+            provider_type=primary_llm.config.model_provider,
+            model_name=primary_llm.config.model_name,
+        )
+    except Exception:
+        logger.exception("Failed to initialize tokenizer for agent token budgeting.")
+        return
+
+    reserved_tokens = 0
+
+    if tools:
+        try:
+            reserved_tokens += compute_all_tool_tokens(tools, tokenizer)
+        except Exception:
+            logger.exception("Failed to compute tool token budget.")
+
+    custom_instructions = prompt_config.custom_instructions
+    if custom_instructions:
+        custom_instruction_text = f"Custom Instructions: {custom_instructions}"
+        reserved_tokens += len(tokenizer.encode(custom_instruction_text))
+
+    if reserved_tokens <= 0:
+        return
+
+    prompt_builder.max_tokens = max(0, prompt_builder.max_tokens - reserved_tokens)
+
+
 def _fast_message_stream(
    answer: Answer,
    tools: list[Tool],
@@ -898,6 +943,12 @@ def _fast_message_stream(
 ) -> Generator[Packet, None, None]:
    # TODO: clean up this jank
    is_responses_api = isinstance(llm_model, OpenAIResponsesModel)
+    prompt_builder = answer.graph_inputs.prompt_builder
+    primary_llm = answer.graph_tooling.primary_llm
+    if prompt_builder and primary_llm:
+        _reserve_prompt_tokens_for_agent_overhead(
+            prompt_builder, primary_llm, tools, prompt_config
+        )
    messages = base_messages_to_agent_sdk_msgs(
        answer.graph_inputs.prompt_builder.build(), is_responses_api=is_responses_api
    )
--- a/backend/onyx/chat/prompt_builder/answer_prompt_builder.py
+++ b/backend/onyx/chat/prompt_builder/answer_prompt_builder.py
@@ -13,16 +13,15 @@ from onyx.chat.prompt_builder.citations_prompt import compute_max_llm_input_toke
 from onyx.chat.prompt_builder.utils import translate_history_to_basemessages
 from onyx.file_store.models import InMemoryChatFile
 from onyx.llm.interfaces import LLMConfig
-from onyx.llm.llm_provider_options import OPENAI_PROVIDER_NAME
 from onyx.llm.models import PreviousMessage
 from onyx.llm.utils import build_content_with_imgs
 from onyx.llm.utils import check_message_tokens
 from onyx.llm.utils import message_to_prompt_and_imgs
+from onyx.llm.utils import model_needs_formatting_reenabled
 from onyx.llm.utils import model_supports_image_input
 from onyx.natural_language_processing.utils import get_tokenizer
 from onyx.prompts.chat_prompts import CHAT_USER_CONTEXT_FREE_PROMPT
 from onyx.prompts.chat_prompts import CODE_BLOCK_MARKDOWN
-from onyx.prompts.chat_prompts import CUSTOM_INSTRUCTIONS_PROMPT
 from onyx.prompts.chat_prompts import DEFAULT_SYSTEM_PROMPT
 from onyx.prompts.chat_prompts import LONG_CONVERSATION_REMINDER_PROMPT
 from onyx.prompts.chat_prompts import TOOL_PERSISTENCE_PROMPT
@@ -38,8 +37,6 @@ from onyx.tools.models import ToolResponse
 from onyx.tools.tool import Tool


-# TODO: We can provide do smoother templating than all these sequential
-# function calls
 def default_build_system_message_v2(
    prompt_config: PromptConfig,
    llm_config: LLMConfig,
@@ -47,31 +44,15 @@ def default_build_system_message_v2(
    tools: Sequence[Tool] | None = None,
    should_cite_documents: bool = False,
 ) -> SystemMessage:
-    # Check if we should include custom instructions (before date processing)
-    custom_instructions = prompt_config.system_prompt.strip()
-    clean_custom_instructions = "".join(custom_instructions.split())
-    clean_default_system_prompt = "".join(DEFAULT_SYSTEM_PROMPT.split())
-    should_include_custom_instructions = (
-        clean_custom_instructions
-        and clean_custom_instructions != clean_default_system_prompt
+    system_prompt = (
+        prompt_config.default_behavior_system_prompt or DEFAULT_SYSTEM_PROMPT
    )

-    # Start with base prompt
-    system_prompt = DEFAULT_SYSTEM_PROMPT
-
-    # See https://simonwillison.net/tags/markdown/ for context on this temporary fix
-    # for o-series markdown generation
-    if (
-        llm_config.model_provider == OPENAI_PROVIDER_NAME
-        and llm_config.model_name.startswith("o")
-    ):
+    # See https://simonwillison.net/tags/markdown/ for context on why this is needed
+    # for OpenAI reasoning models to have correct markdown generation
+    if model_needs_formatting_reenabled(llm_config.model_name):
        system_prompt = CODE_BLOCK_MARKDOWN + system_prompt

-    if should_include_custom_instructions:
-        system_prompt += "\n\n## Custom Instructions\n"
-        system_prompt += CUSTOM_INSTRUCTIONS_PROMPT
-        system_prompt += custom_instructions
-
    tag_handled_prompt = handle_onyx_date_awareness(
        system_prompt,
        prompt_config,
@@ -83,28 +64,23 @@ def default_build_system_message_v2(
    if memories:
        tag_handled_prompt = handle_memories(tag_handled_prompt, memories)

-    # Add Tools section if tools are provided
    if tools:
        tag_handled_prompt += "\n\n# Tools\n"
        tag_handled_prompt += TOOL_PERSISTENCE_PROMPT

-        # Detect tool types
        has_web_search = any(type(tool).__name__ == "WebSearchTool" for tool in tools)
        has_internal_search = any(type(tool).__name__ == "SearchTool" for tool in tools)

-        # Add search guidance if web search or internal search is provided
        if has_web_search or has_internal_search:
            from onyx.prompts.chat_prompts import TOOL_DESCRIPTION_SEARCH_GUIDANCE

            tag_handled_prompt += "\n" + TOOL_DESCRIPTION_SEARCH_GUIDANCE + "\n"

-        # Add internal search guidance if internal search is provided
        if has_internal_search:
            from onyx.prompts.chat_prompts import INTERNAL_SEARCH_GUIDANCE

            tag_handled_prompt += "\n" + INTERNAL_SEARCH_GUIDANCE + "\n"

-        # Add internal search vs web search guidance if both are provided
        if has_internal_search and has_web_search:
            from onyx.prompts.chat_prompts import (
                INTERNAL_SEARCH_VS_WEB_SEARCH_GUIDANCE,
@@ -114,13 +90,11 @@ def default_build_system_message_v2(

        for tool in tools:
            if type(tool).__name__ == "WebSearchTool":
-                # Import at runtime to avoid circular dependency
                from onyx.tools.tool_implementations_v2.web import (
                    WEB_SEARCH_LONG_DESCRIPTION,
                    OPEN_URL_LONG_DESCRIPTION,
                )

-                # Special handling for WebSearchTool - expand to web_search and open_url
                tag_handled_prompt += "\n## web_search\n"
                tag_handled_prompt += WEB_SEARCH_LONG_DESCRIPTION
                tag_handled_prompt += "\n\n## open_url\n"
@@ -135,13 +109,12 @@ def default_build_system_message_v2(
                    )
                    tag_handled_prompt += tool.description

-    # Add citation requirement as second to last section if needed
+    tag_handled_prompt += "\n# Reminders"
    if should_cite_documents:
        from onyx.prompts.chat_prompts import REQUIRE_CITATION_STATEMENT

        tag_handled_prompt += "\n\n" + REQUIRE_CITATION_STATEMENT

-    # Add the reminders section last
    tag_handled_prompt += "\n\n" + LONG_CONVERSATION_REMINDER_PROMPT

    return SystemMessage(content=tag_handled_prompt)
@@ -152,14 +125,17 @@ def default_build_system_message(
    llm_config: LLMConfig,
    memories: list[str] | None = None,
 ) -> SystemMessage | None:
-    system_prompt = prompt_config.system_prompt.strip()
-    # See https://simonwillison.net/tags/markdown/ for context on this temporary fix
-    # for o-series markdown generation
-    if (
-        llm_config.model_provider == OPENAI_PROVIDER_NAME
-        and llm_config.model_name.startswith("o")
-    ):
+    # Build system prompt from default behavior and custom instructions
+    # for backwards compatibility
+    system_prompt = (
+        prompt_config.custom_instructions
+        or prompt_config.default_behavior_system_prompt
+    )
+    # See https://simonwillison.net/tags/markdown/ for context on why this is needed
+    # for OpenAI reasoning models to have correct markdown generation
+    if model_needs_formatting_reenabled(llm_config.model_name):
        system_prompt = CODE_BLOCK_MARKDOWN + system_prompt
+
    tag_handled_prompt = handle_onyx_date_awareness(
        system_prompt,
        prompt_config,
@@ -192,10 +168,10 @@ def default_build_user_message(
    user_prompt = (
        CHAT_USER_CONTEXT_FREE_PROMPT.format(
            history_block=history_block,
-            task_prompt=prompt_config.task_prompt,
+            task_prompt=prompt_config.reminder,
            user_query=user_query,
        )
-        if prompt_config.task_prompt
+        if prompt_config.reminder
        else user_query
    )

--- a/backend/onyx/chat/prompt_builder/citations_prompt.py
+++ b/backend/onyx/chat/prompt_builder/citations_prompt.py
@@ -1,5 +1,6 @@
 from langchain.schema.messages import HumanMessage
 from langchain.schema.messages import SystemMessage
+from sqlalchemy.orm import Session

 from onyx.chat.models import LlmDoc
 from onyx.chat.models import PromptConfig
@@ -8,8 +9,7 @@ from onyx.context.search.models import InferenceChunk
 from onyx.db.models import Persona
 from onyx.db.search_settings import get_multilingual_expansion
 from onyx.file_store.models import InMemoryChatFile
-from onyx.llm.factory import get_llms_for_persona
-from onyx.llm.factory import get_main_llm_from_tuple
+from onyx.llm.factory import get_llm_config_for_persona
 from onyx.llm.interfaces import LLMConfig
 from onyx.llm.utils import build_content_with_imgs
 from onyx.llm.utils import check_number_of_tokens
@@ -36,8 +36,8 @@ logger = setup_logger()
 def get_prompt_tokens(prompt_config: PromptConfig) -> int:
    # Note: currently custom prompts do not allow datetime aware, only default prompts
    return (
-        check_number_of_tokens(prompt_config.system_prompt)
-        + check_number_of_tokens(prompt_config.task_prompt)
+        check_number_of_tokens(prompt_config.default_behavior_system_prompt)
+        + check_number_of_tokens(prompt_config.reminder)
        + CHAT_USER_PROMPT_WITH_CONTEXT_OVERHEAD_TOKEN_CNT
        + CITATION_STATEMENT_TOKEN_CNT
        + CITATION_REMINDER_TOKEN_CNT
@@ -88,12 +88,14 @@ def compute_max_document_tokens(

 def compute_max_document_tokens_for_persona(
    persona: Persona,
+    db_session: Session,
    actual_user_input: str | None = None,
 ) -> int:
    # Use the persona directly since prompts are now embedded
+    # Access to persona is assumed to have been verified already
    return compute_max_document_tokens(
-        prompt_config=PromptConfig.from_model(persona),
-        llm_config=get_main_llm_from_tuple(get_llms_for_persona(persona)).config,
+        prompt_config=PromptConfig.from_model(persona, db_session=db_session),
+        llm_config=get_llm_config_for_persona(persona=persona, db_session=db_session),
        actual_user_input=actual_user_input,
    )

@@ -106,7 +108,7 @@ def compute_max_llm_input_tokens(llm_config: LLMConfig) -> int:
 def build_citations_system_message(
    prompt_config: PromptConfig,
 ) -> SystemMessage:
-    system_prompt = prompt_config.system_prompt.strip()
+    system_prompt = prompt_config.default_behavior_system_prompt.strip()
    # Citations are always enabled
    system_prompt += REQUIRE_CITATION_STATEMENT
    tag_handled_prompt = handle_onyx_date_awareness(
--- a/backend/onyx/chat/prompt_builder/quotes_prompt.py
+++ b/backend/onyx/chat/prompt_builder/quotes_prompt.py
@@ -31,10 +31,10 @@ def _build_strong_llm_quotes_prompt(
        history_block = HISTORY_BLOCK.format(history_str=history_str)

    full_prompt = JSON_PROMPT.format(
-        system_prompt=prompt.system_prompt,
+        system_prompt=prompt.default_behavior_system_prompt,
        context_block=context_block,
        history_block=history_block,
-        task_prompt=prompt.task_prompt,
+        task_prompt=prompt.reminder,
        user_query=question,
        language_hint_or_none=LANGUAGE_HINT.strip() if use_language_hint else "",
    ).strip()
--- a/backend/onyx/chat/prune_and_merge.py
+++ b/backend/onyx/chat/prune_and_merge.py
@@ -101,20 +101,31 @@ def _separate_federated_sections(


 def _compute_limit(
-    prompt_config: PromptConfig,
    llm_config: LLMConfig,
-    question: str,
+    existing_input_tokens: int,
    max_chunks: int | None,
    max_window_percentage: float | None,
    max_tokens: int | None,
    tool_token_count: int,
+    prompt_config: PromptConfig | None = None,
 ) -> int:
-    llm_max_document_tokens = compute_max_document_tokens(
-        prompt_config=prompt_config,
-        llm_config=llm_config,
-        tool_token_count=tool_token_count,
-        actual_user_input=question,
-    )
+    # If prompt_config is provided (backwards compatibility), compute using the old method
+    if prompt_config is not None:
+        llm_max_document_tokens = compute_max_document_tokens(
+            prompt_config=prompt_config,
+            llm_config=llm_config,
+            tool_token_count=tool_token_count,
+            actual_user_input=None,  # Will use default estimate
+        )
+    else:
+        # New path: existing_input_tokens is pre-computed total input token count
+        # This includes system prompt, history, user message, agent turns, etc.
+        llm_max_document_tokens = (
+            llm_config.max_input_tokens
+            - existing_input_tokens
+            - tool_token_count
+            - 40  # _MISC_BUFFER from compute_max_document_tokens
+        )

    window_percentage_based_limit = (
        max_window_percentage * llm_max_document_tokens
@@ -333,10 +344,10 @@ def _apply_pruning(
 def prune_sections(
    sections: list[InferenceSection],
    section_relevance_list: list[bool] | None,
-    prompt_config: PromptConfig,
    llm_config: LLMConfig,
-    question: str,
+    existing_input_tokens: int,
    contextual_pruning_config: ContextualPruningConfig,
+    prompt_config: PromptConfig | None = None,
 ) -> list[InferenceSection]:
    # Assumes the sections are score ordered with highest first
    if section_relevance_list is not None:
@@ -357,13 +368,13 @@ def prune_sections(
    )

    token_limit = _compute_limit(
-        prompt_config=prompt_config,
        llm_config=llm_config,
-        question=question,
+        existing_input_tokens=existing_input_tokens,
        max_chunks=actual_num_chunks,
        max_window_percentage=contextual_pruning_config.max_window_percentage,
        max_tokens=contextual_pruning_config.max_tokens,
        tool_token_count=contextual_pruning_config.tool_num_tokens,
+        prompt_config=prompt_config,
    )

    return _apply_pruning(
@@ -504,19 +515,19 @@ def _merge_sections(sections: list[InferenceSection]) -> list[InferenceSection]:
 def prune_and_merge_sections(
    sections: list[InferenceSection],
    section_relevance_list: list[bool] | None,
-    prompt_config: PromptConfig,
    llm_config: LLMConfig,
-    question: str,
+    existing_input_tokens: int,
    contextual_pruning_config: ContextualPruningConfig,
+    prompt_config: PromptConfig | None = None,
 ) -> list[InferenceSection]:
    # Assumes the sections are score ordered with highest first
    remaining_sections = prune_sections(
        sections=sections,
        section_relevance_list=section_relevance_list,
-        prompt_config=prompt_config,
        llm_config=llm_config,
-        question=question,
+        existing_input_tokens=existing_input_tokens,
        contextual_pruning_config=contextual_pruning_config,
+        prompt_config=prompt_config,
    )

    merged_sections = _merge_sections(sections=remaining_sections)
--- a/backend/onyx/chat/turn/context_handler/citation.py
+++ b/backend/onyx/chat/turn/context_handler/citation.py
@@ -2,22 +2,38 @@

 import json
 from collections.abc import Sequence
+from typing import Annotated
+from typing import Union

 from pydantic import BaseModel
+from pydantic import Field
+from pydantic import TypeAdapter
 from pydantic import ValidationError

 from onyx.agents.agent_sdk.message_types import AgentSDKMessage
 from onyx.agents.agent_sdk.message_types import FunctionCallOutputMessage
 from onyx.chat.models import DOCUMENT_CITATION_NUMBER_EMPTY_VALUE
-from onyx.chat.models import LlmDoc
 from onyx.chat.turn.models import ChatTurnContext
+from onyx.tools.tool_implementations_v2.tool_result_models import (
+    LlmInternalSearchResult,
+)
+from onyx.tools.tool_implementations_v2.tool_result_models import LlmOpenUrlResult
+from onyx.tools.tool_implementations_v2.tool_result_models import LlmWebSearchResult
+
+# Create a tagged union type for all tool results
+ToolResult = Annotated[
+    Union[LlmInternalSearchResult, LlmWebSearchResult, LlmOpenUrlResult],
+    Field(discriminator="type"),
+]
+
+# TypeAdapter for parsing tool results
+_tool_result_adapter = TypeAdapter(list[ToolResult])


 class CitationAssignmentResult(BaseModel):
    updated_messages: list[AgentSDKMessage]
-    num_docs_cited: int
+    new_docs_cited: int
    num_tool_calls_cited: int
-    new_llm_docs: list[LlmDoc]


 def assign_citation_numbers_recent_tool_calls(
@@ -28,9 +44,8 @@ def assign_citation_numbers_recent_tool_calls(
    docs_fetched_so_far = ctx.documents_processed_by_citation_context_handler
    tool_calls_cited_so_far = ctx.tool_calls_processed_by_citation_context_handler
    num_tool_calls_cited = 0
-    num_docs_cited = 0
+    new_docs_cited = 0
    curr_tool_call_idx = 0
-    new_llm_docs: list[LlmDoc] = []

    for message in agent_turn_messages:
        new_message: AgentSDKMessage | None = None
@@ -39,44 +54,71 @@ def assign_citation_numbers_recent_tool_calls(
                # Type narrow to FunctionCallOutputMessage after checking the 'type' field
                func_call_output_msg: FunctionCallOutputMessage = message  # type: ignore[assignment]
                content = func_call_output_msg["output"]
-                try:
-                    raw_list = json.loads(content)
-                    llm_docs = [LlmDoc(**doc) for doc in raw_list]
-                except (json.JSONDecodeError, TypeError, ValidationError):
-                    llm_docs = []
+                tool_call_results = _decode_tool_call_result(content)

-                if llm_docs:
+                if tool_call_results:
                    updated_citation_number = False
-                    for doc in llm_docs:
-                        if (
-                            doc.document_citation_number
+                    for result in tool_call_results:
+                        if not (
+                            result.unique_identifier_to_strip_away is not None
+                            and result.document_citation_number
                            == DOCUMENT_CITATION_NUMBER_EMPTY_VALUE
                        ):
-                            num_docs_cited += 1  # add 1 first so it's 1-indexed
-                            updated_citation_number = True
-                            doc.document_citation_number = (
-                                docs_fetched_so_far + num_docs_cited
+                            continue
+                        updated_citation_number = True
+                        cached_document = ctx.fetched_documents_cache[
+                            result.unique_identifier_to_strip_away
+                        ]
+                        if (
+                            cached_document.document_citation_number
+                            == DOCUMENT_CITATION_NUMBER_EMPTY_VALUE
+                        ):
+                            new_docs_cited += 1
+                            result.document_citation_number = (
+                                docs_fetched_so_far + new_docs_cited
+                            )
+                            cached_document.document_citation_number = (
+                                result.document_citation_number
+                            )
+                        else:
+                            result.document_citation_number = (
+                                cached_document.document_citation_number
                            )
                    if updated_citation_number:
-                        # Create updated function call output message
                        updated_output_message: FunctionCallOutputMessage = {
                            "type": "function_call_output",
                            "call_id": func_call_output_msg["call_id"],
                            "output": json.dumps(
-                                [doc.model_dump(mode="json") for doc in llm_docs]
+                                [
+                                    result.model_dump(
+                                        mode="json",
+                                        exclude={
+                                            "unique_identifier_to_strip_away",
+                                            "type",
+                                        },
+                                    )
+                                    for result in tool_call_results
+                                ]
                            ),
                        }
                        new_message = updated_output_message
                        num_tool_calls_cited += 1
-                        new_llm_docs.extend(llm_docs)
-            # Increment counter for ALL function_call_output messages, not just processed ones
+
            curr_tool_call_idx += 1

        updated_messages.append(new_message or message)

    return CitationAssignmentResult(
        updated_messages=updated_messages,
-        num_docs_cited=num_docs_cited,
+        new_docs_cited=new_docs_cited,
        num_tool_calls_cited=num_tool_calls_cited,
-        new_llm_docs=new_llm_docs,
    )
+
+
+def _decode_tool_call_result(
+    content: str,
+) -> list[LlmInternalSearchResult | LlmOpenUrlResult | LlmWebSearchResult]:
+    try:
+        return _tool_result_adapter.validate_json(content)
+    except ValidationError:
+        return []
--- a/backend/onyx/chat/turn/context_handler/reminder.py
+++ b/backend/onyx/chat/turn/context_handler/reminder.py
@@ -0,0 +1,48 @@
+"""Task prompt context handler for updating task prompts in agent messages."""
+
+from collections.abc import Sequence
+
+from onyx.agents.agent_sdk.message_types import AgentSDKMessage
+from onyx.agents.agent_sdk.message_types import InputTextContent
+from onyx.agents.agent_sdk.message_types import UserMessage
+from onyx.chat.models import PromptConfig
+from onyx.prompts.prompt_utils import build_task_prompt_reminders_v2
+
+
+def maybe_append_reminder(
+    agent_turn_messages: Sequence[AgentSDKMessage],
+    prompt_config: PromptConfig,
+    should_cite_documents: bool,
+    last_iteration_included_web_search: bool = False,
+) -> list[AgentSDKMessage]:
+    """Add task prompt reminder as a user message.
+
+    This function simply appends the task prompt reminder to the agent turn messages.
+    The removal of previous user messages (including previous reminders) is handled
+    by the remove_middle_user_messages context handler.
+
+    Args:
+        current_user_message: The current user message being processed
+        agent_turn_messages: Messages from the current agent turn iteration
+        prompt_config: Configuration containing reminder field
+        should_cite_documents: Whether citation requirements should be included
+
+    Returns:
+        Updated message list with task prompt reminder appended
+    """
+    reminder_text = build_task_prompt_reminders_v2(
+        prompt_config,
+        use_language_hint=False,
+        should_cite=should_cite_documents,
+        last_iteration_included_web_search=last_iteration_included_web_search,
+    )
+    if not reminder_text:
+        return list(agent_turn_messages)
+
+    text_content: InputTextContent = {
+        "type": "input_text",
+        "text": reminder_text,
+    }
+    reminder_message: UserMessage = {"role": "user", "content": [text_content]}
+
+    return list(agent_turn_messages) + [reminder_message]
--- a/backend/onyx/chat/turn/context_handler/task_prompt.py
+++ b/backend/onyx/chat/turn/context_handler/task_prompt.py
@@ -1,58 +0,0 @@
-"""Task prompt context handler for updating task prompts in agent messages."""
-
-from collections.abc import Sequence
-
-from onyx.agents.agent_sdk.message_types import AgentSDKMessage
-from onyx.agents.agent_sdk.message_types import InputTextContent
-from onyx.agents.agent_sdk.message_types import UserMessage
-from onyx.chat.models import PromptConfig
-from onyx.prompts.prompt_utils import build_task_prompt_reminders_v2
-
-
-def update_task_prompt(
-    current_user_message: UserMessage,
-    agent_turn_messages: Sequence[AgentSDKMessage],
-    prompt_config: PromptConfig,
-    should_cite_documents: bool,
-    last_iteration_included_web_search: bool = False,
-) -> list[AgentSDKMessage]:
-    user_query = _extract_user_query(current_user_message)
-
-    new_task_prompt_text = build_task_prompt_reminders_v2(
-        user_query,
-        prompt_config,
-        use_language_hint=False,
-        should_cite=should_cite_documents,
-        last_iteration_included_web_search=last_iteration_included_web_search,
-    )
-    last_user_idx = max(
-        (i for i, m in enumerate(agent_turn_messages) if m.get("role") == "user"),
-        default=-1,
-    )
-
-    # Filter out last user message and add new task prompt as user message
-    filtered_messages: list[AgentSDKMessage] = [
-        m for i, m in enumerate(agent_turn_messages) if i != last_user_idx
-    ]
-
-    text_content: InputTextContent = {
-        "type": "input_text",
-        "text": new_task_prompt_text,
-    }
-    new_user_message: UserMessage = {"role": "user", "content": [text_content]}
-
-    return filtered_messages + [new_user_message]
-
-
-def _extract_user_query(current_user_message: UserMessage) -> str:
-    pass
-
-    first_content = current_user_message["content"][0]
-    # User messages contain InputTextContent or ImageContent
-    # Only InputTextContent has "text" field, ImageContent has "image_url"
-    if first_content["type"] == "input_text":
-        # Type narrow - we know it's InputTextContent based on the type check
-        text_content: InputTextContent = first_content  # type: ignore[assignment]
-        return text_content["text"]
-    # If it's an image content, return empty string or handle appropriately
-    return ""
--- a/backend/onyx/chat/turn/fast_chat_turn.py
+++ b/backend/onyx/chat/turn/fast_chat_turn.py
@@ -19,6 +19,7 @@ from onyx.agents.agent_sdk.monkey_patches import (
 )
 from onyx.agents.agent_sdk.sync_agent_stream_adapter import SyncAgentStream
 from onyx.agents.agent_search.dr.enums import ResearchType
+from onyx.chat.chat_utils import llm_docs_from_fetched_documents_cache
 from onyx.chat.chat_utils import saved_search_docs_from_llm_docs
 from onyx.chat.memories import get_memories
 from onyx.chat.models import PromptConfig
@@ -33,11 +34,12 @@ from onyx.chat.stream_processing.utils import map_document_id_order_v2
 from onyx.chat.turn.context_handler.citation import (
    assign_citation_numbers_recent_tool_calls,
 )
-from onyx.chat.turn.context_handler.task_prompt import update_task_prompt
+from onyx.chat.turn.context_handler.reminder import maybe_append_reminder
 from onyx.chat.turn.infra.chat_turn_event_stream import unified_event_stream
 from onyx.chat.turn.models import AgentToolType
 from onyx.chat.turn.models import ChatTurnContext
 from onyx.chat.turn.models import ChatTurnDependencies
+from onyx.chat.turn.prompts.custom_instruction import build_custom_instructions
 from onyx.chat.turn.save_turn import extract_final_answer_from_packets
 from onyx.chat.turn.save_turn import save_turn
 from onyx.server.query_and_chat.streaming_models import CitationDelta
@@ -62,6 +64,26 @@ if TYPE_CHECKING:
 MAX_ITERATIONS = 10


+# TODO: We should be able to do this a bit more cleanly since we know the schema
+# ahead of time. I'll make sure to do that for when we replace AgentSDKMessage.
+def _extract_tokens_from_messages(messages: list[AgentSDKMessage]) -> int:
+    from onyx.llm.utils import check_number_of_tokens
+
+    total_input_text_parts: list[str] = []
+    for msg in messages:
+        if isinstance(msg, dict):
+            content = msg.get("content") or msg.get("output")
+            if isinstance(content, list):
+                for item in content:
+                    if isinstance(item, dict):
+                        text = item.get("text")
+                        if text:
+                            total_input_text_parts.append(text)
+            elif isinstance(content, str):
+                total_input_text_parts.append(content)
+    return check_number_of_tokens("\n".join(total_input_text_parts))
+
+
 # TODO -- this can be refactored out and played with in evals + normal demo
 def _run_agent_loop(
    messages: list[AgentSDKMessage],
@@ -77,9 +99,6 @@ def _run_agent_loop(
    from onyx.llm.litellm_singleton.config import initialize_litellm

    initialize_litellm()
-    # Split messages into three parts for clear tracking
-    # TODO: Think about terminal tool calls like image gen
-    # in multi turn conversations
    chat_history = messages[1:-1]
    current_user_message = cast(UserMessage, messages[-1])
    agent_turn_messages: list[AgentSDKMessage] = []
@@ -91,6 +110,9 @@ def _run_agent_loop(
            dependencies.tools if iteration_count < MAX_ITERATIONS else []
        )
        memories = get_memories(dependencies.user_or_none, dependencies.db_session)
+        # TODO: The system is rather prompt-cache efficient except for rebuilding the system prompt.
+        # The biggest offender is when we hit max iterations and then all the tool calls cannot
+        # be cached anymore since the system message will be differ in that it will have no tools.
        langchain_system_message = default_build_system_message_v2(
            dependencies.prompt_config,
            dependencies.llm.config,
@@ -106,8 +128,15 @@ def _run_agent_loop(
                )
            ],
        )
-        previous_messages = [new_system_prompt] + chat_history + [current_user_message]
+        custom_instructions = build_custom_instructions(prompt_config)
+        previous_messages = (
+            [new_system_prompt]
+            + chat_history
+            + custom_instructions
+            + [current_user_message]
+        )
        current_messages = previous_messages + agent_turn_messages
+        ctx.current_input_tokens = _extract_tokens_from_messages(current_messages)

        if not available_tools:
            tool_choice = None
@@ -141,26 +170,33 @@ def _run_agent_loop(
            for msg in all_messages_after_stream[len(previous_messages) :]
        ]

+        # Apply context handlers in order:
+        # 1. Remove all user messages in the middle (previous reminders)
+        agent_turn_messages = [
+            msg for msg in agent_turn_messages if msg.get("role") != "user"
+        ]
+
+        # 2. Add task prompt reminder
        last_iteration_included_web_search = any(
            tool_call.name == "web_search" for tool_call in tool_call_events
        )
-
-        agent_turn_messages = list(
-            update_task_prompt(
-                current_user_message,
-                agent_turn_messages,
-                prompt_config,
-                ctx.should_cite_documents,
-                last_iteration_included_web_search,
-            )
+        agent_turn_messages = maybe_append_reminder(
+            agent_turn_messages,
+            prompt_config,
+            ctx.should_cite_documents,
+            last_iteration_included_web_search,
        )
+
+        # 3. Assign citation numbers to tool call outputs
+        # Instead of doing this complex parsing from the tool call response,
+        # I could have just used the ToolCallOutput event from the Agents SDK.
+        # TODO: When agent framework is gone, I can just use our ToolCallOutput event.
        citation_result = assign_citation_numbers_recent_tool_calls(
            agent_turn_messages, ctx
        )
        agent_turn_messages = list(citation_result.updated_messages)
-        ctx.ordered_fetched_documents.extend(citation_result.new_llm_docs)
        ctx.documents_processed_by_citation_context_handler += (
-            citation_result.num_docs_cited
+            citation_result.new_docs_cited
        )
        ctx.tool_calls_processed_by_citation_context_handler += (
            citation_result.num_tool_calls_cited
@@ -201,6 +237,7 @@ def _fast_chat_turn_core(
        chat_session_id,
        dependencies.redis_client,
    )
+
    ctx = starter_context or ChatTurnContext(
        run_dependencies=dependencies,
        chat_session_id=chat_session_id,
@@ -246,8 +283,7 @@ def _fast_chat_turn_core(
        iteration_instructions=ctx.iteration_instructions,
        global_iteration_responses=ctx.global_iteration_responses,
        final_answer=final_answer,
-        unordered_fetched_inference_sections=ctx.unordered_fetched_inference_sections,
-        ordered_fetched_documents=ctx.ordered_fetched_documents,
+        fetched_documents_cache=ctx.fetched_documents_cache,
    )
    dependencies.emitter.emit(
        Packet(ind=ctx.current_run_step, obj=OverallStop(type="stop"))
@@ -284,10 +320,11 @@ def _process_stream(
 ) -> tuple[RunResultStreaming, list["ResponseFunctionToolCall"]]:
    from litellm import ResponseFunctionToolCall

-    mapping = map_document_id_order_v2(ctx.ordered_fetched_documents)
-    if ctx.ordered_fetched_documents:
+    llm_docs = llm_docs_from_fetched_documents_cache(ctx.fetched_documents_cache)
+    mapping = map_document_id_order_v2(llm_docs)
+    if llm_docs:
        processor = CitationProcessor(
-            context_docs=ctx.ordered_fetched_documents,
+            context_docs=llm_docs,
            doc_id_to_rank_map=mapping,
            stop_stream=None,
        )
@@ -424,8 +461,11 @@ def _default_packet_translation(
            needs_start = has_had_message_start(packet_history, ctx.current_run_step)
            if needs_start:
                ctx.current_run_step += 1
+                llm_docs_for_message_start = llm_docs_from_fetched_documents_cache(
+                    ctx.fetched_documents_cache
+                )
                retrieved_search_docs = saved_search_docs_from_llm_docs(
-                    ctx.ordered_fetched_documents
+                    llm_docs_for_message_start
                )
                packets.append(
                    Packet(
--- a/backend/onyx/chat/turn/models.py
+++ b/backend/onyx/chat/turn/models.py
@@ -13,13 +13,13 @@ from agents import LocalShellTool
 from agents import Model
 from agents import ModelSettings
 from agents import WebSearchTool
+from pydantic import BaseModel
 from redis.client import Redis
 from sqlalchemy.orm import Session

 from onyx.agents.agent_search.dr.enums import ResearchType
 from onyx.agents.agent_search.dr.models import IterationAnswer
 from onyx.agents.agent_search.dr.models import IterationInstructions
-from onyx.chat.models import LlmDoc
 from onyx.chat.models import PromptConfig
 from onyx.chat.turn.infra.emitter import Emitter
 from onyx.context.search.models import InferenceSection
@@ -55,6 +55,11 @@ class ChatTurnDependencies:
    prompt_config: PromptConfig


+class FetchedDocumentCacheEntry(BaseModel):
+    inference_section: InferenceSection
+    document_citation_number: int
+
+
@dataclass
 class ChatTurnContext:
    """Context class to hold search tool and other dependencies"""
@@ -73,13 +78,15 @@ class ChatTurnContext:
    should_cite_documents: bool = False
    documents_processed_by_citation_context_handler: int = 0
    tool_calls_processed_by_citation_context_handler: int = 0
-    unordered_fetched_inference_sections: list[InferenceSection] = dataclasses.field(
-        default_factory=list
+    fetched_documents_cache: dict[str, FetchedDocumentCacheEntry] = dataclasses.field(
+        default_factory=dict
    )
-    ordered_fetched_documents: list[LlmDoc] = dataclasses.field(default_factory=list)
    citations: list[CitationInfo] = dataclasses.field(default_factory=list)

    # Used to ignore packets that are streamed back by Agents SDK, but should
    # not be emitted to the frontend (e.g. out of order packets)
    # TODO: remove this once Agents SDK fixes the bug with Anthropic reasoning
    current_output_index: int | None = None
+    # Token count of all current input context (system, history, user message, agent turns, etc.)
+    # Updated dynamically as the conversation progresses through tool calls
+    current_input_tokens: int = 0
--- a/backend/onyx/chat/turn/prompts/init.py
+++ b/backend/onyx/chat/turn/prompts/init.py
@@ -0,0 +1 @@
+"""Prompt utilities for chat turns."""
--- a/backend/onyx/chat/turn/prompts/custom_instruction.py
+++ b/backend/onyx/chat/turn/prompts/custom_instruction.py
@@ -0,0 +1,40 @@
+"""Custom instruction context handler for adding custom instructions to agent messages."""
+
+from onyx.agents.agent_sdk.message_types import InputTextContent
+from onyx.agents.agent_sdk.message_types import UserMessage
+from onyx.chat.models import PromptConfig
+
+
+def build_custom_instructions(
+    prompt_config: PromptConfig,
+) -> list[UserMessage]:
+    """Add custom instructions as a user message if present in prompt_config.
+
+    This function adds a user message containing custom instructions before
+    the task prompt reminder. Custom instructions are only added if they
+    exist in the prompt_config.
+
+    Args:
+        agent_turn_messages: Messages from the current agent turn iteration
+        prompt_config: Configuration containing custom_instruction field
+
+    Returns:
+        Updated message list with custom instruction user message appended (if applicable)
+    """
+    if not prompt_config.custom_instructions:
+        return []
+
+    custom_instruction_text = (
+        f"Custom Instructions: {prompt_config.custom_instructions}"
+    )
+
+    text_content: InputTextContent = {
+        "type": "input_text",
+        "text": custom_instruction_text,
+    }
+    custom_instruction_message: UserMessage = {
+        "role": "user",
+        "content": [text_content],
+    }
+
+    return [custom_instruction_message]
--- a/backend/onyx/chat/turn/save_turn.py
+++ b/backend/onyx/chat/turn/save_turn.py
@@ -15,9 +15,8 @@ from onyx.agents.agent_search.dr.sub_agents.image_generation.models import (
    GeneratedImageFullResult,
 )
 from onyx.agents.agent_search.dr.utils import convert_inference_sections_to_search_docs
-from onyx.chat.models import LlmDoc
+from onyx.chat.turn.models import FetchedDocumentCacheEntry
 from onyx.configs.constants import DocumentSource
-from onyx.context.search.models import InferenceSection
 from onyx.db.chat import create_search_doc_from_inference_section
 from onyx.db.chat import update_db_session_with_messages
 from onyx.db.models import ChatMessage__SearchDoc
@@ -35,50 +34,41 @@ def save_turn(
    chat_session_id: UUID,
    research_type: ResearchType,
    final_answer: str,
-    unordered_fetched_inference_sections: list[InferenceSection],
-    ordered_fetched_documents: list[LlmDoc],
+    fetched_documents_cache: dict[str, FetchedDocumentCacheEntry],
    iteration_instructions: list[IterationInstructions],
    global_iteration_responses: list[IterationAnswer],
    # TODO: figure out better way to pass these dependencies
    model_name: str,
    model_provider: str,
 ) -> None:
-    # first, insert the search_docs
-    search_docs = [
-        create_search_doc_from_inference_section(
-            inference_section=doc,
-            is_internet=doc.center_chunk.source_type == DocumentSource.WEB,
+    # Create search docs from inference sections and build mapping
+    citation_number_to_search_doc_id: dict[int, int] = {}
+    search_docs = []
+    for cache_entry in fetched_documents_cache.values():
+        search_doc = create_search_doc_from_inference_section(
+            inference_section=cache_entry.inference_section,
+            is_internet=cache_entry.inference_section.center_chunk.source_type
+            == DocumentSource.WEB,
            db_session=db_session,
            commit=False,
        )
-        for doc in unordered_fetched_inference_sections
-    ]
+        search_docs.append(search_doc)
+        citation_number_to_search_doc_id[cache_entry.document_citation_number] = (
+            search_doc.id
+        )

-    # then, map_search_docs to message
+    # Map search_docs to message
    _insert_chat_message_search_doc_pair(
-        message_id, [search_doc.id for search_doc in search_docs], db_session
+        message_id, [doc.id for doc in search_docs], db_session
    )
-    # lastly, insert the citations
-    citation_dict: dict[int, int] = {}
+
+    # Build citations dict using cited doc numbers from the final answer
    cited_doc_nrs = _extract_citation_numbers(final_answer)
-    if search_docs:
-        # Create mapping: citation_number -> document_id
-        citation_to_doc_id = {
-            doc.document_citation_number: doc.document_id
-            for doc in ordered_fetched_documents
-            if doc.document_citation_number is not None
-        }
-
-        # Create mapping: document_id -> search_doc.id
-        doc_id_to_search_doc_id = {doc.document_id: doc.id for doc in search_docs}
-
-        # Chain the lookups: cited_doc_nr -> document_id -> search_doc.id
-        citation_dict = {
-            cited_doc_nr: doc_id_to_search_doc_id[citation_to_doc_id[cited_doc_nr]]
-            for cited_doc_nr in cited_doc_nrs
-            if cited_doc_nr in citation_to_doc_id
-            and citation_to_doc_id[cited_doc_nr] in doc_id_to_search_doc_id
-        }
+    citation_dict: dict[int, int] = {
+        cited_doc_nr: citation_number_to_search_doc_id[cited_doc_nr]
+        for cited_doc_nr in cited_doc_nrs
+        if cited_doc_nr in citation_number_to_search_doc_id
+    }
    llm_tokenizer = get_tokenizer(
        model_name=model_name,
        provider_type=model_provider,
--- a/backend/onyx/chat/user_files/parse_user_files.py
+++ b/backend/onyx/chat/user_files/parse_user_files.py
@@ -3,12 +3,11 @@ from uuid import UUID
 from sqlalchemy.orm import Session

 from onyx.db.models import Persona
-from onyx.db.models import UserFile
 from onyx.db.projects import get_user_files_from_project
 from onyx.db.user_file import update_last_accessed_at_for_user_files
 from onyx.file_store.models import InMemoryChatFile
-from onyx.file_store.utils import get_user_files_as_user
 from onyx.file_store.utils import load_in_memory_chat_files
+from onyx.file_store.utils import validate_user_files_ownership
 from onyx.tools.models import SearchToolOverrideKwargs
 from onyx.utils.logger import setup_logger

@@ -17,37 +16,40 @@ logger = setup_logger()


 def parse_user_files(
-    user_file_ids: list[UUID],
+    persona_user_file_ids: list[UUID],
+    current_message_user_file_ids: list[UUID],
    db_session: Session,
    persona: Persona,
    actual_user_input: str,
    project_id: int | None,
-    # should only be None if auth is disabled
    user_id: UUID | None,
-) -> tuple[list[InMemoryChatFile], list[UserFile], SearchToolOverrideKwargs | None]:
+) -> tuple[list[InMemoryChatFile], SearchToolOverrideKwargs | None]:
    """
    Parse user files and project into in-memory chat files and create search tool override kwargs.
    Only creates SearchToolOverrideKwargs if token overflow occurs.

    Args:
-        user_file_ids: List of user file IDs to load
+        persona_user_file_ids: List of user file IDs attached to the persona
+        current_message_user_file_ids: List of user file IDs from the current message
        db_session: Database session
        persona: Persona to calculate available tokens
        actual_user_input: User's input message for token calculation
-        project_id: Project ID to validate file ownership
-        user_id: User ID to validate file ownership
+        project_id: Project ID to load associated files
+        user_id: User ID for file ownership validation and LLM access

    Returns:
        Tuple of (
            loaded user files,
-            user file models,
-            search tool override kwargs if token
-                overflow
+            search tool override kwargs if token overflow occurs
        )
    """
    # Return empty results if no files or project specified
-    if not user_file_ids and not project_id:
-        return [], [], None
+    if (
+        not persona_user_file_ids
+        and not current_message_user_file_ids
+        and not project_id
+    ):
+        return [], None

    project_user_file_ids = []

@@ -60,7 +62,9 @@ def parse_user_files(
        )

    # Combine user-provided and project-derived user file IDs
-    combined_user_file_ids = user_file_ids + project_user_file_ids or []
+    combined_user_file_ids = (
+        persona_user_file_ids + current_message_user_file_ids + project_user_file_ids
+    )

    # Load user files from the database into memory
    user_files = load_in_memory_chat_files(
@@ -68,14 +72,15 @@ def parse_user_files(
        db_session,
    )

-    user_file_models = get_user_files_as_user(
-        combined_user_file_ids,
+    # current message files should be owned by the user
+    validate_user_files_ownership(
+        current_message_user_file_ids,
        user_id,
        db_session,
    )

    # Update last accessed at for the user files which are used in the chat
-    if user_file_ids or project_user_file_ids:
+    if combined_user_file_ids:
        # update_last_accessed_at_for_user_files expects list[UUID]
        update_last_accessed_at_for_user_files(
            combined_user_file_ids,
@@ -96,8 +101,10 @@ def parse_user_files(
    )

    # Calculate available tokens for documents based on prompt, user input, etc.
+    # Access to persona is assumed to have been verified already
    available_tokens = compute_max_document_tokens_for_persona(
        persona=persona,
+        db_session=db_session,
        actual_user_input=actual_user_input,
    )
    uploaded_context_cap = int(available_tokens * 0.5)
@@ -113,7 +120,7 @@ def parse_user_files(
    # we can just pass them into the prompt directly
    if have_enough_tokens:
        # No search tool override needed - files can be passed directly
-        return user_files, user_file_models, None
+        return user_files, None

    # Token overflow - need to use search tool
    override_kwargs = SearchToolOverrideKwargs(
@@ -121,10 +128,10 @@ def parse_user_files(
        alternate_db_session=None,
        retrieved_sections_callback=None,
        skip_query_analysis=have_enough_tokens,
-        user_file_ids=user_file_ids or [],
+        user_file_ids=current_message_user_file_ids + persona_user_file_ids or [],
        project_id=(
            project_id if persona.is_default_persona else None
        ),  # if the persona is not default, we don't want to use the project files
    )

-    return user_files, user_file_models, override_kwargs
+    return user_files, override_kwargs
--- a/backend/onyx/configs/app_configs.py
+++ b/backend/onyx/configs/app_configs.py
@@ -794,11 +794,19 @@ ENTERPRISE_EDITION_ENABLED = (
    os.environ.get("ENABLE_PAID_ENTERPRISE_EDITION_FEATURES", "").lower() == "true"
 )

-# Azure DALL-E Configurations
-AZURE_DALLE_API_VERSION = os.environ.get("AZURE_DALLE_API_VERSION")
-AZURE_DALLE_API_KEY = os.environ.get("AZURE_DALLE_API_KEY")
-AZURE_DALLE_API_BASE = os.environ.get("AZURE_DALLE_API_BASE")
-AZURE_DALLE_DEPLOYMENT_NAME = os.environ.get("AZURE_DALLE_DEPLOYMENT_NAME")
+# Azure Image Configurations
+AZURE_IMAGE_API_VERSION = os.environ.get("AZURE_IMAGE_API_VERSION") or os.environ.get(
+    "AZURE_DALLE_API_VERSION"
+)
+AZURE_IMAGE_API_KEY = os.environ.get("AZURE_IMAGE_API_KEY") or os.environ.get(
+    "AZURE_DALLE_API_KEY"
+)
+AZURE_IMAGE_API_BASE = os.environ.get("AZURE_IMAGE_API_BASE") or os.environ.get(
+    "AZURE_DALLE_API_BASE"
+)
+AZURE_IMAGE_DEPLOYMENT_NAME = os.environ.get(
+    "AZURE_IMAGE_DEPLOYMENT_NAME"
+) or os.environ.get("AZURE_DALLE_DEPLOYMENT_NAME")

 # configurable image model
 IMAGE_MODEL_NAME = os.environ.get("IMAGE_MODEL_NAME", "gpt-image-1")
--- a/backend/onyx/connectors/gmail/connector.py
+++ b/backend/onyx/connectors/gmail/connector.py
@@ -1,4 +1,6 @@
 from base64 import urlsafe_b64decode
+from collections.abc import Callable
+from collections.abc import Iterator
 from typing import Any
 from typing import cast
 from typing import Dict
@@ -13,9 +15,14 @@ from onyx.configs.constants import DocumentSource
 from onyx.connectors.cross_connector_utils.miscellaneous_utils import time_str_to_utc
 from onyx.connectors.google_utils.google_auth import get_google_creds
 from onyx.connectors.google_utils.google_utils import execute_paginated_retrieval
+from onyx.connectors.google_utils.google_utils import (
+    execute_paginated_retrieval_with_max_pages,
+)
 from onyx.connectors.google_utils.google_utils import execute_single_retrieval
+from onyx.connectors.google_utils.google_utils import PAGE_TOKEN_KEY
 from onyx.connectors.google_utils.resources import get_admin_service
 from onyx.connectors.google_utils.resources import get_gmail_service
+from onyx.connectors.google_utils.resources import GmailService
 from onyx.connectors.google_utils.shared_constants import (
    DB_CREDENTIALS_PRIMARY_ADMIN_KEY,
 )
@@ -23,14 +30,16 @@ from onyx.connectors.google_utils.shared_constants import MISSING_SCOPES_ERROR_S
 from onyx.connectors.google_utils.shared_constants import ONYX_SCOPE_INSTRUCTIONS
 from onyx.connectors.google_utils.shared_constants import SLIM_BATCH_SIZE
 from onyx.connectors.google_utils.shared_constants import USER_FIELDS
-from onyx.connectors.interfaces import GenerateDocumentsOutput
+from onyx.connectors.interfaces import CheckpointedConnectorWithPermSync
+from onyx.connectors.interfaces import CheckpointOutput
+from onyx.connectors.interfaces import ConnectorFailure
 from onyx.connectors.interfaces import GenerateSlimDocumentOutput
-from onyx.connectors.interfaces import LoadConnector
-from onyx.connectors.interfaces import PollConnector
 from onyx.connectors.interfaces import SecondsSinceUnixEpoch
 from onyx.connectors.interfaces import SlimConnectorWithPermSync
 from onyx.connectors.models import BasicExpertInfo
+from onyx.connectors.models import ConnectorCheckpoint
 from onyx.connectors.models import Document
+from onyx.connectors.models import DocumentFailure
 from onyx.connectors.models import ImageSection
 from onyx.connectors.models import SlimDocument
 from onyx.connectors.models import TextSection
@@ -60,6 +69,8 @@ EMAIL_FIELDS = [

 MAX_MESSAGE_BODY_BYTES = 10 * 1024 * 1024  # 10MB cap to keep large threads safe

+PAGES_PER_CHECKPOINT = 1
+
 add_retries = retry_builder(tries=50, max_delay=30)


@@ -170,8 +181,12 @@ def _get_message_body(payload: dict[str, Any]) -> str:
    return "".join(message_body_chunks)


+def _build_document_link(thread_id: str) -> str:
+    return f"https://mail.google.com/mail/u/0/#inbox/{thread_id}"
+
+
 def message_to_section(message: Dict[str, Any]) -> tuple[TextSection, dict[str, str]]:
-    link = f"https://mail.google.com/mail/u/0/#inbox/{message['id']}"
+    link = _build_document_link(message["id"])

    payload = message.get("payload", {})
    headers = payload.get("headers", [])
@@ -251,6 +266,8 @@ def thread_to_document(
    if not semantic_identifier:
        semantic_identifier = "(no subject)"

+    # NOTE: we're choosing to unconditionally include perm sync info
+    # (external_access) as it doesn't cost much space
    return Document(
        id=id,
        semantic_identifier=semantic_identifier,
@@ -270,7 +287,59 @@ def thread_to_document(
    )


-class GmailConnector(LoadConnector, PollConnector, SlimConnectorWithPermSync):
+def _full_thread_from_id(
+    thread_id: str,
+    user_email: str,
+    gmail_service: GmailService,
+) -> Document | ConnectorFailure | None:
+    try:
+        thread = next(
+            execute_single_retrieval(
+                retrieval_function=gmail_service.users().threads().get,
+                list_key=None,
+                userId=user_email,
+                fields=THREAD_FIELDS,
+                id=thread_id,
+                continue_on_404_or_403=True,
+            ),
+            None,
+        )
+        if thread is None:
+            raise ValueError(f"Thread {thread_id} not found")
+        return thread_to_document(thread, user_email)
+    except Exception as e:
+        return ConnectorFailure(
+            failed_document=DocumentFailure(
+                document_id=thread_id, document_link=_build_document_link(thread_id)
+            ),
+            failure_message=f"Failed to retrieve thread {thread_id}",
+            exception=e,
+        )
+
+
+def _slim_thread_from_id(
+    thread_id: str,
+    user_email: str,
+    gmail_service: GmailService,
+) -> SlimDocument:
+    return SlimDocument(
+        id=thread_id,
+        external_access=ExternalAccess(
+            external_user_emails={user_email},
+            external_user_group_ids=set(),
+            is_public=False,
+        ),
+    )
+
+
+class GmailCheckpoint(ConnectorCheckpoint):
+    user_emails: list[str] = []  # stack of user emails to process
+    page_token: str | None = None
+
+
+class GmailConnector(
+    SlimConnectorWithPermSync, CheckpointedConnectorWithPermSync[GmailCheckpoint]
+):
    def __init__(self, batch_size: int = INDEX_BATCH_SIZE) -> None:
        self.batch_size = batch_size

@@ -346,79 +415,39 @@ class GmailConnector(LoadConnector, PollConnector, SlimConnectorWithPermSync):
                return [self.primary_admin_email]
            raise

-        except Exception:
-            raise
-
-    def _fetch_threads(
-        self,
-        time_range_start: SecondsSinceUnixEpoch | None = None,
-        time_range_end: SecondsSinceUnixEpoch | None = None,
-    ) -> GenerateDocumentsOutput:
-        query = _build_time_range_query(time_range_start, time_range_end)
-        doc_batch = []
-        for user_email in self._get_all_user_emails():
-            gmail_service = get_gmail_service(self.creds, user_email)
-            try:
-                for thread in execute_paginated_retrieval(
-                    retrieval_function=gmail_service.users().threads().list,
-                    list_key="threads",
-                    userId=user_email,
-                    fields=THREAD_LIST_FIELDS,
-                    q=query,
-                    continue_on_404_or_403=True,
-                ):
-                    full_threads = execute_single_retrieval(
-                        retrieval_function=gmail_service.users().threads().get,
-                        list_key=None,
-                        userId=user_email,
-                        fields=THREAD_FIELDS,
-                        id=thread["id"],
-                        continue_on_404_or_403=True,
-                    )
-                    # full_threads is an iterator containing a single thread
-                    # so we need to convert it to a list and grab the first element
-                    full_thread = list(full_threads)[0]
-                    doc = thread_to_document(full_thread, user_email)
-                    if doc is None:
-                        continue
-
-                    doc_batch.append(doc)
-                    if len(doc_batch) > self.batch_size:
-                        yield doc_batch
-                        doc_batch = []
-            except HttpError as e:
-                if _is_mail_service_disabled_error(e):
-                    logger.warning(
-                        "Skipping Gmail sync for %s because the mailbox is disabled.",
-                        user_email,
-                    )
-                    continue
-                raise
-
-        if doc_batch:
-            yield doc_batch
-
-    def _fetch_slim_threads(
+    def _fetch_threads_impl(
        self,
+        user_email: str,
        time_range_start: SecondsSinceUnixEpoch | None = None,
        time_range_end: SecondsSinceUnixEpoch | None = None,
        callback: IndexingHeartbeatInterface | None = None,
-    ) -> GenerateSlimDocumentOutput:
+        page_token: str | None = None,
+        set_page_token: Callable[[str | None], None] = lambda x: None,
+        is_slim: bool = False,
+    ) -> Iterator[Document | ConnectorFailure] | GenerateSlimDocumentOutput:
        query = _build_time_range_query(time_range_start, time_range_end)
-        doc_batch = []
-        for user_email in self._get_all_user_emails():
-            logger.info(f"Fetching slim threads for user: {user_email}")
-            gmail_service = get_gmail_service(self.creds, user_email)
-            try:
-                for thread in execute_paginated_retrieval(
-                    retrieval_function=gmail_service.users().threads().list,
-                    list_key="threads",
-                    userId=user_email,
-                    fields=THREAD_LIST_FIELDS,
-                    q=query,
-                    continue_on_404_or_403=True,
-                ):
-                    doc_batch.append(
+        slim_doc_batch: list[SlimDocument] = []
+        logger.info(
+            f"Fetching {'slim' if is_slim else 'full'} threads for user: {user_email}"
+        )
+        gmail_service = get_gmail_service(self.creds, user_email)
+        try:
+            for thread in execute_paginated_retrieval_with_max_pages(
+                max_num_pages=PAGES_PER_CHECKPOINT,
+                retrieval_function=gmail_service.users().threads().list,
+                list_key="threads",
+                userId=user_email,
+                fields=THREAD_LIST_FIELDS,
+                q=query,
+                continue_on_404_or_403=True,
+                **({PAGE_TOKEN_KEY: page_token} if page_token else {}),
+            ):
+                # if a page token is returned, set it and leave the function
+                if isinstance(thread, str):
+                    set_page_token(thread)
+                    return
+                if is_slim:
+                    slim_doc_batch.append(
                        SlimDocument(
                            id=thread["id"],
                            external_access=ExternalAccess(
@@ -428,46 +457,141 @@ class GmailConnector(LoadConnector, PollConnector, SlimConnectorWithPermSync):
                            ),
                        )
                    )
-                    if len(doc_batch) > SLIM_BATCH_SIZE:
-                        yield doc_batch
-                        doc_batch = []
-
-                        if callback:
-                            if callback.should_stop():
-                                raise RuntimeError(
-                                    "retrieve_all_slim_docs_perm_sync: Stop signal detected"
-                                )
-
-                            callback.progress("retrieve_all_slim_docs_perm_sync", 1)
-            except HttpError as e:
-                if _is_mail_service_disabled_error(e):
-                    logger.warning(
-                        "Skipping slim Gmail sync for %s because the mailbox is disabled.",
-                        user_email,
+                    if len(slim_doc_batch) >= SLIM_BATCH_SIZE:
+                        yield slim_doc_batch
+                        slim_doc_batch = []
+                else:
+                    result = _full_thread_from_id(
+                        thread["id"], user_email, gmail_service
                    )
-                    continue
-                raise
+                    if result is not None:
+                        yield result
+                if callback:
+                    tag = (
+                        "retrieve_all_slim_docs_perm_sync"
+                        if is_slim
+                        else "gmail_retrieve_all_docs"
+                    )
+                    if callback.should_stop():
+                        raise RuntimeError(f"{tag}: Stop signal detected")

-        if doc_batch:
-            yield doc_batch
+                    callback.progress(tag, 1)
+            if slim_doc_batch:
+                yield slim_doc_batch

-    def load_from_state(self) -> GenerateDocumentsOutput:
+            # done with user
+            set_page_token(None)
+        except HttpError as e:
+            if _is_mail_service_disabled_error(e):
+                logger.warning(
+                    "Skipping Gmail sync for %s because the mailbox is disabled.",
+                    user_email,
+                )
+                return
+            raise
+
+    def _fetch_threads(
+        self,
+        user_email: str,
+        page_token: str | None = None,
+        set_page_token: Callable[[str | None], None] = lambda x: None,
+        time_range_start: SecondsSinceUnixEpoch | None = None,
+        time_range_end: SecondsSinceUnixEpoch | None = None,
+        callback: IndexingHeartbeatInterface | None = None,
+    ) -> Iterator[Document | ConnectorFailure]:
+        yield from cast(
+            Iterator[Document | ConnectorFailure],
+            self._fetch_threads_impl(
+                user_email,
+                time_range_start,
+                time_range_end,
+                callback,
+                page_token,
+                set_page_token,
+                False,
+            ),
+        )
+
+    def _fetch_slim_threads(
+        self,
+        user_email: str,
+        page_token: str | None = None,
+        set_page_token: Callable[[str | None], None] = lambda x: None,
+        time_range_start: SecondsSinceUnixEpoch | None = None,
+        time_range_end: SecondsSinceUnixEpoch | None = None,
+        callback: IndexingHeartbeatInterface | None = None,
+    ) -> GenerateSlimDocumentOutput:
+        yield from cast(
+            GenerateSlimDocumentOutput,
+            self._fetch_threads_impl(
+                user_email,
+                time_range_start,
+                time_range_end,
+                callback,
+                page_token,
+                set_page_token,
+                True,
+            ),
+        )
+
+    def _load_from_checkpoint(
+        self,
+        start: SecondsSinceUnixEpoch,
+        end: SecondsSinceUnixEpoch,
+        checkpoint: GmailCheckpoint,
+    ) -> CheckpointOutput[GmailCheckpoint]:
+        if not checkpoint.user_emails:
+            checkpoint.user_emails = self._get_all_user_emails()
        try:
-            yield from self._fetch_threads()
+
+            def set_page_token(page_token: str | None) -> None:
+                checkpoint.page_token = page_token
+
+            yield from self._fetch_threads(
+                checkpoint.user_emails[-1],
+                checkpoint.page_token,
+                set_page_token,
+                start,
+                end,
+                callback=None,
+            )
+            if checkpoint.page_token is None:
+                # we're done with this user
+                checkpoint.user_emails.pop()
+
+            if len(checkpoint.user_emails) == 0:
+                checkpoint.has_more = False
+            return checkpoint
        except Exception as e:
            if MISSING_SCOPES_ERROR_STR in str(e):
                raise PermissionError(ONYX_SCOPE_INSTRUCTIONS) from e
            raise e

-    def poll_source(
-        self, start: SecondsSinceUnixEpoch, end: SecondsSinceUnixEpoch
-    ) -> GenerateDocumentsOutput:
-        try:
-            yield from self._fetch_threads(start, end)
-        except Exception as e:
-            if MISSING_SCOPES_ERROR_STR in str(e):
-                raise PermissionError(ONYX_SCOPE_INSTRUCTIONS) from e
-            raise e
+    def load_from_checkpoint(
+        self,
+        start: SecondsSinceUnixEpoch,
+        end: SecondsSinceUnixEpoch,
+        checkpoint: GmailCheckpoint,
+    ) -> CheckpointOutput[GmailCheckpoint]:
+        return self._load_from_checkpoint(
+            start=start,
+            end=end,
+            checkpoint=checkpoint,
+        )
+
+    def load_from_checkpoint_with_perm_sync(
+        self,
+        start: SecondsSinceUnixEpoch,
+        end: SecondsSinceUnixEpoch,
+        checkpoint: GmailCheckpoint,
+    ) -> CheckpointOutput[GmailCheckpoint]:
+        # NOTE: we're choosing to unconditionally include perm sync info
+        # (external_access) as it doesn't cost much space
+        return self._load_from_checkpoint(
+            start=start,
+            end=end,
+            checkpoint=checkpoint,
+        )

    def retrieve_all_slim_docs_perm_sync(
        self,
@@ -476,12 +600,31 @@ class GmailConnector(LoadConnector, PollConnector, SlimConnectorWithPermSync):
        callback: IndexingHeartbeatInterface | None = None,
    ) -> GenerateSlimDocumentOutput:
        try:
-            yield from self._fetch_slim_threads(start, end, callback=callback)
+            pt_dict: dict[str, str | None] = {PAGE_TOKEN_KEY: None}
+
+            def set_page_token(page_token: str | None) -> None:
+                pt_dict[PAGE_TOKEN_KEY] = page_token
+
+            for user_email in self._get_all_user_emails():
+                yield from self._fetch_slim_threads(
+                    user_email,
+                    pt_dict[PAGE_TOKEN_KEY],
+                    set_page_token,
+                    start,
+                    end,
+                    callback=callback,
+                )
        except Exception as e:
            if MISSING_SCOPES_ERROR_STR in str(e):
                raise PermissionError(ONYX_SCOPE_INSTRUCTIONS) from e
            raise e

+    def build_dummy_checkpoint(self) -> GmailCheckpoint:
+        return GmailCheckpoint(has_more=True)
+
+    def validate_checkpoint_json(self, checkpoint_json: str) -> GmailCheckpoint:
+        return GmailCheckpoint.model_validate_json(checkpoint_json)
+

 if __name__ == "__main__":
    pass
--- a/backend/onyx/connectors/google_drive/connector.py
+++ b/backend/onyx/connectors/google_drive/connector.py
@@ -41,6 +41,7 @@ from onyx.connectors.google_drive.file_retrieval import (
 )
 from onyx.connectors.google_drive.file_retrieval import get_files_in_shared_drive
 from onyx.connectors.google_drive.file_retrieval import get_root_folder_id
+from onyx.connectors.google_drive.file_retrieval import has_link_only_permission
 from onyx.connectors.google_drive.models import DriveRetrievalStage
 from onyx.connectors.google_drive.models import GoogleDriveCheckpoint
 from onyx.connectors.google_drive.models import GoogleDriveFileType
@@ -164,6 +165,7 @@ class GoogleDriveConnector(
        my_drive_emails: str | None = None,
        shared_folder_urls: str | None = None,
        specific_user_emails: str | None = None,
+        exclude_domain_link_only: bool = False,
        batch_size: int = INDEX_BATCH_SIZE,
        # OLD PARAMETERS
        folder_paths: list[str] | None = None,
@@ -232,6 +234,7 @@ class GoogleDriveConnector(
        self._specific_user_emails = _extract_str_list_from_comma_str(
            specific_user_emails
        )
+        self.exclude_domain_link_only = exclude_domain_link_only

        self._primary_admin_email: str | None = None

@@ -968,21 +971,54 @@ class GoogleDriveConnector(
        )

        for file in drive_files:
-            document_id = onyx_document_id_from_drive_file(file.drive_file)
-            logger.debug(
-                f"Updating checkpoint for file: {file.drive_file.get('name')}. "
-                f"Seen: {document_id in checkpoint.all_retrieved_file_ids}"
-            )
-            checkpoint.completion_map[file.user_email].update(
+            drive_file = file.drive_file or {}
+            completion = checkpoint.completion_map[file.user_email]
+
+            completed_until = completion.completed_until
+            modified_time = drive_file.get(GoogleFields.MODIFIED_TIME.value)
+            if isinstance(modified_time, str):
+                try:
+                    completed_until = datetime.fromisoformat(modified_time).timestamp()
+                except ValueError:
+                    logger.warning(
+                        "Invalid modifiedTime for file '%s' (stage=%s, user=%s).",
+                        drive_file.get("id"),
+                        file.completion_stage,
+                        file.user_email,
+                    )
+
+            completion.update(
                stage=file.completion_stage,
-                completed_until=datetime.fromisoformat(
-                    file.drive_file[GoogleFields.MODIFIED_TIME.value]
-                ).timestamp(),
+                completed_until=completed_until,
                current_folder_or_drive_id=file.parent_id,
            )
-            if document_id not in checkpoint.all_retrieved_file_ids:
-                checkpoint.all_retrieved_file_ids.add(document_id)
+
+            if file.error is not None or not drive_file:
                yield file
+                continue
+
+            try:
+                document_id = onyx_document_id_from_drive_file(drive_file)
+            except KeyError as exc:
+                logger.warning(
+                    "Drive file missing id/webViewLink (stage=%s user=%s). Skipping.",
+                    file.completion_stage,
+                    file.user_email,
+                )
+                if file.error is None:
+                    file.error = exc  # type: ignore[assignment]
+                yield file
+                continue
+
+            logger.debug(
+                f"Updating checkpoint for file: {drive_file.get('name')}. "
+                f"Seen: {document_id in checkpoint.all_retrieved_file_ids}"
+            )
+            if document_id in checkpoint.all_retrieved_file_ids:
+                continue
+
+            checkpoint.all_retrieved_file_ids.add(document_id)
+            yield file

    def _manage_oauth_retrieval(
        self,
@@ -1106,7 +1142,7 @@ class GoogleDriveConnector(
        """
        field_type = (
            DriveFileFieldType.WITH_PERMISSIONS
-            if include_permissions
+            if include_permissions or self.exclude_domain_link_only
            else DriveFileFieldType.STANDARD
        )

@@ -1172,6 +1208,10 @@ class GoogleDriveConnector(
                start=start,
                end=end,
            ):
+                if self.exclude_domain_link_only and has_link_only_permission(
+                    retrieved_file.drive_file
+                ):
+                    continue
                if retrieved_file.error is None:
                    files_batch.append(retrieved_file)
                    continue
@@ -1276,6 +1316,10 @@ class GoogleDriveConnector(
        ):
            if file.error is not None:
                raise file.error
+            if self.exclude_domain_link_only and has_link_only_permission(
+                file.drive_file
+            ):
+                continue
            if doc := build_slim_document(
                self.creds,
                file.drive_file,
--- a/backend/onyx/connectors/google_drive/file_retrieval.py
+++ b/backend/onyx/connectors/google_drive/file_retrieval.py
@@ -36,7 +36,7 @@ class DriveFileFieldType(Enum):


 PERMISSION_FULL_DESCRIPTION = (
-    "permissions(id, emailAddress, type, domain, permissionDetails)"
+    "permissions(id, emailAddress, type, domain, allowFileDiscovery, permissionDetails)"
 )
 FILE_FIELDS = (
    "nextPageToken, files(mimeType, id, name, "
@@ -69,6 +69,23 @@ def generate_time_range_filter(
    return time_range_filter


+LINK_ONLY_PERMISSION_TYPES = {"domain", "anyone"}
+
+
+def has_link_only_permission(file: GoogleDriveFileType) -> bool:
+    """
+    Return True if any permission requires a direct link to access
+    (allowFileDiscovery is explicitly false for supported types).
+    """
+    permissions = file.get("permissions") or []
+    for permission in permissions:
+        if permission.get("type") not in LINK_ONLY_PERMISSION_TYPES:
+            continue
+        if permission.get("allowFileDiscovery") is False:
+            return True
+    return False
+
+
 def _get_folders_in_parent(
    service: Resource,
    parent_id: str | None = None,
--- a/backend/onyx/connectors/teams/connector.py
+++ b/backend/onyx/connectors/teams/connector.py
@@ -105,14 +105,39 @@ class TeamsConnector(
        if self.graph_client is None:
            raise ConnectorMissingCredentialError("Teams credentials not loaded.")

+        # Check if any requested teams have special characters that need client-side filtering
+        has_special_chars = _has_odata_incompatible_chars(self.requested_team_list)
+        if has_special_chars:
+            logger.info(
+                "Some requested team names contain special characters (&, (, )) that require "
+                "client-side filtering during data retrieval."
+            )
+
+        # Minimal validation: just check if we can access the teams endpoint
+        timeout = 10  # Short timeout for basic validation
+
        try:
-            # Minimal call to confirm we can retrieve Teams
-            # make sure it doesn't take forever, since this is a syncronous call
-            found_teams = run_with_timeout(
-                timeout=10,
-                func=_collect_all_teams,
-                graph_client=self.graph_client,
-                requested=self.requested_team_list,
+            # For validation, do a lightweight check instead of full team search
+            logger.info(
+                f"Requested team count: {len(self.requested_team_list) if self.requested_team_list else 0}, "
+                f"Has special chars: {has_special_chars}"
+            )
+
+            validation_query = self.graph_client.teams.get().top(1)
+            run_with_timeout(
+                timeout=timeout,
+                func=lambda: validation_query.execute_query(),
+            )
+
+            logger.info(
+                "Teams validation successful - Access to teams endpoint confirmed"
+            )
+
+        except TimeoutError as e:
+            raise ConnectorValidationError(
+                f"Timeout while validating Teams access (waited {timeout}s). "
+                f"This may indicate network issues or authentication problems. "
+                f"Error: {e}"
            )

        except ClientRequestException as e:
@@ -147,12 +172,6 @@ class TeamsConnector(
                f"Unexpected error during Teams validation: {e}"
            )

-        if not found_teams:
-            raise ConnectorValidationError(
-                "No Teams found for the given credentials. "
-                "Either there are no Teams in this tenant, or your app does not have permission to view them."
-            )
-
    # impls for CheckpointedConnector

    def build_dummy_checkpoint(self) -> TeamsCheckpoint:
@@ -233,8 +252,8 @@ class TeamsConnector(
    def retrieve_all_slim_docs_perm_sync(
        self,
        start: SecondsSinceUnixEpoch | None = None,
-        end: SecondsSinceUnixEpoch | None = None,
-        callback: IndexingHeartbeatInterface | None = None,
+        _end: SecondsSinceUnixEpoch | None = None,
+        _callback: IndexingHeartbeatInterface | None = None,
    ) -> GenerateSlimDocumentOutput:
        start = start or 0

@@ -245,7 +264,9 @@ class TeamsConnector(

        for team in teams:
            if not team.id:
-                logger.warn(f"Expected a team with an id, instead got no id: {team=}")
+                logger.warning(
+                    f"Expected a team with an id, instead got no id: {team=}"
+                )
                continue

            channels = _collect_all_channels_from_team(
@@ -254,7 +275,7 @@ class TeamsConnector(

            for channel in channels:
                if not channel.id:
-                    logger.warn(
+                    logger.warning(
                        f"Expected a channel with an id, instead got no id: {channel=}"
                    )
                    continue
@@ -290,6 +311,70 @@ class TeamsConnector(
                    slim_doc_buffer = []


+def _escape_odata_string(name: str) -> str:
+    """Escape special characters for OData string literals.
+
+    Uses proper OData v4 string literal escaping:
+    - Single quotes: ' becomes ''
+    - Other characters are handled by using contains() instead of eq for problematic cases
+    """
+    # Escape single quotes for OData syntax (replace ' with '')
+    escaped = name.replace("'", "''")
+    return escaped
+
+
+def _has_odata_incompatible_chars(team_names: list[str] | None) -> bool:
+    """Check if any team name contains characters that break Microsoft Graph OData filters.
+
+    The Microsoft Graph Teams API has limited OData support. Characters like
+    &, (, and ) cause parsing errors and require client-side filtering instead.
+    """
+    if not team_names:
+        return False
+    return any(char in name for name in team_names for char in ["&", "(", ")"])
+
+
+def _can_use_odata_filter(
+    team_names: list[str] | None,
+) -> tuple[bool, list[str], list[str]]:
+    """Determine which teams can use OData filtering vs client-side filtering.
+
+    Microsoft Graph /teams endpoint OData limitations:
+    - Only supports basic 'eq' operators in filters
+    - No 'contains', 'startswith', or other advanced operators
+    - Special characters (&, (, )) break OData parsing
+
+    Returns:
+        tuple: (can_use_odata, safe_names, problematic_names)
+    """
+    if not team_names:
+        return False, [], []
+
+    safe_names = []
+    problematic_names = []
+
+    for name in team_names:
+        if any(char in name for char in ["&", "(", ")"]):
+            problematic_names.append(name)
+        else:
+            safe_names.append(name)
+
+    return bool(safe_names), safe_names, problematic_names
+
+
+def _build_simple_odata_filter(safe_names: list[str]) -> str | None:
+    """Build simple OData filter using only 'eq' operators for safe names."""
+    if not safe_names:
+        return None
+
+    filter_parts = []
+    for name in safe_names:
+        escaped_name = _escape_odata_string(name)
+        filter_parts.append(f"displayName eq '{escaped_name}'")
+
+    return " or ".join(filter_parts)
+
+
 def _construct_semantic_identifier(channel: Channel, top_message: Message) -> str:
    top_message_user_name: str

@@ -299,7 +384,7 @@ def _construct_semantic_identifier(channel: Channel, top_message: Message) -> st
            user_display_name if user_display_name else "Unknown User"
        )
    else:
-        logger.warn(f"Message {top_message=} has no `from.user` field")
+        logger.warning(f"Message {top_message=} has no `from.user` field")
        top_message_user_name = "Unknown User"

    top_message_content = top_message.body.content or ""
@@ -392,33 +477,72 @@ def _collect_all_teams(
    graph_client: GraphClient,
    requested: list[str] | None = None,
 ) -> list[Team]:
+    """Collect teams from Microsoft Graph using appropriate filtering strategy.
+
+    For teams with special characters (&, (, )), uses client-side filtering
+    with paginated search. For teams without special characters, uses efficient
+    OData server-side filtering.
+
+    Args:
+        graph_client: Authenticated Microsoft Graph client
+        requested: List of team names to find, or None for all teams
+
+    Returns:
+        List of Team objects matching the requested names
+    """
    teams: list[Team] = []
    next_url: str | None = None

-    # Build OData filter for requested teams
-    # Only escape single quotes for OData syntax - the library handles URL encoding
-    filter = None
-    use_filter = bool(requested)
-    if use_filter and requested:
-        filter_parts = []
-        for name in requested:
-            # Escape single quotes for OData syntax (replace ' with '')
-            # The office365 library will handle URL encoding of the entire filter
-            escaped_name = name.replace("'", "''")
-            filter_parts.append(f"displayName eq '{escaped_name}'")
-        filter = " or ".join(filter_parts)
+    # Determine filtering strategy based on Microsoft Graph limitations
+    if not requested:
+        # No specific teams requested - return empty list (avoid fetching all teams)
+        logger.info("No specific teams requested - returning empty list")
+        return []
+
+    _, safe_names, problematic_names = _can_use_odata_filter(requested)
+
+    if problematic_names and not safe_names:
+        # ALL requested teams have special characters - cannot use OData filtering
+        logger.info(
+            f"All requested team names contain special characters (&, (, )) which require "
+            f"client-side filtering. Using basic /teams endpoint with pagination. "
+            f"Teams: {problematic_names}"
+        )
+        # Use unfiltered query with pagination limit to avoid fetching too many teams
+        use_client_side_filtering = True
+        odata_filter = None
+    elif problematic_names and safe_names:
+        # Mixed scenario - need to fetch more teams to find the problematic ones
+        logger.info(
+            f"Mixed team types: will use client-side filtering for all. "
+            f"Safe names: {safe_names}, Special char names: {problematic_names}"
+        )
+        use_client_side_filtering = True
+        odata_filter = None
+    elif safe_names:
+        # All names are safe - use OData filtering
+        logger.info(f"Using OData filtering for all requested teams: {safe_names}")
+        use_client_side_filtering = False
+        odata_filter = _build_simple_odata_filter(safe_names)
+    else:
+        # No valid names
+        return []
+
+    # Track pagination to avoid fetching too many teams for client-side filtering
+    max_pages = 200
+    page_count = 0

    while True:
        try:
-            if filter:
-                query = graph_client.teams.get().filter(filter)
-                # Add header to work around Microsoft Graph API ampersand bug
-                query.before_execute(lambda req: _add_prefer_header(request=req))
+            if use_client_side_filtering:
+                # Use basic /teams endpoint with top parameter to limit results per page
+                query = graph_client.teams.get().top(50)  # Limit to 50 teams per page
            else:
-                query = graph_client.teams.get_all(
-                    # explicitly needed because of incorrect type definitions provided by the `office365` library
-                    page_loaded=lambda _: None
-                )
+                # Use OData filter with only 'eq' operators
+                query = graph_client.teams.get().filter(odata_filter)
+
+            # Add header to work around Microsoft Graph API issues
+            query.before_execute(lambda req: _add_prefer_header(request=req))

            if next_url:
                url = next_url
@@ -428,17 +552,19 @@ def _collect_all_teams(

            team_collection = query.execute_query()
        except (ClientRequestException, ValueError) as e:
-            # If OData filter fails, fallback to client-side filtering
-            if use_filter:
+            # If OData filter fails, fall back to client-side filtering
+            if not use_client_side_filtering and odata_filter:
                logger.warning(
-                    f"OData filter failed with {type(e).__name__}: {e}. "
-                    f"Falling back to client-side filtering."
+                    f"OData filter failed: {e}. Falling back to client-side filtering."
                )
-                use_filter = False
-                filter = None
+                use_client_side_filtering = True
+                odata_filter = None
                teams = []
                next_url = None
+                page_count = 0
                continue
+            # If client-side approach also fails, re-raise
+            logger.error(f"Teams query failed: {e}")
            raise

        filtered_teams = (
@@ -448,6 +574,32 @@ def _collect_all_teams(
        )
        teams.extend(filtered_teams)

+        # For client-side filtering, check if we found all requested teams or hit page limit
+        if use_client_side_filtering:
+            page_count += 1
+            found_team_names = {
+                team.display_name for team in teams if team.display_name
+            }
+            requested_set = set(requested)
+
+            # Log progress every 10 pages to avoid excessive logging
+            if page_count % 10 == 0:
+                logger.info(
+                    f"Searched {page_count} pages, found {len(found_team_names)} matching teams so far"
+                )
+
+            # Stop if we found all requested teams or hit the page limit
+            if requested_set.issubset(found_team_names):
+                logger.info(f"Found all requested teams after {page_count} pages")
+                break
+            elif page_count >= max_pages:
+                logger.warning(
+                    f"Reached maximum page limit ({max_pages}) while searching for teams. "
+                    f"Found: {found_team_names & requested_set}, "
+                    f"Missing: {requested_set - found_team_names}"
+                )
+                break
+
        if not team_collection.has_next:
            break

@@ -461,6 +613,63 @@ def _collect_all_teams(
    return teams


+def _normalize_team_name(name: str) -> str:
+    """Normalize team name for flexible matching."""
+    if not name:
+        return ""
+    # Convert to lowercase and strip whitespace for case-insensitive matching
+    return name.lower().strip()
+
+
+def _matches_requested_team(
+    team_display_name: str, requested: list[str] | None
+) -> bool:
+    """Check if team display name matches any of the requested team names.
+
+    Uses flexible matching to handle slight variations in team names.
+    """
+    if not requested or not team_display_name:
+        return (
+            not requested
+        )  # If no teams requested, match all; if no name, don't match
+
+    normalized_team_name = _normalize_team_name(team_display_name)
+
+    for requested_name in requested:
+        normalized_requested = _normalize_team_name(requested_name)
+
+        # Exact match after normalization
+        if normalized_team_name == normalized_requested:
+            return True
+
+        # Flexible matching - check if team name contains all significant words
+        # This helps with slight variations in formatting
+        team_words = set(normalized_team_name.split())
+        requested_words = set(normalized_requested.split())
+
+        # If the requested name has special characters, split on those too
+        for char in ["&", "(", ")"]:
+            if char in normalized_requested:
+                # Split on special characters and add words
+                parts = normalized_requested.replace(char, " ").split()
+                requested_words.update(parts)
+
+        # Remove very short words that aren't meaningful
+        meaningful_requested_words = {
+            word for word in requested_words if len(word) >= 3
+        }
+
+        # Check if team name contains most of the meaningful words
+        if (
+            meaningful_requested_words
+            and len(meaningful_requested_words & team_words)
+            >= len(meaningful_requested_words) * 0.7
+        ):
+            return True
+
+    return False
+
+
 def _filter_team(
    team: Team,
    requested: list[str] | None = None,
@@ -469,7 +678,7 @@ def _filter_team(
    Returns the true if:
        - Team is not expired / deleted
        - Team has a display-name and ID
-        - Team display-name is in the requested teams list
+        - Team display-name matches any of the requested teams (with flexible matching)

    Otherwise, returns false.
    """
@@ -477,7 +686,7 @@ def _filter_team(
    if not team.id or not team.display_name:
        return False

-    if requested and team.display_name not in requested:
+    if not _matches_requested_team(team.display_name, requested):
        return False

    props = team.properties
--- a/backend/onyx/context/search/federated/slack_search.py
+++ b/backend/onyx/context/search/federated/slack_search.py
@@ -1,20 +1,26 @@
+import json
 import re
+import time
 from datetime import datetime
-from datetime import timedelta
 from typing import Any

-from langchain_core.messages import HumanMessage
+from pydantic import ValidationError
 from slack_sdk import WebClient
 from slack_sdk.errors import SlackApiError
 from sqlalchemy.orm import Session

 from onyx.configs.app_configs import ENABLE_CONTEXTUAL_RAG
-from onyx.configs.app_configs import MAX_SLACK_QUERY_EXPANSIONS
 from onyx.configs.chat_configs import DOC_TIME_DECAY
 from onyx.configs.model_configs import DOC_EMBEDDING_CONTEXT_SIZE
 from onyx.connectors.models import IndexingDocument
 from onyx.connectors.models import TextSection
 from onyx.context.search.federated.models import SlackMessage
+from onyx.context.search.federated.slack_search_utils import build_channel_query_filter
+from onyx.context.search.federated.slack_search_utils import build_slack_queries
+from onyx.context.search.federated.slack_search_utils import ChannelTypeString
+from onyx.context.search.federated.slack_search_utils import get_channel_type
+from onyx.context.search.federated.slack_search_utils import is_recency_query
+from onyx.context.search.federated.slack_search_utils import should_include_message
 from onyx.context.search.models import InferenceChunk
 from onyx.context.search.models import SearchQuery
 from onyx.db.document import DocumentSource
@@ -22,15 +28,15 @@ from onyx.db.search_settings import get_current_search_settings
 from onyx.document_index.document_index_utils import (
    get_multipass_config,
 )
+from onyx.federated_connectors.slack.models import SlackEntities
 from onyx.indexing.chunker import Chunker
 from onyx.indexing.embedder import DefaultIndexingEmbedder
 from onyx.indexing.models import DocAwareChunk
 from onyx.llm.factory import get_default_llms
-from onyx.llm.interfaces import LLM
-from onyx.llm.utils import message_to_string
 from onyx.onyxbot.slack.models import ChannelType
 from onyx.onyxbot.slack.models import SlackContext
-from onyx.prompts.federated_search import SLACK_QUERY_EXPANSION_PROMPT
+from onyx.redis.redis_pool import get_redis_client
+from onyx.server.federated.models import FederatedConnectorDetail
 from onyx.utils.logger import setup_logger
 from onyx.utils.threadpool_concurrency import run_functions_tuples_in_parallel
 from onyx.utils.timing import log_function_time
@@ -40,6 +46,182 @@ logger = setup_logger()
 HIGHLIGHT_START_CHAR = "\ue000"
 HIGHLIGHT_END_CHAR = "\ue001"

+CHANNEL_TYPES = ["public_channel", "im", "mpim", "private_channel"]
+CHANNEL_METADATA_CACHE_TTL = 60 * 60 * 24  # 24 hours
+SLACK_THREAD_CONTEXT_WINDOW = 3  # Number of messages before matched message to include
+CHANNEL_METADATA_MAX_RETRIES = 3  # Maximum retry attempts for channel metadata fetching
+CHANNEL_METADATA_RETRY_DELAY = 1  # Initial retry delay in seconds (exponential backoff)
+
+
+def fetch_and_cache_channel_metadata(
+    access_token: str, team_id: str, include_private: bool = True
+) -> dict[str, dict[str, Any]]:
+    """
+    Fetch ALL channel metadata in one API call and cache it.
+
+    Returns a dict mapping channel_id -> metadata including name, type, etc.
+    This replaces multiple conversations.info calls with a single conversations.list.
+
+    Note: We ALWAYS fetch all channel types (including private) and cache them together.
+    This ensures a single cache entry per team, avoiding duplicate API calls.
+    """
+    # Use tenant-specific Redis client
+    redis_client = get_redis_client()
+    # (tenant_id prefix is added automatically by TenantRedis)
+    cache_key = f"slack_federated_search:{team_id}:channels:metadata"
+
+    try:
+        cached = redis_client.get(cache_key)
+        if cached:
+            logger.info(f"Channel metadata cache HIT for team {team_id}")
+            cached_str: str = (
+                cached.decode("utf-8") if isinstance(cached, bytes) else str(cached)
+            )
+            cached_data: dict[str, dict[str, Any]] = json.loads(cached_str)
+            logger.info(f"Loaded {len(cached_data)} channels from cache")
+            if not include_private:
+                filtered = {
+                    k: v
+                    for k, v in cached_data.items()
+                    if v.get("type") != "private_channel"
+                }
+                logger.info(f"Filtered to {len(filtered)} channels (exclude private)")
+                return filtered
+            return cached_data
+    except Exception as e:
+        logger.warning(f"Error reading from channel metadata cache: {e}")
+
+    # Cache miss - fetch from Slack API with retry logic
+    logger.info(f"Channel metadata cache MISS for team {team_id} - fetching from API")
+    slack_client = WebClient(token=access_token)
+    channel_metadata: dict[str, dict[str, Any]] = {}
+
+    # Retry logic with exponential backoff
+    last_exception = None
+    for attempt in range(CHANNEL_METADATA_MAX_RETRIES):
+        try:
+            # ALWAYS fetch all channel types including private
+            channel_types = ",".join(CHANNEL_TYPES)
+
+            # Fetch all channels in one call
+            cursor = None
+            channel_count = 0
+            while True:
+                response = slack_client.conversations_list(
+                    types=channel_types,
+                    exclude_archived=True,
+                    limit=1000,
+                    cursor=cursor,
+                )
+                response.validate()
+
+                # Cast response.data to dict for type checking
+                response_data: dict[str, Any] = response.data  # type: ignore
+                for ch in response_data.get("channels", []):
+                    channel_id = ch.get("id")
+                    if not channel_id:
+                        continue
+
+                    # Determine channel type
+                    channel_type_enum = get_channel_type(channel_info=ch)
+                    channel_type = channel_type_enum.value
+
+                    channel_metadata[channel_id] = {
+                        "name": ch.get("name", ""),
+                        "type": channel_type,
+                        "is_private": ch.get("is_private", False),
+                        "is_member": ch.get("is_member", False),
+                    }
+                    channel_count += 1
+
+                cursor = response_data.get("response_metadata", {}).get("next_cursor")
+                if not cursor:
+                    break
+
+            logger.info(f"Fetched {channel_count} channels for team {team_id}")
+
+            # Cache the results
+            try:
+                redis_client.set(
+                    cache_key,
+                    json.dumps(channel_metadata),
+                    ex=CHANNEL_METADATA_CACHE_TTL,
+                )
+                logger.info(
+                    f"Cached {channel_count} channels for team {team_id} (TTL: {CHANNEL_METADATA_CACHE_TTL}s, key: {cache_key})"
+                )
+            except Exception as e:
+                logger.warning(f"Error caching channel metadata: {e}")
+
+            return channel_metadata
+
+        except SlackApiError as e:
+            last_exception = e
+            if attempt < CHANNEL_METADATA_MAX_RETRIES - 1:
+                retry_delay = CHANNEL_METADATA_RETRY_DELAY * (2**attempt)
+                logger.warning(
+                    f"Failed to fetch channel metadata (attempt {attempt + 1}/{CHANNEL_METADATA_MAX_RETRIES}): {e}. "
+                    f"Retrying in {retry_delay}s..."
+                )
+                time.sleep(retry_delay)
+            else:
+                logger.error(
+                    f"Failed to fetch channel metadata after {CHANNEL_METADATA_MAX_RETRIES} attempts: {e}"
+                )
+
+    # If we exhausted all retries, raise the last exception
+    if last_exception:
+        raise SlackApiError(
+            f"Channel metadata fetching failed after {CHANNEL_METADATA_MAX_RETRIES} attempts",
+            last_exception.response,
+        )
+
+    return {}
+
+
+def get_available_channels(
+    access_token: str, team_id: str, include_private: bool = False
+) -> list[str]:
+    """Fetch list of available channel names using cached metadata."""
+    metadata = fetch_and_cache_channel_metadata(access_token, team_id, include_private)
+    return [meta["name"] for meta in metadata.values() if meta["name"]]
+
+
+def _extract_channel_data_from_entities(
+    entities: dict[str, Any] | None,
+    channel_metadata_dict: dict[str, dict[str, Any]] | None,
+) -> list[str] | None:
+    """Extract available channels list from metadata based on entity configuration.
+
+    Args:
+        entities: Entity filter configuration dict
+        channel_metadata_dict: Pre-fetched channel metadata dictionary
+
+    Returns:
+        List of available channel names, or None if not needed
+    """
+    if not entities or not channel_metadata_dict:
+        return None
+
+    try:
+        parsed_entities = SlackEntities(**entities)
+        # Only extract if we have exclusions or channel filters
+        if parsed_entities.exclude_channels or parsed_entities.channels:
+            # Extract channel names from metadata dict
+            return [
+                meta["name"]
+                for meta in channel_metadata_dict.values()
+                if meta["name"]
+                and (
+                    parsed_entities.include_private_channels
+                    or meta.get("type") != ChannelTypeString.PRIVATE_CHANNEL.value
+                )
+            ]
+    except ValidationError:
+        logger.debug("Failed to parse entities for channel data extraction")
+
+    return None
+

 def _should_skip_channel(
    channel_id: str,
@@ -48,26 +230,23 @@ def _should_skip_channel(
    access_token: str,
    include_dm: bool,
 ) -> bool:
-    """
-    Determine if a channel should be skipped if in bot context. When an allowed_private_channel is passed in,
-    all other private channels are filtered out except that specific one.
-    """
+    """Bot context filtering: skip private channels unless explicitly allowed."""
    if bot_token and not include_dm:
        try:
-            # Use bot token if available (has full permissions), otherwise fall back to user token
            token_to_use = bot_token or access_token
            channel_client = WebClient(token=token_to_use)
            channel_info = channel_client.conversations_info(channel=channel_id)

-            if isinstance(channel_info.data, dict) and not _is_public_channel(
-                channel_info.data
-            ):
-                # This is a private channel - filter it out
-                if channel_id != allowed_private_channel:
-                    logger.debug(
-                        f"Skipping message from private channel {channel_id} "
-                        f"(not the allowed private channel: {allowed_private_channel})"
-                    )
+            if isinstance(channel_info.data, dict):
+                channel_data = channel_info.data.get("channel", {})
+                channel_type = get_channel_type(channel_info=channel_data)
+                is_private_or_dm = channel_type in [
+                    ChannelType.PRIVATE_CHANNEL,
+                    ChannelType.IM,
+                    ChannelType.MPIM,
+                ]
+
+                if is_private_or_dm and channel_id != allowed_private_channel:
                    return True
        except Exception as e:
            logger.warning(
@@ -77,50 +256,6 @@ def _should_skip_channel(
    return False


-def build_slack_queries(query: SearchQuery, llm: LLM) -> list[str]:
-    # get time filter
-    time_filter = ""
-    time_cutoff = query.filters.time_cutoff
-    if time_cutoff is not None:
-        # slack after: is exclusive, so we need to subtract one day
-        time_cutoff = time_cutoff - timedelta(days=1)
-        time_filter = f" after:{time_cutoff.strftime('%Y-%m-%d')}"
-
-    # use llm to generate slack queries (use original query to use same keywords as the user)
-    prompt = SLACK_QUERY_EXPANSION_PROMPT.format(query=query.original_query)
-    try:
-        msg = HumanMessage(content=prompt)
-        response = llm.invoke([msg])
-        rephrased_queries = message_to_string(response).split("\n")
-    except Exception as e:
-        logger.error(f"Error expanding query: {e}")
-        rephrased_queries = [query.query]
-
-    return [
-        rephrased_query.strip() + time_filter
-        for rephrased_query in rephrased_queries[:MAX_SLACK_QUERY_EXPANSIONS]
-    ]
-
-
-def _is_public_channel(channel_info: dict[str, Any]) -> bool:
-    """Check if a channel is public based on its info"""
-    # The channel_info structure has a nested 'channel' object
-    channel = channel_info.get("channel", {})
-
-    is_channel = channel.get("is_channel", False)
-    is_private = channel.get("is_private", False)
-    is_group = channel.get("is_group", False)
-    is_mpim = channel.get("is_mpim", False)
-    is_im = channel.get("is_im", False)
-
-    # A public channel is: a channel that is NOT private, NOT a group, NOT mpim, NOT im
-    is_public = (
-        is_channel and not is_private and not is_group and not is_mpim and not is_im
-    )
-
-    return is_public
-
-
 def query_slack(
    query_string: str,
    original_query: SearchQuery,
@@ -129,17 +264,52 @@ def query_slack(
    allowed_private_channel: str | None = None,
    bot_token: str | None = None,
    include_dm: bool = False,
+    entities: dict[str, Any] | None = None,
+    available_channels: list[str] | None = None,
 ) -> list[SlackMessage]:
-    # query slack
+
+    # Check if query has channel override (user specified channels in query)
+    has_channel_override = query_string.startswith("__CHANNEL_OVERRIDE__")
+
+    if has_channel_override:
+        # Remove the marker and use the query as-is (already has channel filters)
+        final_query = query_string.replace("__CHANNEL_OVERRIDE__", "").strip()
+    else:
+        # Normal flow: build channel filters from entity config
+        channel_filter = ""
+        if entities:
+            channel_filter = build_channel_query_filter(entities, available_channels)
+
+        final_query = query_string
+        if channel_filter:
+            # Add channel filter to query
+            final_query = f"{query_string} {channel_filter}"
+
+    logger.info(f"Final query to slack: {final_query}")
+
+    # Detect if query asks for most recent results
+    sort_by_time = is_recency_query(original_query.query)
+
    slack_client = WebClient(token=access_token)
    try:
-        response = slack_client.search_messages(
-            query=query_string, count=limit, highlight=True
-        )
+        search_params: dict[str, Any] = {
+            "query": final_query,
+            "count": limit,
+            "highlight": True,
+        }
+
+        # Sort by timestamp for recency-focused queries, otherwise by relevance
+        if sort_by_time:
+            search_params["sort"] = "timestamp"
+            search_params["sort_dir"] = "desc"
+
+        response = slack_client.search_messages(**search_params)
        response.validate()
+
        messages: dict[str, Any] = response.get("messages", {})
        matches: list[dict[str, Any]] = messages.get("matches", [])
-        logger.info(f"Successfully used search_messages, found {len(matches)} messages")
+
+        logger.info(f"Slack search found {len(matches)} messages")
    except SlackApiError as slack_error:
        logger.error(f"Slack API error in search_messages: {slack_error}")
        logger.error(
@@ -327,11 +497,26 @@ def get_contextualized_thread_text(message: SlackMessage, access_token: str) ->
        if not message_id_idx:
            return thread_text

-        # add the message
-        thread_text += "\n..." if message_id_idx > 1 else ""
+        # Include a few messages BEFORE the matched message for context
+        # This helps understand what the matched message is responding to
+        start_idx = max(
+            1, message_id_idx - SLACK_THREAD_CONTEXT_WINDOW
+        )  # Start after thread starter
+
+        # Add ellipsis if we're skipping messages between thread starter and context window
+        if start_idx > 1:
+            thread_text += "\n..."
+
+        # Add context messages before the matched message
+        for i in range(start_idx, message_id_idx):
+            msg_text = messages[i].get("text", "")
+            msg_sender = messages[i].get("user", "")
+            thread_text += f"\n\n<@{msg_sender}>: {msg_text}"
+
+        # Add the matched message itself
        msg_text = messages[message_id_idx].get("text", "")
        msg_sender = messages[message_id_idx].get("user", "")
-        thread_text += f"\n<@{msg_sender}>: {msg_text}"
+        thread_text += f"\n\n<@{msg_sender}>: {msg_text}"

    # add the following replies to the thread text
    len_replies = 0
@@ -356,7 +541,13 @@ def get_contextualized_thread_text(message: SlackMessage, access_token: str) ->
            profile: dict[str, Any] = response.get("profile", {})
            name: str | None = profile.get("real_name") or profile.get("email")
        except SlackApiError as e:
-            logger.error(f"Slack API error in get_contextualized_thread_text: {e}")
+            # user_not_found is common for deleted users, bots, etc. - not critical
+            if "user_not_found" in str(e):
+                logger.debug(
+                    f"User {userid} not found in Slack workspace (likely deleted/deactivated)"
+                )
+            else:
+                logger.warning(f"Could not fetch profile for user {userid}: {e}")
            continue
        if not name:
            continue
@@ -379,18 +570,84 @@ def slack_retrieval(
    query: SearchQuery,
    access_token: str,
    db_session: Session,
+    connector: FederatedConnectorDetail | None = None,
+    entities: dict[str, Any] | None = None,
    limit: int | None = None,
    slack_event_context: SlackContext | None = None,
    bot_token: str | None = None,  # Add bot token parameter
+    team_id: str | None = None,
 ) -> list[InferenceChunk]:
-    # query slack
-    _, fast_llm = get_default_llms()
-    query_strings = build_slack_queries(query, fast_llm)
+    """
+    Main entry point for Slack federated search with entity filtering.

+    Applies entity filtering including:
+    - Channel selection and exclusion
+    - Date range extraction and enforcement
+    - DM/private channel filtering
+    - Multi-layer caching
+
+    Args:
+        query: Search query object
+        access_token: User OAuth access token
+        db_session: Database session
+        connector: Federated connector detail (unused, kept for backwards compat)
+        entities: Connector-level config (entity filtering configuration)
+        limit: Maximum number of results
+        slack_event_context: Context when called from Slack bot
+        bot_token: Bot token for enhanced permissions
+        team_id: Slack team/workspace ID
+
+    Returns:
+        List of InferenceChunk objects
+    """
+    # Use connector-level config
+    entities = entities or {}
+
+    if not entities:
+        logger.info("No entity configuration found, using defaults")
+    else:
+        logger.info(f"Using entity configuration: {entities}")
+
+    # Extract limit from entity config if not explicitly provided
+    query_limit = limit
+    if entities:
+        try:
+            parsed_entities = SlackEntities(**entities)
+            if limit is None:
+                query_limit = parsed_entities.max_messages_per_query
+                logger.info(f"Using max_messages_per_query from config: {query_limit}")
+        except Exception as e:
+            logger.warning(f"Error parsing entities for limit: {e}")
+            if limit is None:
+                query_limit = 100  # Fallback default
+    elif limit is None:
+        query_limit = 100  # Default when no entities and no limit provided
+
+    # Pre-fetch channel metadata from Redis cache and extract available channels
+    # This avoids repeated Redis lookups during parallel search execution
+    available_channels = None
+    channel_metadata_dict = None
+    if team_id:
+        # Always fetch all channel types (include_private=True) to ensure single cache entry
+        channel_metadata_dict = fetch_and_cache_channel_metadata(
+            access_token, team_id, include_private=True
+        )
+
+        # Extract available channels list if needed for pattern matching
+        available_channels = _extract_channel_data_from_entities(
+            entities, channel_metadata_dict
+        )
+
+    # Query slack with entity filtering
+    _, fast_llm = get_default_llms()
+    query_strings = build_slack_queries(query, fast_llm, entities, available_channels)
+
+    # Determine filtering based on entities OR context (bot)
    include_dm = False
    allowed_private_channel = None

-    if slack_event_context:
+    # Bot context overrides (if entities not specified)
+    if slack_event_context and not entities:
        channel_type = slack_event_context.channel_type
        if channel_type == ChannelType.IM:  # DM with user
            include_dm = True
@@ -400,24 +657,94 @@ def slack_retrieval(
                f"Private channel context: will only allow messages from {allowed_private_channel} + public channels"
            )

-    results = run_functions_tuples_in_parallel(
-        [
+    # Build search tasks
+    search_tasks = [
+        (
+            query_slack,
            (
-                query_slack,
+                query_string,
+                query,
+                access_token,
+                query_limit,
+                allowed_private_channel,
+                bot_token,
+                include_dm,
+                entities,
+                available_channels,
+            ),
+        )
+        for query_string in query_strings
+    ]
+
+    # If include_dm is True, add additional searches without channel filters
+    # This allows searching DMs/group DMs while still searching the specified channels
+    if entities and entities.get("include_dm"):
+        # Create a minimal entities dict that won't add channel filters
+        # This ensures we search ALL conversations (DMs, group DMs, private channels)
+        # BUT we still want to exclude channels specified in exclude_channels
+        dm_entities = {
+            "include_dm": True,
+            "include_private_channels": entities.get("include_private_channels", False),
+            "default_search_days": entities.get("default_search_days", 30),
+            "search_all_channels": True,
+            "channels": None,
+            "exclude_channels": entities.get(
+                "exclude_channels"
+            ),  # ALWAYS apply exclude_channels
+        }
+
+        for query_string in query_strings:
+            search_tasks.append(
                (
-                    query_string,
-                    query,
-                    access_token,
-                    limit,
-                    allowed_private_channel,
-                    bot_token,
-                    include_dm,
-                ),
+                    query_slack,
+                    (
+                        query_string,
+                        query,
+                        access_token,
+                        query_limit,
+                        allowed_private_channel,
+                        bot_token,
+                        include_dm,
+                        dm_entities,
+                        available_channels,
+                    ),
+                )
            )
-            for query_string in query_strings
-        ]
-    )
+
+    # Execute searches in parallel
+    results = run_functions_tuples_in_parallel(search_tasks)
+
+    # Merge and post-filter results
    slack_messages, docid_to_message = merge_slack_messages(results)
+
+    # Post-filter by channel type (DM, private channel, etc.)
+    # NOTE: We must post-filter because Slack's search.messages API only supports
+    # filtering by channel NAME (via in:#channel syntax), not by channel TYPE.
+    # There's no way to specify "only public channels" or "exclude DMs" in the query.
+    if entities and team_id:
+        # Use pre-fetched channel metadata to avoid cache misses
+        # Pass it directly instead of relying on Redis cache
+
+        filtered_messages = []
+        removed_count = 0
+        for msg in slack_messages:
+            # Pass pre-fetched metadata to avoid cache lookups
+            channel_type = get_channel_type(
+                channel_id=msg.channel_id,
+                channel_metadata=channel_metadata_dict,
+            )
+            if should_include_message(channel_type, entities):
+                filtered_messages.append(msg)
+            else:
+                removed_count += 1
+
+        if removed_count > 0:
+            logger.info(
+                f"Post-filtering removed {removed_count} messages: "
+                f"{len(slack_messages)} -> {len(filtered_messages)}"
+            )
+        slack_messages = filtered_messages
+
    slack_messages = slack_messages[: limit or len(slack_messages)]
    if not slack_messages:
        return []
@@ -437,6 +764,9 @@ def slack_retrieval(
        highlighted_texts.update(slack_message.highlighted_texts)
    sorted_highlighted_texts = sorted(highlighted_texts, key=len)

+    # For queries without highlights (e.g., empty recency queries), we should keep all chunks
+    has_highlights = len(sorted_highlighted_texts) > 0
+
    # convert slack messages to index documents
    index_docs: list[IndexingDocument] = []
    for slack_message in slack_messages:
@@ -475,24 +805,36 @@ def slack_retrieval(
    chunks = chunker.chunk(index_docs)

    # prune chunks without any highlighted texts
+    # BUT: for recency queries without keywords, keep all chunks
    relevant_chunks: list[DocAwareChunk] = []
    chunkid_to_match_highlight: dict[str, str] = {}
-    for chunk in chunks:
-        match_highlight = chunk.content
-        for highlight in sorted_highlighted_texts:  # faster than re sub
-            match_highlight = match_highlight.replace(
-                highlight, f"<hi>{highlight}</hi>"
-            )

-        # if nothing got replaced, the chunk is irrelevant
-        if len(match_highlight) == len(chunk.content):
-            continue
+    if not has_highlights:
+        # No highlighted terms - keep all chunks (recency query)
+        for chunk in chunks:
+            chunk_id = f"{chunk.source_document.id}__{chunk.chunk_id}"
+            relevant_chunks.append(chunk)
+            chunkid_to_match_highlight[chunk_id] = chunk.content  # No highlighting
+            if limit and len(relevant_chunks) >= limit:
+                break
+    else:
+        # Prune chunks that don't contain highlighted terms
+        for chunk in chunks:
+            match_highlight = chunk.content
+            for highlight in sorted_highlighted_texts:  # faster than re sub
+                match_highlight = match_highlight.replace(
+                    highlight, f"<hi>{highlight}</hi>"
+                )

-        chunk_id = f"{chunk.source_document.id}__{chunk.chunk_id}"
-        relevant_chunks.append(chunk)
-        chunkid_to_match_highlight[chunk_id] = match_highlight
-        if limit and len(relevant_chunks) >= limit:
-            break
+            # if nothing got replaced, the chunk is irrelevant
+            if len(match_highlight) == len(chunk.content):
+                continue
+
+            chunk_id = f"{chunk.source_document.id}__{chunk.chunk_id}"
+            relevant_chunks.append(chunk)
+            chunkid_to_match_highlight[chunk_id] = match_highlight
+            if limit and len(relevant_chunks) >= limit:
+                break

    # convert to inference chunks
    top_chunks: list[InferenceChunk] = []
--- a/Show More
+++ b/Show More